blob: 46a8dd2c4e2b40af3f569077fa28150aeea2d906 [file] [log] [blame]
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
* Copyright 2017 RackTop Systems.
*/
/*
* The objective of this program is to provide a DMU/ZAP/SPA stress test
* that runs entirely in userland, is easy to use, and easy to extend.
*
* The overall design of the ztest program is as follows:
*
* (1) For each major functional area (e.g. adding vdevs to a pool,
* creating and destroying datasets, reading and writing objects, etc)
* we have a simple routine to test that functionality. These
* individual routines do not have to do anything "stressful".
*
* (2) We turn these simple functionality tests into a stress test by
* running them all in parallel, with as many threads as desired,
* and spread across as many datasets, objects, and vdevs as desired.
*
* (3) While all this is happening, we inject faults into the pool to
* verify that self-healing data really works.
*
* (4) Every time we open a dataset, we change its checksum and compression
* functions. Thus even individual objects vary from block to block
* in which checksum they use and whether they're compressed.
*
* (5) To verify that we never lose on-disk consistency after a crash,
* we run the entire test in a child of the main process.
* At random times, the child self-immolates with a SIGKILL.
* This is the software equivalent of pulling the power cord.
* The parent then runs the test again, using the existing
* storage pool, as many times as desired. If backwards compatibility
* testing is enabled ztest will sometimes run the "older" version
* of ztest after a SIGKILL.
*
* (6) To verify that we don't have future leaks or temporal incursions,
* many of the functional tests record the transaction group number
* as part of their data. When reading old data, they verify that
* the transaction group number is less than the current, open txg.
* If you add a new test, please do this if applicable.
*
* When run with no arguments, ztest runs for about five minutes and
* produces no output if successful. To get a little bit of information,
* specify -V. To get more information, specify -VV, and so on.
*
* To turn this into an overnight stress test, use -T to specify run time.
*
* You can ask more more vdevs [-v], datasets [-d], or threads [-t]
* to increase the pool capacity, fanout, and overall stress level.
*
* Use the -k option to set the desired frequency of kills.
*
* When ztest invokes itself it passes all relevant information through a
* temporary file which is mmap-ed in the child process. This allows shared
* memory to survive the exec syscall. The ztest_shared_hdr_t struct is always
* stored at offset 0 of this file and contains information on the size and
* number of shared structures in the file. The information stored in this file
* must remain backwards compatible with older versions of ztest so that
* ztest can invoke them during backwards compatibility testing (-B).
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/txg.h>
#include <sys/dbuf.h>
#include <sys/zap.h>
#include <sys/dmu_objset.h>
#include <sys/poll.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/zio.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_file.h>
#include <sys/vdev_initialize.h>
#include <sys/spa_impl.h>
#include <sys/metaslab_impl.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_scan.h>
#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <sys/zfeature.h>
#include <sys/dsl_userhold.h>
#include <sys/abd.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <umem.h>
#include <dlfcn.h>
#include <ctype.h>
#include <math.h>
#include <sys/fs/zfs.h>
#include <libnvpair.h>
#include <libcmdutils.h>
static int ztest_fd_data = -1;
static int ztest_fd_rand = -1;
typedef struct ztest_shared_hdr {
uint64_t zh_hdr_size;
uint64_t zh_opts_size;
uint64_t zh_size;
uint64_t zh_stats_size;
uint64_t zh_stats_count;
uint64_t zh_ds_size;
uint64_t zh_ds_count;
} ztest_shared_hdr_t;
static ztest_shared_hdr_t *ztest_shared_hdr;
typedef struct ztest_shared_opts {
char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
char zo_alt_ztest[MAXNAMELEN];
char zo_alt_libpath[MAXNAMELEN];
uint64_t zo_vdevs;
uint64_t zo_vdevtime;
size_t zo_vdev_size;
int zo_ashift;
int zo_mirrors;
int zo_raidz;
int zo_raidz_parity;
int zo_datasets;
int zo_threads;
uint64_t zo_passtime;
uint64_t zo_killrate;
int zo_verbose;
int zo_init;
uint64_t zo_time;
uint64_t zo_maxloops;
uint64_t zo_metaslab_force_ganging;
} ztest_shared_opts_t;
static const ztest_shared_opts_t ztest_opts_defaults = {
.zo_pool = { 'z', 't', 'e', 's', 't', '\0' },
.zo_dir = { '/', 't', 'm', 'p', '\0' },
.zo_alt_ztest = { '\0' },
.zo_alt_libpath = { '\0' },
.zo_vdevs = 5,
.zo_ashift = SPA_MINBLOCKSHIFT,
.zo_mirrors = 2,
.zo_raidz = 4,
.zo_raidz_parity = 1,
.zo_vdev_size = SPA_MINDEVSIZE * 4, /* 256m default size */
.zo_datasets = 7,
.zo_threads = 23,
.zo_passtime = 60, /* 60 seconds */
.zo_killrate = 70, /* 70% kill rate */
.zo_verbose = 0,
.zo_init = 1,
.zo_time = 300, /* 5 minutes */
.zo_maxloops = 50, /* max loops during spa_freeze() */
.zo_metaslab_force_ganging = 32 << 10
};
extern uint64_t metaslab_force_ganging;
extern uint64_t metaslab_df_alloc_threshold;
extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern boolean_t zfs_abd_scatter_enabled;
extern boolean_t zfs_force_some_double_word_sm_entries;
static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
typedef struct ztest_shared_ds {
uint64_t zd_seq;
} ztest_shared_ds_t;
static ztest_shared_ds_t *ztest_shared_ds;
#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d])
#define BT_MAGIC 0x123456789abcdefULL
#define MAXFAULTS() \
(MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1)
enum ztest_io_type {
ZTEST_IO_WRITE_TAG,
ZTEST_IO_WRITE_PATTERN,
ZTEST_IO_WRITE_ZEROES,
ZTEST_IO_TRUNCATE,
ZTEST_IO_SETATTR,
ZTEST_IO_REWRITE,
ZTEST_IO_TYPES
};
typedef struct ztest_block_tag {
uint64_t bt_magic;
uint64_t bt_objset;
uint64_t bt_object;
uint64_t bt_offset;
uint64_t bt_gen;
uint64_t bt_txg;
uint64_t bt_crtxg;
} ztest_block_tag_t;
typedef struct bufwad {
uint64_t bw_index;
uint64_t bw_txg;
uint64_t bw_data;
} bufwad_t;
/*
* XXX -- fix zfs range locks to be generic so we can use them here.
*/
typedef enum {
RL_READER,
RL_WRITER,
RL_APPEND
} rl_type_t;
typedef struct rll {
void *rll_writer;
int rll_readers;
kmutex_t rll_lock;
kcondvar_t rll_cv;
} rll_t;
typedef struct rl {
uint64_t rl_object;
uint64_t rl_offset;
uint64_t rl_size;
rll_t *rl_lock;
} rl_t;
#define ZTEST_RANGE_LOCKS 64
#define ZTEST_OBJECT_LOCKS 64
/*
* Object descriptor. Used as a template for object lookup/create/remove.
*/
typedef struct ztest_od {
uint64_t od_dir;
uint64_t od_object;
dmu_object_type_t od_type;
dmu_object_type_t od_crtype;
uint64_t od_blocksize;
uint64_t od_crblocksize;
uint64_t od_gen;
uint64_t od_crgen;
char od_name[ZFS_MAX_DATASET_NAME_LEN];
} ztest_od_t;
/*
* Per-dataset state.
*/
typedef struct ztest_ds {
ztest_shared_ds_t *zd_shared;
objset_t *zd_os;
krwlock_t zd_zilog_lock;
zilog_t *zd_zilog;
ztest_od_t *zd_od; /* debugging aid */
char zd_name[ZFS_MAX_DATASET_NAME_LEN];
kmutex_t zd_dirobj_lock;
rll_t zd_object_lock[ZTEST_OBJECT_LOCKS];
rll_t zd_range_lock[ZTEST_RANGE_LOCKS];
} ztest_ds_t;
/*
* Per-iteration state.
*/
typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
typedef struct ztest_info {
ztest_func_t *zi_func; /* test function */
uint64_t zi_iters; /* iterations per execution */
uint64_t *zi_interval; /* execute every <interval> seconds */
} ztest_info_t;
typedef struct ztest_shared_callstate {
uint64_t zc_count; /* per-pass count */
uint64_t zc_time; /* per-pass time */
uint64_t zc_next; /* next time to call this function */
} ztest_shared_callstate_t;
static ztest_shared_callstate_t *ztest_shared_callstate;
#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c])
/*
* Note: these aren't static because we want dladdr() to work.
*/
ztest_func_t ztest_dmu_read_write;
ztest_func_t ztest_dmu_write_parallel;
ztest_func_t ztest_dmu_object_alloc_free;
ztest_func_t ztest_dmu_commit_callbacks;
ztest_func_t ztest_zap;
ztest_func_t ztest_zap_parallel;
ztest_func_t ztest_zil_commit;
ztest_func_t ztest_zil_remount;
ztest_func_t ztest_dmu_read_write_zcopy;
ztest_func_t ztest_dmu_objset_create_destroy;
ztest_func_t ztest_dmu_prealloc;
ztest_func_t ztest_fzap;
ztest_func_t ztest_dmu_snapshot_create_destroy;
ztest_func_t ztest_dsl_prop_get_set;
ztest_func_t ztest_spa_prop_get_set;
ztest_func_t ztest_spa_create_destroy;
ztest_func_t ztest_fault_inject;
ztest_func_t ztest_ddt_repair;
ztest_func_t ztest_dmu_snapshot_hold;
ztest_func_t ztest_scrub;
ztest_func_t ztest_dsl_dataset_promote_busy;
ztest_func_t ztest_vdev_attach_detach;
ztest_func_t ztest_vdev_LUN_growth;
ztest_func_t ztest_vdev_add_remove;
ztest_func_t ztest_vdev_aux_add_remove;
ztest_func_t ztest_split_pool;
ztest_func_t ztest_reguid;
ztest_func_t ztest_spa_upgrade;
ztest_func_t ztest_device_removal;
ztest_func_t ztest_remap_blocks;
ztest_func_t ztest_spa_checkpoint_create_discard;
ztest_func_t ztest_initialize;
uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
uint64_t zopt_often = 1ULL * NANOSEC; /* every second */
uint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */
uint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */
ztest_info_t ztest_info[] = {
{ ztest_dmu_read_write, 1, &zopt_always },
{ ztest_dmu_write_parallel, 10, &zopt_always },
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
{ ztest_dmu_commit_callbacks, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
{ ztest_split_pool, 1, &zopt_always },
{ ztest_zil_commit, 1, &zopt_incessant },
{ ztest_zil_remount, 1, &zopt_sometimes },
{ ztest_dmu_read_write_zcopy, 1, &zopt_often },
{ ztest_dmu_objset_create_destroy, 1, &zopt_often },
{ ztest_dsl_prop_get_set, 1, &zopt_often },
{ ztest_spa_prop_get_set, 1, &zopt_sometimes },
#if 0
{ ztest_dmu_prealloc, 1, &zopt_sometimes },
#endif
{ ztest_fzap, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
{ ztest_spa_create_destroy, 1, &zopt_sometimes },
{ ztest_fault_inject, 1, &zopt_sometimes },
{ ztest_ddt_repair, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
{ ztest_reguid, 1, &zopt_rarely },
{ ztest_scrub, 1, &zopt_rarely },
{ ztest_spa_upgrade, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_sometimes },
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
{ ztest_vdev_add_remove, 1,
&ztest_opts.zo_vdevtime },
{ ztest_vdev_aux_add_remove, 1,
&ztest_opts.zo_vdevtime },
{ ztest_device_removal, 1, &zopt_sometimes },
{ ztest_remap_blocks, 1, &zopt_sometimes },
{ ztest_spa_checkpoint_create_discard, 1, &zopt_rarely },
{ ztest_initialize, 1, &zopt_sometimes }
};
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
/*
* The following struct is used to hold a list of uncalled commit callbacks.
* The callbacks are ordered by txg number.
*/
typedef struct ztest_cb_list {
kmutex_t zcl_callbacks_lock;
list_t zcl_callbacks;
} ztest_cb_list_t;
/*
* Stuff we need to share writably between parent and child.
*/
typedef struct ztest_shared {
boolean_t zs_do_init;
hrtime_t zs_proc_start;
hrtime_t zs_proc_stop;
hrtime_t zs_thread_start;
hrtime_t zs_thread_stop;
hrtime_t zs_thread_kill;
uint64_t zs_enospc_count;
uint64_t zs_vdev_next_leaf;
uint64_t zs_vdev_aux;
uint64_t zs_alloc;
uint64_t zs_space;
uint64_t zs_splits;
uint64_t zs_mirrors;
uint64_t zs_metaslab_sz;
uint64_t zs_metaslab_df_alloc_threshold;
uint64_t zs_guid;
} ztest_shared_t;
#define ID_PARALLEL -1ULL
static char ztest_dev_template[] = "%s/%s.%llua";
static char ztest_aux_template[] = "%s/%s.%s.%llu";
ztest_shared_t *ztest_shared;
static spa_t *ztest_spa = NULL;
static ztest_ds_t *ztest_ds;
static kmutex_t ztest_vdev_lock;
static kmutex_t ztest_checkpoint_lock;
static boolean_t ztest_device_removal_active = B_FALSE;
/*
* The ztest_name_lock protects the pool and dataset namespace used by
* the individual tests. To modify the namespace, consumers must grab
* this lock as writer. Grabbing the lock as reader will ensure that the
* namespace does not change while the lock is held.
*/
static krwlock_t ztest_name_lock;
static boolean_t ztest_dump_core = B_TRUE;
static boolean_t ztest_exiting;
/* Global commit callback list */
static ztest_cb_list_t zcl;
enum ztest_object {
ZTEST_META_DNODE = 0,
ZTEST_DIROBJ,
ZTEST_OBJECTS
};
static void usage(boolean_t) __NORETURN;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities.
*/
const char *
_umem_debug_init()
{
return ("default,verbose"); /* $UMEM_DEBUG setting */
}
const char *
_umem_logging_init(void)
{
return ("fail,contents"); /* $UMEM_LOGGING setting */
}
#define FATAL_MSG_SZ 1024
char *fatal_msg;
static void
fatal(int do_perror, char *message, ...)
{
va_list args;
int save_errno = errno;
char buf[FATAL_MSG_SZ];
(void) fflush(stdout);
va_start(args, message);
(void) sprintf(buf, "ztest: ");
/* LINTED */
(void) vsprintf(buf + strlen(buf), message, args);
va_end(args);
if (do_perror) {
(void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
": %s", strerror(save_errno));
}
(void) fprintf(stderr, "%s\n", buf);
fatal_msg = buf; /* to ease debugging */
if (ztest_dump_core)
abort();
exit(3);
}
static int
str2shift(const char *buf)
{
const char *ends = "BKMGTPEZ";
int i;
if (buf[0] == '\0')
return (0);
for (i = 0; i < strlen(ends); i++) {
if (toupper(buf[0]) == ends[i])
break;
}
if (i == strlen(ends)) {
(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
buf);
usage(B_FALSE);
}
if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
return (10*i);
}
(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
usage(B_FALSE);
/* NOTREACHED */
}
static uint64_t
nicenumtoull(const char *buf)
{
char *end;
uint64_t val;
val = strtoull(buf, &end, 0);
if (end == buf) {
(void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
usage(B_FALSE);
} else if (end[0] == '.') {
double fval = strtod(buf, &end);
fval *= pow(2, str2shift(end));
if (fval > UINT64_MAX) {
(void) fprintf(stderr, "ztest: value too large: %s\n",
buf);
usage(B_FALSE);
}
val = (uint64_t)fval;
} else {
int shift = str2shift(end);
if (shift >= 64 || (val << shift) >> shift != val) {
(void) fprintf(stderr, "ztest: value too large: %s\n",
buf);
usage(B_FALSE);
}
val <<= shift;
}
return (val);
}
static void
usage(boolean_t requested)
{
const ztest_shared_opts_t *zo = &ztest_opts_defaults;
char nice_vdev_size[NN_NUMBUF_SZ];
char nice_force_ganging[NN_NUMBUF_SZ];
FILE *fp = requested ? stdout : stderr;
nicenum(zo->zo_vdev_size, nice_vdev_size, sizeof (nice_vdev_size));
nicenum(zo->zo_metaslab_force_ganging, nice_force_ganging,
sizeof (nice_force_ganging));
(void) fprintf(fp, "Usage: %s\n"
"\t[-v vdevs (default: %llu)]\n"
"\t[-s size_of_each_vdev (default: %s)]\n"
"\t[-a alignment_shift (default: %d)] use 0 for random\n"
"\t[-m mirror_copies (default: %d)]\n"
"\t[-r raidz_disks (default: %d)]\n"
"\t[-R raidz_parity (default: %d)]\n"
"\t[-d datasets (default: %d)]\n"
"\t[-t threads (default: %d)]\n"
"\t[-g gang_block_threshold (default: %s)]\n"
"\t[-i init_count (default: %d)] initialize pool i times\n"
"\t[-k kill_percentage (default: %llu%%)]\n"
"\t[-p pool_name (default: %s)]\n"
"\t[-f dir (default: %s)] file directory for vdev files\n"
"\t[-V] verbose (use multiple times for ever more blather)\n"
"\t[-E] use existing pool instead of creating new one\n"
"\t[-T time (default: %llu sec)] total run time\n"
"\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
"\t[-P passtime (default: %llu sec)] time per pass\n"
"\t[-B alt_ztest (default: <none>)] alternate ztest path\n"
"\t[-o variable=value] ... set global variable to an unsigned\n"
"\t 32-bit integer value\n"
"\t[-h] (print help)\n"
"",
zo->zo_pool,
(u_longlong_t)zo->zo_vdevs, /* -v */
nice_vdev_size, /* -s */
zo->zo_ashift, /* -a */
zo->zo_mirrors, /* -m */
zo->zo_raidz, /* -r */
zo->zo_raidz_parity, /* -R */
zo->zo_datasets, /* -d */
zo->zo_threads, /* -t */
nice_force_ganging, /* -g */
zo->zo_init, /* -i */
(u_longlong_t)zo->zo_killrate, /* -k */
zo->zo_pool, /* -p */
zo->zo_dir, /* -f */
(u_longlong_t)zo->zo_time, /* -T */
(u_longlong_t)zo->zo_maxloops, /* -F */
(u_longlong_t)zo->zo_passtime);
exit(requested ? 0 : 1);
}
static void
process_options(int argc, char **argv)
{
char *path;
ztest_shared_opts_t *zo = &ztest_opts;
int opt;
uint64_t value;
char altdir[MAXNAMELEN] = { 0 };
bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
while ((opt = getopt(argc, argv,
"v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) {
value = 0;
switch (opt) {
case 'v':
case 's':
case 'a':
case 'm':
case 'r':
case 'R':
case 'd':
case 't':
case 'g':
case 'i':
case 'k':
case 'T':
case 'P':
case 'F':
value = nicenumtoull(optarg);
}
switch (opt) {
case 'v':
zo->zo_vdevs = value;
break;
case 's':
zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value);
break;
case 'a':
zo->zo_ashift = value;
break;
case 'm':
zo->zo_mirrors = value;
break;
case 'r':
zo->zo_raidz = MAX(1, value);
break;
case 'R':
zo->zo_raidz_parity = MIN(MAX(value, 1), 3);
break;
case 'd':
zo->zo_datasets = MAX(1, value);
break;
case 't':
zo->zo_threads = MAX(1, value);
break;
case 'g':
zo->zo_metaslab_force_ganging =
MAX(SPA_MINBLOCKSIZE << 1, value);
break;
case 'i':
zo->zo_init = value;
break;
case 'k':
zo->zo_killrate = value;
break;
case 'p':
(void) strlcpy(zo->zo_pool, optarg,
sizeof (zo->zo_pool));
break;
case 'f':
path = realpath(optarg, NULL);
if (path == NULL) {
(void) fprintf(stderr, "error: %s: %s\n",
optarg, strerror(errno));
usage(B_FALSE);
} else {
(void) strlcpy(zo->zo_dir, path,
sizeof (zo->zo_dir));
}
break;
case 'V':
zo->zo_verbose++;
break;
case 'E':
zo->zo_init = 0;
break;
case 'T':
zo->zo_time = value;
break;
case 'P':
zo->zo_passtime = MAX(1, value);
break;
case 'F':
zo->zo_maxloops = MAX(1, value);
break;
case 'B':
(void) strlcpy(altdir, optarg, sizeof (altdir));
break;
case 'o':
if (set_global_var(optarg) != 0)
usage(B_FALSE);
break;
case 'h':
usage(B_TRUE);
break;
case '?':
default:
usage(B_FALSE);
break;
}
}
zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1);
zo->zo_vdevtime =
(zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
UINT64_MAX >> 2);
if (strlen(altdir) > 0) {
char *cmd;
char *realaltdir;
char *bin;
char *ztest;
char *isa;
int isalen;
cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
VERIFY(NULL != realpath(getexecname(), cmd));
if (0 != access(altdir, F_OK)) {
ztest_dump_core = B_FALSE;
fatal(B_TRUE, "invalid alternate ztest path: %s",
altdir);
}
VERIFY(NULL != realpath(altdir, realaltdir));
/*
* 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest".
* We want to extract <isa> to determine if we should use
* 32 or 64 bit binaries.
*/
bin = strstr(cmd, "/usr/bin/");
ztest = strstr(bin, "/ztest");
isa = bin + 9;
isalen = ztest - isa;
(void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest),
"%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa);
(void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath),
"%s/usr/lib/%.*s", realaltdir, isalen, isa);
if (0 != access(zo->zo_alt_ztest, X_OK)) {
ztest_dump_core = B_FALSE;
fatal(B_TRUE, "invalid alternate ztest: %s",
zo->zo_alt_ztest);
} else if (0 != access(zo->zo_alt_libpath, X_OK)) {
ztest_dump_core = B_FALSE;
fatal(B_TRUE, "invalid alternate lib directory %s",
zo->zo_alt_libpath);
}
umem_free(cmd, MAXPATHLEN);
umem_free(realaltdir, MAXPATHLEN);
}
}
static void
ztest_kill(ztest_shared_t *zs)
{
zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
/*
* Before we kill off ztest, make sure that the config is updated.
* See comment above spa_write_cachefile().
*/
mutex_enter(&spa_namespace_lock);
spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE);
mutex_exit(&spa_namespace_lock);
zfs_dbgmsg_print(FTAG);
(void) kill(getpid(), SIGKILL);
}
static uint64_t
ztest_random(uint64_t range)
{
uint64_t r;
ASSERT3S(ztest_fd_rand, >=, 0);
if (range == 0)
return (0);
if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
fatal(1, "short read from /dev/urandom");
return (r % range);
}
/* ARGSUSED */
static void
ztest_record_enospc(const char *s)
{
ztest_shared->zs_enospc_count++;
}
static uint64_t
ztest_get_ashift(void)
{
if (ztest_opts.zo_ashift == 0)
return (SPA_MINBLOCKSHIFT + ztest_random(5));
return (ztest_opts.zo_ashift);
}
static nvlist_t *
make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
{
char pathbuf[MAXPATHLEN];
uint64_t vdev;
nvlist_t *file;
if (ashift == 0)
ashift = ztest_get_ashift();
if (path == NULL) {
path = pathbuf;
if (aux != NULL) {
vdev = ztest_shared->zs_vdev_aux;
(void) snprintf(path, sizeof (pathbuf),
ztest_aux_template, ztest_opts.zo_dir,
pool == NULL ? ztest_opts.zo_pool : pool,
aux, vdev);
} else {
vdev = ztest_shared->zs_vdev_next_leaf++;
(void) snprintf(path, sizeof (pathbuf),
ztest_dev_template, ztest_opts.zo_dir,
pool == NULL ? ztest_opts.zo_pool : pool, vdev);
}
}
if (size != 0) {
int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (fd == -1)
fatal(1, "can't open %s", path);
if (ftruncate(fd, size) != 0)
fatal(1, "can't ftruncate %s", path);
(void) close(fd);
}
VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
return (file);
}
static nvlist_t *
make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
uint64_t ashift, int r)
{
nvlist_t *raidz, **child;
int c;
if (r < 2)
return (make_vdev_file(path, aux, pool, size, ashift));
child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
for (c = 0; c < r; c++)
child[c] = make_vdev_file(path, aux, pool, size, ashift);
VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
VDEV_TYPE_RAIDZ) == 0);
VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
ztest_opts.zo_raidz_parity) == 0);
VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
child, r) == 0);
for (c = 0; c < r; c++)
nvlist_free(child[c]);
umem_free(child, r * sizeof (nvlist_t *));
return (raidz);
}
static nvlist_t *
make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
uint64_t ashift, int r, int m)
{
nvlist_t *mirror, **child;
int c;
if (m < 1)
return (make_vdev_raidz(path, aux, pool, size, ashift, r));
child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
for (c = 0; c < m; c++)
child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
VDEV_TYPE_MIRROR) == 0);
VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
child, m) == 0);
for (c = 0; c < m; c++)
nvlist_free(child[c]);
umem_free(child, m * sizeof (nvlist_t *));
return (mirror);
}
static nvlist_t *
make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift,
int log, int r, int m, int t)
{
nvlist_t *root, **child;
int c;
ASSERT(t > 0);
child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
for (c = 0; c < t; c++) {
child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
r, m);
VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
log) == 0);
}
VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
child, t) == 0);
for (c = 0; c < t; c++)
nvlist_free(child[c]);
umem_free(child, t * sizeof (nvlist_t *));
return (root);
}
/*
* Find a random spa version. Returns back a random spa version in the
* range [initial_version, SPA_VERSION_FEATURES].
*/
static uint64_t
ztest_random_spa_version(uint64_t initial_version)
{
uint64_t version = initial_version;
if (version <= SPA_VERSION_BEFORE_FEATURES) {
version = version +
ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1);
}
if (version > SPA_VERSION_BEFORE_FEATURES)
version = SPA_VERSION_FEATURES;
ASSERT(SPA_VERSION_IS_SUPPORTED(version));
return (version);
}
static int
ztest_random_blocksize(void)
{
uint64_t block_shift;
/*
* Choose a block size >= the ashift.
* If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks.
*/
int maxbs = SPA_OLD_MAXBLOCKSHIFT;
if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE)
maxbs = 20;
block_shift = ztest_random(maxbs - ztest_spa->spa_max_ashift + 1);
return (1 << (SPA_MINBLOCKSHIFT + block_shift));
}
static int
ztest_random_ibshift(void)
{
return (DN_MIN_INDBLKSHIFT +
ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
}
static uint64_t
ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
{
uint64_t top;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *tvd;
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
do {
top = ztest_random(rvd->vdev_children);
tvd = rvd->vdev_child[top];
} while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) ||
tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
return (top);
}
static uint64_t
ztest_random_dsl_prop(zfs_prop_t prop)
{
uint64_t value;
do {
value = zfs_prop_random_value(prop, ztest_random(-1ULL));
} while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
return (value);
}
static int
ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
boolean_t inherit)
{
const char *propname = zfs_prop_to_name(prop);
const char *valname;
char setpoint[MAXPATHLEN];
uint64_t curval;
int error;
error = dsl_prop_set_int(osname, propname,
(inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
return (error);
}
ASSERT0(error);
VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
if (ztest_opts.zo_verbose >= 6) {
VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
(void) printf("%s %s = %s at '%s'\n",
osname, propname, valname, setpoint);
}
return (error);
}
static int
ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value)
{
spa_t *spa = ztest_spa;
nvlist_t *props = NULL;
int error;
VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
error = spa_prop_set(spa, props);
nvlist_free(props);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
return (error);
}
ASSERT0(error);
return (error);
}
static void
ztest_rll_init(rll_t *rll)
{
rll->rll_writer = NULL;
rll->rll_readers = 0;
mutex_init(&rll->rll_lock, NULL, USYNC_THREAD, NULL);
cv_init(&rll->rll_cv, NULL, USYNC_THREAD, NULL);
}
static void
ztest_rll_destroy(rll_t *rll)
{
ASSERT(rll->rll_writer == NULL);
ASSERT(rll->rll_readers == 0);
mutex_destroy(&rll->rll_lock);
cv_destroy(&rll->rll_cv);
}
static void
ztest_rll_lock(rll_t *rll, rl_type_t type)
{
mutex_enter(&rll->rll_lock);
if (type == RL_READER) {
while (rll->rll_writer != NULL)
cv_wait(&rll->rll_cv, &rll->rll_lock);
rll->rll_readers++;
} else {
while (rll->rll_writer != NULL || rll->rll_readers)
cv_wait(&rll->rll_cv, &rll->rll_lock);
rll->rll_writer = curthread;
}
mutex_exit(&rll->rll_lock);
}
static void
ztest_rll_unlock(rll_t *rll)
{
mutex_enter(&rll->rll_lock);
if (rll->rll_writer) {
ASSERT(rll->rll_readers == 0);
rll->rll_writer = NULL;
} else {
ASSERT(rll->rll_readers != 0);
ASSERT(rll->rll_writer == NULL);
rll->rll_readers--;
}
if (rll->rll_writer == NULL && rll->rll_readers == 0)
cv_broadcast(&rll->rll_cv);
mutex_exit(&rll->rll_lock);
}
static void
ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
{
rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
ztest_rll_lock(rll, type);
}
static void
ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
{
rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
ztest_rll_unlock(rll);
}
static rl_t *
ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
uint64_t size, rl_type_t type)
{
uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
rl_t *rl;
rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
rl->rl_object = object;
rl->rl_offset = offset;
rl->rl_size = size;
rl->rl_lock = rll;
ztest_rll_lock(rll, type);
return (rl);
}
static void
ztest_range_unlock(rl_t *rl)
{
rll_t *rll = rl->rl_lock;
ztest_rll_unlock(rll);
umem_free(rl, sizeof (*rl));
}
static void
ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
{
zd->zd_os = os;
zd->zd_zilog = dmu_objset_zil(os);
zd->zd_shared = szd;
dmu_objset_name(os, zd->zd_name);
if (zd->zd_shared != NULL)
zd->zd_shared->zd_seq = 0;
rw_init(&zd->zd_zilog_lock, NULL, USYNC_THREAD, NULL);
mutex_init(&zd->zd_dirobj_lock, NULL, USYNC_THREAD, NULL);
for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
ztest_rll_init(&zd->zd_object_lock[l]);
for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
ztest_rll_init(&zd->zd_range_lock[l]);
}
static void
ztest_zd_fini(ztest_ds_t *zd)
{
mutex_destroy(&zd->zd_dirobj_lock);
for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
ztest_rll_destroy(&zd->zd_object_lock[l]);
for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
ztest_rll_destroy(&zd->zd_range_lock[l]);
}
#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
static uint64_t
ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
{
uint64_t txg;
int error;
/*
* Attempt to assign tx to some transaction group.
*/
error = dmu_tx_assign(tx, txg_how);
if (error) {
if (error == ERESTART) {
ASSERT(txg_how == TXG_NOWAIT);
dmu_tx_wait(tx);
} else {
ASSERT3U(error, ==, ENOSPC);
ztest_record_enospc(tag);
}
dmu_tx_abort(tx);
return (0);
}
txg = dmu_tx_get_txg(tx);
ASSERT(txg != 0);
return (txg);
}
static void
ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
{
uint64_t *ip = buf;
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
while (ip < ip_end)
*ip++ = value;
}
static boolean_t
ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
{
uint64_t *ip = buf;
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
uint64_t diff = 0;
while (ip < ip_end)
diff |= (value - *ip++);
return (diff == 0);
}
static void
ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
{
bt->bt_magic = BT_MAGIC;
bt->bt_objset = dmu_objset_id(os);
bt->bt_object = object;
bt->bt_offset = offset;
bt->bt_gen = gen;
bt->bt_txg = txg;
bt->bt_crtxg = crtxg;
}
static void
ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
{
ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
ASSERT3U(bt->bt_object, ==, object);
ASSERT3U(bt->bt_offset, ==, offset);
ASSERT3U(bt->bt_gen, <=, gen);
ASSERT3U(bt->bt_txg, <=, txg);
ASSERT3U(bt->bt_crtxg, ==, crtxg);
}
static ztest_block_tag_t *
ztest_bt_bonus(dmu_buf_t *db)
{
dmu_object_info_t doi;
ztest_block_tag_t *bt;
dmu_object_info_from_db(db, &doi);
ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
return (bt);
}
/*
* ZIL logging ops
*/
#define lrz_type lr_mode
#define lrz_blocksize lr_uid
#define lrz_ibshift lr_gid
#define lrz_bonustype lr_rdev
#define lrz_bonuslen lr_crtime[1]
static void
ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
{
char *name = (void *)(lr + 1); /* name follows lr */
size_t namesize = strlen(name) + 1;
itx_t *itx;
if (zil_replaying(zd->zd_zilog, tx))
return;
itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
sizeof (*lr) + namesize - sizeof (lr_t));
zil_itx_assign(zd->zd_zilog, itx, tx);
}
static void
ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
{
char *name = (void *)(lr + 1); /* name follows lr */
size_t namesize = strlen(name) + 1;
itx_t *itx;
if (zil_replaying(zd->zd_zilog, tx))
return;
itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
sizeof (*lr) + namesize - sizeof (lr_t));
itx->itx_oid = object;
zil_itx_assign(zd->zd_zilog, itx, tx);
}
static void
ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
{
itx_t *itx;
itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
if (zil_replaying(zd->zd_zilog, tx))
return;
if (lr->lr_length > ZIL_MAX_LOG_DATA)
write_state = WR_INDIRECT;
itx = zil_itx_create(TX_WRITE,
sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
if (write_state == WR_COPIED &&
dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
write_state = WR_NEED_COPY;
}
itx->itx_private = zd;
itx->itx_wr_state = write_state;
itx->itx_sync = (ztest_random(8) == 0);
bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
sizeof (*lr) - sizeof (lr_t));
zil_itx_assign(zd->zd_zilog, itx, tx);
}
static void
ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
{
itx_t *itx;
if (zil_replaying(zd->zd_zilog, tx))
return;
itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
sizeof (*lr) - sizeof (lr_t));
itx->itx_sync = B_FALSE;
zil_itx_assign(zd->zd_zilog, itx, tx);
}
static void
ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
{
itx_t *itx;
if (zil_replaying(zd->zd_zilog, tx))
return;
itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
sizeof (*lr) - sizeof (lr_t));
itx->itx_sync = B_FALSE;
zil_itx_assign(zd->zd_zilog, itx, tx);
}
/*
* ZIL replay ops
*/
static int
ztest_replay_create(void *arg1, void *arg2, boolean_t byteswap)
{
ztest_ds_t *zd = arg1;
lr_create_t *lr = arg2;
char *name = (void *)(lr + 1); /* name follows lr */
objset_t *os = zd->zd_os;
ztest_block_tag_t *bbt;
dmu_buf_t *db;
dmu_tx_t *tx;
uint64_t txg;
int error = 0;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
ASSERT(lr->lr_doid == ZTEST_DIROBJ);
ASSERT(name[0] != '\0');
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
} else {
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
}
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0)
return (ENOSPC);
ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
if (lr->lr_foid == 0) {
lr->lr_foid = zap_create(os,
lr->lrz_type, lr->lrz_bonustype,
lr->lrz_bonuslen, tx);
} else {
error = zap_create_claim(os, lr->lr_foid,
lr->lrz_type, lr->lrz_bonustype,
lr->lrz_bonuslen, tx);
}
} else {
if (lr->lr_foid == 0) {
lr->lr_foid = dmu_object_alloc(os,
lr->lrz_type, 0, lr->lrz_bonustype,
lr->lrz_bonuslen, tx);
} else {
error = dmu_object_claim(os, lr->lr_foid,
lr->lrz_type, 0, lr->lrz_bonustype,
lr->lrz_bonuslen, tx);
}
}
if (error) {
ASSERT3U(error, ==, EEXIST);
ASSERT(zd->zd_zilog->zl_replay);
dmu_tx_commit(tx);
return (error);
}
ASSERT(lr->lr_foid != 0);
if (lr->lrz_type != DMU_OT_ZAP_OTHER)
VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid,
lr->lrz_blocksize, lr->lrz_ibshift, tx));
VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
bbt = ztest_bt_bonus(db);
dmu_buf_will_dirty(db, tx);
ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
dmu_buf_rele(db, FTAG);
VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
&lr->lr_foid, tx));
(void) ztest_log_create(zd, tx, lr);
dmu_tx_commit(tx);
return (0);
}
static int
ztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap)
{
ztest_ds_t *zd = arg1;
lr_remove_t *lr = arg2;
char *name = (void *)(lr + 1); /* name follows lr */
objset_t *os = zd->zd_os;
dmu_object_info_t doi;
dmu_tx_t *tx;
uint64_t object, txg;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
ASSERT(lr->lr_doid == ZTEST_DIROBJ);
ASSERT(name[0] != '\0');
VERIFY3U(0, ==,
zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
ASSERT(object != 0);
ztest_object_lock(zd, object, RL_WRITER);
VERIFY3U(0, ==, dmu_object_info(os, object, &doi));
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
ztest_object_unlock(zd, object);
return (ENOSPC);
}
if (doi.doi_type == DMU_OT_ZAP_OTHER) {
VERIFY3U(0, ==, zap_destroy(os, object, tx));
} else {
VERIFY3U(0, ==, dmu_object_free(os, object, tx));
}
VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx));
(void) ztest_log_remove(zd, tx, lr, object);
dmu_tx_commit(tx);
ztest_object_unlock(zd, object);
return (0);
}
static int
ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
{
ztest_ds_t *zd = arg1;
lr_write_t *lr = arg2;
objset_t *os = zd->zd_os;
void *data = lr + 1; /* data follows lr */
uint64_t offset, length;
ztest_block_tag_t *bt = data;
ztest_block_tag_t *bbt;
uint64_t gen, txg, lrtxg, crtxg;
dmu_object_info_t doi;
dmu_tx_t *tx;
dmu_buf_t *db;
arc_buf_t *abuf = NULL;
rl_t *rl;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
offset = lr->lr_offset;
length = lr->lr_length;
/* If it's a dmu_sync() block, write the whole block */
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
if (length < blocksize) {
offset -= offset % blocksize;
length = blocksize;
}
}
if (bt->bt_magic == BSWAP_64(BT_MAGIC))
byteswap_uint64_array(bt, sizeof (*bt));
if (bt->bt_magic != BT_MAGIC)
bt = NULL;
ztest_object_lock(zd, lr->lr_foid, RL_READER);
rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
dmu_object_info_from_db(db, &doi);
bbt = ztest_bt_bonus(db);
ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
gen = bbt->bt_gen;
crtxg = bbt->bt_crtxg;
lrtxg = lr->lr_common.lrc_txg;
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
P2PHASE(offset, length) == 0)
abuf = dmu_request_arcbuf(db, length);
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
if (abuf != NULL)
dmu_return_arcbuf(abuf);
dmu_buf_rele(db, FTAG);
ztest_range_unlock(rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
if (bt != NULL) {
/*
* Usually, verify the old data before writing new data --
* but not always, because we also want to verify correct
* behavior when the data was not recently read into cache.
*/
ASSERT(offset % doi.doi_data_block_size == 0);
if (ztest_random(4) != 0) {
int prefetch = ztest_random(2) ?
DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
ztest_block_tag_t rbt;
VERIFY(dmu_read(os, lr->lr_foid, offset,
sizeof (rbt), &rbt, prefetch) == 0);
if (rbt.bt_magic == BT_MAGIC) {
ztest_bt_verify(&rbt, os, lr->lr_foid,
offset, gen, txg, crtxg);
}
}
/*
* Writes can appear to be newer than the bonus buffer because
* the ztest_get_data() callback does a dmu_read() of the
* open-context data, which may be different than the data
* as it was when the write was generated.
*/
if (zd->zd_zilog->zl_replay) {
ztest_bt_verify(bt, os, lr->lr_foid, offset,
MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
bt->bt_crtxg);
}
/*
* Set the bt's gen/txg to the bonus buffer's gen/txg
* so that all of the usual ASSERTs will work.
*/
ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
}
if (abuf == NULL) {
dmu_write(os, lr->lr_foid, offset, length, data, tx);
} else {
bcopy(data, abuf->b_data, length);
dmu_assign_arcbuf(db, offset, abuf, tx);
}
(void) ztest_log_write(zd, tx, lr);
dmu_buf_rele(db, FTAG);
dmu_tx_commit(tx);
ztest_range_unlock(rl);
ztest_object_unlock(zd, lr->lr_foid);
return (0);
}
static int
ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
{
ztest_ds_t *zd = arg1;
lr_truncate_t *lr = arg2;
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
ztest_object_lock(zd, lr->lr_foid, RL_READER);
rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
RL_WRITER);
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
ztest_range_unlock(rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
lr->lr_length, tx) == 0);
(void) ztest_log_truncate(zd, tx, lr);
dmu_tx_commit(tx);
ztest_range_unlock(rl);
ztest_object_unlock(zd, lr->lr_foid);
return (0);
}
static int
ztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
{
ztest_ds_t *zd = arg1;
lr_setattr_t *lr = arg2;
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
dmu_buf_t *db;
ztest_block_tag_t *bbt;
uint64_t txg, lrtxg, crtxg;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, lr->lr_foid);
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
dmu_buf_rele(db, FTAG);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
bbt = ztest_bt_bonus(db);
ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
crtxg = bbt->bt_crtxg;
lrtxg = lr->lr_common.lrc_txg;
if (zd->zd_zilog->zl_replay) {
ASSERT(lr->lr_size != 0);
ASSERT(lr->lr_mode != 0);
ASSERT(lrtxg != 0);
} else {
/*
* Randomly change the size and increment the generation.
*/
lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
sizeof (*bbt);
lr->lr_mode = bbt->bt_gen + 1;
ASSERT(lrtxg == 0);
}
/*
* Verify that the current bonus buffer is not newer than our txg.
*/
ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
MAX(txg, lrtxg), crtxg);
dmu_buf_will_dirty(db, tx);
ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
ASSERT3U(lr->lr_size, <=, db->db_size);
VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
bbt = ztest_bt_bonus(db);
ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
dmu_buf_rele(db, FTAG);
(void) ztest_log_setattr(zd, tx, lr);
dmu_tx_commit(tx);
ztest_object_unlock(zd, lr->lr_foid);
return (0);
}
zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* 0 no such transaction type */
ztest_replay_create, /* TX_CREATE */
NULL, /* TX_MKDIR */
NULL, /* TX_MKXATTR */
NULL, /* TX_SYMLINK */
ztest_replay_remove, /* TX_REMOVE */
NULL, /* TX_RMDIR */
NULL, /* TX_LINK */
NULL, /* TX_RENAME */
ztest_replay_write, /* TX_WRITE */
ztest_replay_truncate, /* TX_TRUNCATE */
ztest_replay_setattr, /* TX_SETATTR */
NULL, /* TX_ACL */
NULL, /* TX_CREATE_ACL */
NULL, /* TX_CREATE_ATTR */
NULL, /* TX_CREATE_ACL_ATTR */
NULL, /* TX_MKDIR_ACL */
NULL, /* TX_MKDIR_ATTR */
NULL, /* TX_MKDIR_ACL_ATTR */
NULL, /* TX_WRITE2 */
};
/*
* ZIL get_data callbacks
*/
/* ARGSUSED */
static void
ztest_get_done(zgd_t *zgd, int error)
{
ztest_ds_t *zd = zgd->zgd_private;
uint64_t object = zgd->zgd_rl->rl_object;
if (zgd->zgd_db)
dmu_buf_rele(zgd->zgd_db, zgd);
ztest_range_unlock(zgd->zgd_rl);
ztest_object_unlock(zd, object);
umem_free(zgd, sizeof (*zgd));
}
static int
ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
zio_t *zio)
{
ztest_ds_t *zd = arg;
objset_t *os = zd->zd_os;
uint64_t object = lr->lr_foid;
uint64_t offset = lr->lr_offset;
uint64_t size = lr->lr_length;
uint64_t txg = lr->lr_common.lrc_txg;
uint64_t crtxg;
dmu_object_info_t doi;
dmu_buf_t *db;
zgd_t *zgd;
int error;
ASSERT3P(lwb, !=, NULL);
ASSERT3P(zio, !=, NULL);
ASSERT3U(size, !=, 0);
ztest_object_lock(zd, object, RL_READER);
error = dmu_bonus_hold(os, object, FTAG, &db);
if (error) {
ztest_object_unlock(zd, object);
return (error);
}
crtxg = ztest_bt_bonus(db)->bt_crtxg;
if (crtxg == 0 || crtxg > txg) {
dmu_buf_rele(db, FTAG);
ztest_object_unlock(zd, object);
return (ENOENT);
}
dmu_object_info_from_db(db, &doi);
dmu_buf_rele(db, FTAG);
db = NULL;
zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
zgd->zgd_lwb = lwb;
zgd->zgd_private = zd;
if (buf != NULL) { /* immediate write */
zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
RL_READER);
error = dmu_read(os, object, offset, size, buf,
DMU_READ_NO_PREFETCH);
ASSERT(error == 0);
} else {
size = doi.doi_data_block_size;
if (ISP2(size)) {
offset = P2ALIGN(offset, size);
} else {
ASSERT(offset < size);
offset = 0;
}
zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
RL_READER);
error = dmu_buf_hold(os, object, offset, zgd, &db,
DMU_READ_NO_PREFETCH);
if (error == 0) {
blkptr_t *bp = &lr->lr_blkptr;
zgd->zgd_db = db;
zgd->zgd_bp = bp;
ASSERT(db->db_offset == offset);
ASSERT(db->db_size == size);
error = dmu_sync(zio, lr->lr_common.lrc_txg,
ztest_get_done, zgd);
if (error == 0)
return (0);
}
}
ztest_get_done(zgd, error);
return (error);
}
static void *
ztest_lr_alloc(size_t lrsize, char *name)
{
char *lr;
size_t namesize = name ? strlen(name) + 1 : 0;
lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
if (name)
bcopy(name, lr + lrsize, namesize);
return (lr);
}
void
ztest_lr_free(void *lr, size_t lrsize, char *name)
{
size_t namesize = name ? strlen(name) + 1 : 0;
umem_free(lr, lrsize + namesize);
}
/*
* Lookup a bunch of objects. Returns the number of objects not found.
*/
static int
ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
{
int missing = 0;
int error;
ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
for (int i = 0; i < count; i++, od++) {
od->od_object = 0;
error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
sizeof (uint64_t), 1, &od->od_object);
if (error) {
ASSERT(error == ENOENT);
ASSERT(od->od_object == 0);
missing++;
} else {
dmu_buf_t *db;
ztest_block_tag_t *bbt;
dmu_object_info_t doi;
ASSERT(od->od_object != 0);
ASSERT(missing == 0); /* there should be no gaps */
ztest_object_lock(zd, od->od_object, RL_READER);
VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
od->od_object, FTAG, &db));
dmu_object_info_from_db(db, &doi);
bbt = ztest_bt_bonus(db);
ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
od->od_type = doi.doi_type;
od->od_blocksize = doi.doi_data_block_size;
od->od_gen = bbt->bt_gen;
dmu_buf_rele(db, FTAG);
ztest_object_unlock(zd, od->od_object);
}
}
return (missing);
}
static int
ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
{
int missing = 0;
ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
for (int i = 0; i < count; i++, od++) {
if (missing) {
od->od_object = 0;
missing++;
continue;
}
lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
lr->lr_doid = od->od_dir;
lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */
lr->lrz_type = od->od_crtype;
lr->lrz_blocksize = od->od_crblocksize;
lr->lrz_ibshift = ztest_random_ibshift();
lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
lr->lrz_bonuslen = dmu_bonus_max();
lr->lr_gen = od->od_crgen;
lr->lr_crtime[0] = time(NULL);
if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
ASSERT(missing == 0);
od->od_object = 0;
missing++;
} else {
od->od_object = lr->lr_foid;
od->od_type = od->od_crtype;
od->od_blocksize = od->od_crblocksize;
od->od_gen = od->od_crgen;
ASSERT(od->od_object != 0);
}
ztest_lr_free(lr, sizeof (*lr), od->od_name);
}
return (missing);
}
static int
ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
{
int missing = 0;
int error;
ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
od += count - 1;
for (int i = count - 1; i >= 0; i--, od--) {
if (missing) {
missing++;
continue;
}
/*
* No object was found.
*/
if (od->od_object == 0)
continue;
lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
lr->lr_doid = od->od_dir;
if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
ASSERT3U(error, ==, ENOSPC);
missing++;
} else {
od->od_object = 0;
}
ztest_lr_free(lr, sizeof (*lr), od->od_name);
}
return (missing);
}
static int
ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
void *data)
{
lr_write_t *lr;
int error;
lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
lr->lr_foid = object;
lr->lr_offset = offset;
lr->lr_length = size;
lr->lr_blkoff = 0;
BP_ZERO(&lr->lr_blkptr);
bcopy(data, lr + 1, size);
error = ztest_replay_write(zd, lr, B_FALSE);
ztest_lr_free(lr, sizeof (*lr) + size, NULL);
return (error);
}
static int
ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
{
lr_truncate_t *lr;
int error;
lr = ztest_lr_alloc(sizeof (*lr), NULL);
lr->lr_foid = object;
lr->lr_offset = offset;
lr->lr_length = size;
error = ztest_replay_truncate(zd, lr, B_FALSE);
ztest_lr_free(lr, sizeof (*lr), NULL);
return (error);
}
static int
ztest_setattr(ztest_ds_t *zd, uint64_t object)
{
lr_setattr_t *lr;
int error;
lr = ztest_lr_alloc(sizeof (*lr), NULL);
lr->lr_foid = object;
lr->lr_size = 0;
lr->lr_mode = 0;
error = ztest_replay_setattr(zd, lr, B_FALSE);
ztest_lr_free(lr, sizeof (*lr), NULL);
return (error);
}
static void
ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
{
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
txg_wait_synced(dmu_objset_pool(os), 0);
ztest_object_lock(zd, object, RL_READER);
rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, object, offset, size);
txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg != 0) {
dmu_prealloc(os, object, offset, size, tx);
dmu_tx_commit(tx);
txg_wait_synced(dmu_objset_pool(os), txg);
} else {
(void) dmu_free_long_range(os, object, offset, size);
}
ztest_range_unlock(rl);
ztest_object_unlock(zd, object);
}
static void
ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
{
int err;
ztest_block_tag_t wbt;
dmu_object_info_t doi;
enum ztest_io_type io_type;
uint64_t blocksize;
void *data;
VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
blocksize = doi.doi_data_block_size;
data = umem_alloc(blocksize, UMEM_NOFAIL);
/*
* Pick an i/o type at random, biased toward writing block tags.
*/
io_type = ztest_random(ZTEST_IO_TYPES);
if (ztest_random(2) == 0)
io_type = ZTEST_IO_WRITE_TAG;
rw_enter(&zd->zd_zilog_lock, RW_READER);
switch (io_type) {
case ZTEST_IO_WRITE_TAG:
ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
(void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
break;
case ZTEST_IO_WRITE_PATTERN:
(void) memset(data, 'a' + (object + offset) % 5, blocksize);
if (ztest_random(2) == 0) {
/*
* Induce fletcher2 collisions to ensure that
* zio_ddt_collision() detects and resolves them
* when using fletcher2-verify for deduplication.
*/
((uint64_t *)data)[0] ^= 1ULL << 63;
((uint64_t *)data)[4] ^= 1ULL << 63;
}
(void) ztest_write(zd, object, offset, blocksize, data);
break;
case ZTEST_IO_WRITE_ZEROES:
bzero(data, blocksize);
(void) ztest_write(zd, object, offset, blocksize, data);
break;
case ZTEST_IO_TRUNCATE:
(void) ztest_truncate(zd, object, offset, blocksize);
break;
case ZTEST_IO_SETATTR:
(void) ztest_setattr(zd, object);
break;
case ZTEST_IO_REWRITE:
rw_enter(&ztest_name_lock, RW_READER);
err = ztest_dsl_prop_set_uint64(zd->zd_name,
ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa),
B_FALSE);
VERIFY(err == 0 || err == ENOSPC);
err = ztest_dsl_prop_set_uint64(zd->zd_name,
ZFS_PROP_COMPRESSION,
ztest_random_dsl_prop(ZFS_PROP_COMPRESSION),
B_FALSE);
VERIFY(err == 0 || err == ENOSPC);
rw_exit(&ztest_name_lock);
VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
DMU_READ_NO_PREFETCH));
(void) ztest_write(zd, object, offset, blocksize, data);
break;
}
rw_exit(&zd->zd_zilog_lock);
umem_free(data, blocksize);
}
/*
* Initialize an object description template.
*/
static void
ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
{
od->od_dir = ZTEST_DIROBJ;
od->od_object = 0;
od->od_crtype = type;
od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
od->od_crgen = gen;
od->od_type = DMU_OT_NONE;
od->od_blocksize = 0;
od->od_gen = 0;
(void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
tag, (int64_t)id, index);
}
/*
* Lookup or create the objects for a test using the od template.
* If the objects do not all exist, or if 'remove' is specified,
* remove any existing objects and create new ones. Otherwise,
* use the existing objects.
*/
static int
ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
{
int count = size / sizeof (*od);
int rv = 0;
mutex_enter(&zd->zd_dirobj_lock);
if ((ztest_lookup(zd, od, count) != 0 || remove) &&
(ztest_remove(zd, od, count) != 0 ||
ztest_create(zd, od, count) != 0))
rv = -1;
zd->zd_od = od;
mutex_exit(&zd->zd_dirobj_lock);
return (rv);
}
/* ARGSUSED */
void
ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
{
zilog_t *zilog = zd->zd_zilog;
rw_enter(&zd->zd_zilog_lock, RW_READER);
zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
/*
* Remember the committed values in zd, which is in parent/child
* shared memory. If we die, the next iteration of ztest_run()
* will verify that the log really does contain this record.
*/
mutex_enter(&zilog->zl_lock);
ASSERT(zd->zd_shared != NULL);
ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
mutex_exit(&zilog->zl_lock);
rw_exit(&zd->zd_zilog_lock);
}
/*
* This function is designed to simulate the operations that occur during a
* mount/unmount operation. We hold the dataset across these operations in an
* attempt to expose any implicit assumptions about ZIL management.
*/
/* ARGSUSED */
void
ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
{
objset_t *os = zd->zd_os;
/*
* We grab the zd_dirobj_lock to ensure that no other thread is
* updating the zil (i.e. adding in-memory log records) and the
* zd_zilog_lock to block any I/O.
*/
mutex_enter(&zd->zd_dirobj_lock);
rw_enter(&zd->zd_zilog_lock, RW_WRITER);
/* zfsvfs_teardown() */
zil_close(zd->zd_zilog);
/* zfsvfs_setup() */
VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
zil_replay(os, zd, ztest_replay_vector);
rw_exit(&zd->zd_zilog_lock);
mutex_exit(&zd->zd_dirobj_lock);
}
/*
* Verify that we can't destroy an active pool, create an existing pool,
* or create a pool with a bad vdev spec.
*/
/* ARGSUSED */
void
ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
{
ztest_shared_opts_t *zo = &ztest_opts;
spa_t *spa;
nvlist_t *nvroot;
/*
* Attempt to create using a bad file.
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_file", nvroot, NULL, NULL));
nvlist_free(nvroot);
/*
* Attempt to create using a bad mirror.
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
nvlist_free(nvroot);
/*
* Attempt to create an existing pool. It shouldn't matter
* what's in the nvroot; we should fail with EEXIST.
*/
rw_enter(&ztest_name_lock, RW_READER);
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
nvlist_free(nvroot);
VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
spa_close(spa, FTAG);
rw_exit(&ztest_name_lock);
}
/* ARGSUSED */
void
ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
{
spa_t *spa;
uint64_t initial_version = SPA_VERSION_INITIAL;
uint64_t version, newversion;
nvlist_t *nvroot, *props;
char *name;
mutex_enter(&ztest_vdev_lock);
name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
/*
* Clean up from previous runs.
*/
(void) spa_destroy(name);
nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
/*
* If we're configuring a RAIDZ device then make sure that the
* the initial version is capable of supporting that feature.
*/
switch (ztest_opts.zo_raidz_parity) {
case 0:
case 1:
initial_version = SPA_VERSION_INITIAL;
break;
case 2:
initial_version = SPA_VERSION_RAIDZ2;
break;
case 3:
initial_version = SPA_VERSION_RAIDZ3;
break;
}
/*
* Create a pool with a spa version that can be upgraded. Pick
* a value between initial_version and SPA_VERSION_BEFORE_FEATURES.
*/
do {
version = ztest_random_spa_version(initial_version);
} while (version > SPA_VERSION_BEFORE_FEATURES);
props = fnvlist_alloc();
fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
VERIFY0(spa_create(name, nvroot, props, NULL));
fnvlist_free(nvroot);
fnvlist_free(props);
VERIFY0(spa_open(name, &spa, FTAG));
VERIFY3U(spa_version(spa), ==, version);
newversion = ztest_random_spa_version(version + 1);
if (ztest_opts.zo_verbose >= 4) {
(void) printf("upgrading spa version from %llu to %llu\n",
(u_longlong_t)version, (u_longlong_t)newversion);
}
spa_upgrade(spa, newversion);
VERIFY3U(spa_version(spa), >, version);
VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
zpool_prop_to_name(ZPOOL_PROP_VERSION)));
spa_close(spa, FTAG);
strfree(name);
mutex_exit(&ztest_vdev_lock);
}
static void
ztest_spa_checkpoint(spa_t *spa)
{
ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
int error = spa_checkpoint(spa->spa_name);
switch (error) {
case 0:
case ZFS_ERR_DEVRM_IN_PROGRESS:
case ZFS_ERR_DISCARDING_CHECKPOINT:
case ZFS_ERR_CHECKPOINT_EXISTS:
break;
case ENOSPC:
ztest_record_enospc(FTAG);
break;
default:
fatal(0, "spa_checkpoint(%s) = %d", spa->spa_name, error);
}
}
static void
ztest_spa_discard_checkpoint(spa_t *spa)
{
ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
int error = spa_checkpoint_discard(spa->spa_name);
switch (error) {
case 0:
case ZFS_ERR_DISCARDING_CHECKPOINT:
case ZFS_ERR_NO_CHECKPOINT:
break;
default:
fatal(0, "spa_discard_checkpoint(%s) = %d",
spa->spa_name, error);
}
}
/* ARGSUSED */
void
ztest_spa_checkpoint_create_discard(ztest_ds_t *zd, uint64_t id)
{
spa_t *spa = ztest_spa;
mutex_enter(&ztest_checkpoint_lock);
if (ztest_random(2) == 0) {
ztest_spa_checkpoint(spa);
} else {
ztest_spa_discard_checkpoint(spa);
}
mutex_exit(&ztest_checkpoint_lock);
}
static vdev_t *
vdev_lookup_by_path(vdev_t *vd, const char *path)
{
vdev_t *mvd;
if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
return (vd);
for (int c = 0; c < vd->vdev_children; c++)
if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
NULL)
return (mvd);
return (NULL);
}
/*
* Find the first available hole which can be used as a top-level.
*/
int
find_vdev_hole(spa_t *spa)
{
vdev_t *rvd = spa->spa_root_vdev;
int c;
ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
for (c = 0; c < rvd->vdev_children; c++) {
vdev_t *cvd = rvd->vdev_child[c];
if (cvd->vdev_ishole)
break;
}
return (c);
}
/*
* Verify that vdev_add() works as expected.
*/
/* ARGSUSED */
void
ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
{
ztest_shared_t *zs = ztest_shared;
spa_t *spa = ztest_spa;
uint64_t leaves;
uint64_t guid;
nvlist_t *nvroot;
int error;
mutex_enter(&ztest_vdev_lock);
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
/*
* If we have slogs then remove them 1/4 of the time.
*/
if (spa_has_slogs(spa) && ztest_random(4) == 0) {
/*
* Grab the guid from the head of the log class rotor.
*/
guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
spa_config_exit(spa, SCL_VDEV, FTAG);
/*
* We have to grab the zs_name_lock as writer to
* prevent a race between removing a slog (dmu_objset_find)
* and destroying a dataset. Removing the slog will
* grab a reference on the dataset which may cause
* dmu_objset_destroy() to fail with EBUSY thus
* leaving the dataset in an inconsistent state.
*/
rw_enter(&ztest_name_lock, RW_WRITER);
error = spa_vdev_remove(spa, guid, B_FALSE);
rw_exit(&ztest_name_lock);
switch (error) {
case 0:
case EEXIST:
case ZFS_ERR_CHECKPOINT_EXISTS:
case ZFS_ERR_DISCARDING_CHECKPOINT:
break;
default:
fatal(0, "spa_vdev_remove() = %d", error);
}
} else {
spa_config_exit(spa, SCL_VDEV, FTAG);
/*
* Make 1/4 of the devices be log devices.
*/
nvroot = make_vdev_root(NULL, NULL, NULL,
ztest_opts.zo_vdev_size, 0,
ztest_random(4) == 0, ztest_opts.zo_raidz,
zs->zs_mirrors, 1);
error = spa_vdev_add(spa, nvroot);
nvlist_free(nvroot);
switch (error) {
case 0:
break;
case ENOSPC:
ztest_record_enospc("spa_vdev_add");
break;
default:
fatal(0, "spa_vdev_add() = %d", error);
}
}
mutex_exit(&ztest_vdev_lock);
}
/*
* Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
*/
/* ARGSUSED */
void
ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
{
ztest_shared_t *zs = ztest_shared;
spa_t *spa = ztest_spa;
vdev_t *rvd = spa->spa_root_vdev;
spa_aux_vdev_t *sav;
char *aux;
uint64_t guid = 0;
int error;
if (ztest_random(2) == 0) {
sav = &spa->spa_spares;
aux = ZPOOL_CONFIG_SPARES;
} else {
sav = &spa->spa_l2cache;
aux = ZPOOL_CONFIG_L2CACHE;
}
mutex_enter(&ztest_vdev_lock);
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
if (sav->sav_count != 0 && ztest_random(4) == 0) {
/*
* Pick a random device to remove.
*/
guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
} else {
/*
* Find an unused device we can add.
*/
zs->zs_vdev_aux = 0;
for (;;) {
char path[MAXPATHLEN];
int c;
(void) snprintf(path, sizeof (path), ztest_aux_template,
ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
zs->zs_vdev_aux);
for (c = 0; c < sav->sav_count; c++)
if (strcmp(sav->sav_vdevs[c]->vdev_path,
path) == 0)
break;
if (c == sav->sav_count &&
vdev_lookup_by_path(rvd, path) == NULL)
break;
zs->zs_vdev_aux++;
}
}
spa_config_exit(spa, SCL_VDEV, FTAG);
if (guid == 0) {
/*
* Add a new device.
*/
nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
(ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
error = spa_vdev_add(spa, nvroot);
switch (error) {