This is the next patch series for scrub userland tools. Change log v1->v2: - commands now reachable as "btrfs scrub ..." instead of "btrfs filesystem scrub ..." - ability to scrub a single device instead of a whole file system - superfluous command line options removed - resume is now a separate command ("scrub resume") instead of "scrub start -r" - read-only mode (which inherited the -r option immediately, sorry for that) - up to date progress numbers with "btrfs scrub status" while scrub is running - effective locking to protect against multiple scrubs on a filesystem - man page entry for scrub added Jan Schmidt (5): commands added scrub ioctls added check_mounted_where scrub userland implementation scrub added to manpage Makefile | 4 +- btrfs.c | 18 +- btrfs_cmds.c | 3 +- btrfs_cmds.h | 5 + ctree.h | 2 +- ioctl.h | 60 +++- man/btrfs.8.in | 66 +++- scrub.c | 1568 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ utils.c | 29 +- utils.h | 2 + 10 files changed, 1743 insertions(+), 14 deletions(-) create mode 100644 scrub.c -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
- scrub commands added - open_file_or_dir no longer static (needed by scrub.c) Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> --- Makefile | 4 ++-- btrfs.c | 18 +++++++++++++++++- btrfs_cmds.c | 3 ++- btrfs_cmds.h | 5 +++++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6e6f6c6..6630887 100644 --- a/Makefile +++ b/Makefile @@ -37,8 +37,8 @@ all: version $(progs) manpages version: bash version.sh -btrfs: $(objects) btrfs.o btrfs_cmds.o - gcc $(CFLAGS) -o btrfs btrfs.o btrfs_cmds.o \ +btrfs: $(objects) btrfs.o btrfs_cmds.o scrub.o + gcc -lpthread $(CFLAGS) -o btrfs btrfs.o btrfs_cmds.o scrub.o \ $(objects) $(LDFLAGS) $(LIBS) btrfsctl: $(objects) btrfsctl.o diff --git a/btrfs.c b/btrfs.c index 46314cf..2dfa0b7 100644 --- a/btrfs.c +++ b/btrfs.c @@ -89,12 +89,28 @@ static struct Command commands[] = { }, { do_df_filesystem, 1, "filesystem df", "<path>\n" - "Show space usage information for a mount point\n." + "Show space usage information for a mount point." }, { do_balance, 1, "filesystem balance", "<path>\n" "Balance the chunks across the device." }, + { do_scrub_start, -1, + "scrub start", "[-Bdqr] <path>|<device>\n" + "Start a new scrub." + }, + { do_scrub_cancel, 1, + "scrub cancel", "<path>|<device>\n" + "Cancel a running scrub." + }, + { do_scrub_resume, -1, + "scrub resume", "[-Bdqr] <path>|<device>\n" + "Resume previously canceled or interrupted scrub." + }, + { do_scrub_status, -1, + "scrub status", "[-d] <path>|<device>\n" + "Show status of running or finished scrub." + }, { do_scan, 999, "device scan", "[<device> [<device>..]\n" "Scan all device for or the passed device for a btrfs\n" diff --git a/btrfs_cmds.c b/btrfs_cmds.c index 8031c58..39f84a8 100644 --- a/btrfs_cmds.c +++ b/btrfs_cmds.c @@ -90,7 +90,7 @@ static int test_isdir(char *path) } -static int open_file_or_dir(const char *fname) +int open_file_or_dir(const char *fname) { int ret; struct stat st; @@ -776,6 +776,7 @@ int do_balance(int argc, char **argv) } return 0; } + int do_remove_volume(int nargs, char **args) { diff --git a/btrfs_cmds.h b/btrfs_cmds.h index 7bde191..a879288 100644 --- a/btrfs_cmds.h +++ b/btrfs_cmds.h @@ -23,6 +23,10 @@ int do_defrag(int argc, char **argv); int do_show_filesystem(int nargs, char **argv); int do_add_volume(int nargs, char **args); int do_balance(int nargs, char **argv); +int do_scrub_start(int nargs, char **argv); +int do_scrub_status(int argc, char **argv); +int do_scrub_resume(int argc, char **argv); +int do_scrub_cancel(int nargs, char **argv); int do_remove_volume(int nargs, char **args); int do_scan(int nargs, char **argv); int do_resize(int nargs, char **argv); @@ -32,3 +36,4 @@ int list_subvols(int fd); int do_df_filesystem(int nargs, char **argv); int find_updated_files(int fd, u64 root_id, u64 oldest_gen); int do_find_newer(int argc, char **argv); +int open_file_or_dir(const char *fname); -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
- scrub structs added - ioctls for scrub - BTRFS_FSID_SIZE moved Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> --- ctree.h | 2 +- ioctl.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/ctree.h b/ctree.h index b79e238..577365e 100644 --- a/ctree.h +++ b/ctree.h @@ -24,6 +24,7 @@ #include "radix-tree.h" #include "extent-cache.h" #include "extent_io.h" +#include "ioctl.h" struct btrfs_root; struct btrfs_trans_handle; @@ -250,7 +251,6 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) sizeof(struct btrfs_stripe) * (num_stripes - 1); } -#define BTRFS_FSID_SIZE 16 #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) diff --git a/ioctl.h b/ioctl.h index 776d7a9..f70a22c 100644 --- a/ioctl.h +++ b/ioctl.h @@ -23,13 +23,62 @@ #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 -#define BTRFS_PATH_NAME_MAX 4087 +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 struct btrfs_ioctl_vol_args { __s64 fd; char name[BTRFS_PATH_NAME_MAX + 1]; }; +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 + +struct btrfs_scrub_progress { + __u64 data_extents_scrubbed; + __u64 tree_extents_scrubbed; + __u64 data_bytes_scrubbed; + __u64 tree_bytes_scrubbed; + __u64 read_errors; + __u64 csum_errors; + __u64 verify_errors; + __u64 no_csum; + __u64 csum_discards; + __u64 super_errors; + __u64 malloc_errors; + __u64 uncorrectable_errors; + __u64 corrected_errors; + __u64 last_physical; +}; + +#define BTRFS_SCRUB_READONLY 1 +struct btrfs_ioctl_scrub_args { + __u64 devid; /* in */ + __u64 start; /* in */ + __u64 end; /* in */ + __u64 flags; /* in */ + struct btrfs_scrub_progress progress; /* out */ + /* pad to 1k */ + __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; +}; + +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 +struct btrfs_ioctl_dev_info_args { + __u64 devid; /* in/out */ + __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ + __u64 bytes_used; /* out */ + __u64 total_bytes; /* out */ + __u64 unused[379]; /* pad to 4k */ + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ +}; + +struct btrfs_ioctl_fs_info_args { + __u64 max_id; /* out */ + __u64 num_devices; /* out */ + __u8 fsid[BTRFS_FSID_SIZE]; /* out */ + __u64 reserved[124]; /* pad to 1k */ +}; + struct btrfs_ioctl_search_key { /* which root are we searching. 0 is the tree of tree roots */ __u64 tree_id; @@ -169,4 +218,13 @@ struct btrfs_ioctl_space_args { #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ struct btrfs_ioctl_space_args) +#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) +#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ + struct btrfs_ioctl_dev_info_args) +#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ + struct btrfs_ioctl_fs_info_args) #endif -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
new version of check_mounted() returning more information gathered while searching. check_mounted() is now a wrapper for check_mounted_where(). the new version is needed by scrub.c Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> --- utils.c | 29 ++++++++++++++++++++++------- utils.h | 2 ++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/utils.c b/utils.c index fd894f3..bea36ce 100644 --- a/utils.c +++ b/utils.c @@ -749,13 +749,8 @@ int blk_file_in_dev_list(struct btrfs_fs_devices* fs_devices, const char* file) */ int check_mounted(const char* file) { - int ret; int fd; - u64 total_devs = 1; - int is_btrfs; - struct btrfs_fs_devices* fs_devices_mnt = NULL; - FILE *f; - struct mntent *mnt; + int ret; fd = open(file, O_RDONLY); if (fd < 0) { @@ -763,11 +758,26 @@ int check_mounted(const char* file) return -errno; } + ret = check_mounted_where(fd, file, NULL, 0, NULL); + close(fd); + + return ret; +} + +int check_mounted_where(int fd, const char* file, char *where, int size, + struct btrfs_fs_devices **fs_dev_ret) +{ + int ret; + u64 total_devs = 1; + int is_btrfs; + struct btrfs_fs_devices* fs_devices_mnt = NULL; + FILE *f; + struct mntent *mnt; + /* scan the initial device */ ret = btrfs_scan_one_device(fd, file, &fs_devices_mnt, &total_devs, BTRFS_SUPER_INFO_OFFSET); is_btrfs = (ret >= 0); - close(fd); /* scan other devices */ if (is_btrfs && total_devs > 1) { @@ -803,6 +813,11 @@ int check_mounted(const char* file) } /* Did we find an entry in mnt table? */ + if (mnt && size && where) + strncpy(where, mnt->mnt_dir, size); + if (fs_dev_ret) + *fs_dev_ret = fs_devices_mnt; + ret = (mnt != NULL); out_mntloop_err: diff --git a/utils.h b/utils.h index 9dce5b0..d2c9626 100644 --- a/utils.h +++ b/utils.h @@ -37,6 +37,8 @@ int btrfs_scan_for_fsid(struct btrfs_fs_devices *fs_devices, u64 total_devs, void btrfs_register_one_device(char *fname); int btrfs_scan_one_dir(char *dirname, int run_ioctl); int check_mounted(const char *devicename); +int check_mounted_where(int fd, const char* file, char *where, int size, + struct btrfs_fs_devices **fs_devices_mnt); int btrfs_device_already_in_root(struct btrfs_root *root, int fd, int super_offset); char *pretty_sizes(u64 size); -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> --- scrub.c | 1568 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 1568 insertions(+), 0 deletions(-) diff --git a/scrub.c b/scrub.c new file mode 100644 index 0000000..22052ed --- /dev/null +++ b/scrub.c @@ -0,0 +1,1568 @@ + +#include <sys/ioctl.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <poll.h> +#include <sys/file.h> +#include <uuid/uuid.h> +#include <fcntl.h> +#include <unistd.h> +#include <pthread.h> +#include <ctype.h> +#include <signal.h> +#include <stdarg.h> + +#include "ctree.h" +#include "ioctl.h" +#include "btrfs_cmds.h" +#include "utils.h" +#include "volumes.h" +#include "disk-io.h" + +#define SCRUB_DATA_FILE "/var/btrfs/scrub.status" +#define SCRUB_PROGRESS_SOCKET_PATH "/var/btrfs/scrub.progress" +#define SCRUB_FILE_VERSION_PREFIX "scrub status:" +#define SCRUB_FILE_VERSION "1" + +struct scrub_stats { + time_t t_start; + time_t t_resumed; + u64 duration; + u64 finished; + u64 canceled; +}; + +struct scrub_progress { + struct btrfs_ioctl_scrub_args scrub_args; + int fd; + int ret; + int skip; + struct scrub_stats stats; + struct scrub_file_record *resumed; + int ioctl_errno; + pthread_mutex_t progress_mutex; +}; + +struct scrub_file_record { + u8 fsid[BTRFS_FSID_SIZE]; + u64 devid; + struct scrub_stats stats; + struct btrfs_scrub_progress p; +}; + +struct scrub_progress_cycle { + int fdmnt; + int prg_fd; + int do_record; + struct btrfs_ioctl_fs_info_args *fi; + struct scrub_progress *progress; + struct scrub_progress *shared_progress; + pthread_mutex_t *write_mutex; +}; + +struct scrub_fs_stat { + struct btrfs_scrub_progress p; + struct scrub_stats s; + int i; +}; + +static void print_scrub_full(struct btrfs_scrub_progress *sp) +{ + printf("\tdata_extents_scrubbed: %lld\n", sp->data_extents_scrubbed); + printf("\ttree_extents_scrubbed: %lld\n", sp->tree_extents_scrubbed); + printf("\tdata_bytes_scrubbed: %lld\n", sp->data_bytes_scrubbed); + printf("\ttree_bytes_scrubbed: %lld\n", sp->tree_bytes_scrubbed); + printf("\tread_errors: %lld\n", sp->read_errors); + printf("\tcsum_errors: %lld\n", sp->csum_errors); + printf("\tverify_errors: %lld\n", sp->verify_errors); + printf("\tno_csum: %lld\n", sp->no_csum); + printf("\tcsum_discards: %lld\n", sp->csum_discards); + printf("\tsuper_errors: %lld\n", sp->super_errors); + printf("\tmalloc_errors: %lld\n", sp->malloc_errors); + printf("\tuncorrectable_errors: %lld\n", sp->uncorrectable_errors); + printf("\tcorrected_errors: %lld\n", sp->corrected_errors); + printf("\tlast_physical: %lld\n", sp->last_physical); +} + +#define err(test, ...) do { \ + if (test) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +#define PRINT_SCRUB_ERROR(test, desc) do { \ + if (test) \ + printf(" %s=%llu", desc, test); \ +} while (0) +static void print_scrub_summary(struct btrfs_scrub_progress *p) +{ + u64 err_cnt; + u64 err_cnt2; + + err_cnt = p->read_errors + + p->csum_errors + + p->verify_errors + + p->csum_discards + + p->super_errors + + p->malloc_errors; + + err_cnt2 = p->corrected_errors + p->uncorrectable_errors; + + printf("\ttotal bytes scrubbed: %s with %llu errors\n", + pretty_sizes(p->data_bytes_scrubbed + p->tree_bytes_scrubbed), + max(err_cnt, err_cnt2)); + if (err_cnt || err_cnt2) { + printf("\terror details:"); + PRINT_SCRUB_ERROR(p->read_errors, "read"); + PRINT_SCRUB_ERROR(p->super_errors, "super"); + PRINT_SCRUB_ERROR(p->malloc_errors, "malloc"); + PRINT_SCRUB_ERROR(p->verify_errors, "verify"); + PRINT_SCRUB_ERROR(p->csum_errors, "csum"); + PRINT_SCRUB_ERROR(p->csum_discards, "csum-discards"); + printf("\n"); + printf("\tcorrected errors: %llu, uncorrectable errors: %llu\n", + p->corrected_errors, p->uncorrectable_errors); + } +} + +#define _SCRUB_FS_STAT(p, name, fs_stat) fs_stat->p.name += p->name +#define _SCRUB_FS_STAT_MIN(ss, name, fs_stat) \ +do { \ + if (fs_stat->s.name > ss->name) { \ + fs_stat->s.name = ss->name; \ + } \ +} while (0) +#define _SCRUB_FS_STAT_ZMIN(ss, name, fs_stat) \ +do { \ + if (!fs_stat->s.name || fs_stat->s.name > ss->name) { \ + fs_stat->s.name = ss->name; \ + } \ +} while (0) +#define _SCRUB_FS_STAT_MAX(ss, name, fs_stat) \ +do { \ + if (!fs_stat->s.name || fs_stat->s.name < ss->name) { \ + fs_stat->s.name = ss->name; \ + } \ +} while (0) +static void add_to_fs_stat(struct btrfs_scrub_progress *p, + struct scrub_stats *ss, + struct scrub_fs_stat *fs_stat) +{ + _SCRUB_FS_STAT(p, data_extents_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, tree_extents_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, data_bytes_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, tree_bytes_scrubbed, fs_stat); + _SCRUB_FS_STAT(p, read_errors, fs_stat); + _SCRUB_FS_STAT(p, csum_errors, fs_stat); + _SCRUB_FS_STAT(p, verify_errors, fs_stat); + _SCRUB_FS_STAT(p, no_csum, fs_stat); + _SCRUB_FS_STAT(p, csum_discards, fs_stat); + _SCRUB_FS_STAT(p, super_errors, fs_stat); + _SCRUB_FS_STAT(p, malloc_errors, fs_stat); + _SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat); + _SCRUB_FS_STAT(p, corrected_errors, fs_stat); + _SCRUB_FS_STAT(p, last_physical, fs_stat); + _SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat); + _SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat); + _SCRUB_FS_STAT_MAX(ss, duration, fs_stat); + _SCRUB_FS_STAT_MAX(ss, canceled, fs_stat); + _SCRUB_FS_STAT_MIN(ss, finished, fs_stat); +} + +static void init_fs_stat(struct scrub_fs_stat *fs_stat) +{ + memset(fs_stat, 0, sizeof(*fs_stat)); + fs_stat->s.finished = 2; +} + +static void _print_scrub_ss(struct scrub_stats *ss) +{ + char t[BTRFS_PATH_NAME_MAX+1]; + struct tm tm; + + if (!ss || !ss->t_start) { + printf("\tno stats available\n"); + return; + } + if (ss->t_resumed) { + localtime_r(&ss->t_resumed, &tm); + strftime(t, sizeof(t), "%c", &tm); + printf("\tscrub resumed at %s", t); + } else { + localtime_r(&ss->t_start, &tm); + strftime(t, sizeof(t), "%c", &tm); + printf("\tscrub started at %s", t); + } + if (ss->finished && !ss->canceled) { + printf(" and finished after %llu seconds\n", + ss->duration); + } else if (ss->canceled) { + printf(" and was aborted after %llu seconds\n", + ss->duration); + } else { + printf(", running for %llu seconds\n", ss->duration); + } +} + +static void print_scrub_dev(struct btrfs_ioctl_dev_info_args *di, + struct btrfs_scrub_progress *p, int raw, + const char *append, struct scrub_stats *ss) +{ + printf("scrub device %s (id %llu) %s\n", di->path, di->devid, + append ? append : ""); + + _print_scrub_ss(ss); + + if (p) { + if (raw) + print_scrub_full(p); + else + print_scrub_summary(p); + } +} + +static void print_fs_stat(struct scrub_fs_stat *fs_stat, int raw) +{ + _print_scrub_ss(&fs_stat->s); + + if (raw) + print_scrub_full(&fs_stat->p); + else + print_scrub_summary(&fs_stat->p); +} + +static void free_history(struct scrub_file_record **last_scrubs) +{ + struct scrub_file_record **l = last_scrubs; + if (!l) + return; + while (*l) + free(*l++); + free(last_scrubs); +} + +static int cancel_fd = -1; +static void scrub_sigint_record_progress(int signal) +{ + ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); +} + +static int scrub_handle_sigint_parent(void) +{ + struct sigaction sa = { + .sa_handler = SIG_IGN, + .sa_flags = SA_RESTART, + }; + + return sigaction(SIGINT, &sa, NULL); +} + +static int scrub_handle_sigint_child(int fd) +{ + struct sigaction sa = { + .sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress, + }; + + cancel_fd = fd; + return sigaction(SIGINT, &sa, NULL); +} + +static int _scrub_datafile(const char *fn_base, const char *fn_local, + const char *fn_tmp, char *datafile, int max) +{ + int ret; + + strncpy(datafile, fn_base, max); + ret = strlen(datafile); + + if (ret + 1 >= max) + return -EOVERFLOW; + + datafile[ret] = ''.''; + strncpy(datafile+ret+1, fn_local, max-ret-1); + ret = strlen(datafile); + + if (ret + 1 >= max) + return -EOVERFLOW; + + if (fn_tmp) { + datafile[ret] = ''_''; + strncpy(datafile+ret+1, fn_tmp, max-ret-1); + ret = strlen(datafile); + + if (ret >= max) + return -EOVERFLOW; + } + + return 0; +} + +static int _scrub_open_file(const char *datafile, int m) +{ + int fd; + int ret; + + fd = open(datafile, m, 0600); + if (fd < 0) + return -errno; + + ret = flock(fd, LOCK_EX|LOCK_NB); + if (ret) { + ret = errno; + close(fd); + return -ret; + } + + return fd; +} + +static int scrub_open_file_r(const char *fn_base, const char *fn_local) +{ + int ret; + char datafile[BTRFS_PATH_NAME_MAX+1]; + ret = _scrub_datafile(fn_base, fn_local, NULL, + datafile, sizeof(datafile)); + if (ret < 0) + return ret; + return _scrub_open_file(datafile, O_RDONLY); +} + +static int scrub_open_file_w(const char *fn_base, const char *fn_local, + const char *tmp) +{ + int ret; + char datafile[BTRFS_PATH_NAME_MAX+1]; + ret = _scrub_datafile(fn_base, fn_local, tmp, + datafile, sizeof(datafile)); + if (ret < 0) + return ret; + return _scrub_open_file(datafile, O_WRONLY|O_CREAT); +} + +static int scrub_rename_file(const char *fn_base, const char *fn_local, + const char *tmp) +{ + int ret; + char datafile_old[BTRFS_PATH_NAME_MAX+1]; + char datafile_new[BTRFS_PATH_NAME_MAX+1]; + ret = _scrub_datafile(fn_base, fn_local, tmp, + datafile_old, sizeof(datafile_old)); + if (ret < 0) + return ret; + ret = _scrub_datafile(fn_base, fn_local, NULL, + datafile_new, sizeof(datafile_new)); + if (ret < 0) + return ret; + ret = rename(datafile_old, datafile_new); + return ret ? -errno : 0; +} + +#define _SCRUB_KVREAD(i, name, avail, l, dest) \ + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest.name) +#define _SCRUB_KVREAD_STATS(i, name, avail, l, dest) \ + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest->stats.name) +/* + * returns 0 if the key did not match (nothing was read) + * 1 if the key did match (success) + * -1 if the key did match and an error occured + */ +static int _scrub_kvread(int *i, int len, int avail, const char *buf, + const char *key, u64 *dest) +{ + int j; + + if (*i+len+1 < avail && strncmp(&buf[*i], key, len-1) == 0) { + *i += len-1; + if (buf[*i] != '':'') { + return -1; + } + *i += 1; + for (j=0; isdigit(buf[*i+j]) && *i+j < avail; ++j) + ; + if (*i+j >= avail) + return -1; + *dest = atoll(&buf[*i]); + *i += j; + return 1; + } + + return 0; +} + +#define _SCRUB_ILLEGAL do { \ + if (report_errors) { \ + fprintf(stderr, "WARNING: illegal data in line %d pos " \ + "%d state %d (near \"%.*s\") at %s:%d\n", \ + lineno, i, state, 20 > avail ? avail : 20, l+i, \ + __FILE__, __LINE__); \ + } \ + goto skip; \ +} while (0) +static struct scrub_file_record **scrub_read_file(int fd, int report_errors) +{ + int avail = 0; + int old_avail = 0; + char l[512]; + int state = 0; + int curr = -1; + int i = 0; + int j; + int ret; + int eof = 0; + int lineno = 0; + u64 version; + char empty_uuid[BTRFS_FSID_SIZE] = {0}; + struct scrub_file_record **p = NULL; + + if (fd < 0) + return ERR_PTR(-EINVAL); + +again: + old_avail = avail-i; + BUG_ON(old_avail < 0); + if (old_avail) + memmove(l, l+i, old_avail); + avail = read(fd, l+old_avail, sizeof(l)-old_avail); + if (avail == 0) { + eof = 1; + } + if (avail + old_avail == 0) { + if (curr >= 0 && + memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) { + p[curr] = NULL; + } else if (curr == -1) { + p = ERR_PTR(-ENODATA); + } + return p; + } + if (avail == -1) + return ERR_PTR(-errno); + avail += old_avail; + + i = 0; + while (i < avail) { + switch (state) { + case 0: /* start if file */ + ret = _scrub_kvread(&i, + sizeof(SCRUB_FILE_VERSION_PREFIX)-1, avail, l, + SCRUB_FILE_VERSION_PREFIX, &version); + if (ret != 1) + _SCRUB_ILLEGAL; + if (version != atoll(SCRUB_FILE_VERSION)) + return ERR_PTR(-ENOTSUP); + state = 6; + continue; + case 1: /* start of line, alloc */ + if (!eof && !memchr(l+i, ''\n'', avail-i)) + goto again; + ++lineno; + if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid, + BTRFS_FSID_SIZE) == 0) { + state = 2; + continue; + } + ++curr; + p = realloc(p, (curr+2)*sizeof(*p)); + if (p) + p[curr] = malloc(sizeof(**p)); + if (!p || !p[curr]) + return ERR_PTR(-errno); + memset(p[curr], 0, sizeof(**p)); + p[curr+1] = NULL; + ++state; + case 2: /* start of line, skip space */ + while (isspace(l[i]) && i<avail) { + if (l[i] == ''\n'') + ++lineno; + ++i; + } + if (i >= avail || (!eof && !memchr(l+i, ''\n'', avail-i))) + goto again; + ++state; + case 3: /* read fsid */ + if (i == avail) + continue; + for (j=0; l[i+j] != '':'' && i+j < avail; ++j) + ; + if (i+j+1 >= avail) + _SCRUB_ILLEGAL; + if (j != 36) + _SCRUB_ILLEGAL; + l[i+j] = ''\0''; + ret = uuid_parse(l+i, p[curr]->fsid); + if (ret) + _SCRUB_ILLEGAL; + i += j + 1; + ++state; + case 4: /* read dev id */ + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) + ; + if (!j || i+j+1 >= avail) + _SCRUB_ILLEGAL; + p[curr]->devid = atoll(&l[i]); + i += j + 1; + ++state; + case 5: /* read key/value pair */ + ret = _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, tree_extents_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, data_bytes_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, tree_bytes_scrubbed, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, read_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, csum_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, verify_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, no_csum, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, csum_discards, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, super_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, malloc_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, uncorrectable_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, corrected_errors, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, last_physical, avail, + l, &p[curr]->p) || + _SCRUB_KVREAD(&i, finished, avail, + l, &p[curr]->stats) || + _SCRUB_KVREAD(&i, t_start, avail, + l, (u64*)&p[curr]->stats) || + _SCRUB_KVREAD(&i, t_resumed, avail, + l, (u64*)&p[curr]->stats) || + _SCRUB_KVREAD(&i, duration, avail, + l, (u64*)&p[curr]->stats) || + _SCRUB_KVREAD(&i, canceled, avail, + l, &p[curr]->stats); + if (ret != 1) + _SCRUB_ILLEGAL; + ++state; + case 6: /* after number */ + if (l[i] == ''|'') { + state = 5; + } else if (l[i] == ''\n'') { + state = 1; + } else { + _SCRUB_ILLEGAL; + } + ++i; + continue; + case 99: /* skip rest of line */ +skip: + state = 99; + do { + ++i; + if (l[i-1] == ''\n'') { + state = 1; + break; + } + } while (i < avail); + continue; + } + BUG(); + } + goto again; +} +#undef _SCRUB_ILLEGAL + +static int _scrub_write_buf(int fd, const void *data, int len) +{ + int ret; + ret = write(fd, data, len); + return ret - len; +} + +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) + __attribute__ ((format (printf, 4, 5))); +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) +{ + int ret; + va_list args; + + va_start(args, fmt); + ret = vsnprintf(buf, max, fmt, args); + va_end(args); + if (ret >= max) + return ret - max; + return _scrub_write_buf(fd, buf, ret); +} + +#define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name = \ + data->resumed->p.name + data->scrub_args.progress.name +static struct scrub_progress *_scrub_resumed_stats(struct scrub_progress *data, + struct scrub_progress *dest) +{ + if (!data->resumed || data->skip) + return data; + + _SCRUB_SUM(dest, data, data_extents_scrubbed); + _SCRUB_SUM(dest, data, tree_extents_scrubbed); + _SCRUB_SUM(dest, data, data_bytes_scrubbed); + _SCRUB_SUM(dest, data, tree_bytes_scrubbed); + _SCRUB_SUM(dest, data, read_errors); + _SCRUB_SUM(dest, data, csum_errors); + _SCRUB_SUM(dest, data, verify_errors); + _SCRUB_SUM(dest, data, no_csum); + _SCRUB_SUM(dest, data, csum_discards); + _SCRUB_SUM(dest, data, super_errors); + _SCRUB_SUM(dest, data, malloc_errors); + _SCRUB_SUM(dest, data, uncorrectable_errors); + _SCRUB_SUM(dest, data, corrected_errors); + _SCRUB_SUM(dest, data, last_physical); + dest->stats.canceled = data->stats.canceled; + dest->stats.finished = data->stats.finished; + dest->stats.t_resumed = data->stats.t_start; + dest->stats.t_start = data->resumed->stats.t_start; + dest->stats.duration = data->resumed->stats.duration + + data->stats.duration; + dest->scrub_args.devid = data->scrub_args.devid; + return dest; +} + +#define _SCRUB_KVWRITE(fd, buf, name, use) \ + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ + use->scrub_args.progress.name) +#define _SCRUB_KVWRITE_STATS(fd, buf, name, use) \ + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ + use->stats.name) +static int _scrub_kvwrite(int fd, char *buf, int max, + const char *key, u64 val) +{ + return _scrub_writev(fd, buf, max, "|%s:%lld", key, val); +} + +static int scrub_write_file(int fd, const char *fsid, + struct scrub_progress* data, int n) +{ + int ret = 0; + int i; + char buf[1024]; + struct scrub_progress local; + struct scrub_progress *use; + + if (n < 1) { + return -EINVAL; + } + + ret = _scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX SCRUB_FILE_VERSION + "\n", sizeof(SCRUB_FILE_VERSION_PREFIX)-1 + + sizeof(SCRUB_FILE_VERSION)-1 + 1); + if (ret) + return -EOVERFLOW; + + for (i=0; i<n; ++i) { + use = _scrub_resumed_stats(&data[i], &local); + if (_scrub_write_buf(fd, fsid, strlen(fsid)) || + _scrub_write_buf(fd, ":", 1) || + _scrub_writev(fd, buf, sizeof(buf), "%lld", + use->scrub_args.devid) || + _scrub_write_buf(fd, buf, ret) || + _SCRUB_KVWRITE(fd, buf, data_extents_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, tree_extents_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, data_bytes_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, tree_bytes_scrubbed, use) || + _SCRUB_KVWRITE(fd, buf, read_errors, use) || + _SCRUB_KVWRITE(fd, buf, csum_errors, use) || + _SCRUB_KVWRITE(fd, buf, verify_errors, use) || + _SCRUB_KVWRITE(fd, buf, no_csum, use) || + _SCRUB_KVWRITE(fd, buf, csum_discards, use) || + _SCRUB_KVWRITE(fd, buf, super_errors, use) || + _SCRUB_KVWRITE(fd, buf, malloc_errors, use) || + _SCRUB_KVWRITE(fd, buf, uncorrectable_errors, use) || + _SCRUB_KVWRITE(fd, buf, corrected_errors, use) || + _SCRUB_KVWRITE(fd, buf, last_physical, use) || + _SCRUB_KVWRITE_STATS(fd, buf, t_start, use) || + _SCRUB_KVWRITE_STATS(fd, buf, t_resumed, use) || + _SCRUB_KVWRITE_STATS(fd, buf, duration, use) || + _SCRUB_KVWRITE_STATS(fd, buf, canceled, use) || + _SCRUB_KVWRITE_STATS(fd, buf, finished, use) || + _scrub_write_buf(fd, "\n", 1)) { + return -EOVERFLOW; + } + } + + return 0; +} +#undef _SCRUB_KVWRITE + +static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, + struct scrub_progress* data, int n) +{ + int ret; + int fd; + int old; + + ret = pthread_mutex_lock(m); + if (ret) { + ret = -errno; + goto out; + } + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); + + fd = scrub_open_file_w(SCRUB_DATA_FILE, fsid, "tmp"); + if (fd < 0) { + ret = fd; + goto out; + } + ret = scrub_write_file(fd, fsid, data, n); + if (ret) + goto out; + ret = scrub_rename_file(SCRUB_DATA_FILE, fsid, "tmp"); + if (ret) + goto out; + ret = close(fd); + if (ret) { + ret = -errno; + goto out; + } + +out: + if (ret) { + pthread_mutex_unlock(m); + } else { + ret = pthread_mutex_unlock(m); + if (ret) + ret = -errno; + } + + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old); + + return ret; +} + +static void *scrub_one_dev(void *ctx) +{ + struct scrub_progress *sp = ctx; + int ret; + struct timeval tv; + + sp->stats.canceled = 0; + sp->stats.duration = 0; + sp->stats.finished = 0; + + ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args); + gettimeofday(&tv, NULL); + sp->ret = ret; + sp->stats.duration = tv.tv_sec - sp->stats.t_start; + sp->stats.canceled = !!ret; + sp->ioctl_errno = errno; + ret = pthread_mutex_lock(&sp->progress_mutex); + if (ret) + return ERR_PTR(-errno); + sp->stats.finished = 1; + ret = pthread_mutex_unlock(&sp->progress_mutex); + if (ret) + return ERR_PTR(-errno); + + + return NULL; +} + +static void *progress_one_dev(void *ctx) +{ + struct scrub_progress *sp = ctx; + + sp->ret = ioctl(sp->fd, BTRFS_IOC_SCRUB_PROGRESS, &sp->scrub_args); + sp->ioctl_errno = errno; + + return NULL; +} + +static void *scrub_progress_cycle(void *ctx) +{ + int ret; + int i; + char fsid[37]; + struct scrub_progress *sp; + struct scrub_progress *sp_last; + struct scrub_progress *sp_shared; + struct timeval tv; + struct scrub_progress_cycle *spc = ctx; + int ndev = spc->fi->num_devices; + int this = 1; + int last = 0; + int peer_fd = -1; + struct pollfd accept_poll_fd = { + .fd = spc->prg_fd, + .events = POLLIN, + .revents = 0, + }; + struct pollfd write_poll_fd = { + .events = POLLOUT, + .revents = 0, + }; + struct sockaddr_un peer; + socklen_t peer_size = sizeof(peer); + + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &ret); + uuid_unparse(spc->fi->fsid, fsid); + + for (i=0; i<ndev; ++i) { + sp = &spc->progress[i]; + sp_last = &spc->progress[i+ndev]; + sp_shared = &spc->shared_progress[i]; + sp->scrub_args.devid = sp_last->scrub_args.devid + sp_shared->scrub_args.devid; + sp->fd = sp_last->fd = spc->fdmnt; + sp->stats.t_start = sp_last->stats.t_start + sp_shared->stats.t_start; + sp->resumed = sp_last->resumed = sp_shared->resumed; + sp->skip = sp_last->skip = sp_shared->skip; + sp->stats.finished = sp_last->stats.finished + sp_shared->stats.finished; + } + + while (1) { + ret = poll(&accept_poll_fd, 1, 5*1000); + if (ret == -1) + return ERR_PTR(-errno); + if (ret) + peer_fd = accept(spc->prg_fd, (struct sockaddr *)&peer, + &peer_size); + gettimeofday(&tv, NULL); + this = (this+1)%2; + last = (last+1)%2; + for (i=0; i<ndev; ++i) { + sp = &spc->progress[this*ndev+i]; + sp_last = &spc->progress[last*ndev+i]; + sp_shared = &spc->shared_progress[i]; + if (sp->stats.finished) { + continue; + } + progress_one_dev(sp); + sp->stats.duration = tv.tv_sec - sp->stats.t_start; + if (!sp->ret) + continue; + if (sp->ioctl_errno != ENOTCONN && + sp->ioctl_errno != ENODEV) + return ERR_PTR(-sp->ioctl_errno); + /* + * scrub finished or device removed, check the + * finished flag. if unset, just use the last + * result we got for the current write and go + * on. flag should be set on next cycle, then. + */ + ret = pthread_mutex_lock(&sp_shared->progress_mutex); + if (ret) + return ERR_PTR(-errno); + if (!sp_shared->stats.finished) { + ret = pthread_mutex_unlock( + &sp_shared->progress_mutex); + if (ret) + return ERR_PTR(-errno); + memcpy(sp, sp_last, sizeof(*sp)); + continue; + } + ret = pthread_mutex_unlock(&sp_shared->progress_mutex); + if (ret) + return ERR_PTR(-errno); + memcpy(sp, sp_shared, sizeof(*sp)); + memcpy(sp_last, sp_shared, sizeof(*sp)); + } + if (peer_fd != -1) { + write_poll_fd.fd = peer_fd; + ret = poll(&write_poll_fd, 1, 0); + if (ret == -1) + return ERR_PTR(-errno); + if (ret) { + ret = scrub_write_file( + peer_fd, fsid, + &spc->progress[this*ndev], ndev); + if (ret) + return ERR_PTR(ret); + } + close(peer_fd); + peer_fd = -1; + } + if (!spc->do_record) + continue; + ret = scrub_write_progress(spc->write_mutex, fsid, + &spc->progress[this*ndev], ndev); + if (ret) + return ERR_PTR(ret); + } +} + +static struct scrub_file_record *last_dev_scrub( + struct scrub_file_record *const *const past_scrubs, u64 devid) +{ + int i; + + if (!past_scrubs || IS_ERR(past_scrubs)) + return NULL; + + for (i=0; past_scrubs[i]; ++i) + if (past_scrubs[i]->devid == devid) + return past_scrubs[i]; + + return NULL; +} + +static int scrub_device_info(int fd, u64 devid, + struct btrfs_ioctl_dev_info_args *di_args) +{ + int ret; + + di_args->devid = devid; + memset(&di_args->uuid, ''\0'', sizeof(di_args->uuid)); + + ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args); + return ret ? -errno : 0; +} + +static int scrub_fs_info(int fd, char *path, + struct btrfs_ioctl_fs_info_args *fi_args, + struct btrfs_ioctl_dev_info_args **di_ret) +{ + int ret = 0; + int ndevs = 0; + int i = 1; + struct btrfs_fs_devices* fs_devices_mnt = NULL; + struct btrfs_ioctl_dev_info_args *di_args; + char mp[BTRFS_PATH_NAME_MAX+1]; + + memset(fi_args, 0, sizeof(*fi_args)); + + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); + if (ret && errno == EINVAL) { + /* path is no mounted btrfs. try if it''s a device */ + ret = check_mounted_where(fd, path, mp, sizeof(mp), + &fs_devices_mnt); + if (!ret) + return -EINVAL; + fi_args->num_devices = 1; + fi_args->max_id = fs_devices_mnt->latest_devid; + i = fs_devices_mnt->latest_devid; + memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE); + close(fd); + fd = open_file_or_dir(mp); + if (fd < 0) + return -errno; + } else if (ret) { + return -errno; + } + + if (!fi_args->num_devices) + return 0; + + di_args = *di_ret = malloc(fi_args->num_devices*sizeof(*di_args)); + if (!di_args) + return -errno; + + for (; i<=fi_args->max_id; ++i) { + BUG_ON(ndevs >= fi_args->num_devices); + ret = scrub_device_info(fd, i, &di_args[ndevs]); + if (ret == -ENODEV) + continue; + if (ret) + return ret; + ++ndevs; + } + + BUG_ON(ndevs == 0); + + return 0; +} + +int mkdir_p(char *path) +{ + int i; + int ret; + + for (i=1; i<strlen(path); ++i) { + if (path[i] != ''/'') + continue; + path[i] = ''\0''; + ret = mkdir(path, 0777); + if (ret && errno != EEXIST) + return 1; + path[i] = ''/''; + } + + return 0; +} + +static int scrub_start(int argc, char **argv, int resume) +{ + int fdmnt; + int prg_fd = -1; + int fdres = -1; + int ret; + pid_t pid; + int c; + int i; + int err = 0; + int print_raw = 0; + char *path; + int do_background = 1; + int do_wait = 0; + int do_print = 0; + int do_quiet = 0; + int do_record = 1; + int readonly = 0; + int do_stats_per_dev = 0; + int n_start = 0; + int n_skip = 0; + int n_resume = 0; + struct btrfs_ioctl_fs_info_args fi_args; + struct btrfs_ioctl_dev_info_args *di_args = NULL; + struct scrub_progress *sp = NULL; + struct scrub_fs_stat fs_stat; + struct timeval tv; + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + pthread_t *t_devs = NULL; + pthread_t t_prog; + pthread_attr_t t_attr; + struct scrub_file_record **past_scrubs = NULL; + struct scrub_file_record *last_scrub = NULL; + char *datafile = strdup(SCRUB_DATA_FILE); + char fsid[37]; + char sock_path[BTRFS_PATH_NAME_MAX+1] = ""; + struct scrub_progress_cycle spc; + pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER; + void *terr; + u64 devid; + + optind = 1; + while ((c = getopt(argc, argv, "BdqrR")) != -1) { + switch(c) { + case ''B'': + do_background = 0; + do_wait = 1; + do_print = 1; + break; + case ''d'': + do_stats_per_dev = 1; + break; + case ''q'': + do_quiet = 1; + break; + case ''r'': + readonly = 1; + break; + case ''R'': + print_raw = 1; + break; + case ''?'': + default: + fprintf(stderr, "ERROR: scrub args invalid.\n" + " -B do not background (implies -W)\n" + " -d stats per device (-B only)\n" + " -q quiet\n" + " -r read only mode\n"); + return 1; + } + } + + /* try to catch most error cases before forking */ + + spc.progress = NULL; + if (do_quiet && do_print) + do_print = 0; + + if (mkdir_p(datafile)) { + err(!do_quiet, "WARNING: cannot create scrub data " + "file, mkdir %s failed: %s. Status recording " + "disabled\n", datafile, strerror(errno)); + do_record = 0; + } + + path = argv[optind]; + + fdmnt = open_file_or_dir(path); + if (fdmnt < 0) { + err(!do_quiet, "ERROR: can''t access ''%s''\n", path); + return 12; + } + + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); + if (ret) { + err(!do_quiet, "ERROR: getting dev info for scrub failed: " + "%s\n", strerror(-ret)); + err = 1; + goto out; + } + if (!fi_args.num_devices) { + err(!do_quiet, "ERROR: no devices found\n"); + err = 1; + goto out; + } + + uuid_unparse(fi_args.fsid, fsid); + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); + if (fdres < 0 && fdres != -ENOENT) { + err(!do_quiet, "WARNING: failed to open status file: " + "%s\n", strerror(-fdres)); + } else if (fdres >= 0) { + past_scrubs = scrub_read_file(fdres, !do_quiet); + if (IS_ERR(past_scrubs)) + err(!do_quiet, "WARNING: failed to read status file: " + "%s\n", strerror(-PTR_ERR(past_scrubs))); + close(fdres); + } + + t_devs = malloc(fi_args.num_devices*sizeof(*t_devs)); + sp = calloc(1, fi_args.num_devices*sizeof(*sp)); + spc.progress = calloc(1, fi_args.num_devices*2*sizeof(*spc.progress)); + + if (!t_devs || !sp || !spc.progress) { + err(!do_quiet, "ERROR: scrub failed: %s", strerror(errno)); + err = 1; + goto out; + } + + ret = pthread_attr_init(&t_attr); + if (ret) { + err(!do_quiet, "ERROR: pthread_attr_init failed: %s\n", + strerror(ret)); + err = 1; + goto out; + } + + for (i = 0; i < fi_args.num_devices; ++i) { + devid = di_args[i].devid; + ret = pthread_mutex_init(&sp[i].progress_mutex, NULL); + if (ret) { + err(!do_quiet, "ERROR: pthread_mutex_init failed: " + "%s\n", strerror(ret)); + err = 1; + goto out; + } + last_scrub = last_dev_scrub(past_scrubs, devid); + sp[i].scrub_args.devid = devid; + sp[i].fd = fdmnt; + if (resume && last_scrub && (last_scrub->stats.canceled || + !last_scrub->stats.finished)) { + ++n_resume; + sp[i].scrub_args.start = last_scrub->p.last_physical; + sp[i].resumed = last_scrub; + } else if (resume) { + ++n_skip; + sp[i].skip = 1; + sp[i].resumed = last_scrub; + continue; + } else { + ++n_start; + sp[i].scrub_args.start = 0ll; + sp[i].resumed = NULL; + } + sp[i].skip = 0; + sp[i].scrub_args.end = (u64)-1ll; + sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0; + } + + if (!n_start && !n_resume) { + if (!do_quiet) + printf("scrub: nothing to resume for %s, fsid %s\n", + path, fsid); + err = 0; + goto out; + } + + ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0); + while (ret != -1) { + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, + NULL, sock_path, sizeof(sock_path)); + /* ignore EOVERFLOW, as strncpy follows anyway */ + strncpy(addr.sun_path, sock_path, + sizeof(addr.sun_path)-1); + ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret != -1 || errno != EADDRINUSE) + break; + ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); + if (!ret || errno != ECONNREFUSED) { + fprintf(stderr, "ERROR: scrub already running\n"); + close(prg_fd); + goto out; + } + ret = unlink(sock_path); + } + if (ret != -1) { + ret = listen(prg_fd, 100); + } + if (ret == -1) { + err(!do_quiet, "WARNING: failed to open the progress status " + "socket at %s: %s. Progress cannot be queried\n", + sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH, + strerror(errno)); + if (prg_fd != -1) { + close(prg_fd); + prg_fd = -1; + if (sock_path[0]) + unlink(sock_path); + } + } + + if (do_record) { + /* write all-zero progress file for a start */ + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, + fi_args.num_devices); + if (ret) { + err(!do_quiet, "WARNING: failed to write the progress " + "status file: %s. Status recording disabled\n", + strerror(-ret)); + do_record = 0; + } + } + + if (do_background) { + pid = fork(); + if (pid == -1) { + err(!do_quiet, "ERROR: cannot scrub, fork failed: " + "%s\n", strerror(errno)); + err = 1; + goto out; + } + + if (pid) { + int stat; + scrub_handle_sigint_parent(); + if (!do_quiet) + printf("scrub %s on %s, fsid %s (pid=%d)\n", + n_start ? "started" : "resumed", + path, fsid, pid); + if (!do_wait) { + err = 0; + goto out; + } + ret = wait(&stat); + if (ret != pid) { + err(!do_quiet, "ERROR: wait failed: (ret=%d) " + "%s\n", ret, strerror(errno)); + err = 1; + goto out; + } + if (!WIFEXITED(stat) || WEXITSTATUS(stat)) { + err(!do_quiet, "ERROR: scrub process failed\n"); + err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1; + goto out; + } + err = 0; + goto out; + } + } + + scrub_handle_sigint_child(fdmnt); + + for (i = 0; i < fi_args.num_devices; ++i) { + if (sp[i].skip) { + sp[i].scrub_args.progress = sp[i].resumed->p; + sp[i].stats = sp[i].resumed->stats; + sp[i].ret = 0; + sp[i].stats.finished = 1; + continue; + } + devid = di_args[i].devid; + gettimeofday(&tv, NULL); + sp[i].stats.t_start = tv.tv_sec; + ret = pthread_create(&t_devs[i], &t_attr, scrub_one_dev,&sp[i]); + if (ret) { + if (do_print) + fprintf(stderr, "ERROR: creating " + "scrub_one_dev[%llu] thread failed: " + "%s\n", devid, strerror(ret)); + err = 1; + goto out; + } + } + + spc.fdmnt = fdmnt; + spc.prg_fd = prg_fd; + spc.do_record = do_record; + spc.write_mutex = &spc_write_mutex; + spc.shared_progress = sp; + spc.fi = &fi_args; + pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc); + + err = 0; + for (i = 0; i < fi_args.num_devices; ++i) { + if (sp[i].skip) + continue; + devid = di_args[i].devid; + ret = pthread_join(t_devs[i], NULL); + if (ret) { + if (do_print) + fprintf(stderr, "ERROR: pthread_join failed " + "for scrub_one_dev[%llu]: %s\n", devid, + strerror(ret)); + err++; + continue; + } + if (sp[i].ret && sp[i].ioctl_errno == ENODEV) { + if (do_print) + fprintf(stderr, "WARNING: device %lld not " + "present\n", devid); + continue; + } + if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) { + err++; + } else if (sp[i].ret) { + if (do_print) + fprintf(stderr, "ERROR: scrubbing %s failed " + "for device id %lld (%s)\n", path, + devid, strerror(sp[i].ioctl_errno)); + err++; + continue; + } + } + + if (do_print) { + const char *append = "done"; + if (!do_stats_per_dev) + init_fs_stat(&fs_stat); + for (i = 0; i < fi_args.num_devices; ++i) { + if (do_stats_per_dev) { + print_scrub_dev(&di_args[i], + &sp[i].scrub_args.progress, + print_raw, + sp[i].ret ? "canceled" : "done", + &sp[i].stats); + } else { + if (sp[i].ret) + append = "canceled"; + add_to_fs_stat(&sp[i].scrub_args.progress, + &sp[i].stats, &fs_stat); + } + } + if (!do_stats_per_dev) { + printf("scrub %s for %s\n", append, fsid); + print_fs_stat(&fs_stat, print_raw); + } + } + + pthread_cancel(t_prog); + ret = pthread_join(t_prog, &terr); + if (do_print && terr && terr != PTHREAD_CANCELED) { + fprintf(stderr, "ERROR: recording progress " + "failed: %s\n", strerror(-PTR_ERR(terr))); + } + + if (do_record) { + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, + fi_args.num_devices); + if (ret && do_print) { + fprintf(stderr, "ERROR: failed to record the result: " + "%s\n", strerror(-ret)); + } + } + + scrub_handle_sigint_child(-1); + +out: + free_history(past_scrubs); + free(di_args); + free(t_devs); + free(sp); + free(spc.progress); + if (prg_fd > -1) { + close(prg_fd); + if (sock_path[0]) + unlink(sock_path); + } + close(fdmnt); + + return !!err; +} + +int do_scrub_start(int argc, char **argv) +{ + return scrub_start(argc, argv, 0); +} + +int do_scrub_resume(int argc, char **argv) +{ + return scrub_start(argc, argv, 1); +} + +int do_scrub_cancel(int argc, char **argv) +{ + char *path = argv[1]; + int ret; + int fdmnt; + int err; + char mp[BTRFS_PATH_NAME_MAX+1]; + struct btrfs_fs_devices* fs_devices_mnt = NULL; + + fdmnt = open_file_or_dir(path); + if (fdmnt < 0) { + fprintf(stderr, "ERROR: scrub cancel failed\n"); + return 12; + } + +again: + ret = ioctl(fdmnt, BTRFS_IOC_SCRUB_CANCEL, NULL); + err = errno; + close(fdmnt); + + if (ret && err == EINVAL) { + /* path is no mounted btrfs. try if it''s a device */ + ret = check_mounted_where(fdmnt, path, mp, sizeof(mp), + &fs_devices_mnt); + close(fdmnt); + if (ret) { + fdmnt = open_file_or_dir(mp); + if (fdmnt >= 0) { + path = mp; + goto again; + } + } + } + + if (ret) { + fprintf(stderr, "ERROR: scrub cancel failed on %s: %s\n", path, + err == ENOTCONN ? "not running" : strerror(errno)); + return 1; + } + + printf("scrub cancelled\n"); + + return 0; +} + +int do_scrub_status(int argc, char **argv) +{ + + char *path; + struct btrfs_ioctl_fs_info_args fi_args; + struct btrfs_ioctl_dev_info_args *di_args = NULL; + struct scrub_file_record **past_scrubs = NULL; + struct scrub_file_record *last_scrub; + struct scrub_fs_stat fs_stat; + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + int ret; + int fdmnt; + int i; + optind = 1; + int print_raw = 0; + int do_stats_per_dev = 0; + char c; + char fsid[37]; + int fdres = -1; + int err = 0; + + while ((c = getopt(argc, argv, "dR")) != -1) { + switch(c) { + case ''d'': + do_stats_per_dev = 1; + break; + case ''R'': + print_raw = 1; + break; + case ''?'': + default: + fprintf(stderr, "ERROR: scrub status args invalid.\n" + " -d stats per device\n"); + return 1; + } + } + + path = argv[optind]; + + fdmnt = open_file_or_dir(path); + if (fdmnt < 0) { + fprintf(stderr, "ERROR: can''t access to ''%s''\n", path); + return 12; + } + + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); + if (ret) { + fprintf(stderr, "ERROR: getting dev info for scrub failed: " + "%s\n", strerror(-ret)); + err = 1; + goto out; + } + if (!fi_args.num_devices) { + fprintf(stderr, "ERROR: no devices found\n"); + err = 1; + goto out; + } + + uuid_unparse(fi_args.fsid, fsid); + + fdres = socket(AF_UNIX, SOCK_STREAM, 0); + if (fdres == -1) { + fprintf(stderr, "ERROR: failed to create socket to " + "receive progress information: %s\n", + strerror(errno)); + err = 1; + goto out; + } + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, + NULL, addr.sun_path, sizeof(addr.sun_path)-1); + /* ignore EOVERFLOW, just use shorter name and hope for the best */ + ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr)); + if (ret == -1) { + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); + if (fdres < 0 && fdres != -ENOENT) { + fprintf(stderr, "WARNING: failed to open status file: " + "%s\n", strerror(-fdres)); + err = 1; + goto out; + } + } + + if (fdres >= 0) { + past_scrubs = scrub_read_file(fdres, 1); + if (IS_ERR(past_scrubs)) + fprintf(stderr, "WARNING: failed to read status: %s\n", + strerror(-PTR_ERR(past_scrubs))); + } + + printf("scrub status for %s\n", fsid); + + /* + * TODO: rather communicate with scrub process instead of + * dumping the file stats for instant results + */ + if (do_stats_per_dev) { + for (i = 0; i < fi_args.num_devices; ++i) { + last_scrub = last_dev_scrub(past_scrubs, + di_args[i].devid); + if (!last_scrub) { + print_scrub_dev(&di_args[i], NULL, print_raw, + NULL, NULL); + continue; + } + print_scrub_dev(&di_args[i], &last_scrub->p, print_raw, + last_scrub->stats.finished ? + "history" : "status", + &last_scrub->stats); + } + } else { + init_fs_stat(&fs_stat); + for (i = 0; i < fi_args.num_devices; ++i) { + last_scrub = last_dev_scrub(past_scrubs, + di_args[i].devid); + if (!last_scrub) + continue; + add_to_fs_stat(&last_scrub->p, &last_scrub->stats, + &fs_stat); + } + print_fs_stat(&fs_stat, print_raw); + } + +out: + free_history(past_scrubs); + free(di_args); + close(fdmnt); + if (fdres > -1) + close(fdres); + + return err; +} -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> --- man/btrfs.8.in | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 65 insertions(+), 1 deletions(-) diff --git a/man/btrfs.8.in b/man/btrfs.8.in index 26ef982..35aa44c 100644 --- a/man/btrfs.8.in +++ b/man/btrfs.8.in @@ -29,7 +29,15 @@ btrfs \- control a btrfs filesystem .PP \fBbtrfs\fP \fBdevice add\fP\fI <dev> [<dev>..] <path> \fP .PP -\fBbtrfs\fP \fBdevice delete\fP\fI <dev> [<dev>..] <path> \fP] +\fBbtrfs\fP \fBdevice delete\fP\fI <dev> [<dev>..] <path> \fP +.PP +\fBbtrfs\fP \fBscrub start\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +.PP +\fBbtrfs\fP \fBscrub cancel\fP {\fI<path>\fP|\fI<device>\fP} +.PP +\fBbtrfs\fP \fBscrub resume\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +.PP +\fBbtrfs\fP \fBscrub status\fP [-d] {\fI<path>\fP|\fI<device>\fP} .PP \fBbtrfs\fP \fBhelp|\-\-help|\-h \fP\fI\fP @@ -154,6 +162,62 @@ Add device(s) to the filesystem identified by \fI<path>\fR. \fBdevice delete\fR\fI <dev> [<dev>..] <path>\fR Remove device(s) from a filesystem identified by \fI<path>\fR. +.TP + +\fBscrub start\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +Start a scrub on all devices of the filesystem identified by \fI<path>\fR or on +a single \fI<device>\fR. Without options, scrub is started as a background +process. Progress can be obtained with the \fBscrub status\fR command. Scrubbing +involves reading all data from all disks and verifying checksums. Errors are +corrected along the way if possible. +.RS + +\fIOptions\fR +.IP -B 5 +Do not background and print scrub statistics when finished. +.IP -d 5 +Print separate statistics for each device of the filesystem (-B only). +.IP -q 5 +Quiet. Omit error messages and statistics. +.IP -r 5 +Read only mode. Do not attempt to correct anything. +.IP -u 5 +Scrub unused space as well. (NOT IMPLEMENTED) +.RE +.TP + +\fBscrub cancel\fP {\fI<path>\fP|\fI<device>\fP} +If a scrub is running on the filesystem identified by \fI<path>\fR, cancel it. +Progress is saved in the scrub progress file and scrubbing can be resumed later +using the \fBscrub resume\fR command. +If a \fI<device>\fR is given, the corresponding filesystem is found and +\fBscrub cancel\fP behaves as if it was called on that filesystem. +.TP + +\fBscrub resume\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +Resume a canceled or interrupted scrub cycle on the filesystem identified by +\fI<path>\fR or on a given \fI<device>\fR. Does not start a new scrub if the +last scrub finished successfully. +.RS + +\fIOptions\fR +.TP +see \fBscrub start\fP. +.RE +.TP + +\fBscrub status\fP [-d] {\fI<path>\fP|\fI<device>\fP} +Show status of a running scrub for the filesystem identified by \fI<path>\fR or +for the specified \fI<device>\fR. +If no scrub is running, show statistics of the last finished or canceled scrub +for that filesystem or device. +.RS + +\fIOptions\fR +.IP -d 5 +Print separate statistics for each device of the filesystem. +.RE + .PP .SH EXIT STATUS -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Yes, this is over three months after the initial posting, but since nobody else has looked at it yet, and the patch is in my integration stack... I''ve not reviewed the whole thing -- just the "scrub start" code so far. I''ve removed the bits I''ve not checked from the file below. On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: No commit message at all?> Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> > --- > scrub.c | 1568 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 1568 insertions(+), 0 deletions(-)This is quite big to review in one lump... Is it possible to split the patch into functional sections? (Add shared infrastructure, then each of the four functions separately, maybe?)> diff --git a/scrub.c b/scrub.c > new file mode 100644 > index 0000000..22052ed > --- /dev/null > +++ b/scrub.c > @@ -0,0 +1,1568 @@It seems to be conventional to put a GPL notice at the top of source files... :)> + > +#include <sys/ioctl.h> > +#include <sys/wait.h> > +#include <sys/stat.h> > +#include <sys/types.h> > +#include <sys/socket.h> > +#include <sys/un.h> > +#include <poll.h> > +#include <sys/file.h> > +#include <uuid/uuid.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <pthread.h> > +#include <ctype.h> > +#include <signal.h> > +#include <stdarg.h> > + > +#include "ctree.h" > +#include "ioctl.h" > +#include "btrfs_cmds.h" > +#include "utils.h" > +#include "volumes.h" > +#include "disk-io.h" > + > +#define SCRUB_DATA_FILE "/var/btrfs/scrub.status" > +#define SCRUB_PROGRESS_SOCKET_PATH "/var/btrfs/scrub.progress"I''d suggest /var/lib/btrfs/[...] instead. Putting it in the top level of /var seems a bit presumptuous (and contravenes the FHS).> +#define SCRUB_FILE_VERSION_PREFIX "scrub status:"I''d drop the : from this, since there''s no colons used in other key names elsewhere (in scrub_read_file() and _scrub_kvread()), and the correction for the colon at [1] is confusing.> +#define SCRUB_FILE_VERSION "1" > + > +struct scrub_stats { > + time_t t_start; > + time_t t_resumed; > + u64 duration; > + u64 finished; > + u64 canceled; > +}; > + > +struct scrub_progress { > + struct btrfs_ioctl_scrub_args scrub_args; > + int fd; > + int ret; > + int skip; > + struct scrub_stats stats; > + struct scrub_file_record *resumed; > + int ioctl_errno; > + pthread_mutex_t progress_mutex; > +}; > + > +struct scrub_file_record { > + u8 fsid[BTRFS_FSID_SIZE]; > + u64 devid; > + struct scrub_stats stats; > + struct btrfs_scrub_progress p; > +}; > + > +struct scrub_progress_cycle { > + int fdmnt; > + int prg_fd; > + int do_record; > + struct btrfs_ioctl_fs_info_args *fi; > + struct scrub_progress *progress; > + struct scrub_progress *shared_progress; > + pthread_mutex_t *write_mutex; > +}; > + > +struct scrub_fs_stat { > + struct btrfs_scrub_progress p; > + struct scrub_stats s; > + int i; > +}; > + > +static void print_scrub_full(struct btrfs_scrub_progress *sp) > +{ > + printf("\tdata_extents_scrubbed: %lld\n", sp->data_extents_scrubbed); > + printf("\ttree_extents_scrubbed: %lld\n", sp->tree_extents_scrubbed); > + printf("\tdata_bytes_scrubbed: %lld\n", sp->data_bytes_scrubbed); > + printf("\ttree_bytes_scrubbed: %lld\n", sp->tree_bytes_scrubbed); > + printf("\tread_errors: %lld\n", sp->read_errors); > + printf("\tcsum_errors: %lld\n", sp->csum_errors); > + printf("\tverify_errors: %lld\n", sp->verify_errors); > + printf("\tno_csum: %lld\n", sp->no_csum); > + printf("\tcsum_discards: %lld\n", sp->csum_discards); > + printf("\tsuper_errors: %lld\n", sp->super_errors); > + printf("\tmalloc_errors: %lld\n", sp->malloc_errors); > + printf("\tuncorrectable_errors: %lld\n", sp->uncorrectable_errors); > + printf("\tcorrected_errors: %lld\n", sp->corrected_errors); > + printf("\tlast_physical: %lld\n", sp->last_physical); > +} > + > +#define err(test, ...) do { \ > + if (test) \ > + fprintf(stderr, __VA_ARGS__); \ > +} while (0) > + > +#define PRINT_SCRUB_ERROR(test, desc) do { \ > + if (test) \ > + printf(" %s=%llu", desc, test); \ > +} while (0)Extra line of space here, otherwise it looks like the function is part of the macro. (And in a number of other places throughout the file, too)> +static void print_scrub_summary(struct btrfs_scrub_progress *p) > +{ > + u64 err_cnt; > + u64 err_cnt2; > + > + err_cnt = p->read_errors + > + p->csum_errors + > + p->verify_errors + > + p->csum_discards + > + p->super_errors + > + p->malloc_errors; > + > + err_cnt2 = p->corrected_errors + p->uncorrectable_errors; > + > + printf("\ttotal bytes scrubbed: %s with %llu errors\n", > + pretty_sizes(p->data_bytes_scrubbed + p->tree_bytes_scrubbed), > + max(err_cnt, err_cnt2));Memory leak: pretty_sizes() mallocs space for its result.> + if (err_cnt || err_cnt2) { > + printf("\terror details:"); > + PRINT_SCRUB_ERROR(p->read_errors, "read"); > + PRINT_SCRUB_ERROR(p->super_errors, "super"); > + PRINT_SCRUB_ERROR(p->malloc_errors, "malloc"); > + PRINT_SCRUB_ERROR(p->verify_errors, "verify"); > + PRINT_SCRUB_ERROR(p->csum_errors, "csum"); > + PRINT_SCRUB_ERROR(p->csum_discards, "csum-discards"); > + printf("\n"); > + printf("\tcorrected errors: %llu, uncorrectable errors: %llu\n", > + p->corrected_errors, p->uncorrectable_errors); > + } > +} > + > +#define _SCRUB_FS_STAT(p, name, fs_stat) fs_stat->p.name += p->namecheckpatch.pl says: scrub.c:130: ERROR: Macros with complex values should be enclosed in parenthesis (and in general, checkpatch.pl is whinging about lots of whitespace/ indentation issues with this file)> +#define _SCRUB_FS_STAT_MIN(ss, name, fs_stat) \ > +do { \ > + if (fs_stat->s.name > ss->name) { \ > + fs_stat->s.name = ss->name; \ > + } \ > +} while (0) > +#define _SCRUB_FS_STAT_ZMIN(ss, name, fs_stat) \ > +do { \ > + if (!fs_stat->s.name || fs_stat->s.name > ss->name) { \ > + fs_stat->s.name = ss->name; \ > + } \ > +} while (0) > +#define _SCRUB_FS_STAT_MAX(ss, name, fs_stat) \Maybe use _SCRUB_FS_STAT_ZMAX to match the ZMIN usage above?> +do { \ > + if (!fs_stat->s.name || fs_stat->s.name < ss->name) { \ > + fs_stat->s.name = ss->name; \ > + } \ > +} while (0)Line of space needed here.> +static void add_to_fs_stat(struct btrfs_scrub_progress *p, > + struct scrub_stats *ss, > + struct scrub_fs_stat *fs_stat) > +{ > + _SCRUB_FS_STAT(p, data_extents_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, tree_extents_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, data_bytes_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, tree_bytes_scrubbed, fs_stat); > + _SCRUB_FS_STAT(p, read_errors, fs_stat); > + _SCRUB_FS_STAT(p, csum_errors, fs_stat); > + _SCRUB_FS_STAT(p, verify_errors, fs_stat); > + _SCRUB_FS_STAT(p, no_csum, fs_stat); > + _SCRUB_FS_STAT(p, csum_discards, fs_stat); > + _SCRUB_FS_STAT(p, super_errors, fs_stat); > + _SCRUB_FS_STAT(p, malloc_errors, fs_stat); > + _SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat); > + _SCRUB_FS_STAT(p, corrected_errors, fs_stat); > + _SCRUB_FS_STAT(p, last_physical, fs_stat); > + _SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat); > + _SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat); > + _SCRUB_FS_STAT_MAX(ss, duration, fs_stat); > + _SCRUB_FS_STAT_MAX(ss, canceled, fs_stat); > + _SCRUB_FS_STAT_MIN(ss, finished, fs_stat); > +} > + > +static void init_fs_stat(struct scrub_fs_stat *fs_stat) > +{ > + memset(fs_stat, 0, sizeof(*fs_stat)); > + fs_stat->s.finished = 2;What does 2 mean? ->s.finished seems to be a boolean everywhere except here. Can you turn this value into a more descriptive #define? Or just use 1?> +} > + > +static void _print_scrub_ss(struct scrub_stats *ss) > +{ > + char t[BTRFS_PATH_NAME_MAX+1];Since this string is used for storing formatted dates, there''s a little cognitive dissonance over it being long enough to store a btrfs path name... (Yeah, OK, it''s a convenient large value, but still a tad confusing to use it here)> + struct tm tm; > + > + if (!ss || !ss->t_start) { > + printf("\tno stats available\n"); > + return; > + } > + if (ss->t_resumed) { > + localtime_r(&ss->t_resumed, &tm); > + strftime(t, sizeof(t), "%c", &tm);strftime doesn''t append a terminating zero if the string is longer than the buffer it''s filling.> + printf("\tscrub resumed at %s", t); > + } else { > + localtime_r(&ss->t_start, &tm); > + strftime(t, sizeof(t), "%c", &tm); > + printf("\tscrub started at %s", t); > + } > + if (ss->finished && !ss->canceled) { > + printf(" and finished after %llu seconds\n", > + ss->duration); > + } else if (ss->canceled) { > + printf(" and was aborted after %llu seconds\n", > + ss->duration); > + } else { > + printf(", running for %llu seconds\n", ss->duration); > + } > +} > + > +static void print_scrub_dev(struct btrfs_ioctl_dev_info_args *di, > + struct btrfs_scrub_progress *p, int raw, > + const char *append, struct scrub_stats *ss) > +{ > + printf("scrub device %s (id %llu) %s\n", di->path, di->devid, > + append ? append : ""); > + > + _print_scrub_ss(ss); > + > + if (p) { > + if (raw) > + print_scrub_full(p); > + else > + print_scrub_summary(p); > + } > +} > + > +static void print_fs_stat(struct scrub_fs_stat *fs_stat, int raw) > +{ > + _print_scrub_ss(&fs_stat->s); > + > + if (raw) > + print_scrub_full(&fs_stat->p); > + else > + print_scrub_summary(&fs_stat->p); > +} > + > +static void free_history(struct scrub_file_record **last_scrubs) > +{ > + struct scrub_file_record **l = last_scrubs; > + if (!l) > + return; > + while (*l) > + free(*l++); > + free(last_scrubs); > +} > + > +static int cancel_fd = -1; > +static void scrub_sigint_record_progress(int signal)What does this function have to do with recording progress? Seems a bit of a misnomer to me. (Call it scrub_sigint_cancel_scrub, maybe?)> +{ > + ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); > +} > + > +static int scrub_handle_sigint_parent(void) > +{ > + struct sigaction sa = { > + .sa_handler = SIG_IGN, > + .sa_flags = SA_RESTART, > + }; > + > + return sigaction(SIGINT, &sa, NULL); > +} > + > +static int scrub_handle_sigint_child(int fd) > +{ > + struct sigaction sa = { > + .sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress, > + }; > + > + cancel_fd = fd; > + return sigaction(SIGINT, &sa, NULL); > +} > + > +static int _scrub_datafile(const char *fn_base, const char *fn_local, > + const char *fn_tmp, char *datafile, int max) > +{ > + int ret; > + > + strncpy(datafile, fn_base, max);You need to put a zero byte at datafile[max], otherwise it could be unterminated (if max <= strlen(fn_base)), and the strlen will then run off the end of the string.> + ret = strlen(datafile); > + > + if (ret + 1 >= max) > + return -EOVERFLOW;This will never happen (if you put the zero terminator in)> + datafile[ret] = ''.''; > + strncpy(datafile+ret+1, fn_local, max-ret-1);... and add a zero byte here, too (or use strncat)> + ret = strlen(datafile); > + > + if (ret + 1 >= max) > + return -EOVERFLOW;as above: won''t happen> + if (fn_tmp) { > + datafile[ret] = ''_''; > + strncpy(datafile+ret+1, fn_tmp, max-ret-1);... and add a zero byte here (or use strncat)> + ret = strlen(datafile); > + > + if (ret >= max) > + return -EOVERFLOW; > + } > + > + return 0; > +} > + > +static int _scrub_open_file(const char *datafile, int m)Just a niggle: Why the leading _ when other scrub-specific functions don''t have it? (There''s about a dozen other such symbols -- was there a criterion you used to decide which ones have _ and which don''t?)> +{ > + int fd; > + int ret; > + > + fd = open(datafile, m, 0600); > + if (fd < 0) > + return -errno; > + > + ret = flock(fd, LOCK_EX|LOCK_NB); > + if (ret) { > + ret = errno; > + close(fd); > + return -ret; > + } > + > + return fd; > +} > + > +static int scrub_open_file_r(const char *fn_base, const char *fn_local) > +{ > + int ret; > + char datafile[BTRFS_PATH_NAME_MAX+1]; > + ret = _scrub_datafile(fn_base, fn_local, NULL, > + datafile, sizeof(datafile)); > + if (ret < 0) > + return ret; > + return _scrub_open_file(datafile, O_RDONLY); > +} > + > +static int scrub_open_file_w(const char *fn_base, const char *fn_local, > + const char *tmp) > +{ > + int ret; > + char datafile[BTRFS_PATH_NAME_MAX+1]; > + ret = _scrub_datafile(fn_base, fn_local, tmp, > + datafile, sizeof(datafile)); > + if (ret < 0) > + return ret; > + return _scrub_open_file(datafile, O_WRONLY|O_CREAT); > +} > + > +static int scrub_rename_file(const char *fn_base, const char *fn_local, > + const char *tmp) > +{ > + int ret; > + char datafile_old[BTRFS_PATH_NAME_MAX+1]; > + char datafile_new[BTRFS_PATH_NAME_MAX+1]; > + ret = _scrub_datafile(fn_base, fn_local, tmp, > + datafile_old, sizeof(datafile_old)); > + if (ret < 0) > + return ret; > + ret = _scrub_datafile(fn_base, fn_local, NULL, > + datafile_new, sizeof(datafile_new)); > + if (ret < 0) > + return ret; > + ret = rename(datafile_old, datafile_new); > + return ret ? -errno : 0; > +} > + > +#define _SCRUB_KVREAD(i, name, avail, l, dest) \ > + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest.name) > +#define _SCRUB_KVREAD_STATS(i, name, avail, l, dest) \ > + _scrub_kvread(i, sizeof(#name), avail, l, #name, dest->stats.name) > +/* > + * returns 0 if the key did not match (nothing was read) > + * 1 if the key did match (success) > + * -1 if the key did match and an error occured > + */ > +static int _scrub_kvread(int *i, int len, int avail, const char *buf, > + const char *key, u64 *dest) > +{ > + int j; > + > + if (*i+len+1 < avail && strncmp(&buf[*i], key, len-1) == 0) { > + *i += len-1; > + if (buf[*i] != '':'') { > + return -1; > + } > + *i += 1; > + for (j=0; isdigit(buf[*i+j]) && *i+j < avail; ++j) > + ; > + if (*i+j >= avail) > + return -1; > + *dest = atoll(&buf[*i]); > + *i += j; > + return 1; > + } > + > + return 0; > +} > + > +#define _SCRUB_ILLEGAL do { \I''d better call the police, then... :) I think you mean invalid (or unexpected, or unparsable), not illegal. (and likewise in the message, below, of course).> + if (report_errors) { \ > + fprintf(stderr, "WARNING: illegal data in line %d pos " \ > + "%d state %d (near \"%.*s\") at %s:%d\n", \ > + lineno, i, state, 20 > avail ? avail : 20, l+i, \ > + __FILE__, __LINE__); \ > + } \ > + goto skip; \ > +} while (0)Extra line of space here> +static struct scrub_file_record **scrub_read_file(int fd, int report_errors) > +{ > + int avail = 0; > + int old_avail = 0; > + char l[512]; > + int state = 0; > + int curr = -1; > + int i = 0; > + int j; > + int ret; > + int eof = 0; > + int lineno = 0; > + u64 version; > + char empty_uuid[BTRFS_FSID_SIZE] = {0}; > + struct scrub_file_record **p = NULL; > + > + if (fd < 0) > + return ERR_PTR(-EINVAL); > + > +again: > + old_avail = avail-i; > + BUG_ON(old_avail < 0); > + if (old_avail) > + memmove(l, l+i, old_avail); > + avail = read(fd, l+old_avail, sizeof(l)-old_avail); > + if (avail == 0) { > + eof = 1; > + } > + if (avail + old_avail == 0) { > + if (curr >= 0 && > + memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) { > + p[curr] = NULL; > + } else if (curr == -1) { > + p = ERR_PTR(-ENODATA); > + } > + return p; > + } > + if (avail == -1) > + return ERR_PTR(-errno);If avail == -1 (i.e. the read failed) and old_avail == 1 (i.e. there was only one character left in the buffer), then that triggers the code in the previous if statement (avail+old_avail == 0) as well.> + avail += old_avail; > + > + i = 0; > + while (i < avail) { > + switch (state) { > + case 0: /* start if file */of> + ret = _scrub_kvread(&i, > + sizeof(SCRUB_FILE_VERSION_PREFIX)-1, avail, l,[1] Drop the colon from SCRUB_FILE_VERSION_PREFIX and leave out the -1 here.> + SCRUB_FILE_VERSION_PREFIX, &version); > + if (ret != 1) > + _SCRUB_ILLEGAL; > + if (version != atoll(SCRUB_FILE_VERSION)) > + return ERR_PTR(-ENOTSUP); > + state = 6; > + continue; > + case 1: /* start of line, alloc */ > + if (!eof && !memchr(l+i, ''\n'', avail-i)) > + goto again;This will cause problems (an infinite loop), I think, if there''s an input line greater than 512 bytes in length -- you can''t find a line ending, so you go back to "again", read in zero bytes because you''ve not consumed anything in your buffer, and come back here to discover that there''s no line ending...> + ++lineno; > + if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid, > + BTRFS_FSID_SIZE) == 0) { > + state = 2; > + continue; > + } > + ++curr; > + p = realloc(p, (curr+2)*sizeof(*p)); > + if (p) > + p[curr] = malloc(sizeof(**p)); > + if (!p || !p[curr]) > + return ERR_PTR(-errno); > + memset(p[curr], 0, sizeof(**p)); > + p[curr+1] = NULL; > + ++state;You probably need a comment here (and below, as appropriate) to indicate that the lack of a continue or break is intended, and not a bug.> + case 2: /* start of line, skip space */ > + while (isspace(l[i]) && i<avail) { > + if (l[i] == ''\n'') > + ++lineno; > + ++i; > + } > + if (i >= avail || (!eof && !memchr(l+i, ''\n'', avail-i))) > + goto again; > + ++state; > + case 3: /* read fsid */ > + if (i == avail) > + continue; > + for (j=0; l[i+j] != '':'' && i+j < avail; ++j) > + ; > + if (i+j+1 >= avail) > + _SCRUB_ILLEGAL;Possibly a comment needed here to indicate that state 1 guarantees a full line of text in the buffer, so hitting the end of the buffer here is a fatal error. (It took me a while to work out why this case was always an error).> + if (j != 36) > + _SCRUB_ILLEGAL; > + l[i+j] = ''\0''; > + ret = uuid_parse(l+i, p[curr]->fsid); > + if (ret) > + _SCRUB_ILLEGAL; > + i += j + 1; > + ++state; > + case 4: /* read dev id */ > + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) > + ; > + if (!j || i+j+1 >= avail)j == 0 is clearer than !j here, IMO> + _SCRUB_ILLEGAL; > + p[curr]->devid = atoll(&l[i]); > + i += j + 1;Is there any reason that you couldn''t just use strtoull here? We know that the string is terminated with a \n (by the guarantee of state 1), so strtoull will always finish within the buffer.> + ++state; > + case 5: /* read key/value pair */ > + ret = _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, tree_extents_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, data_bytes_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, tree_bytes_scrubbed, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, read_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, csum_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, verify_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, no_csum, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, csum_discards, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, super_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, malloc_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, uncorrectable_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, corrected_errors, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, last_physical, avail, > + l, &p[curr]->p) || > + _SCRUB_KVREAD(&i, finished, avail, > + l, &p[curr]->stats) || > + _SCRUB_KVREAD(&i, t_start, avail, > + l, (u64*)&p[curr]->stats) || > + _SCRUB_KVREAD(&i, t_resumed, avail, > + l, (u64*)&p[curr]->stats) || > + _SCRUB_KVREAD(&i, duration, avail, > + l, (u64*)&p[curr]->stats) || > + _SCRUB_KVREAD(&i, canceled, avail, > + l, &p[curr]->stats); > + if (ret != 1) > + _SCRUB_ILLEGAL;If there''s a syntax error in the parser (i.e. the matched key is not followed by a colon, or we run out of data), then _SCRUB_KVREAD returns -1, which is converted to 1 by the ||, and the error is dropped silently.> + ++state; > + case 6: /* after number */ > + if (l[i] == ''|'') { > + state = 5; > + } else if (l[i] == ''\n'') { > + state = 1; > + } else { > + _SCRUB_ILLEGAL; > + } > + ++i; > + continue; > + case 99: /* skip rest of line */ > +skip: > + state = 99; > + do { > + ++i; > + if (l[i-1] == ''\n'') { > + state = 1; > + break; > + } > + } while (i < avail); > + continue; > + } > + BUG(); > + } > + goto again; > +} > +#undef _SCRUB_ILLEGAL[...]> +static int scrub_write_file(int fd, const char *fsid, > + struct scrub_progress* data, int n) > +{ > + int ret = 0; > + int i; > + char buf[1024]; > + struct scrub_progress local; > + struct scrub_progress *use; > + > + if (n < 1) { > + return -EINVAL; > + } > + > + ret = _scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX SCRUB_FILE_VERSION > + "\n", sizeof(SCRUB_FILE_VERSION_PREFIX)-1If you leave out the colon from SCRUB_FILE_VERSION_PREFIX, then you don''t need the -1 here. You can add a literal colon in the middle of the string concatenation between the prefix and the version number.> + + sizeof(SCRUB_FILE_VERSION)-1 + 1); > + if (ret) > + return -EOVERFLOW; > +[...]> +static struct scrub_file_record *last_dev_scrub( > + struct scrub_file_record *const *const past_scrubs, u64 devid) > +{ > + int i; > + > + if (!past_scrubs || IS_ERR(past_scrubs)) > + return NULL; > + > + for (i=0; past_scrubs[i]; ++i) > + if (past_scrubs[i]->devid == devid) > + return past_scrubs[i]; > + > + return NULL; > +} > + > +static int scrub_device_info(int fd, u64 devid, > + struct btrfs_ioctl_dev_info_args *di_args) > +{ > + int ret; > + > + di_args->devid = devid; > + memset(&di_args->uuid, ''\0'', sizeof(di_args->uuid)); > + > + ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args); > + return ret ? -errno : 0; > +} > + > +static int scrub_fs_info(int fd, char *path, > + struct btrfs_ioctl_fs_info_args *fi_args, > + struct btrfs_ioctl_dev_info_args **di_ret) > +{ > + int ret = 0; > + int ndevs = 0; > + int i = 1; > + struct btrfs_fs_devices* fs_devices_mnt = NULL; > + struct btrfs_ioctl_dev_info_args *di_args; > + char mp[BTRFS_PATH_NAME_MAX+1]; > + > + memset(fi_args, 0, sizeof(*fi_args)); > + > + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); > + if (ret && errno == EINVAL) { > + /* path is no mounted btrfs. try if it''s a device */ > + ret = check_mounted_where(fd, path, mp, sizeof(mp), > + &fs_devices_mnt); > + if (!ret) > + return -EINVAL; > + fi_args->num_devices = 1;Is this a valid assumption? What happens if I pass just one device of a multi-device FS to "btrfs scrub start"?> + fi_args->max_id = fs_devices_mnt->latest_devid; > + i = fs_devices_mnt->latest_devid; > + memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE); > + close(fd); > + fd = open_file_or_dir(mp); > + if (fd < 0) > + return -errno; > + } else if (ret) { > + return -errno; > + } > + > + if (!fi_args->num_devices) > + return 0; > + > + di_args = *di_ret = malloc(fi_args->num_devices*sizeof(*di_args)); > + if (!di_args) > + return -errno; > + > + for (; i<=fi_args->max_id; ++i) { > + BUG_ON(ndevs >= fi_args->num_devices); > + ret = scrub_device_info(fd, i, &di_args[ndevs]); > + if (ret == -ENODEV) > + continue; > + if (ret) > + return ret; > + ++ndevs; > + } > + > + BUG_ON(ndevs == 0); > + > + return 0; > +} > + > +int mkdir_p(char *path) > +{ > + int i; > + int ret; > + > + for (i=1; i<strlen(path); ++i) { > + if (path[i] != ''/'') > + continue; > + path[i] = ''\0''; > + ret = mkdir(path, 0777); > + if (ret && errno != EEXIST) > + return 1; > + path[i] = ''/''; > + } > + > + return 0; > +} > + > +static int scrub_start(int argc, char **argv, int resume) > +{ > + int fdmnt; > + int prg_fd = -1; > + int fdres = -1; > + int ret; > + pid_t pid; > + int c; > + int i; > + int err = 0;This clashes with the macro err(). OK, I know the compiler''s clever enough to disambiguate, but it leads to nastiness like [2], below.> + int print_raw = 0; > + char *path; > + int do_background = 1; > + int do_wait = 0; > + int do_print = 0; > + int do_quiet = 0; > + int do_record = 1; > + int readonly = 0; > + int do_stats_per_dev = 0; > + int n_start = 0; > + int n_skip = 0; > + int n_resume = 0; > + struct btrfs_ioctl_fs_info_args fi_args; > + struct btrfs_ioctl_dev_info_args *di_args = NULL; > + struct scrub_progress *sp = NULL; > + struct scrub_fs_stat fs_stat; > + struct timeval tv; > + struct sockaddr_un addr = { > + .sun_family = AF_UNIX, > + }; > + pthread_t *t_devs = NULL; > + pthread_t t_prog; > + pthread_attr_t t_attr; > + struct scrub_file_record **past_scrubs = NULL; > + struct scrub_file_record *last_scrub = NULL; > + char *datafile = strdup(SCRUB_DATA_FILE);This is never freed.> + char fsid[37];Magic number. is there a #define in libuuid for this value?> + char sock_path[BTRFS_PATH_NAME_MAX+1] = ""; > + struct scrub_progress_cycle spc; > + pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER; > + void *terr; > + u64 devid; > + > + optind = 1; > + while ((c = getopt(argc, argv, "BdqrR")) != -1) { > + switch(c) { > + case ''B'': > + do_background = 0; > + do_wait = 1; > + do_print = 1; > + break; > + case ''d'': > + do_stats_per_dev = 1; > + break; > + case ''q'': > + do_quiet = 1; > + break; > + case ''r'': > + readonly = 1; > + break; > + case ''R'': > + print_raw = 1; > + break; > + case ''?'': > + default: > + fprintf(stderr, "ERROR: scrub args invalid.\n" > + " -B do not background (implies -W)\n"What''s -W?> + " -d stats per device (-B only)\n" > + " -q quiet\n" > + " -r read only mode\n"); > + return 1; > + } > + } > + > + /* try to catch most error cases before forking */ > + > + spc.progress = NULL; > + if (do_quiet && do_print) > + do_print = 0; > + > + if (mkdir_p(datafile)) { > + err(!do_quiet, "WARNING: cannot create scrub data " > + "file, mkdir %s failed: %s. Status recording " > + "disabled\n", datafile, strerror(errno)); > + do_record = 0; > + } > + > + path = argv[optind];No bounds check: hrm@ruthven:btrfs-progs-unstable $ ./btrfs scrub start -B ERROR: can''t access ''(null)''> + fdmnt = open_file_or_dir(path); > + if (fdmnt < 0) { > + err(!do_quiet, "ERROR: can''t access ''%s''\n", path); > + return 12; > + } > + > + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); > + if (ret) { > + err(!do_quiet, "ERROR: getting dev info for scrub failed: " > + "%s\n", strerror(-ret)); > + err = 1; > + goto out; > + } > + if (!fi_args.num_devices) { > + err(!do_quiet, "ERROR: no devices found\n"); > + err = 1; > + goto out; > + } > + > + uuid_unparse(fi_args.fsid, fsid); > + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); > + if (fdres < 0 && fdres != -ENOENT) { > + err(!do_quiet, "WARNING: failed to open status file: " > + "%s\n", strerror(-fdres)); > + } else if (fdres >= 0) { > + past_scrubs = scrub_read_file(fdres, !do_quiet); > + if (IS_ERR(past_scrubs)) > + err(!do_quiet, "WARNING: failed to read status file: " > + "%s\n", strerror(-PTR_ERR(past_scrubs))); > + close(fdres); > + } > + > + t_devs = malloc(fi_args.num_devices*sizeof(*t_devs)); > + sp = calloc(1, fi_args.num_devices*sizeof(*sp));Shouldn''t that be calloc(fi_args.num_devices, sizeof(*sp)) ? (OK, it doesn''t make any particular difference, but it just seems odd to keep a dog and bark yourself).> + spc.progress = calloc(1, fi_args.num_devices*2*sizeof(*spc.progress));Woof! (And why do we need twice as many progress markers as devices?)> + if (!t_devs || !sp || !spc.progress) { > + err(!do_quiet, "ERROR: scrub failed: %s", strerror(errno)); > + err = 1;[2] Eugh. Calling what looks like a function, then assigning a value to it. Can you call the variable something else? (Or make the macro a more obvious macro: ERR() say?)> + goto out; > + } > + > + ret = pthread_attr_init(&t_attr); > + if (ret) { > + err(!do_quiet, "ERROR: pthread_attr_init failed: %s\n", > + strerror(ret)); > + err = 1; > + goto out; > + } > + > + for (i = 0; i < fi_args.num_devices; ++i) { > + devid = di_args[i].devid; > + ret = pthread_mutex_init(&sp[i].progress_mutex, NULL); > + if (ret) { > + err(!do_quiet, "ERROR: pthread_mutex_init failed: " > + "%s\n", strerror(ret)); > + err = 1; > + goto out; > + } > + last_scrub = last_dev_scrub(past_scrubs, devid); > + sp[i].scrub_args.devid = devid; > + sp[i].fd = fdmnt; > + if (resume && last_scrub && (last_scrub->stats.canceled || > + !last_scrub->stats.finished)) { > + ++n_resume; > + sp[i].scrub_args.start = last_scrub->p.last_physical; > + sp[i].resumed = last_scrub; > + } else if (resume) { > + ++n_skip; > + sp[i].skip = 1; > + sp[i].resumed = last_scrub; > + continue; > + } else { > + ++n_start; > + sp[i].scrub_args.start = 0ll; > + sp[i].resumed = NULL; > + } > + sp[i].skip = 0; > + sp[i].scrub_args.end = (u64)-1ll; > + sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0; > + } > + > + if (!n_start && !n_resume) { > + if (!do_quiet) > + printf("scrub: nothing to resume for %s, fsid %s\n", > + path, fsid); > + err = 0; > + goto out; > + } > + > + ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0); > + while (ret != -1) { > + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, > + NULL, sock_path, sizeof(sock_path)); > + /* ignore EOVERFLOW, as strncpy follows anyway */The name in sock_path could still be truncated on -EOVERFLOW, though. Is that always safe?> + strncpy(addr.sun_path, sock_path, > + sizeof(addr.sun_path)-1); > + ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); > + if (ret != -1 || errno != EADDRINUSE) > + break;If we failed to bind because the address was in use, is there much point in trying to connect to the socket here?> + ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); > + if (!ret || errno != ECONNREFUSED) { > + fprintf(stderr, "ERROR: scrub already running\n"); > + close(prg_fd); > + goto out; > + } > + ret = unlink(sock_path);Under the right (wrong) set of circumstances, isn''t this loop going to busy-wait?> + } > + if (ret != -1) { > + ret = listen(prg_fd, 100); > + } > + if (ret == -1) { > + err(!do_quiet, "WARNING: failed to open the progress status " > + "socket at %s: %s. Progress cannot be queried\n", > + sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH, > + strerror(errno)); > + if (prg_fd != -1) { > + close(prg_fd); > + prg_fd = -1; > + if (sock_path[0]) > + unlink(sock_path); > + } > + } > + > + if (do_record) { > + /* write all-zero progress file for a start */ > + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, > + fi_args.num_devices);-HRM: Unchecked scrub_write_progress> + if (ret) { > + err(!do_quiet, "WARNING: failed to write the progress " > + "status file: %s. Status recording disabled\n", > + strerror(-ret)); > + do_record = 0; > + } > + } > + > + if (do_background) { > + pid = fork(); > + if (pid == -1) { > + err(!do_quiet, "ERROR: cannot scrub, fork failed: " > + "%s\n", strerror(errno)); > + err = 1; > + goto out; > + } > + > + if (pid) { > + int stat; > + scrub_handle_sigint_parent(); > + if (!do_quiet) > + printf("scrub %s on %s, fsid %s (pid=%d)\n", > + n_start ? "started" : "resumed", > + path, fsid, pid); > + if (!do_wait) { > + err = 0; > + goto out; > + } > + ret = wait(&stat); > + if (ret != pid) { > + err(!do_quiet, "ERROR: wait failed: (ret=%d) " > + "%s\n", ret, strerror(errno)); > + err = 1; > + goto out; > + } > + if (!WIFEXITED(stat) || WEXITSTATUS(stat)) { > + err(!do_quiet, "ERROR: scrub process failed\n"); > + err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1; > + goto out; > + } > + err = 0; > + goto out; > + } > + } > + > + scrub_handle_sigint_child(fdmnt); > + > + for (i = 0; i < fi_args.num_devices; ++i) { > + if (sp[i].skip) { > + sp[i].scrub_args.progress = sp[i].resumed->p; > + sp[i].stats = sp[i].resumed->stats; > + sp[i].ret = 0; > + sp[i].stats.finished = 1; > + continue; > + } > + devid = di_args[i].devid; > + gettimeofday(&tv, NULL); > + sp[i].stats.t_start = tv.tv_sec; > + ret = pthread_create(&t_devs[i], &t_attr, scrub_one_dev,&sp[i]);-HRM not checked scrub_one_dev()> + if (ret) { > + if (do_print) > + fprintf(stderr, "ERROR: creating " > + "scrub_one_dev[%llu] thread failed: " > + "%s\n", devid, strerror(ret)); > + err = 1; > + goto out; > + } > + } > + > + spc.fdmnt = fdmnt; > + spc.prg_fd = prg_fd; > + spc.do_record = do_record; > + spc.write_mutex = &spc_write_mutex; > + spc.shared_progress = sp; > + spc.fi = &fi_args; > + pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc); > +-HRM: Not checked: scrub_progress_cycle()> + err = 0; > + for (i = 0; i < fi_args.num_devices; ++i) { > + if (sp[i].skip) > + continue; > + devid = di_args[i].devid; > + ret = pthread_join(t_devs[i], NULL); > + if (ret) { > + if (do_print) > + fprintf(stderr, "ERROR: pthread_join failed " > + "for scrub_one_dev[%llu]: %s\n", devid, > + strerror(ret)); > + err++; > + continue; > + } > + if (sp[i].ret && sp[i].ioctl_errno == ENODEV) { > + if (do_print) > + fprintf(stderr, "WARNING: device %lld not " > + "present\n", devid); > + continue; > + } > + if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) { > + err++; > + } else if (sp[i].ret) { > + if (do_print) > + fprintf(stderr, "ERROR: scrubbing %s failed " > + "for device id %lld (%s)\n", path, > + devid, strerror(sp[i].ioctl_errno)); > + err++; > + continue; > + } > + } > + > + if (do_print) { > + const char *append = "done"; > + if (!do_stats_per_dev) > + init_fs_stat(&fs_stat); > + for (i = 0; i < fi_args.num_devices; ++i) { > + if (do_stats_per_dev) { > + print_scrub_dev(&di_args[i], > + &sp[i].scrub_args.progress, > + print_raw, > + sp[i].ret ? "canceled" : "done", > + &sp[i].stats); > + } else { > + if (sp[i].ret) > + append = "canceled"; > + add_to_fs_stat(&sp[i].scrub_args.progress, > + &sp[i].stats, &fs_stat); > + } > + } > + if (!do_stats_per_dev) { > + printf("scrub %s for %s\n", append, fsid); > + print_fs_stat(&fs_stat, print_raw); > + } > + } > + > + pthread_cancel(t_prog); > + ret = pthread_join(t_prog, &terr);Does this need to happen before the output above? Is there a possible race between scrub_progress_cycle() and the stats gathering code here? (I''ve not looked at scrub_progress_cycle() in detail yet, so I don''t know).> + if (do_print && terr && terr != PTHREAD_CANCELED) { > + fprintf(stderr, "ERROR: recording progress " > + "failed: %s\n", strerror(-PTR_ERR(terr))); > + } > + > + if (do_record) { > + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, > + fi_args.num_devices);-HRM Not checked scrub_write_progress()> + if (ret && do_print) { > + fprintf(stderr, "ERROR: failed to record the result: " > + "%s\n", strerror(-ret)); > + } > + } > + > + scrub_handle_sigint_child(-1); > + > +out: > + free_history(past_scrubs); > + free(di_args); > + free(t_devs); > + free(sp); > + free(spc.progress); > + if (prg_fd > -1) { > + close(prg_fd); > + if (sock_path[0]) > + unlink(sock_path); > + } > + close(fdmnt); > + > + return !!err; > +} > + > +int do_scrub_start(int argc, char **argv) > +{ > + return scrub_start(argc, argv, 0); > +} > + > +int do_scrub_resume(int argc, char **argv) > +{ > + return scrub_start(argc, argv, 1); > +}[...] -- === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk == PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk --- Welcome to Rivendell, Mr Anderson... ---
On Wed, Mar 30, 2011 at 06:53:09PM +0200, Jan Schmidt wrote:> - scrub commands added > - open_file_or_dir no longer static (needed by scrub.c) > > Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> > --- > Makefile | 4 ++-- > btrfs.c | 18 +++++++++++++++++- > btrfs_cmds.c | 3 ++- > btrfs_cmds.h | 5 +++++ > 4 files changed, 26 insertions(+), 4 deletions(-) > > diff --git a/Makefile b/Makefile > index 6e6f6c6..6630887 100644 > --- a/Makefile > +++ b/Makefile > @@ -37,8 +37,8 @@ all: version $(progs) manpages > version: > bash version.sh > > -btrfs: $(objects) btrfs.o btrfs_cmds.o > - gcc $(CFLAGS) -o btrfs btrfs.o btrfs_cmds.o \ > +btrfs: $(objects) btrfs.o btrfs_cmds.o scrub.o > + gcc -lpthread $(CFLAGS) -o btrfs btrfs.o btrfs_cmds.o scrub.o \ > $(objects) $(LDFLAGS) $(LIBS) > > btrfsctl: $(objects) btrfsctl.o > diff --git a/btrfs.c b/btrfs.c > index 46314cf..2dfa0b7 100644 > --- a/btrfs.c > +++ b/btrfs.c > @@ -89,12 +89,28 @@ static struct Command commands[] = { > }, > { do_df_filesystem, 1, > "filesystem df", "<path>\n" > - "Show space usage information for a mount point\n." > + "Show space usage information for a mount point." > }, > { do_balance, 1, > "filesystem balance", "<path>\n" > "Balance the chunks across the device." > }, > + { do_scrub_start, -1, > + "scrub start", "[-Bdqr] <path>|<device>\n" > + "Start a new scrub."Could you update this patch with detailed help for the various command-line options, now that we''ve got a field for it in struct Command? Hugo. -- === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk == PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk --- There is no dark side to the Moon, really. As a matter of --- fact, it''s all dark.
On 10.07.2011 20:23, Hugo Mills wrote:> Yes, this is over three months after the initial posting, but since > nobody else has looked at it yet, and the patch is in my integration > stack...... thanks!> I''ve not reviewed the whole thing -- just the "scrub start" code so > far. I''ve removed the bits I''ve not checked from the file below.I rebased the old branch I found to your current integration branch and fixed up a most of what you mentioned. I''ll not send a new version out until after your complete review (or your statement that this is it or your statement that you would rather going on reviewing the revised version). Things I ripped out are accepted and corrected without resistance. Comments follow.> On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: > > No commit message at all?Didn''t know what to put there. Cover letter says it all. And as mentioned, this is the initial implementation.>> Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> >> --- >> scrub.c | 1568 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 files changed, 1568 insertions(+), 0 deletions(-) > > This is quite big to review in one lump... Is it possible to split > the patch into functional sections? (Add shared infrastructure, then > each of the four functions separately, maybe?)Thought about that, but it doesn''t make sense to me. It is the initial implementation. A lot of the code is shared, thus adding one lump and patching the patch with four small additional commits wouldn''t help much.>> diff --git a/scrub.c b/scrub.c >> new file mode 100644 >> index 0000000..22052ed >> --- /dev/null >> +++ b/scrub.c >> +#define SCRUB_DATA_FILE "/var/btrfs/scrub.status" >> +#define SCRUB_PROGRESS_SOCKET_PATH "/var/btrfs/scrub.progress" > > I''d suggest /var/lib/btrfs/[...] instead. Putting it in the top > level of /var seems a bit presumptuous (and contravenes the FHS).I wasn''t sure if I can expect /var/lib to be present anywhere btrfs could run. But I changed it to what you suggested.>> + printf("\ttotal bytes scrubbed: %s with %llu errors\n", >> + pretty_sizes(p->data_bytes_scrubbed + p->tree_bytes_scrubbed), >> + max(err_cnt, err_cnt2)); > > Memory leak: pretty_sizes() mallocs space for its result.Pah... In a user space function of a run-once utility right before it exits. But I fixed that one, just to please you :-)>> +static void init_fs_stat(struct scrub_fs_stat *fs_stat) >> +{ >> + memset(fs_stat, 0, sizeof(*fs_stat)); >> + fs_stat->s.finished = 2; > > What does 2 mean? ->s.finished seems to be a boolean everywhere > except here. Can you turn this value into a more descriptive #define? > Or just use 1?Good question. I guess I once wanted to distinguish really finished scrub runs from not-even-started ones. I changed it to 1 (which makes it much more likely we''ll need that distinction quite soon).>> +static int cancel_fd = -1; >> +static void scrub_sigint_record_progress(int signal) > > What does this function have to do with recording progress? Seems a > bit of a misnomer to me. (Call it scrub_sigint_cancel_scrub, maybe?)I added a comment and left the name unchanged.>> +{ >> + ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); >> +} >> + >> +static int scrub_handle_sigint_parent(void) >> +{ >> + struct sigaction sa = { >> + .sa_handler = SIG_IGN, >> + .sa_flags = SA_RESTART, >> + }; >> + >> + return sigaction(SIGINT, &sa, NULL); >> +} >> + >> +static int scrub_handle_sigint_child(int fd) >> +{ >> + struct sigaction sa = { >> + .sa_handler = fd == -1 ? SIG_DFL : scrub_sigint_record_progress, >> + }; >> + >> + cancel_fd = fd; >> + return sigaction(SIGINT, &sa, NULL); >> +} >> + >> +static int _scrub_datafile(const char *fn_base, const char *fn_local, >> + const char *fn_tmp, char *datafile, int max) >> +{ >> + int ret; >> + >> + strncpy(datafile, fn_base, max); > > You need to put a zero byte at datafile[max], otherwise it could be > unterminated (if max <= strlen(fn_base)), and the strlen will then run > off the end of the string.Damn. strncpy is a mess. I want strlcpy. I Modified the code another way. I rather return an error than throwing away bytes and continue happily. strncpy third arg always - 1, thus we always have a 0 byte at the end of the buffer. I then compare strlen to the buffer size.>> + ret = strlen(datafile); >> + >> + if (ret + 1 >= max) >> + return -EOVERFLOW; > > This will never happen (if you put the zero terminator in) > >> + datafile[ret] = ''.''; >> + strncpy(datafile+ret+1, fn_local, max-ret-1); > > ... and add a zero byte here, too (or use strncat) > >> + ret = strlen(datafile); >> + >> + if (ret + 1 >= max) >> + return -EOVERFLOW; > > as above: won''t happen > >> + if (fn_tmp) { >> + datafile[ret] = ''_''; >> + strncpy(datafile+ret+1, fn_tmp, max-ret-1); > > ... and add a zero byte here (or use strncat) > >> + ret = strlen(datafile); >> + >> + if (ret >= max) >> + return -EOVERFLOW; >> + } >> + >> + return 0; >> +} >> + >> +static int _scrub_open_file(const char *datafile, int m) > > Just a niggle: Why the leading _ when other scrub-specific > functions don''t have it? (There''s about a dozen other such symbols -- > was there a criterion you used to decide which ones have _ and which > don''t?)Looking at the function names today, I think the criterion was "everything i/o based get''s a _", which is rubbish. I dropped all the ''_''s.>> +static struct scrub_file_record **scrub_read_file(int fd, int report_errors) >> +{ >> + int avail = 0; >> + int old_avail = 0; >> + char l[512]; >> + int state = 0; >> + int curr = -1; >> + int i = 0; >> + int j; >> + int ret; >> + int eof = 0; >> + int lineno = 0; >> + u64 version; >> + char empty_uuid[BTRFS_FSID_SIZE] = {0}; >> + struct scrub_file_record **p = NULL; >> + >> + if (fd < 0) >> + return ERR_PTR(-EINVAL); >> + >> +again: >> + old_avail = avail-i; >> + BUG_ON(old_avail < 0); >> + if (old_avail) >> + memmove(l, l+i, old_avail); >> + avail = read(fd, l+old_avail, sizeof(l)-old_avail); >> + if (avail == 0) { >> + eof = 1; >> + } >> + if (avail + old_avail == 0) { >> + if (curr >= 0 && >> + memcmp(p[curr]->fsid, empty_uuid, BTRFS_FSID_SIZE) == 0) { >> + p[curr] = NULL; >> + } else if (curr == -1) { >> + p = ERR_PTR(-ENODATA); >> + } >> + return p; >> + } >> + if (avail == -1) >> + return ERR_PTR(-errno); > > If avail == -1 (i.e. the read failed) and old_avail == 1 > (i.e. there was only one character left in the buffer), then that > triggers the code in the previous if statement (avail+old_avail == 0) > as well. > >> + avail += old_avail; >> + >> + i = 0; >> + while (i < avail) { >> + switch (state) { >> + case 0: /* start if file */ > of > >> + ret = _scrub_kvread(&i, >> + sizeof(SCRUB_FILE_VERSION_PREFIX)-1, avail, l, > > [1] Drop the colon from SCRUB_FILE_VERSION_PREFIX and leave out the > -1 here. > >> + SCRUB_FILE_VERSION_PREFIX, &version); >> + if (ret != 1) >> + _SCRUB_ILLEGAL; >> + if (version != atoll(SCRUB_FILE_VERSION)) >> + return ERR_PTR(-ENOTSUP); >> + state = 6; >> + continue; >> + case 1: /* start of line, alloc */ >> + if (!eof && !memchr(l+i, ''\n'', avail-i)) >> + goto again; > > This will cause problems (an infinite loop), I think, if there''s an > input line greater than 512 bytes in length -- you can''t find a line > ending, so you go back to "again", read in zero bytes because you''ve > not consumed anything in your buffer, and come back here to discover > that there''s no line ending...No, it will complain about a too long line. We go to "again", read 0 characters (which is fine, returns 0), then set the eof flag (which is kind of a bug) and stop processing. I.e., whenever we encounter a line longer than sizeof(l), we tell this fact to the user and stop processing for that file completely. I increased that buffer to 16k, which sounds big enough for quite some file format extensions.>> + ++lineno; >> + if (curr > -1 && memcmp(p[curr]->fsid, empty_uuid, >> + BTRFS_FSID_SIZE) == 0) { >> + state = 2; >> + continue; >> + } >> + ++curr; >> + p = realloc(p, (curr+2)*sizeof(*p)); >> + if (p) >> + p[curr] = malloc(sizeof(**p)); >> + if (!p || !p[curr]) >> + return ERR_PTR(-errno); >> + memset(p[curr], 0, sizeof(**p)); >> + p[curr+1] = NULL; >> + ++state; > > You probably need a comment here (and below, as appropriate) to > indicate that the lack of a continue or break is intended, and not a > bug. > >> + case 2: /* start of line, skip space */ >> + while (isspace(l[i]) && i<avail) { >> + if (l[i] == ''\n'') >> + ++lineno; >> + ++i; >> + } >> + if (i >= avail || (!eof && !memchr(l+i, ''\n'', avail-i))) >> + goto again; >> + ++state; >> + case 3: /* read fsid */ >> + if (i == avail) >> + continue; >> + for (j=0; l[i+j] != '':'' && i+j < avail; ++j) >> + ; >> + if (i+j+1 >= avail) >> + _SCRUB_ILLEGAL; > > Possibly a comment needed here to indicate that state 1 guarantees > a full line of text in the buffer, so hitting the end of the buffer > here is a fatal error. (It took me a while to work out why this case > was always an error).Comment added in "case 1", instead, because this holds true for each _SCRUB_INVALID (let''s call it invalid) in this block.>> + if (j != 36) >> + _SCRUB_ILLEGAL; >> + l[i+j] = ''\0''; >> + ret = uuid_parse(l+i, p[curr]->fsid); >> + if (ret) >> + _SCRUB_ILLEGAL; >> + i += j + 1; >> + ++state; >> + case 4: /* read dev id */ >> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) >> + ; >> + if (!j || i+j+1 >= avail) > > j == 0 is clearer than !j here, IMO > >> + _SCRUB_ILLEGAL; >> + p[curr]->devid = atoll(&l[i]); >> + i += j + 1; > > Is there any reason that you couldn''t just use strtoull here? We > know that the string is terminated with a \n (by the guarantee of > state 1), so strtoull will always finish within the buffer.I just found it way easier to use atoll. We already know the first character really is a digit, so why bother with a more cumbersome function?>> + ++state; >> + case 5: /* read key/value pair */ >> + ret = _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, data_extents_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, tree_extents_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, data_bytes_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, tree_bytes_scrubbed, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, read_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, csum_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, verify_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, no_csum, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, csum_discards, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, super_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, malloc_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, uncorrectable_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, corrected_errors, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, last_physical, avail, >> + l, &p[curr]->p) || >> + _SCRUB_KVREAD(&i, finished, avail, >> + l, &p[curr]->stats) || >> + _SCRUB_KVREAD(&i, t_start, avail, >> + l, (u64*)&p[curr]->stats) || >> + _SCRUB_KVREAD(&i, t_resumed, avail, >> + l, (u64*)&p[curr]->stats) || >> + _SCRUB_KVREAD(&i, duration, avail, >> + l, (u64*)&p[curr]->stats) || >> + _SCRUB_KVREAD(&i, canceled, avail, >> + l, &p[curr]->stats); >> + if (ret != 1) >> + _SCRUB_ILLEGAL; > > If there''s a syntax error in the parser (i.e. the matched key is > not followed by a colon, or we run out of data), then _SCRUB_KVREAD > returns -1, which is converted to 1 by the ||, and the error is > dropped silently.Oops. Fixed by defining 0 to be the success return value for scrub_kvread (sounds better, anyway). The check then looks for ret != 0 instead.>> +static int scrub_fs_info(int fd, char *path, >> + struct btrfs_ioctl_fs_info_args *fi_args, >> + struct btrfs_ioctl_dev_info_args **di_ret) >> +{ >> + int ret = 0; >> + int ndevs = 0; >> + int i = 1; >> + struct btrfs_fs_devices* fs_devices_mnt = NULL; >> + struct btrfs_ioctl_dev_info_args *di_args; >> + char mp[BTRFS_PATH_NAME_MAX+1]; >> + >> + memset(fi_args, 0, sizeof(*fi_args)); >> + >> + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); >> + if (ret && errno == EINVAL) { >> + /* path is no mounted btrfs. try if it''s a device */ >> + ret = check_mounted_where(fd, path, mp, sizeof(mp), >> + &fs_devices_mnt); >> + if (!ret) >> + return -EINVAL; >> + fi_args->num_devices = 1; > > Is this a valid assumption? What happens if I pass just one device > of a multi-device FS to "btrfs scrub start"?You tell scrub to scrub that one device, then. At least for raid1, this option makes sense.>> +static int scrub_start(int argc, char **argv, int resume) >> +{ >> + int fdmnt; >> + int prg_fd = -1; >> + int fdres = -1; >> + int ret; >> + pid_t pid; >> + int c; >> + int i; >> + int err = 0; > > This clashes with the macro err(). OK, I know the compiler''s clever > enough to disambiguate, but it leads to nastiness like [2], below.Okok. Macro is now ERR().>> + int print_raw = 0; >> + char *path; >> + int do_background = 1; >> + int do_wait = 0; >> + int do_print = 0; >> + int do_quiet = 0; >> + int do_record = 1; >> + int readonly = 0; >> + int do_stats_per_dev = 0; >> + int n_start = 0; >> + int n_skip = 0; >> + int n_resume = 0; >> + struct btrfs_ioctl_fs_info_args fi_args; >> + struct btrfs_ioctl_dev_info_args *di_args = NULL; >> + struct scrub_progress *sp = NULL; >> + struct scrub_fs_stat fs_stat; >> + struct timeval tv; >> + struct sockaddr_un addr = { >> + .sun_family = AF_UNIX, >> + }; >> + pthread_t *t_devs = NULL; >> + pthread_t t_prog; >> + pthread_attr_t t_attr; >> + struct scrub_file_record **past_scrubs = NULL; >> + struct scrub_file_record *last_scrub = NULL; >> + char *datafile = strdup(SCRUB_DATA_FILE); > > This is never freed. > >> + char fsid[37]; > > Magic number. is there a #define in libuuid for this value?At least the man page of uuid_parse clearly states uuids have 36 bytes plus a \0 in printf format. uuid/uuid.h doesn''t contain such a constant. But volumes.c, print-tree.c and others do it with 37, too.>> + char sock_path[BTRFS_PATH_NAME_MAX+1] = ""; >> + struct scrub_progress_cycle spc; >> + pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER; >> + void *terr; >> + u64 devid; >> + >> + optind = 1; >> + while ((c = getopt(argc, argv, "BdqrR")) != -1) { >> + switch(c) { >> + case ''B'': >> + do_background = 0; >> + do_wait = 1; >> + do_print = 1; >> + break; >> + case ''d'': >> + do_stats_per_dev = 1; >> + break; >> + case ''q'': >> + do_quiet = 1; >> + break; >> + case ''r'': >> + readonly = 1; >> + break; >> + case ''R'': >> + print_raw = 1; >> + break; >> + case ''?'': >> + default: >> + fprintf(stderr, "ERROR: scrub args invalid.\n" >> + " -B do not background (implies -W)\n" > > What''s -W?A development option that was removed before submitting v2 and is removed for v3 now from that comment above as well.>> + " -d stats per device (-B only)\n" >> + " -q quiet\n" >> + " -r read only mode\n"); >> + return 1; >> + } >> + } >> + >> + /* try to catch most error cases before forking */ >> + >> + spc.progress = NULL; >> + if (do_quiet && do_print) >> + do_print = 0; >> + >> + if (mkdir_p(datafile)) { >> + err(!do_quiet, "WARNING: cannot create scrub data " >> + "file, mkdir %s failed: %s. Status recording " >> + "disabled\n", datafile, strerror(errno)); >> + do_record = 0; >> + } >> + >> + path = argv[optind]; > > No bounds check: > > hrm@ruthven:btrfs-progs-unstable $ ./btrfs scrub start -B > ERROR: can''t access ''(null)'' > >> + fdmnt = open_file_or_dir(path); >> + if (fdmnt < 0) { >> + err(!do_quiet, "ERROR: can''t access ''%s''\n", path); >> + return 12; >> + } >> + >> + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); >> + if (ret) { >> + err(!do_quiet, "ERROR: getting dev info for scrub failed: " >> + "%s\n", strerror(-ret)); >> + err = 1; >> + goto out; >> + } >> + if (!fi_args.num_devices) { >> + err(!do_quiet, "ERROR: no devices found\n"); >> + err = 1; >> + goto out; >> + } >> + >> + uuid_unparse(fi_args.fsid, fsid); >> + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); >> + if (fdres < 0 && fdres != -ENOENT) { >> + err(!do_quiet, "WARNING: failed to open status file: " >> + "%s\n", strerror(-fdres)); >> + } else if (fdres >= 0) { >> + past_scrubs = scrub_read_file(fdres, !do_quiet); >> + if (IS_ERR(past_scrubs)) >> + err(!do_quiet, "WARNING: failed to read status file: " >> + "%s\n", strerror(-PTR_ERR(past_scrubs))); >> + close(fdres); >> + } >> + >> + t_devs = malloc(fi_args.num_devices*sizeof(*t_devs)); >> + sp = calloc(1, fi_args.num_devices*sizeof(*sp)); > > Shouldn''t that be calloc(fi_args.num_devices, sizeof(*sp)) ? (OK, > it doesn''t make any particular difference, but it just seems odd to > keep a dog and bark yourself).Sounds quite reasonable.>> + spc.progress = calloc(1, fi_args.num_devices*2*sizeof(*spc.progress)); > > Woof! (And why do we need twice as many progress markers as devices?)We need them for consistency reasons. Both are used alternately when progress is recorded to make sure we never write a half baked record.>> + if (!t_devs || !sp || !spc.progress) { >> + err(!do_quiet, "ERROR: scrub failed: %s", strerror(errno)); >> + err = 1; > > [2] Eugh. Calling what looks like a function, then assigning a > value to it. Can you call the variable something else? (Or make the > macro a more obvious macro: ERR() say?) > >> + goto out; >> + } >> + >> + ret = pthread_attr_init(&t_attr); >> + if (ret) { >> + err(!do_quiet, "ERROR: pthread_attr_init failed: %s\n", >> + strerror(ret)); >> + err = 1; >> + goto out; >> + } >> + >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + devid = di_args[i].devid; >> + ret = pthread_mutex_init(&sp[i].progress_mutex, NULL); >> + if (ret) { >> + err(!do_quiet, "ERROR: pthread_mutex_init failed: " >> + "%s\n", strerror(ret)); >> + err = 1; >> + goto out; >> + } >> + last_scrub = last_dev_scrub(past_scrubs, devid); >> + sp[i].scrub_args.devid = devid; >> + sp[i].fd = fdmnt; >> + if (resume && last_scrub && (last_scrub->stats.canceled || >> + !last_scrub->stats.finished)) { >> + ++n_resume; >> + sp[i].scrub_args.start = last_scrub->p.last_physical; >> + sp[i].resumed = last_scrub; >> + } else if (resume) { >> + ++n_skip; >> + sp[i].skip = 1; >> + sp[i].resumed = last_scrub; >> + continue; >> + } else { >> + ++n_start; >> + sp[i].scrub_args.start = 0ll; >> + sp[i].resumed = NULL; >> + } >> + sp[i].skip = 0; >> + sp[i].scrub_args.end = (u64)-1ll; >> + sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0; >> + } >> + >> + if (!n_start && !n_resume) { >> + if (!do_quiet) >> + printf("scrub: nothing to resume for %s, fsid %s\n", >> + path, fsid); >> + err = 0; >> + goto out; >> + } >> + >> + ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0); >> + while (ret != -1) { >> + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, >> + NULL, sock_path, sizeof(sock_path)); >> + /* ignore EOVERFLOW, as strncpy follows anyway */ > > The name in sock_path could still be truncated on -EOVERFLOW, > though. Is that always safe?Yeah, no, that wasn''t nice. It''s changed now.>> + strncpy(addr.sun_path, sock_path, >> + sizeof(addr.sun_path)-1); >> + ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); >> + if (ret != -1 || errno != EADDRINUSE) >> + break; > > If we failed to bind because the address was in use, is there much > point in trying to connect to the socket here?Had to think about those lines for a moment, so I''ll add a comment. It''s code that cares for an orphan socket file: try to connect, if that''s working, scrub must be running, so error out. If it doesn''t, unlink the file and loop.>> + ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr)); >> + if (!ret || errno != ECONNREFUSED) { >> + fprintf(stderr, "ERROR: scrub already running\n"); >> + close(prg_fd); >> + goto out; >> + } >> + ret = unlink(sock_path); > > Under the right (wrong) set of circumstances, isn''t this loop going > to busy-wait?In general, the loop can''t execute more than twice. If you manage to recreate that socket file in exactly the same moment over and over again, that''s possible, yes. But you would have to do that externally on your own, this can''t happen if you only use btrfs scrub.>> + } >> + if (ret != -1) { >> + ret = listen(prg_fd, 100); >> + } >> + if (ret == -1) { >> + err(!do_quiet, "WARNING: failed to open the progress status " >> + "socket at %s: %s. Progress cannot be queried\n", >> + sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH, >> + strerror(errno)); >> + if (prg_fd != -1) { >> + close(prg_fd); >> + prg_fd = -1; >> + if (sock_path[0]) >> + unlink(sock_path); >> + } >> + } >> + >> + if (do_record) { >> + /* write all-zero progress file for a start */ >> + ret = scrub_write_progress(&spc_write_mutex, fsid, sp, >> + fi_args.num_devices); > > -HRM: Unchecked scrub_write_progress > >> + if (ret) { >> + err(!do_quiet, "WARNING: failed to write the progress " >> + "status file: %s. Status recording disabled\n", >> + strerror(-ret)); >> + do_record = 0; >> + } >> + } >> + >> + if (do_background) { >> + pid = fork(); >> + if (pid == -1) { >> + err(!do_quiet, "ERROR: cannot scrub, fork failed: " >> + "%s\n", strerror(errno)); >> + err = 1; >> + goto out; >> + } >> + >> + if (pid) { >> + int stat; >> + scrub_handle_sigint_parent(); >> + if (!do_quiet) >> + printf("scrub %s on %s, fsid %s (pid=%d)\n", >> + n_start ? "started" : "resumed", >> + path, fsid, pid); >> + if (!do_wait) { >> + err = 0; >> + goto out; >> + } >> + ret = wait(&stat); >> + if (ret != pid) { >> + err(!do_quiet, "ERROR: wait failed: (ret=%d) " >> + "%s\n", ret, strerror(errno)); >> + err = 1; >> + goto out; >> + } >> + if (!WIFEXITED(stat) || WEXITSTATUS(stat)) { >> + err(!do_quiet, "ERROR: scrub process failed\n"); >> + err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1; >> + goto out; >> + } >> + err = 0; >> + goto out; >> + } >> + } >> + >> + scrub_handle_sigint_child(fdmnt); >> + >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + if (sp[i].skip) { >> + sp[i].scrub_args.progress = sp[i].resumed->p; >> + sp[i].stats = sp[i].resumed->stats; >> + sp[i].ret = 0; >> + sp[i].stats.finished = 1; >> + continue; >> + } >> + devid = di_args[i].devid; >> + gettimeofday(&tv, NULL); >> + sp[i].stats.t_start = tv.tv_sec; >> + ret = pthread_create(&t_devs[i], &t_attr, scrub_one_dev,&sp[i]); > > -HRM not checked scrub_one_dev() > >> + if (ret) { >> + if (do_print) >> + fprintf(stderr, "ERROR: creating " >> + "scrub_one_dev[%llu] thread failed: " >> + "%s\n", devid, strerror(ret)); >> + err = 1; >> + goto out; >> + } >> + } >> + >> + spc.fdmnt = fdmnt; >> + spc.prg_fd = prg_fd; >> + spc.do_record = do_record; >> + spc.write_mutex = &spc_write_mutex; >> + spc.shared_progress = sp; >> + spc.fi = &fi_args; >> + pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc); >> + > > -HRM: Not checked: scrub_progress_cycle() > >> + err = 0; >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + if (sp[i].skip) >> + continue; >> + devid = di_args[i].devid;HRM: mark1>> + ret = pthread_join(t_devs[i], NULL); >> + if (ret) { >> + if (do_print) >> + fprintf(stderr, "ERROR: pthread_join failed " >> + "for scrub_one_dev[%llu]: %s\n", devid, >> + strerror(ret)); >> + err++; >> + continue; >> + } >> + if (sp[i].ret && sp[i].ioctl_errno == ENODEV) { >> + if (do_print) >> + fprintf(stderr, "WARNING: device %lld not " >> + "present\n", devid); >> + continue; >> + } >> + if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) { >> + err++; >> + } else if (sp[i].ret) { >> + if (do_print) >> + fprintf(stderr, "ERROR: scrubbing %s failed " >> + "for device id %lld (%s)\n", path, >> + devid, strerror(sp[i].ioctl_errno)); >> + err++; >> + continue; >> + } >> + } >> + >> + if (do_print) { >> + const char *append = "done"; >> + if (!do_stats_per_dev) >> + init_fs_stat(&fs_stat); >> + for (i = 0; i < fi_args.num_devices; ++i) { >> + if (do_stats_per_dev) { >> + print_scrub_dev(&di_args[i], >> + &sp[i].scrub_args.progress, >> + print_raw, >> + sp[i].ret ? "canceled" : "done", >> + &sp[i].stats); >> + } else { >> + if (sp[i].ret) >> + append = "canceled"; >> + add_to_fs_stat(&sp[i].scrub_args.progress, >> + &sp[i].stats, &fs_stat); >> + } >> + } >> + if (!do_stats_per_dev) { >> + printf("scrub %s for %s\n", append, fsid); >> + print_fs_stat(&fs_stat, print_raw); >> + } >> + } >> + >> + pthread_cancel(t_prog); >> + ret = pthread_join(t_prog, &terr); > > Does this need to happen before the output above? Is there aNo, it doesn''t. Look at the line below the "mark1" mark above. (What a sentence!) There we join all the worker threads and print their results. After succeeding, all workers must be done, so the progress thread can''t do anything useful anymore.> possible race between scrub_progress_cycle() and the stats gathering > code here? (I''ve not looked at scrub_progress_cycle() in detail yet, > so I don''t know).As I see it, no. In addition to the fact that there won''t come any fresh progress notification at this state, we have those progress fields twice to ensure consistency together with the progress_mutex taken by scrub_progress_cycle. -Jan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
OK, here''s the remainder of my comments for this file. Not much for this bit -- just one comment about locking, a reminder, and an observation. On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: [...]> +static int _scrub_write_buf(int fd, const void *data, int len) > +{ > + int ret; > + ret = write(fd, data, len); > + return ret - len; > +} > + > +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) > + __attribute__ ((format (printf, 4, 5))); > +static int _scrub_writev(int fd, char *buf, int max, const char *fmt, ...) > +{ > + int ret; > + va_list args; > + > + va_start(args, fmt); > + ret = vsnprintf(buf, max, fmt, args); > + va_end(args); > + if (ret >= max) > + return ret - max; > + return _scrub_write_buf(fd, buf, ret); > +} > + > +#define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name = \ > + data->resumed->p.name + data->scrub_args.progress.name > +static struct scrub_progress *_scrub_resumed_stats(struct scrub_progress *data, > + struct scrub_progress *dest) > +{ > + if (!data->resumed || data->skip) > + return data; > + > + _SCRUB_SUM(dest, data, data_extents_scrubbed); > + _SCRUB_SUM(dest, data, tree_extents_scrubbed); > + _SCRUB_SUM(dest, data, data_bytes_scrubbed); > + _SCRUB_SUM(dest, data, tree_bytes_scrubbed); > + _SCRUB_SUM(dest, data, read_errors); > + _SCRUB_SUM(dest, data, csum_errors); > + _SCRUB_SUM(dest, data, verify_errors); > + _SCRUB_SUM(dest, data, no_csum); > + _SCRUB_SUM(dest, data, csum_discards); > + _SCRUB_SUM(dest, data, super_errors); > + _SCRUB_SUM(dest, data, malloc_errors); > + _SCRUB_SUM(dest, data, uncorrectable_errors); > + _SCRUB_SUM(dest, data, corrected_errors); > + _SCRUB_SUM(dest, data, last_physical); > + dest->stats.canceled = data->stats.canceled; > + dest->stats.finished = data->stats.finished; > + dest->stats.t_resumed = data->stats.t_start; > + dest->stats.t_start = data->resumed->stats.t_start; > + dest->stats.duration = data->resumed->stats.duration + > + data->stats.duration; > + dest->scrub_args.devid = data->scrub_args.devid; > + return dest; > +} > + > +#define _SCRUB_KVWRITE(fd, buf, name, use) \ > + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ > + use->scrub_args.progress.name) > +#define _SCRUB_KVWRITE_STATS(fd, buf, name, use) \ > + _scrub_kvwrite(fd, buf, sizeof(buf), #name, \ > + use->stats.name) > +static int _scrub_kvwrite(int fd, char *buf, int max, > + const char *key, u64 val) > +{ > + return _scrub_writev(fd, buf, max, "|%s:%lld", key, val); > +} > + > +static int scrub_write_file(int fd, const char *fsid, > + struct scrub_progress* data, int n) > +{ > + int ret = 0; > + int i; > + char buf[1024]; > + struct scrub_progress local; > + struct scrub_progress *use; > + > + if (n < 1) { > + return -EINVAL; > + } > + > + ret = _scrub_write_buf(fd, SCRUB_FILE_VERSION_PREFIX SCRUB_FILE_VERSION > + "\n", sizeof(SCRUB_FILE_VERSION_PREFIX)-1 > + + sizeof(SCRUB_FILE_VERSION)-1 + 1); > + if (ret) > + return -EOVERFLOW; > + > + for (i=0; i<n; ++i) { > + use = _scrub_resumed_stats(&data[i], &local); > + if (_scrub_write_buf(fd, fsid, strlen(fsid)) || > + _scrub_write_buf(fd, ":", 1) || > + _scrub_writev(fd, buf, sizeof(buf), "%lld", > + use->scrub_args.devid) || > + _scrub_write_buf(fd, buf, ret) || > + _SCRUB_KVWRITE(fd, buf, data_extents_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, tree_extents_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, data_bytes_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, tree_bytes_scrubbed, use) || > + _SCRUB_KVWRITE(fd, buf, read_errors, use) || > + _SCRUB_KVWRITE(fd, buf, csum_errors, use) || > + _SCRUB_KVWRITE(fd, buf, verify_errors, use) || > + _SCRUB_KVWRITE(fd, buf, no_csum, use) || > + _SCRUB_KVWRITE(fd, buf, csum_discards, use) || > + _SCRUB_KVWRITE(fd, buf, super_errors, use) || > + _SCRUB_KVWRITE(fd, buf, malloc_errors, use) || > + _SCRUB_KVWRITE(fd, buf, uncorrectable_errors, use) || > + _SCRUB_KVWRITE(fd, buf, corrected_errors, use) || > + _SCRUB_KVWRITE(fd, buf, last_physical, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, t_start, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, t_resumed, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, duration, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, canceled, use) || > + _SCRUB_KVWRITE_STATS(fd, buf, finished, use) || > + _scrub_write_buf(fd, "\n", 1)) { > + return -EOVERFLOW; > + } > + } > + > + return 0; > +} > +#undef _SCRUB_KVWRITE > + > +static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, > + struct scrub_progress* data, int n) > +{ > + int ret; > + int fd; > + int old; > + > + ret = pthread_mutex_lock(m); > + if (ret) { > + ret = -errno; > + goto out; > + } > + > + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old);Probably not massively important, but you don''t check for the return value of this call or its counterpart at the end of this function.> + fd = scrub_open_file_w(SCRUB_DATA_FILE, fsid, "tmp"); > + if (fd < 0) { > + ret = fd; > + goto out; > + } > + ret = scrub_write_file(fd, fsid, data, n); > + if (ret) > + goto out;This leaks the file handle fd.> + ret = scrub_rename_file(SCRUB_DATA_FILE, fsid, "tmp"); > + if (ret) > + goto out;As does this.> + ret = close(fd); > + if (ret) { > + ret = -errno; > + goto out; > + } > + > +out: > + if (ret) { > + pthread_mutex_unlock(m); > + } else { > + ret = pthread_mutex_unlock(m); > + if (ret) > + ret = -errno; > + } > + > + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old); > + > + return ret; > +} > + > +static void *scrub_one_dev(void *ctx) > +{ > + struct scrub_progress *sp = ctx; > + int ret; > + struct timeval tv; > + > + sp->stats.canceled = 0; > + sp->stats.duration = 0; > + sp->stats.finished = 0; > + > + ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args); > + gettimeofday(&tv, NULL); > + sp->ret = ret; > + sp->stats.duration = tv.tv_sec - sp->stats.t_start; > + sp->stats.canceled = !!ret; > + sp->ioctl_errno = errno; > + ret = pthread_mutex_lock(&sp->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + sp->stats.finished = 1; > + ret = pthread_mutex_unlock(&sp->progress_mutex);If you downgrade .finished to a plain int, rather than a u64, then is this locking actually be needed? You have one place where the lock is held to write a single value (which is atomic for an int, IIRC), and you have another place where you hold the lock to read and compare it. I don''t see any problem with removing the lock for that.> + if (ret) > + return ERR_PTR(-errno); > + > + > + return NULL; > +} > + > +static void *progress_one_dev(void *ctx) > +{ > + struct scrub_progress *sp = ctx; > + > + sp->ret = ioctl(sp->fd, BTRFS_IOC_SCRUB_PROGRESS, &sp->scrub_args); > + sp->ioctl_errno = errno; > + > + return NULL; > +} > + > +static void *scrub_progress_cycle(void *ctx) > +{ > + int ret; > + int i; > + char fsid[37]; > + struct scrub_progress *sp; > + struct scrub_progress *sp_last; > + struct scrub_progress *sp_shared; > + struct timeval tv; > + struct scrub_progress_cycle *spc = ctx; > + int ndev = spc->fi->num_devices; > + int this = 1; > + int last = 0; > + int peer_fd = -1; > + struct pollfd accept_poll_fd = { > + .fd = spc->prg_fd, > + .events = POLLIN, > + .revents = 0, > + }; > + struct pollfd write_poll_fd = { > + .events = POLLOUT, > + .revents = 0, > + }; > + struct sockaddr_un peer; > + socklen_t peer_size = sizeof(peer); > + > + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &ret); > + uuid_unparse(spc->fi->fsid, fsid); > + > + for (i=0; i<ndev; ++i) { > + sp = &spc->progress[i]; > + sp_last = &spc->progress[i+ndev]; > + sp_shared = &spc->shared_progress[i]; > + sp->scrub_args.devid = sp_last->scrub_args.devid > + sp_shared->scrub_args.devid; > + sp->fd = sp_last->fd = spc->fdmnt; > + sp->stats.t_start = sp_last->stats.t_start > + sp_shared->stats.t_start; > + sp->resumed = sp_last->resumed = sp_shared->resumed; > + sp->skip = sp_last->skip = sp_shared->skip; > + sp->stats.finished = sp_last->stats.finished > + sp_shared->stats.finished; > + } > + > + while (1) { > + ret = poll(&accept_poll_fd, 1, 5*1000); > + if (ret == -1) > + return ERR_PTR(-errno); > + if (ret) > + peer_fd = accept(spc->prg_fd, (struct sockaddr *)&peer, > + &peer_size); > + gettimeofday(&tv, NULL); > + this = (this+1)%2; > + last = (last+1)%2; > + for (i=0; i<ndev; ++i) { > + sp = &spc->progress[this*ndev+i]; > + sp_last = &spc->progress[last*ndev+i]; > + sp_shared = &spc->shared_progress[i]; > + if (sp->stats.finished) { > + continue; > + } > + progress_one_dev(sp); > + sp->stats.duration = tv.tv_sec - sp->stats.t_start; > + if (!sp->ret) > + continue; > + if (sp->ioctl_errno != ENOTCONN && > + sp->ioctl_errno != ENODEV) > + return ERR_PTR(-sp->ioctl_errno); > + /* > + * scrub finished or device removed, check the > + * finished flag. if unset, just use the last > + * result we got for the current write and go > + * on. flag should be set on next cycle, then. > + */ > + ret = pthread_mutex_lock(&sp_shared->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + if (!sp_shared->stats.finished) { > + ret = pthread_mutex_unlock( > + &sp_shared->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + memcpy(sp, sp_last, sizeof(*sp)); > + continue; > + } > + ret = pthread_mutex_unlock(&sp_shared->progress_mutex); > + if (ret) > + return ERR_PTR(-errno); > + memcpy(sp, sp_shared, sizeof(*sp)); > + memcpy(sp_last, sp_shared, sizeof(*sp)); > + } > + if (peer_fd != -1) { > + write_poll_fd.fd = peer_fd; > + ret = poll(&write_poll_fd, 1, 0); > + if (ret == -1) > + return ERR_PTR(-errno); > + if (ret) { > + ret = scrub_write_file( > + peer_fd, fsid, > + &spc->progress[this*ndev], ndev); > + if (ret) > + return ERR_PTR(ret); > + } > + close(peer_fd); > + peer_fd = -1; > + } > + if (!spc->do_record) > + continue; > + ret = scrub_write_progress(spc->write_mutex, fsid, > + &spc->progress[this*ndev], ndev); > + if (ret) > + return ERR_PTR(ret); > + } > +}[...]> +int do_scrub_cancel(int argc, char **argv) > +{ > + char *path = argv[1]; > + int ret; > + int fdmnt; > + int err; > + char mp[BTRFS_PATH_NAME_MAX+1]; > + struct btrfs_fs_devices* fs_devices_mnt = NULL; > + > + fdmnt = open_file_or_dir(path); > + if (fdmnt < 0) { > + fprintf(stderr, "ERROR: scrub cancel failed\n"); > + return 12; > + } > + > +again: > + ret = ioctl(fdmnt, BTRFS_IOC_SCRUB_CANCEL, NULL); > + err = errno; > + close(fdmnt); > + > + if (ret && err == EINVAL) { > + /* path is no mounted btrfs. try if it''s a device */ > + ret = check_mounted_where(fdmnt, path, mp, sizeof(mp), > + &fs_devices_mnt); > + close(fdmnt); > + if (ret) { > + fdmnt = open_file_or_dir(mp); > + if (fdmnt >= 0) { > + path = mp; > + goto again; > + } > + } > + } > + > + if (ret) { > + fprintf(stderr, "ERROR: scrub cancel failed on %s: %s\n", path, > + err == ENOTCONN ? "not running" : strerror(errno)); > + return 1; > + } > + > + printf("scrub cancelled\n"); > + > + return 0; > +} > + > +int do_scrub_status(int argc, char **argv) > +{ > + > + char *path; > + struct btrfs_ioctl_fs_info_args fi_args; > + struct btrfs_ioctl_dev_info_args *di_args = NULL; > + struct scrub_file_record **past_scrubs = NULL; > + struct scrub_file_record *last_scrub; > + struct scrub_fs_stat fs_stat; > + struct sockaddr_un addr = { > + .sun_family = AF_UNIX, > + }; > + int ret; > + int fdmnt; > + int i; > + optind = 1; > + int print_raw = 0; > + int do_stats_per_dev = 0; > + char c; > + char fsid[37]; > + int fdres = -1; > + int err = 0; > + > + while ((c = getopt(argc, argv, "dR")) != -1) { > + switch(c) { > + case ''d'': > + do_stats_per_dev = 1; > + break; > + case ''R'': > + print_raw = 1; > + break; > + case ''?'': > + default: > + fprintf(stderr, "ERROR: scrub status args invalid.\n" > + " -d stats per device\n"); > + return 1; > + } > + } > + > + path = argv[optind]; > + > + fdmnt = open_file_or_dir(path); > + if (fdmnt < 0) { > + fprintf(stderr, "ERROR: can''t access to ''%s''\n", path); > + return 12; > + } > + > + ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); > + if (ret) { > + fprintf(stderr, "ERROR: getting dev info for scrub failed: " > + "%s\n", strerror(-ret)); > + err = 1; > + goto out; > + } > + if (!fi_args.num_devices) { > + fprintf(stderr, "ERROR: no devices found\n"); > + err = 1; > + goto out; > + } > + > + uuid_unparse(fi_args.fsid, fsid); > + > + fdres = socket(AF_UNIX, SOCK_STREAM, 0); > + if (fdres == -1) { > + fprintf(stderr, "ERROR: failed to create socket to " > + "receive progress information: %s\n", > + strerror(errno)); > + err = 1; > + goto out; > + } > + _scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, > + NULL, addr.sun_path, sizeof(addr.sun_path)-1); > + /* ignore EOVERFLOW, just use shorter name and hope for the best */Same comment as in the previous mail about ignoring EOVERFLOW in this code...> + ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr)); > + if (ret == -1) { > + fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); > + if (fdres < 0 && fdres != -ENOENT) { > + fprintf(stderr, "WARNING: failed to open status file: " > + "%s\n", strerror(-fdres)); > + err = 1; > + goto out; > + } > + } > + > + if (fdres >= 0) { > + past_scrubs = scrub_read_file(fdres, 1); > + if (IS_ERR(past_scrubs)) > + fprintf(stderr, "WARNING: failed to read status: %s\n", > + strerror(-PTR_ERR(past_scrubs))); > + } > + > + printf("scrub status for %s\n", fsid); > + > + /* > + * TODO: rather communicate with scrub process instead of > + * dumping the file stats for instant results > + */ > + if (do_stats_per_dev) { > + for (i = 0; i < fi_args.num_devices; ++i) { > + last_scrub = last_dev_scrub(past_scrubs, > + di_args[i].devid); > + if (!last_scrub) { > + print_scrub_dev(&di_args[i], NULL, print_raw, > + NULL, NULL); > + continue; > + } > + print_scrub_dev(&di_args[i], &last_scrub->p, print_raw, > + last_scrub->stats.finished ? > + "history" : "status", > + &last_scrub->stats); > + } > + } else { > + init_fs_stat(&fs_stat); > + for (i = 0; i < fi_args.num_devices; ++i) { > + last_scrub = last_dev_scrub(past_scrubs, > + di_args[i].devid); > + if (!last_scrub) > + continue; > + add_to_fs_stat(&last_scrub->p, &last_scrub->stats, > + &fs_stat); > + } > + print_fs_stat(&fs_stat, print_raw); > + } > + > +out: > + free_history(past_scrubs); > + free(di_args); > + close(fdmnt); > + if (fdres > -1) > + close(fdres); > + > + return err; > +}Hugo. -- === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk == PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk --- There is no dark side to the Moon, really. As a matter of --- fact, it''s all dark.
On Mon, Jul 11, 2011 at 04:29:24PM +0200, Jan Schmidt wrote:> On 10.07.2011 20:23, Hugo Mills wrote: > > Yes, this is over three months after the initial posting, but since > > nobody else has looked at it yet, and the patch is in my integration > > stack... > > ... thanks! > > > I''ve not reviewed the whole thing -- just the "scrub start" code so > > far. I''ve removed the bits I''ve not checked from the file below. > > I rebased the old branch I found to your current integration branch and > fixed up a most of what you mentioned. I''ll not send a new version out > until after your complete review (or your statement that this is it or > your statement that you would rather going on reviewing the revised > version).Thanks. The other half has just gone out (with few comments).> Things I ripped out are accepted and corrected without resistance. > Comments follow.Only a couple of rejoinders below.> > On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote:[...]> >> + case 4: /* read dev id */ > >> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) > >> + ; > >> + if (!j || i+j+1 >= avail) > > > > j == 0 is clearer than !j here, IMO > > > >> + _SCRUB_ILLEGAL; > >> + p[curr]->devid = atoll(&l[i]); > >> + i += j + 1; > > > > Is there any reason that you couldn''t just use strtoull here? We > > know that the string is terminated with a \n (by the guarantee of > > state 1), so strtoull will always finish within the buffer. > > I just found it way easier to use atoll. We already know the first > character really is a digit, so why bother with a more cumbersome function?Ah, my mistake for not being clearer, I think: I was talking about the for loop at the head of the state 4 code as well. That only exists in order to find the end of the number read in by atoll, and strtoull would do that for you. [...]> >> + char fsid[37]; > > > > Magic number. is there a #define in libuuid for this value? > > At least the man page of uuid_parse clearly states uuids have 36 bytes > plus a \0 in printf format. uuid/uuid.h doesn''t contain such a constant. > But volumes.c, print-tree.c and others do it with 37, too.OK, if that''s conventional (and not defined in uuid.h), then go with the magic number. Hugo. -- === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk == PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk --- "I will not be pushed, filed, stamped, indexed, briefed, --- debriefed or numbered. My life is my own."
On 11.07.2011 22:45, Hugo Mills wrote:> OK, here''s the remainder of my comments for this file. Not much for > this bit -- just one comment about locking, a reminder, and an > observation.Again, I ripped out the bits I simply corrected. My comments below.> [...] > >> +static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, >> + struct scrub_progress* data, int n) >> +{ >> + int ret; >> + int fd; >> + int old; >> + >> + ret = pthread_mutex_lock(m); >> + if (ret) { >> + ret = -errno; >> + goto out; >> + } >> + >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); > > Probably not massively important, but you don''t check for the > return value of this call or its counterpart at the end of this > function.pthread_* return values where wrong throughout the code. Good that you pointed at this one. It''s all fixed now.> [...] > >> +static void *scrub_one_dev(void *ctx) >> +{ >> + struct scrub_progress *sp = ctx; >> + int ret; >> + struct timeval tv; >> + >> + sp->stats.canceled = 0; >> + sp->stats.duration = 0; >> + sp->stats.finished = 0; >> + >> + ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args); >> + gettimeofday(&tv, NULL); >> + sp->ret = ret; >> + sp->stats.duration = tv.tv_sec - sp->stats.t_start; >> + sp->stats.canceled = !!ret; >> + sp->ioctl_errno = errno; >> + ret = pthread_mutex_lock(&sp->progress_mutex); >> + if (ret) >> + return ERR_PTR(-errno); >> + sp->stats.finished = 1; >> + ret = pthread_mutex_unlock(&sp->progress_mutex); > > If you downgrade .finished to a plain int, rather than a u64, then > is this locking actually be needed? You have one place where the lock > is held to write a single value (which is atomic for an int, IIRC), > and you have another place where you hold the lock to read and compare > it. I don''t see any problem with removing the lock for that.Conclusion first: I want to stick with the mutex. My reasoning: - this is by no means time critical code - the mutexes do the memory barriers required for synchronization - using int instead of u64 would complicate the kvread macros Thanks, -Jan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 11.07.2011 22:57, Hugo Mills wrote:> On Mon, Jul 11, 2011 at 04:29:24PM +0200, Jan Schmidt wrote: >> On 10.07.2011 20:23, Hugo Mills wrote: >>> Yes, this is over three months after the initial posting, but since >>> nobody else has looked at it yet, and the patch is in my integration >>> stack... >> >> ... thanks! >> >>> I''ve not reviewed the whole thing -- just the "scrub start" code so >>> far. I''ve removed the bits I''ve not checked from the file below. >> >> I rebased the old branch I found to your current integration branch and >> fixed up a most of what you mentioned. I''ll not send a new version out >> until after your complete review (or your statement that this is it or >> your statement that you would rather going on reviewing the revised >> version). > > Thanks. The other half has just gone out (with few comments).I''m through now, but I''ll wait another day for you to protest on my latest comments before sending the new version.>> Things I ripped out are accepted and corrected without resistance. >> Comments follow. > > Only a couple of rejoinders below. > >>> On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: > [...] > >>>> + case 4: /* read dev id */ >>>> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) >>>> + ; >>>> + if (!j || i+j+1 >= avail) >>> >>> j == 0 is clearer than !j here, IMO >>> >>>> + _SCRUB_ILLEGAL; >>>> + p[curr]->devid = atoll(&l[i]); >>>> + i += j + 1; >>> >>> Is there any reason that you couldn''t just use strtoull here? We >>> know that the string is terminated with a \n (by the guarantee of >>> state 1), so strtoull will always finish within the buffer. >> >> I just found it way easier to use atoll. We already know the first >> character really is a digit, so why bother with a more cumbersome function? > > Ah, my mistake for not being clearer, I think: I was talking about > the for loop at the head of the state 4 code as well. That only exists > in order to find the end of the number read in by atoll, and strtoull > would do that for you.Alright, now I see your point. However, with strtoull I would have to calculate with character pointers, whereas anything else uses direct character access with i and j here. And I don''t need the fancy bits of strtoull, either. So I''d like to stick with atoll.> [...] > >>>> + char fsid[37]; >>> >>> Magic number. is there a #define in libuuid for this value? >> >> At least the man page of uuid_parse clearly states uuids have 36 bytes >> plus a \0 in printf format. uuid/uuid.h doesn''t contain such a constant. >> But volumes.c, print-tree.c and others do it with 37, too. > > OK, if that''s conventional (and not defined in uuid.h), then go > with the magic number. > > Hugo. >Thanks, -Jan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 12, 2011 at 10:49:59AM +0200, Jan Schmidt wrote:> On 11.07.2011 22:57, Hugo Mills wrote: > > On Mon, Jul 11, 2011 at 04:29:24PM +0200, Jan Schmidt wrote: > >> On 10.07.2011 20:23, Hugo Mills wrote: > >>> Yes, this is over three months after the initial posting, but since > >>> nobody else has looked at it yet, and the patch is in my integration > >>> stack... > >> > >> ... thanks! > >> > >>> I''ve not reviewed the whole thing -- just the "scrub start" code so > >>> far. I''ve removed the bits I''ve not checked from the file below. > >> > >> I rebased the old branch I found to your current integration branch and > >> fixed up a most of what you mentioned. I''ll not send a new version out > >> until after your complete review (or your statement that this is it or > >> your statement that you would rather going on reviewing the revised > >> version). > > > > Thanks. The other half has just gone out (with few comments). > > I''m through now, but I''ll wait another day for you to protest on my > latest comments before sending the new version. > > >> Things I ripped out are accepted and corrected without resistance. > >> Comments follow. > > > > Only a couple of rejoinders below. > > > >>> On Wed, Mar 30, 2011 at 06:53:12PM +0200, Jan Schmidt wrote: > > [...] > > > >>>> + case 4: /* read dev id */ > >>>> + for (j=0; isdigit(l[i+j]) && i+j < avail; ++j) > >>>> + ; > >>>> + if (!j || i+j+1 >= avail) > >>> > >>> j == 0 is clearer than !j here, IMO > >>> > >>>> + _SCRUB_ILLEGAL; > >>>> + p[curr]->devid = atoll(&l[i]); > >>>> + i += j + 1; > >>> > >>> Is there any reason that you couldn''t just use strtoull here? We > >>> know that the string is terminated with a \n (by the guarantee of > >>> state 1), so strtoull will always finish within the buffer. > >> > >> I just found it way easier to use atoll. We already know the first > >> character really is a digit, so why bother with a more cumbersome function? > > > > Ah, my mistake for not being clearer, I think: I was talking about > > the for loop at the head of the state 4 code as well. That only exists > > in order to find the end of the number read in by atoll, and strtoull > > would do that for you. > > Alright, now I see your point. However, with strtoull I would have to > calculate with character pointers, whereas anything else uses direct > character access with i and j here. And I don''t need the fancy bits of > strtoull, either. So I''d like to stick with atoll.OK, it''s not something I feel massively strongly about. Stick with atoll, then. Hugo. -- === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk == PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk --- "He''s a nutcase, you know. There''s no getting away from it -- --- he''ll end up with a knighthood"