This is far from complete and in particular it *doesn''t* take the necessary locks at all i.e. please don''t use this yet outside of testing. This patch is more of a request for help with regards to the locking. I think there is also a bug in the mainline tree. When taking snapshots of snapshots I''m not sure we''re adding the proper references and back references in the root_tree. This code doesn''t immediately panic the system and seems to delete a tree succesfully in simple tests. The filesystem even fscks correctly afterwards! Thanks! --- fs/btrfs/ioctl.c | 325 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 3 + 2 files changed, 328 insertions(+), 0 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 988fdc8..6e09873 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -550,6 +550,329 @@ out: return ret; } +static noinline int remove_directory_entries(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *dir, + struct inode *inode, char *name, int name_len) +{ + struct btrfs_path *path; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_key found_key; + int ret; + unsigned long found_name; + struct extent_buffer *leaf; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, name_len, -1); + + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto err; + } + + if (!di) { + printk(KERN_INFO "btrfs: weird no dentry\n"); + ret = -ENOENT; + goto err; + } + + ret = btrfs_delete_one_dir_name(trans, root, path, di); + if (ret) { + printk(KERN_INFO "btrfs: cannot delete name\n"); + goto err; + } + + btrfs_release_path(root, path); + + /* now the tough part... the index.. we can''t rely on the inode + * ref b/c the inode ref lives in a different tree... just fall + * back to search */ + key.objectid = dir->i_ino; + key.offset = (u64) -1; + key.type = BTRFS_DIR_INDEX_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + + if (ret < 0) { + printk(KERN_INFO "btrfs: problem with initial dir index search\n"); + goto err; + } + + BUG_ON(ret == 0); + + ret = 0; + di = NULL; + + while (!ret) { + ret = btrfs_previous_item(root, path, key.objectid, + BTRFS_DIR_INDEX_KEY); + + if (ret < 0) { + printk(KERN_INFO "btrfs: problem scanning backwards for index\n"); + goto err; + } + + if (ret) + break; + + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + leaf = path->nodes[0]; + + BUG_ON(found_key.objectid != key.objectid); + + di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + + if (btrfs_dir_name_len(leaf, di) == name_len) { + found_name = (unsigned long) (di + 1); + if (!memcmp_extent_buffer(leaf, name, found_name, name_len)) + break; + } + + di = NULL; + } + + /* di will be non-null if we found a match */ + if (di != NULL) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + } else { + printk(KERN_INFO "btrfs: hmm no dir index\n"); + } + + btrfs_i_size_write(dir, dir->i_size - name_len * 2); + inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; + btrfs_update_inode(trans, root, dir); + dir->i_sb->s_dirt = 1; +err: + btrfs_free_path(path); +out: + return ret; +} + +static noinline int delete_root_references(struct btrfs_root *dir_root, + struct btrfs_root *target_root) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + int ret; + int refs; + struct btrfs_path *path; + struct btrfs_root *root; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + root = dir_root->fs_info->tree_root; + + trans = btrfs_start_transaction(root, 1); + + key.objectid = dir_root->root_key.objectid; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = target_root->root_key.objectid; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + + if (ret < 0) { + printk(KERN_INFO "btrfs: problem find root refs\n"); + goto err; + } + + if (!ret) + btrfs_del_item(trans, root, path); + else + printk(KERN_INFO "btrfs: hmm no root refs\n"); + + btrfs_release_path(root, path); + + key.objectid = target_root->root_key.objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = dir_root->root_key.objectid; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + + if (ret < 0) { + printk(KERN_INFO "btrfs: problem find root backrefs\n"); + goto err; + } + + if (!ret) + btrfs_del_item(trans, root, path); + else + printk(KERN_INFO "btrfs: hmm no root backrefs\n"); + + btrfs_release_path(root, path); + + refs = btrfs_root_refs(&target_root->root_item); + btrfs_set_root_refs(&target_root->root_item, refs - 1); + + ret = btrfs_update_root(trans, root, &target_root->root_key, + &target_root->root_item); + + if (ret) { + printk(KERN_INFO "btrfs: problem copying root into btree\n"); + goto err; + } + + ret = btrfs_commit_transaction(trans, root); + + if (ret) { + printk(KERN_INFO "btrfs: problem committing transaction\n"); + goto err; + } + +err: + btrfs_free_path(path); +out: + return ret; +} + +static noinline int btrfs_ioctl_snap_destroy(struct file *file, + void __user *arg) +{ + struct inode *inode; + struct dentry *dir = fdentry(file); + struct dentry *dentry; + struct btrfs_root *root = BTRFS_I(dir->d_inode)->root; + struct btrfs_root *target_root; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + int namelen; + int ret = 0; + int refs; + + if (root->fs_info->sb->s_flags & MS_RDONLY) { + ret = -EROFS; + goto out_final; + } + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) { + ret = -ENOMEM; + goto out_final; + } + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + + vol_args->name[BTRFS_PATH_NAME_MAX] = ''\0''; + namelen = strlen(vol_args->name); + if (strchr(vol_args->name, ''/'')) { + ret = -EINVAL; + goto out; + } + + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + + dentry = lookup_one_len(vol_args->name, dir, namelen); + if (IS_ERR(dentry)) { + mutex_unlock(&dir->d_inode->i_mutex); + goto out; + } + + inode = dentry->d_inode; + + mutex_unlock(&dir->d_inode->i_mutex); + + /* (1) locks needed? */ + if (!(inode->i_mode & S_IFDIR)) { + printk(KERN_INFO "btrfs: not directory\n"); + ret = -EINVAL; + goto out_dput; + } + + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { + printk(KERN_INFO "btrfs: not snapshot\n"); + ret = -EINVAL; + goto out_dput; + } + + /* (2) lock out submounts? */ + if (have_submounts(dentry)) { + printk(KERN_INFO "btrfs: Snapshot has submounts\n"); + ret = -EBUSY; + goto out_dput; + } + + /* (3) lock sb... lock dcache lock btrfs? */ + shrink_dcache_parent(dentry); + + if (!list_empty(&dentry->d_subdirs)) { + printk(KERN_INFO "btrfs: leftover dentries\n"); + ret = -EBUSY; + goto out_dput; + } + + ret = btrfs_check_free_space(root, 1, 1); + if (ret) { + printk(KERN_INFO "btrfs: not enough space to delete snapshot\n"); + goto out_dput; + } + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir->d_inode); + + ret = remove_directory_entries(trans, root, dir->d_inode, + inode, vol_args->name, namelen); + + if (ret) { + printk(KERN_INFO "btrfs: problem removing dentries\n"); + goto out_dput; + } + + ret = btrfs_commit_transaction(trans, root); + + if (ret) { + printk(KERN_INFO "btrfs: problem committing transaction\n"); + goto out_dput; + } + + target_root = BTRFS_I(inode)->root; + + ret = delete_root_references(root, target_root); + + if (ret) { + printk(KERN_INFO "btrfs: problem deleting root references\n"); + goto out_dput; + } + + refs = btrfs_root_refs(&target_root->root_item); + + if (!refs) { + ret = btrfs_add_dead_root(target_root, root); + + if (ret) { + printk(KERN_INFO "btrfs: problem adding dead root\n"); + goto out_dput; + } + + radix_tree_delete(&target_root->fs_info->fs_roots_radix, + (unsigned long)target_root->root_key.objectid); + + /* leak root? */ + } + +out_dput: + dput(dentry); +out: + kfree(vol_args); +out_final: + return ret; +} + static noinline int btrfs_ioctl_snap_create(struct file *file, void __user *arg, int subvol) { @@ -1102,6 +1425,8 @@ long btrfs_ioctl(struct file *file, unsigned int switch (cmd) { case BTRFS_IOC_SNAP_CREATE: return btrfs_ioctl_snap_create(file, argp, 0); + case BTRFS_IOC_SNAP_DESTROY: + return btrfs_ioctl_snap_destroy(file, argp); case BTRFS_IOC_SUBVOL_CREATE: return btrfs_ioctl_snap_create(file, argp, 1); case BTRFS_IOC_DEFRAG: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b320b10..587d021 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -66,4 +66,7 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) + #endif -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2009-02-09 at 16:07 -0800, Aaron Straus wrote:> This is far from complete and in particular it *doesn''t* take the necessary > locks at all i.e. please don''t use this yet outside of testing. This patch is > more of a request for help with regards to the locking. >Sorry to say that I won''t have time to read through this in the morning, but this is a pretty hefty task for a first patch. Thanks for taking it on and posting this work! -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Feb 09 08:09 PM, Chris Mason wrote:> Sorry to say that I won''t have time to read through this in the morning, > but this is a pretty hefty task for a first patch. Thanks for taking it > on and posting this work!No worries... I just wanted to learn more about btrfs... so it seemed like a good project, especially if nobody else is working on it. I don''t think my patch actually works correctly, but I''m hoping someone who understands both the VFS and btrfs can fix it up or at least use it as a starting point. -- Quick question about the root_refs for you or anyone else on the list. Let''s say we have: base/s1 where base is a subvolume and s1 is a snapshot. So base has a root_ref to s1 and s1 has a back_ref to base in the tree root. Now let''s say we make a snapshot of base so we have: base/s1 base.snap/s1 So we have s1, the same tree root referenced in two separate snapshots. Now should s1 should now have two back_refs, one to base and one to base.snap? Also base.snap show have a root_ref to s1? That doesn''t seem to happen with the current snapshotting code, unless I''m missing something... I''m happy to try to add a patch if that is indeed what should happen... Regards, =a -- ==================Aaron Straus aaron@merfinllc.com -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Aaron Straus <aaron <at> merfinllc.com> writes: <-cut-> Just tried and: [20555.327056] btrfs: use compression [20592.252323] BUG: unable to handle kernel NULL pointer dereference at 00000073 [20592.252335] IP: [<f8d185c6>] :btrfs:btrfs_ioctl_snap_destroy+0xeb/0x24a [20592.252385] *pde = 00000000 [20592.252393] Oops: 0000 [#1] SMP [20592.252399] Modules linked in: btrfs nls_utf8 cifs nls_base vmnet vmci vmmon nvidia(P) agpgart ppdev lp rfcomm l2cap bluetooth vboxnetflt vboxdrv video output ac battery powernow_k8 cpufreq_userspace cpufreq_stats cpufreq_powersave cpufreq_ondemand freq_table cpufreq_conservative kvm_amd kvm dm_snapshot dm_mirror dm_log dm_mod ipv6 fuse zlib_inflate zlib_deflate libcrc32c firewire_sbp2 loop rtc_cmos rtc_core rtc_lib snd_pcsp k8temp snd_hda_intel snd_pcm_oss snd_mixer_oss snd_pcm snd_timer snd soundcore snd_page_alloc parport_pc parport button i2c_nforce2 i2c_core evdev joydev ext3 jbd mbcache raid1 md_mod sd_mod crc_t10dif ide_disk usb_storage usbhid sata_nv hid ff_memless ata_generic libata scsi_mod dock floppy forcedeth firewire_ohci firewire_core crc_itu_t ide_pci_generic amd74xx ide_core ehci_hcd ohci_hcd usbcore thermal processor fan thermal_sys [last unloaded: btrfs] [20592.252521] [20592.252527] Pid: 6247, comm: btrfsctl Tainted: P (2.6.27-1-686 #1) [20592.252534] EIP: 0060:[<f8d185c6>] EFLAGS: 00210202 CPU: 0 [20592.252573] EIP is at btrfs_ioctl_snap_destroy+0xeb/0x24a [btrfs] [20592.252578] EAX: 00000000 EBX: bfc53cc8 ECX: 00000000 EDX: 00000000 [20592.252584] ESI: ffffffea EDI: f59bce7c EBP: f480ac00 ESP: f2e0dec4 [20592.252589] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [20592.252595] Process btrfsctl (pid: 6247, ti=f2e0c000 task=dde44620 task.ti=f2e0c000) [20592.252599] Stack: e9b91000 e9b91008 00000006 00000000 e685b744 f43a03c0 bfc53cc8 f480ac00 [20592.252613] 5000940f f8d187f4 00000000 c1000000 f4829668 f5666b40 f480ac00 f4928b7c [20592.252626] 000000f6 00000000 00000000 003b9380 f4829680 00000000 b7eb5300 f4829668 [20592.252639] Call Trace: [20592.252643] [<f8d187f4>] btrfs_ioctl+0xcf/0x897 [btrfs] [20592.252683] [<c0168c12>] __vma_link+0x3f/0x4a [20592.252693] [<c01693d4>] vma_link+0x4a/0xc5 [20592.252701] [<f8d18725>] btrfs_ioctl+0x0/0x897 [btrfs] [20592.252738] [<c01825b0>] vfs_ioctl+0x1c/0x5d [20592.252747] [<c0182830>] do_vfs_ioctl+0x23f/0x24d [20592.252755] [<c018287f>] sys_ioctl+0x41/0x5a [20592.252763] [<c01037f7>] sysenter_do_call+0x12/0x2f [20592.252771] ======================[20592.252774] Code: 47 0c 31 f6 83 c0 78 e8 1e a1 5a c7 e9 67 01 00 00 8b 54 24 10 8b 52 0c 89 54 24 0c 8b 47 0c 83 c0 78 e8 03 a1 5a c7 8b 44 24 0c <f6> 40 73 40 75 07 68 70 20 d2 f8 eb 12 8b 54 24 0c 81 7a 20 00 [20592.252839] EIP: [<f8d185c6>] btrfs_ioctl_snap_destroy+0xeb/0x24a [btrfs] SS:ESP 0068:f2e0dec4 [20592.252881] ---[ end trace f1de5e7cc818c994 ]--- (END) -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi, Thanks for the traceback, I''ll look into it tonight and try to figure out what went wrong. There are definitely problems with the patch (other than the complete lack of locking and memory leaks). :) One of the biggest issues with this snapshot deletion code is that it relies on the snapshot refs and backrefs being accurate. I''m not sure that''s the case with the current snapshot creation code. I want to make sure my understanding of the root_refs and root_backrefs is accurate before doing any more work on the patch though. Regards, =a On Feb 10 07:44 PM, Michele Petrazzo wrote:> Aaron Straus <aaron <at> merfinllc.com> writes: > > <-cut-> > > Just tried and: > > [20555.327056] btrfs: use compression > [20592.252323] BUG: unable to handle kernel NULL pointer dereference at 00000073 > [20592.252335] IP: [<f8d185c6>] :btrfs:btrfs_ioctl_snap_destroy+0xeb/0x24a > [20592.252385] *pde = 00000000 > [20592.252393] Oops: 0000 [#1] SMP > [20592.252399] Modules linked in: btrfs nls_utf8 cifs nls_base vmnet vmci vmmon > nvidia(P) agpgart ppdev lp rfcomm l2cap bluetooth vboxnetflt vboxdrv video > output ac battery powernow_k8 cpufreq_userspace cpufreq_stats cpufreq_powersave > cpufreq_ondemand freq_table cpufreq_conservative kvm_amd kvm dm_snapshot > dm_mirror dm_log dm_mod ipv6 fuse zlib_inflate zlib_deflate libcrc32c > firewire_sbp2 loop rtc_cmos rtc_core rtc_lib snd_pcsp k8temp snd_hda_intel > snd_pcm_oss snd_mixer_oss snd_pcm snd_timer snd soundcore snd_page_alloc > parport_pc parport button i2c_nforce2 i2c_core evdev joydev ext3 jbd mbcache > raid1 md_mod sd_mod crc_t10dif ide_disk usb_storage usbhid sata_nv hid > ff_memless ata_generic libata scsi_mod dock floppy forcedeth firewire_ohci > firewire_core crc_itu_t ide_pci_generic amd74xx ide_core ehci_hcd ohci_hcd > usbcore thermal processor fan thermal_sys [last unloaded: btrfs] > [20592.252521] > [20592.252527] Pid: 6247, comm: btrfsctl Tainted: P (2.6.27-1-686 #1) > [20592.252534] EIP: 0060:[<f8d185c6>] EFLAGS: 00210202 CPU: 0 > [20592.252573] EIP is at btrfs_ioctl_snap_destroy+0xeb/0x24a [btrfs] > [20592.252578] EAX: 00000000 EBX: bfc53cc8 ECX: 00000000 EDX: 00000000 > [20592.252584] ESI: ffffffea EDI: f59bce7c EBP: f480ac00 ESP: f2e0dec4 > [20592.252589] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 > [20592.252595] Process btrfsctl (pid: 6247, ti=f2e0c000 task=dde44620 > task.ti=f2e0c000) > [20592.252599] Stack: e9b91000 e9b91008 00000006 00000000 e685b744 f43a03c0 > bfc53cc8 f480ac00 > [20592.252613] 5000940f f8d187f4 00000000 c1000000 f4829668 f5666b40 > f480ac00 f4928b7c > [20592.252626] 000000f6 00000000 00000000 003b9380 f4829680 00000000 > b7eb5300 f4829668 > [20592.252639] Call Trace: > [20592.252643] [<f8d187f4>] btrfs_ioctl+0xcf/0x897 [btrfs] > [20592.252683] [<c0168c12>] __vma_link+0x3f/0x4a > [20592.252693] [<c01693d4>] vma_link+0x4a/0xc5 > [20592.252701] [<f8d18725>] btrfs_ioctl+0x0/0x897 [btrfs] > [20592.252738] [<c01825b0>] vfs_ioctl+0x1c/0x5d > [20592.252747] [<c0182830>] do_vfs_ioctl+0x23f/0x24d > [20592.252755] [<c018287f>] sys_ioctl+0x41/0x5a > [20592.252763] [<c01037f7>] sysenter_do_call+0x12/0x2f > [20592.252771] ======================> [20592.252774] Code: 47 0c 31 f6 83 c0 78 e8 1e a1 5a c7 e9 67 01 00 00 8b 54 24 > 10 8b 52 0c 89 54 24 0c 8b 47 0c 83 c0 78 e8 03 a1 5a c7 8b 44 24 0c <f6> 40 73 > 40 75 07 68 70 20 d2 f8 eb 12 8b 54 24 0c 81 7a 20 00 > [20592.252839] EIP: [<f8d185c6>] btrfs_ioctl_snap_destroy+0xeb/0x24a [btrfs] > SS:ESP 0068:f2e0dec4 > [20592.252881] ---[ end trace f1de5e7cc818c994 ]--- > (END)-- ==================Aaron Straus aaron@merfinllc.com -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Feb 10 11:57 AM, Aaron Straus wrote:> Thanks for the traceback, I''ll look into it tonight and try to figure > out what went wrong.Hi, Actually that''s an easy one. That''s a bad way of saying file not found i.e. you tried to delete a snapshot that wasn''t there. Please apply the following patch on top of the previous one. Thanks! =a -- ==================Aaron Straus aaron@merfinllc.com --- diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6e09873..496e8d6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -787,6 +787,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, mutex_unlock(&dir->d_inode->i_mutex); + if (!inode) { + printk(KERN_INFO "btrfs: snapshot not found\n"); + ret = -ENOENT; + goto out_dput; + } + /* (1) locks needed? */ if (!(inode->i_mode & S_IFDIR)) { printk(KERN_INFO "btrfs: not directory\n"); -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html