Btrfs: use rcu to protect device->name

Al pointed out that we can just toss out the old name on a device and add a
new one arbitrarily, so anybody who uses device->name in printk could
possibly use free'd memory.  Instead of adding locking around all of this he
suggested doing it with RCU, so I've introduced a struct rcu_string that
does just that and have gone through and protected all accesses to
device->name that aren't under the uuid_mutex with rcu_read_lock().  This
protects us and I will use it for dealing with removing the device that we
used to mount the file system in a later patch.  Thanks,

Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <josef@redhat.com>
This commit is contained in:
Josef Bacik
2012-06-04 14:03:51 -04:00
committed by Chris Mason
parent 17ca04aff7
commit 606686eeac
8 changed files with 162 additions and 64 deletions

View File

@@ -35,6 +35,7 @@
#include "volumes.h"
#include "async-thread.h"
#include "check-integrity.h"
#include "rcu-string.h"
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
device = list_entry(fs_devices->devices.next,
struct btrfs_device, dev_list);
list_del(&device->dev_list);
kfree(device->name);
rcu_string_free(device->name);
kfree(device);
}
kfree(fs_devices);
@@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices;
struct rcu_string *name;
u64 found_transid = btrfs_super_generation(disk_super);
char *name;
fs_devices = find_fsid(disk_super->fsid);
if (!fs_devices) {
@@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,
memcpy(device->uuid, disk_super->dev_item.uuid,
BTRFS_UUID_SIZE);
spin_lock_init(&device->io_lock);
device->name = kstrdup(path, GFP_NOFS);
if (!device->name) {
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
kfree(device);
return -ENOMEM;
}
rcu_assign_pointer(device->name, name);
INIT_LIST_HEAD(&device->dev_alloc_list);
/* init readahead state */
@@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,
device->fs_devices = fs_devices;
fs_devices->num_devices++;
} else if (!device->name || strcmp(device->name, path)) {
name = kstrdup(path, GFP_NOFS);
} else if (!device->name || strcmp(device->name->str, path)) {
name = rcu_string_strdup(path, GFP_NOFS);
if (!name)
return -ENOMEM;
kfree(device->name);
device->name = name;
rcu_string_free(device->name);
rcu_assign_pointer(device->name, name);
if (device->missing) {
fs_devices->missing_devices--;
device->missing = 0;
@@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
/* We have held the volume lock, it is safe to get the devices. */
list_for_each_entry(orig_dev, &orig->devices, dev_list) {
struct rcu_string *name;
device = kzalloc(sizeof(*device), GFP_NOFS);
if (!device)
goto error;
device->name = kstrdup(orig_dev->name, GFP_NOFS);
if (!device->name) {
/*
* This is ok to do without rcu read locked because we hold the
* uuid mutex so nothing we touch in here is going to disappear.
*/
name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
if (!name) {
kfree(device);
goto error;
}
rcu_assign_pointer(device->name, name);
device->devid = orig_dev->devid;
device->work.func = pending_bios_fn;
@@ -491,7 +501,7 @@ again:
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
kfree(device->name);
rcu_string_free(device->name);
kfree(device);
}
@@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)
if (device->bdev)
blkdev_put(device->bdev, device->mode);
kfree(device->name);
rcu_string_free(device->name);
kfree(device);
}
@@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
struct btrfs_device *new_device;
struct rcu_string *name;
if (device->bdev)
fs_devices->open_devices--;
@@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
BUG_ON(!new_device); /* -ENOMEM */
memcpy(new_device, device, sizeof(*new_device));
new_device->name = kstrdup(device->name, GFP_NOFS);
BUG_ON(device->name && !new_device->name); /* -ENOMEM */
/* Safe because we are under uuid_mutex */
name = rcu_string_strdup(device->name->str, GFP_NOFS);
BUG_ON(device->name && !name); /* -ENOMEM */
rcu_assign_pointer(new_device->name, name);
new_device->bdev = NULL;
new_device->writeable = 0;
new_device->in_fs_metadata = 0;
@@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
if (!device->name)
continue;
bdev = blkdev_get_by_path(device->name, flags, holder);
bdev = blkdev_get_by_path(device->name->str, flags, holder);
if (IS_ERR(bdev)) {
printk(KERN_INFO "open %s failed\n", device->name);
printk(KERN_INFO "open %s failed\n", device->name->str);
goto error;
}
filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
struct block_device *bdev;
struct list_head *devices;
struct super_block *sb = root->fs_info->sb;
struct rcu_string *name;
u64 total_bytes;
int seeding_dev = 0;
int ret = 0;
@@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
goto error;
}
device->name = kstrdup(device_path, GFP_NOFS);
if (!device->name) {
name = rcu_string_strdup(device_path, GFP_NOFS);
if (!name) {
kfree(device);
ret = -ENOMEM;
goto error;
}
rcu_assign_pointer(device->name, name);
ret = find_next_devid(root, &device->devid);
if (ret) {
kfree(device->name);
rcu_string_free(device->name);
kfree(device);
goto error;
}
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
kfree(device->name);
rcu_string_free(device->name);
kfree(device);
ret = PTR_ERR(trans);
goto error;
@@ -1796,7 +1812,7 @@ error_trans:
unlock_chunks(root);
btrfs_abort_transaction(trans, root, ret);
btrfs_end_transaction(trans, root);
kfree(device->name);
rcu_string_free(device->name);
kfree(device);
error:
blkdev_put(bdev, FMODE_EXCL);
@@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
dev = bbio->stripes[dev_nr].dev;
if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
#ifdef DEBUG
struct rcu_string *name;
rcu_read_lock();
name = rcu_dereference(dev->name);
pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
"(%s id %llu), size=%u\n", rw,
(u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
dev->name, dev->devid, bio->bi_size);
name->str, dev->devid, bio->bi_size);
rcu_read_unlock();
#endif
bio->bi_bdev = dev->bdev;
if (async_submit)
schedule_bio(root, dev, rw, bio);
@@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
device->name, (unsigned long long)device->devid);
printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
rcu_str_deref(device->name),
(unsigned long long)device->devid);
__btrfs_reset_dev_stats(device);
device->dev_stats_valid = 1;
btrfs_release_path(path);
@@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
BUG_ON(!path);
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
if (ret < 0) {
printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
ret, device->name);
printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
ret, rcu_str_deref(device->name));
goto out;
}
@@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
/* need to delete old one and insert a new one */
ret = btrfs_del_item(trans, dev_root, path);
if (ret != 0) {
printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
device->name, ret);
printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
rcu_str_deref(device->name), ret);
goto out;
}
ret = 1;
@@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, dev_root, path,
&key, sizeof(*ptr));
if (ret < 0) {
printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
device->name, ret);
printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
rcu_str_deref(device->name), ret);
goto out;
}
}
@@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
{
if (!dev->dev_stats_valid)
return;
printk_ratelimited(KERN_ERR
printk_ratelimited_in_rcu(KERN_ERR
"btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
dev->name,
rcu_str_deref(dev->name),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
@@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
{
printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
dev->name,
printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
rcu_str_deref(dev->name),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),