Skip to content
Snippets Groups Projects
  • Yu Kuai's avatar
    077a4033
    block: don't allow a disk link holder to itself · 077a4033
    Yu Kuai authored
    
    After creating a dm device, then user can reload such dm with itself,
    and dead loop will be triggered because dm keep looking up to itself.
    
    Test procedures:
    
    1) dmsetup create test --table "xxx sda", assume dm-0 is created
    2) dmsetup suspend test
    3) dmsetup reload test --table "xxx dm-0"
    4) dmsetup resume test
    
    Test result:
    
    BUG: TASK stack guard page was hit at 00000000736a261f (stack is 000000008d12c88d..00000000c8dd82d5)
    stack guard page: 0000 [#1] PREEMPT SMP
    CPU: 29 PID: 946 Comm: systemd-udevd Not tainted 6.1.0-rc3-next-20221101-00006-g17640ca3b0ee #1295
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
    RIP: 0010:dm_prepare_ioctl+0xf/0x1e0
    Code: da 48 83 05 4a 7c 99 0b 01 41 89 c4 eb cd e8 b8 1f 40 00 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 48 83 05 a1 5a 99 0b 01 <41> 56 49 89 d6 41 55 4c 8d af 90 02 00 00 9
    RSP: 0018:ffffc90002090000 EFLAGS: 00010206
    RAX: ffff8881049d6800 RBX: ffff88817e589000 RCX: 0000000000000000
    RDX: ffffc90002090010 RSI: ffffc9000209001c RDI: ffff88817e589000
    RBP: 00000000484a101d R08: 0000000000000000 R09: 0000000000000007
    R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000005331
    R13: 0000000000005331 R14: 0000000000000000 R15: 0000000000000000
    FS:  00007fddf9609200(0000) GS:ffff889fbfd40000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: ffffc9000208fff8 CR3: 0000000179043000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Call Trace:
     <TASK>
     dm_blk_ioctl+0x50/0x1c0
     ? dm_prepare_ioctl+0xe0/0x1e0
     dm_blk_ioctl+0x88/0x1c0
     dm_blk_ioctl+0x88/0x1c0
     ......(a lot of same lines)
     dm_blk_ioctl+0x88/0x1c0
     dm_blk_ioctl+0x88/0x1c0
     blkdev_ioctl+0x184/0x3e0
     __x64_sys_ioctl+0xa3/0x110
     do_syscall_64+0x35/0x80
     entry_SYSCALL_64_after_hwframe+0x63/0xcd
    RIP: 0033:0x7fddf7306577
    Code: b3 66 90 48 8b 05 11 89 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 88 8
    RSP: 002b:00007ffd0b2ec318 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
    RAX: ffffffffffffffda RBX: 00005634ef478320 RCX: 00007fddf7306577
    RDX: 0000000000000000 RSI: 0000000000005331 RDI: 0000000000000007
    RBP: 0000000000000007 R08: 00005634ef4843e0 R09: 0000000000000080
    R10: 00007fddf75cfb38 R11: 0000000000000246 R12: 00000000030d4000
    R13: 0000000000000000 R14: 0000000000000000 R15: 00005634ef48b800
     </TASK>
    Modules linked in:
    ---[ end trace 0000000000000000 ]---
    RIP: 0010:dm_prepare_ioctl+0xf/0x1e0
    Code: da 48 83 05 4a 7c 99 0b 01 41 89 c4 eb cd e8 b8 1f 40 00 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 48 83 05 a1 5a 99 0b 01 <41> 56 49 89 d6 41 55 4c 8d af 90 02 00 00 9
    RSP: 0018:ffffc90002090000 EFLAGS: 00010206
    RAX: ffff8881049d6800 RBX: ffff88817e589000 RCX: 0000000000000000
    RDX: ffffc90002090010 RSI: ffffc9000209001c RDI: ffff88817e589000
    RBP: 00000000484a101d R08: 0000000000000000 R09: 0000000000000007
    R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000005331
    R13: 0000000000005331 R14: 0000000000000000 R15: 0000000000000000
    FS:  00007fddf9609200(0000) GS:ffff889fbfd40000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: ffffc9000208fff8 CR3: 0000000179043000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Kernel panic - not syncing: Fatal exception in interrupt
    Kernel Offset: disabled
    ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
    
    Fix the problem by forbidding a disk to create link to itself.
    
    Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
    Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
    Link: https://lore.kernel.org/r/20221115141054.1051801-11-yukuai1@huaweicloud.com
    
    
    Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
    077a4033
    History
    block: don't allow a disk link holder to itself
    Yu Kuai authored
    
    After creating a dm device, then user can reload such dm with itself,
    and dead loop will be triggered because dm keep looking up to itself.
    
    Test procedures:
    
    1) dmsetup create test --table "xxx sda", assume dm-0 is created
    2) dmsetup suspend test
    3) dmsetup reload test --table "xxx dm-0"
    4) dmsetup resume test
    
    Test result:
    
    BUG: TASK stack guard page was hit at 00000000736a261f (stack is 000000008d12c88d..00000000c8dd82d5)
    stack guard page: 0000 [#1] PREEMPT SMP
    CPU: 29 PID: 946 Comm: systemd-udevd Not tainted 6.1.0-rc3-next-20221101-00006-g17640ca3b0ee #1295
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
    RIP: 0010:dm_prepare_ioctl+0xf/0x1e0
    Code: da 48 83 05 4a 7c 99 0b 01 41 89 c4 eb cd e8 b8 1f 40 00 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 48 83 05 a1 5a 99 0b 01 <41> 56 49 89 d6 41 55 4c 8d af 90 02 00 00 9
    RSP: 0018:ffffc90002090000 EFLAGS: 00010206
    RAX: ffff8881049d6800 RBX: ffff88817e589000 RCX: 0000000000000000
    RDX: ffffc90002090010 RSI: ffffc9000209001c RDI: ffff88817e589000
    RBP: 00000000484a101d R08: 0000000000000000 R09: 0000000000000007
    R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000005331
    R13: 0000000000005331 R14: 0000000000000000 R15: 0000000000000000
    FS:  00007fddf9609200(0000) GS:ffff889fbfd40000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: ffffc9000208fff8 CR3: 0000000179043000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Call Trace:
     <TASK>
     dm_blk_ioctl+0x50/0x1c0
     ? dm_prepare_ioctl+0xe0/0x1e0
     dm_blk_ioctl+0x88/0x1c0
     dm_blk_ioctl+0x88/0x1c0
     ......(a lot of same lines)
     dm_blk_ioctl+0x88/0x1c0
     dm_blk_ioctl+0x88/0x1c0
     blkdev_ioctl+0x184/0x3e0
     __x64_sys_ioctl+0xa3/0x110
     do_syscall_64+0x35/0x80
     entry_SYSCALL_64_after_hwframe+0x63/0xcd
    RIP: 0033:0x7fddf7306577
    Code: b3 66 90 48 8b 05 11 89 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 88 8
    RSP: 002b:00007ffd0b2ec318 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
    RAX: ffffffffffffffda RBX: 00005634ef478320 RCX: 00007fddf7306577
    RDX: 0000000000000000 RSI: 0000000000005331 RDI: 0000000000000007
    RBP: 0000000000000007 R08: 00005634ef4843e0 R09: 0000000000000080
    R10: 00007fddf75cfb38 R11: 0000000000000246 R12: 00000000030d4000
    R13: 0000000000000000 R14: 0000000000000000 R15: 00005634ef48b800
     </TASK>
    Modules linked in:
    ---[ end trace 0000000000000000 ]---
    RIP: 0010:dm_prepare_ioctl+0xf/0x1e0
    Code: da 48 83 05 4a 7c 99 0b 01 41 89 c4 eb cd e8 b8 1f 40 00 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 48 83 05 a1 5a 99 0b 01 <41> 56 49 89 d6 41 55 4c 8d af 90 02 00 00 9
    RSP: 0018:ffffc90002090000 EFLAGS: 00010206
    RAX: ffff8881049d6800 RBX: ffff88817e589000 RCX: 0000000000000000
    RDX: ffffc90002090010 RSI: ffffc9000209001c RDI: ffff88817e589000
    RBP: 00000000484a101d R08: 0000000000000000 R09: 0000000000000007
    R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000005331
    R13: 0000000000005331 R14: 0000000000000000 R15: 0000000000000000
    FS:  00007fddf9609200(0000) GS:ffff889fbfd40000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: ffffc9000208fff8 CR3: 0000000179043000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Kernel panic - not syncing: Fatal exception in interrupt
    Kernel Offset: disabled
    ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
    
    Fix the problem by forbidding a disk to create link to itself.
    
    Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
    Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
    Link: https://lore.kernel.org/r/20221115141054.1051801-11-yukuai1@huaweicloud.com
    
    
    Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
holder.c 3.81 KiB
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/blkdev.h>
#include <linux/slab.h>

struct bd_holder_disk {
	struct list_head	list;
	struct kobject		*holder_dir;
	int			refcnt;
};

static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
						  struct gendisk *disk)
{
	struct bd_holder_disk *holder;

	list_for_each_entry(holder, &disk->slave_bdevs, list)
		if (holder->holder_dir == bdev->bd_holder_dir)
			return holder;
	return NULL;
}

static int add_symlink(struct kobject *from, struct kobject *to)
{
	return sysfs_create_link(from, to, kobject_name(to));
}

static void del_symlink(struct kobject *from, struct kobject *to)
{
	sysfs_remove_link(from, kobject_name(to));
}

/**
 * bd_link_disk_holder - create symlinks between holding disk and slave bdev
 * @bdev: the claimed slave bdev
 * @disk: the holding disk
 *
 * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
 *
 * This functions creates the following sysfs symlinks.
 *
 * - from "slaves" directory of the holder @disk to the claimed @bdev
 * - from "holders" directory of the @bdev to the holder @disk
 *
 * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
 * passed to bd_link_disk_holder(), then:
 *
 *   /sys/block/dm-0/slaves/sda --> /sys/block/sda
 *   /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
 *
 * The caller must have claimed @bdev before calling this function and
 * ensure that both @bdev and @disk are valid during the creation and
 * lifetime of these symlinks.
 *
 * CONTEXT:
 * Might sleep.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
	struct bd_holder_disk *holder;
	int ret = 0;

	if (WARN_ON_ONCE(!disk->slave_dir))
		return -EINVAL;

	if (bdev->bd_disk == disk)
		return -EINVAL;
	/*
	 * del_gendisk drops the initial reference to bd_holder_dir, so we
	 * need to keep our own here to allow for cleanup past that point.
	 */
	mutex_lock(&bdev->bd_disk->open_mutex);
	if (!disk_live(bdev->bd_disk)) {
		mutex_unlock(&bdev->bd_disk->open_mutex);
		return -ENODEV;
	}
	kobject_get(bdev->bd_holder_dir);
	mutex_unlock(&bdev->bd_disk->open_mutex);

	mutex_lock(&disk->open_mutex);
	WARN_ON_ONCE(!bdev->bd_holder);

	holder = bd_find_holder_disk(bdev, disk);
	if (holder) {
		kobject_put(bdev->bd_holder_dir);
		holder->refcnt++;
		goto out_unlock;
	}

	holder = kzalloc(sizeof(*holder), GFP_KERNEL);
	if (!holder) {
		ret = -ENOMEM;
		goto out_unlock;
	}

	INIT_LIST_HEAD(&holder->list);
	holder->refcnt = 1;
	holder->holder_dir = bdev->bd_holder_dir;

	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
	if (ret)
		goto out_free_holder;
	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
	if (ret)
		goto out_del_symlink;
	list_add(&holder->list, &disk->slave_bdevs);

	mutex_unlock(&disk->open_mutex);
	return 0;

out_del_symlink:
	del_symlink(disk->slave_dir, bdev_kobj(bdev));
out_free_holder:
	kfree(holder);
out_unlock:
	mutex_unlock(&disk->open_mutex);
	if (ret)
		kobject_put(bdev->bd_holder_dir);
	return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);

/**
 * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
 * @bdev: the calimed slave bdev
 * @disk: the holding disk
 *
 * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
 *
 * CONTEXT:
 * Might sleep.
 */
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
	struct bd_holder_disk *holder;

	if (WARN_ON_ONCE(!disk->slave_dir))
		return;

	mutex_lock(&disk->open_mutex);
	holder = bd_find_holder_disk(bdev, disk);
	if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
		del_symlink(disk->slave_dir, bdev_kobj(bdev));
		del_symlink(holder->holder_dir, &disk_to_dev(disk)->kobj);
		kobject_put(holder->holder_dir);
		list_del_init(&holder->list);
		kfree(holder);
	}
	mutex_unlock(&disk->open_mutex);
}
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);