Commit 4934b640 authored by Yu Kuai's avatar Yu Kuai Committed by Song Liu
Browse files

md: fix 'delete_mutex' deadlock



Commit 3ce94ce5 ("md: fix duplicate filename for rdev") introduce a
new lock 'delete_mutex', and trigger a new deadlock:

t1: remove rdev			t2: sysfs writer

rdev_attr_store			rdev_attr_store
 mddev_lock
 state_store
 md_kick_rdev_from_array
  lock delete_mutex
  list_add mddev->deleting
  unlock delete_mutex
 mddev_unlock
				 mddev_lock
				 ...
  lock delete_mutex
  kobject_del
  // wait for sysfs writers to be done
				 mddev_unlock
				 lock delete_mutex
				 // wait for delete_mutex, deadlock

'delete_mutex' is used to protect the list 'mddev->deleting', turns out
that this list can be protected by 'reconfig_mutex' directly, and this
lock can be removed.

Fix this problem by removing the lock, and use 'reconfig_mutex' to
protect the list. mddev_unlock() will move this list to a local list to
be handled after 'reconfig_mutex' is dropped.

Fixes: 3ce94ce5 ("md: fix duplicate filename for rdev")
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230621142933.1395629-1-yukuai1@huaweicloud.com
parent a1d76719
Loading
Loading
Loading
Loading
+9 −19
Original line number Diff line number Diff line
@@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev)
{
	mutex_init(&mddev->open_mutex);
	mutex_init(&mddev->reconfig_mutex);
	mutex_init(&mddev->delete_mutex);
	mutex_init(&mddev->bitmap_info.mutex);
	INIT_LIST_HEAD(&mddev->disks);
	INIT_LIST_HEAD(&mddev->all_mddevs);
@@ -749,26 +748,15 @@ static void mddev_free(struct mddev *mddev)

static const struct attribute_group md_redundancy_group;

static void md_free_rdev(struct mddev *mddev)
void mddev_unlock(struct mddev *mddev)
{
	struct md_rdev *rdev;
	struct md_rdev *tmp;
	LIST_HEAD(delete);

	mutex_lock(&mddev->delete_mutex);
	if (list_empty(&mddev->deleting))
		goto out;
	if (!list_empty(&mddev->deleting))
		list_splice_init(&mddev->deleting, &delete);

	list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
		list_del_init(&rdev->same_set);
		kobject_del(&rdev->kobj);
		export_rdev(rdev, mddev);
	}
out:
	mutex_unlock(&mddev->delete_mutex);
}

void mddev_unlock(struct mddev *mddev)
{
	if (mddev->to_remove) {
		/* These cannot be removed under reconfig_mutex as
		 * an access to the files will try to take reconfig_mutex
@@ -808,7 +796,11 @@ void mddev_unlock(struct mddev *mddev)
	} else
		mutex_unlock(&mddev->reconfig_mutex);

	md_free_rdev(mddev);
	list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
		list_del_init(&rdev->same_set);
		kobject_del(&rdev->kobj);
		export_rdev(rdev, mddev);
	}

	md_wakeup_thread(mddev->thread);
	wake_up(&mddev->sb_wait);
@@ -2488,9 +2480,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
	 * reconfig_mutex is held, hence it can't be called under
	 * reconfig_mutex and it's delayed to mddev_unlock().
	 */
	mutex_lock(&mddev->delete_mutex);
	list_add(&rdev->same_set, &mddev->deleting);
	mutex_unlock(&mddev->delete_mutex);
}

static void export_array(struct mddev *mddev)
+1 −3
Original line number Diff line number Diff line
@@ -531,11 +531,9 @@ struct mddev {

	/*
	 * Temporarily store rdev that will be finally removed when
	 * reconfig_mutex is unlocked.
	 * reconfig_mutex is unlocked, protected by reconfig_mutex.
	 */
	struct list_head		deleting;
	/* Protect the deleting list */
	struct mutex			delete_mutex;

	bool	has_superblocks:1;
	bool	fail_last_dev:1;