Commit fc7cbcd4 authored by David Sterba's avatar David Sterba
Browse files

Revert "btrfs: turn fs_roots_radix in btrfs_fs_info into an XArray"

This reverts commit 48b36a60.

Revert the xarray conversion, there's a problem with potential
sleep-inside-spinlock [1] when calling xa_insert that triggers GFP_NOFS
allocation. The radix tree used the preloading mechanism to avoid
sleeping but this is not available in xarray.

Conversion from spin lock to mutex is possible but at time of rc6 is
riskier than a clean revert.

[1] https://lore.kernel.org/linux-btrfs/cover.1657097693.git.fdmanana@suse.com/



Reported-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b3a3b025
Loading
Loading
Loading
Loading
+3 −5
Original line number Original line Diff line number Diff line
@@ -675,9 +675,8 @@ struct btrfs_fs_info {
	rwlock_t global_root_lock;
	rwlock_t global_root_lock;
	struct rb_root global_root_tree;
	struct rb_root global_root_tree;


	/* The xarray that holds all the FS roots */
	spinlock_t fs_roots_radix_lock;
	spinlock_t fs_roots_lock;
	struct radix_tree_root fs_roots_radix;
	struct xarray fs_roots;


	/* block group cache stuff */
	/* block group cache stuff */
	rwlock_t block_group_cache_lock;
	rwlock_t block_group_cache_lock;
@@ -1119,8 +1118,7 @@ enum {
	 */
	 */
	BTRFS_ROOT_SHAREABLE,
	BTRFS_ROOT_SHAREABLE,
	BTRFS_ROOT_TRACK_DIRTY,
	BTRFS_ROOT_TRACK_DIRTY,
	/* The root is tracked in fs_info::fs_roots */
	BTRFS_ROOT_IN_RADIX,
	BTRFS_ROOT_REGISTERED,
	BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
	BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
	BTRFS_ROOT_DEFRAG_RUNNING,
	BTRFS_ROOT_DEFRAG_RUNNING,
	BTRFS_ROOT_FORCE_COW,
	BTRFS_ROOT_FORCE_COW,
+97 −76
Original line number Original line Diff line number Diff line
@@ -5,6 +5,7 @@


#include <linux/fs.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/writeback.h>
#include <linux/workqueue.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/kthread.h>
@@ -1210,9 +1211,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
	btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
	btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
#ifdef CONFIG_BTRFS_DEBUG
#ifdef CONFIG_BTRFS_DEBUG
	INIT_LIST_HEAD(&root->leak_list);
	INIT_LIST_HEAD(&root->leak_list);
	spin_lock(&fs_info->fs_roots_lock);
	spin_lock(&fs_info->fs_roots_radix_lock);
	list_add_tail(&root->leak_list, &fs_info->allocated_roots);
	list_add_tail(&root->leak_list, &fs_info->allocated_roots);
	spin_unlock(&fs_info->fs_roots_lock);
	spin_unlock(&fs_info->fs_roots_radix_lock);
#endif
#endif
}
}


@@ -1659,11 +1660,12 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
{
{
	struct btrfs_root *root;
	struct btrfs_root *root;


	spin_lock(&fs_info->fs_roots_lock);
	spin_lock(&fs_info->fs_roots_radix_lock);
	root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
	root = radix_tree_lookup(&fs_info->fs_roots_radix,
				 (unsigned long)root_id);
	if (root)
	if (root)
		root = btrfs_grab_root(root);
		root = btrfs_grab_root(root);
	spin_unlock(&fs_info->fs_roots_lock);
	spin_unlock(&fs_info->fs_roots_radix_lock);
	return root;
	return root;
}
}


@@ -1705,14 +1707,20 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
{
{
	int ret;
	int ret;


	spin_lock(&fs_info->fs_roots_lock);
	ret = radix_tree_preload(GFP_NOFS);
	ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
	if (ret)
			root, GFP_NOFS);
		return ret;

	spin_lock(&fs_info->fs_roots_radix_lock);
	ret = radix_tree_insert(&fs_info->fs_roots_radix,
				(unsigned long)root->root_key.objectid,
				root);
	if (ret == 0) {
	if (ret == 0) {
		btrfs_grab_root(root);
		btrfs_grab_root(root);
		set_bit(BTRFS_ROOT_REGISTERED, &root->state);
		set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
	}
	}
	spin_unlock(&fs_info->fs_roots_lock);
	spin_unlock(&fs_info->fs_roots_radix_lock);
	radix_tree_preload_end();


	return ret;
	return ret;
}
}
@@ -2342,9 +2350,9 @@ void btrfs_put_root(struct btrfs_root *root)
		btrfs_drew_lock_destroy(&root->snapshot_lock);
		btrfs_drew_lock_destroy(&root->snapshot_lock);
		free_root_extent_buffers(root);
		free_root_extent_buffers(root);
#ifdef CONFIG_BTRFS_DEBUG
#ifdef CONFIG_BTRFS_DEBUG
		spin_lock(&root->fs_info->fs_roots_lock);
		spin_lock(&root->fs_info->fs_roots_radix_lock);
		list_del_init(&root->leak_list);
		list_del_init(&root->leak_list);
		spin_unlock(&root->fs_info->fs_roots_lock);
		spin_unlock(&root->fs_info->fs_roots_radix_lock);
#endif
#endif
		kfree(root);
		kfree(root);
	}
	}
@@ -2352,21 +2360,28 @@ void btrfs_put_root(struct btrfs_root *root)


void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
{
{
	struct btrfs_root *root;
	int ret;
	unsigned long index = 0;
	struct btrfs_root *gang[8];
	int i;


	while (!list_empty(&fs_info->dead_roots)) {
	while (!list_empty(&fs_info->dead_roots)) {
		root = list_entry(fs_info->dead_roots.next,
		gang[0] = list_entry(fs_info->dead_roots.next,
				     struct btrfs_root, root_list);
				     struct btrfs_root, root_list);
		list_del(&root->root_list);
		list_del(&gang[0]->root_list);


		if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
		if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
			btrfs_drop_and_free_fs_root(fs_info, root);
			btrfs_drop_and_free_fs_root(fs_info, gang[0]);
		btrfs_put_root(root);
		btrfs_put_root(gang[0]);
	}
	}


	xa_for_each(&fs_info->fs_roots, index, root) {
	while (1) {
		btrfs_drop_and_free_fs_root(fs_info, root);
		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
					     (void **)gang, 0,
					     ARRAY_SIZE(gang));
		if (!ret)
			break;
		for (i = 0; i < ret; i++)
			btrfs_drop_and_free_fs_root(fs_info, gang[i]);
	}
	}
}
}


@@ -3134,7 +3149,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)


void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
{
	xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
	xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
	xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
	INIT_LIST_HEAD(&fs_info->trans_list);
	INIT_LIST_HEAD(&fs_info->trans_list);
	INIT_LIST_HEAD(&fs_info->dead_roots);
	INIT_LIST_HEAD(&fs_info->dead_roots);
@@ -3143,7 +3158,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
	INIT_LIST_HEAD(&fs_info->caching_block_groups);
	INIT_LIST_HEAD(&fs_info->caching_block_groups);
	spin_lock_init(&fs_info->delalloc_root_lock);
	spin_lock_init(&fs_info->delalloc_root_lock);
	spin_lock_init(&fs_info->trans_lock);
	spin_lock_init(&fs_info->trans_lock);
	spin_lock_init(&fs_info->fs_roots_lock);
	spin_lock_init(&fs_info->fs_roots_radix_lock);
	spin_lock_init(&fs_info->delayed_iput_lock);
	spin_lock_init(&fs_info->delayed_iput_lock);
	spin_lock_init(&fs_info->defrag_inodes_lock);
	spin_lock_init(&fs_info->defrag_inodes_lock);
	spin_lock_init(&fs_info->super_lock);
	spin_lock_init(&fs_info->super_lock);
@@ -3374,7 +3389,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
	/*
	/*
	 * btrfs_find_orphan_roots() is responsible for finding all the dead
	 * btrfs_find_orphan_roots() is responsible for finding all the dead
	 * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
	 * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
	 * them into the fs_info->fs_roots. This must be done before
	 * them into the fs_info->fs_roots_radix tree. This must be done before
	 * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
	 * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
	 * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
	 * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
	 * item before the root's tree is deleted - this means that if we unmount
	 * item before the root's tree is deleted - this means that if we unmount
@@ -4498,11 +4513,12 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
{
{
	bool drop_ref = false;
	bool drop_ref = false;


	spin_lock(&fs_info->fs_roots_lock);
	spin_lock(&fs_info->fs_roots_radix_lock);
	xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
	radix_tree_delete(&fs_info->fs_roots_radix,
	if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
			  (unsigned long)root->root_key.objectid);
	if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
		drop_ref = true;
		drop_ref = true;
	spin_unlock(&fs_info->fs_roots_lock);
	spin_unlock(&fs_info->fs_roots_radix_lock);


	if (BTRFS_FS_ERROR(fs_info)) {
	if (BTRFS_FS_ERROR(fs_info)) {
		ASSERT(root->log_root == NULL);
		ASSERT(root->log_root == NULL);
@@ -4518,48 +4534,50 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,


int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
{
	struct btrfs_root *roots[8];
	u64 root_objectid = 0;
	unsigned long index = 0;
	struct btrfs_root *gang[8];
	int i;
	int i = 0;
	int err = 0;
	int err = 0;
	int grabbed;
	unsigned int ret = 0;


	while (1) {
	while (1) {
		struct btrfs_root *root;
		spin_lock(&fs_info->fs_roots_radix_lock);

		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
		spin_lock(&fs_info->fs_roots_lock);
					     (void **)gang, root_objectid,
		if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
					     ARRAY_SIZE(gang));
			spin_unlock(&fs_info->fs_roots_lock);
		if (!ret) {
			return err;
			spin_unlock(&fs_info->fs_roots_radix_lock);
			break;
		}
		}
		root_objectid = gang[ret - 1]->root_key.objectid + 1;


		grabbed = 0;
		for (i = 0; i < ret; i++) {
		xa_for_each_start(&fs_info->fs_roots, index, root, index) {
			/* Avoid to grab roots in dead_roots */
			/* Avoid grabbing roots in dead_roots */
			if (btrfs_root_refs(&gang[i]->root_item) == 0) {
			if (btrfs_root_refs(&root->root_item) > 0)
				gang[i] = NULL;
				roots[grabbed++] = btrfs_grab_root(root);
				continue;
			if (grabbed >= ARRAY_SIZE(roots))
			}
				break;
			/* grab all the search result for later use */
			gang[i] = btrfs_grab_root(gang[i]);
		}
		}
		spin_unlock(&fs_info->fs_roots_lock);
		spin_unlock(&fs_info->fs_roots_radix_lock);


		for (i = 0; i < grabbed; i++) {
		for (i = 0; i < ret; i++) {
			if (!roots[i])
			if (!gang[i])
				continue;
				continue;
			index = roots[i]->root_key.objectid;
			root_objectid = gang[i]->root_key.objectid;
			err = btrfs_orphan_cleanup(roots[i]);
			err = btrfs_orphan_cleanup(gang[i]);
			if (err)
			if (err)
				goto out;
				break;
			btrfs_put_root(roots[i]);
			btrfs_put_root(gang[i]);
		}
		}
		index++;
		root_objectid++;
	}
	}


out:
	/* release the uncleaned roots due to error */
	/* Release the roots that remain uncleaned due to error */
	for (; i < ret; i++) {
	for (; i < grabbed; i++) {
		if (gang[i])
		if (roots[i])
			btrfs_put_root(gang[i]);
			btrfs_put_root(roots[i]);
	}
	}
	return err;
	return err;
}
}
@@ -4878,28 +4896,31 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)


static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
{
{
	unsigned long index = 0;
	struct btrfs_root *gang[8];
	int grabbed = 0;
	u64 root_objectid = 0;
	struct btrfs_root *roots[8];
	int ret;

	spin_lock(&fs_info->fs_roots_radix_lock);
	while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
					     (void **)gang, root_objectid,
					     ARRAY_SIZE(gang))) != 0) {
		int i;


	spin_lock(&fs_info->fs_roots_lock);
		for (i = 0; i < ret; i++)
	while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
			gang[i] = btrfs_grab_root(gang[i]);
				     ULONG_MAX, 8, XA_PRESENT))) {
		spin_unlock(&fs_info->fs_roots_radix_lock);
		for (int i = 0; i < grabbed; i++)
			roots[i] = btrfs_grab_root(roots[i]);
		spin_unlock(&fs_info->fs_roots_lock);


		for (int i = 0; i < grabbed; i++) {
		for (i = 0; i < ret; i++) {
			if (!roots[i])
			if (!gang[i])
				continue;
				continue;
			index = roots[i]->root_key.objectid;
			root_objectid = gang[i]->root_key.objectid;
			btrfs_free_log(NULL, roots[i]);
			btrfs_free_log(NULL, gang[i]);
			btrfs_put_root(roots[i]);
			btrfs_put_root(gang[i]);
		}
		}
		index++;
		root_objectid++;
		spin_lock(&fs_info->fs_roots_lock);
		spin_lock(&fs_info->fs_roots_radix_lock);
	}
	}
	spin_unlock(&fs_info->fs_roots_lock);
	spin_unlock(&fs_info->fs_roots_radix_lock);
	btrfs_free_log_root_tree(NULL, fs_info);
	btrfs_free_log_root_tree(NULL, fs_info);
}
}


+1 −1
Original line number Original line Diff line number Diff line
@@ -5829,7 +5829,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
	btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
	btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
	btrfs_qgroup_free_meta_all_pertrans(root);
	btrfs_qgroup_free_meta_all_pertrans(root);


	if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
	if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
		btrfs_add_dropped_root(trans, root);
		btrfs_add_dropped_root(trans, root);
	else
	else
		btrfs_put_root(root);
		btrfs_put_root(root);
+6 −7
Original line number Original line Diff line number Diff line
@@ -3578,7 +3578,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
	u64 last_objectid = 0;
	u64 last_objectid = 0;
	int ret = 0, nr_unlink = 0;
	int ret = 0, nr_unlink = 0;


	/* Bail out if the cleanup is already running. */
	if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
	if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
		return 0;
		return 0;


@@ -3661,17 +3660,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
			 *
			 *
			 * btrfs_find_orphan_roots() ran before us, which has
			 * btrfs_find_orphan_roots() ran before us, which has
			 * found all deleted roots and loaded them into
			 * found all deleted roots and loaded them into
			 * fs_info->fs_roots. So here we can find if an
			 * fs_info->fs_roots_radix. So here we can find if an
			 * orphan item corresponds to a deleted root by looking
			 * orphan item corresponds to a deleted root by looking
			 * up the root from that xarray.
			 * up the root from that radix tree.
			 */
			 */


			spin_lock(&fs_info->fs_roots_lock);
			spin_lock(&fs_info->fs_roots_radix_lock);
			dead_root = xa_load(&fs_info->fs_roots,
			dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
							 (unsigned long)found_key.objectid);
							 (unsigned long)found_key.objectid);
			if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
			if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
				is_dead_root = 1;
				is_dead_root = 1;
			spin_unlock(&fs_info->fs_roots_lock);
			spin_unlock(&fs_info->fs_roots_radix_lock);


			if (is_dead_root) {
			if (is_dead_root) {
				/* prevent this orphan from being found again */
				/* prevent this orphan from being found again */
+1 −1
Original line number Original line Diff line number Diff line
@@ -186,7 +186,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
	if (!root)
	if (!root)
		return;
		return;
	/* Will be freed by btrfs_free_fs_roots */
	/* Will be freed by btrfs_free_fs_roots */
	if (WARN_ON(test_bit(BTRFS_ROOT_REGISTERED, &root->state)))
	if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
		return;
		return;
	btrfs_global_root_delete(root);
	btrfs_global_root_delete(root);
	btrfs_put_root(root);
	btrfs_put_root(root);
Loading