Commit 85a90500 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-5.14-2021-08-07' of git://git.kernel.dk/linux-block

Pull io_uring from Jens Axboe:
 "A few io-wq related fixes:

   - Fix potential nr_worker race and missing max_workers check from one
     path (Hao)

   - Fix race between worker exiting and new work queue (me)"

* tag 'io_uring-5.14-2021-08-07' of git://git.kernel.dk/linux-block:
  io-wq: fix lack of acct->nr_workers < acct->max_workers judgement
  io-wq: fix no lock protection of acct->nr_worker
  io-wq: fix race between worker exiting and activating free worker
parents 6bbf5914 21698274
Loading
Loading
Loading
Loading
+45 −26
Original line number Diff line number Diff line
@@ -130,6 +130,7 @@ struct io_cb_cancel_data {
};

static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
static void io_wqe_dec_running(struct io_worker *worker);

static bool io_worker_get(struct io_worker *worker)
{
@@ -168,26 +169,21 @@ static void io_worker_exit(struct io_worker *worker)
{
	struct io_wqe *wqe = worker->wqe;
	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
	unsigned flags;

	if (refcount_dec_and_test(&worker->ref))
		complete(&worker->ref_done);
	wait_for_completion(&worker->ref_done);

	preempt_disable();
	current->flags &= ~PF_IO_WORKER;
	flags = worker->flags;
	worker->flags = 0;
	if (flags & IO_WORKER_F_RUNNING)
		atomic_dec(&acct->nr_running);
	worker->flags = 0;
	preempt_enable();

	raw_spin_lock_irq(&wqe->lock);
	if (flags & IO_WORKER_F_FREE)
	if (worker->flags & IO_WORKER_F_FREE)
		hlist_nulls_del_rcu(&worker->nulls_node);
	list_del_rcu(&worker->all_list);
	acct->nr_workers--;
	preempt_disable();
	io_wqe_dec_running(worker);
	worker->flags = 0;
	current->flags &= ~PF_IO_WORKER;
	preempt_enable();
	raw_spin_unlock_irq(&wqe->lock);

	kfree_rcu(worker, rcu);
@@ -214,16 +210,20 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
	struct hlist_nulls_node *n;
	struct io_worker *worker;

	n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list));
	if (is_a_nulls(n))
		return false;

	worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
	if (io_worker_get(worker)) {
		wake_up_process(worker->task);
	/*
	 * Iterate free_list and see if we can find an idle worker to
	 * activate. If a given worker is on the free_list but in the process
	 * of exiting, keep trying.
	 */
	hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
		if (!io_worker_get(worker))
			continue;
		if (wake_up_process(worker->task)) {
			io_worker_release(worker);
			return true;
		}
		io_worker_release(worker);
	}

	return false;
}
@@ -247,9 +247,18 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
	ret = io_wqe_activate_free_worker(wqe);
	rcu_read_unlock();

	if (!ret && acct->nr_workers < acct->max_workers) {
	if (!ret) {
		bool do_create = false;

		raw_spin_lock_irq(&wqe->lock);
		if (acct->nr_workers < acct->max_workers) {
			atomic_inc(&acct->nr_running);
			atomic_inc(&wqe->wq->worker_refs);
			acct->nr_workers++;
			do_create = true;
		}
		raw_spin_unlock_irq(&wqe->lock);
		if (do_create)
			create_io_worker(wqe->wq, wqe, acct->index);
	}
}
@@ -271,9 +280,17 @@ static void create_worker_cb(struct callback_head *cb)
{
	struct create_worker_data *cwd;
	struct io_wq *wq;
	struct io_wqe *wqe;
	struct io_wqe_acct *acct;

	cwd = container_of(cb, struct create_worker_data, work);
	wq = cwd->wqe->wq;
	wqe = cwd->wqe;
	wq = wqe->wq;
	acct = &wqe->acct[cwd->index];
	raw_spin_lock_irq(&wqe->lock);
	if (acct->nr_workers < acct->max_workers)
		acct->nr_workers++;
	raw_spin_unlock_irq(&wqe->lock);
	create_io_worker(wq, cwd->wqe, cwd->index);
	kfree(cwd);
}
@@ -635,6 +652,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
		kfree(worker);
fail:
		atomic_dec(&acct->nr_running);
		raw_spin_lock_irq(&wqe->lock);
		acct->nr_workers--;
		raw_spin_unlock_irq(&wqe->lock);
		io_worker_ref_put(wq);
		return;
	}
@@ -650,9 +670,8 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
	worker->flags |= IO_WORKER_F_FREE;
	if (index == IO_WQ_ACCT_BOUND)
		worker->flags |= IO_WORKER_F_BOUND;
	if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
	if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND))
		worker->flags |= IO_WORKER_F_FIXED;
	acct->nr_workers++;
	raw_spin_unlock_irq(&wqe->lock);
	wake_up_new_task(tsk);
}