Commit c0ec4ffc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-5.11-2021-01-29' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "We got the cancelation story sorted now, so for all intents and
  purposes, this should be it for 5.11 outside of any potential little
  fixes that may come in. This contains:

   - task_work task state fixes (Hao, Pavel)

   - Cancelation fixes (me, Pavel)

   - Fix for an inflight req patch in this release (Pavel)

   - Fix for a lock deadlock issue (Pavel)"

* tag 'io_uring-5.11-2021-01-29' of git://git.kernel.dk/linux-block:
  io_uring: reinforce cancel on flush during exit
  io_uring: fix sqo ownership false positive warning
  io_uring: fix list corruption for splice file_get
  io_uring: fix flush cqring overflow list while TASK_INTERRUPTIBLE
  io_uring: fix wqe->lock/completion_lock deadlock
  io_uring: fix cancellation taking mutex while TASK_UNINTERRUPTIBLE
  io_uring: fix __io_uring_files_cancel() with TASK_UNINTERRUPTIBLE
  io_uring: only call io_cqring_ev_posted() if events were posted
  io_uring: if we see flush on exit, cancel related tasks
parents 8ef24c20 3a7efd1a
Loading
Loading
Loading
Loading
+53 −42
Original line number Diff line number Diff line
@@ -1026,6 +1026,7 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
			     const struct iovec *fast_iov,
			     struct iov_iter *iter, bool force);
static void io_req_drop_files(struct io_kiocb *req);
static void io_req_task_queue(struct io_kiocb *req);

static struct kmem_cache *req_cachep;

@@ -1069,8 +1070,12 @@ static bool io_match_task(struct io_kiocb *head,
{
	struct io_kiocb *req;

	if (task && head->task != task)
	if (task && head->task != task) {
		/* in terms of cancelation, always match if req task is dead */
		if (head->task->flags & PF_EXITING)
			return true;
		return false;
	}
	if (!files)
		return true;

@@ -1630,18 +1635,11 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
	do {
		struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
						struct io_defer_entry, list);
		struct io_kiocb *link;

		if (req_need_defer(de->req, de->seq))
			break;
		list_del_init(&de->list);
		/* punt-init is done before queueing for defer */
		link = __io_queue_async_work(de->req);
		if (link) {
			__io_queue_linked_timeout(link);
			/* drop submission reference */
			io_put_req_deferred(link, 1);
		}
		io_req_task_queue(de->req);
		kfree(de);
	} while (!list_empty(&ctx->defer_list));
}
@@ -1775,12 +1773,13 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
	struct io_kiocb *req, *tmp;
	struct io_uring_cqe *cqe;
	unsigned long flags;
	bool all_flushed;
	bool all_flushed, posted;
	LIST_HEAD(list);

	if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries)
		return false;

	posted = false;
	spin_lock_irqsave(&ctx->completion_lock, flags);
	list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) {
		if (!io_match_task(req, tsk, files))
@@ -1800,6 +1799,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
			WRITE_ONCE(ctx->rings->cq_overflow,
				   ctx->cached_cq_overflow);
		}
		posted = true;
	}

	all_flushed = list_empty(&ctx->cq_overflow_list);
@@ -1809,8 +1809,10 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
	}

	if (posted)
		io_commit_cqring(ctx);
	spin_unlock_irqrestore(&ctx->completion_lock, flags);
	if (posted)
		io_cqring_ev_posted(ctx);

	while (!list_empty(&list)) {
@@ -6458,7 +6460,8 @@ static struct file *io_file_get(struct io_submit_state *state,
		file = __io_file_get(state, fd);
	}

	if (file && file->f_op == &io_uring_fops) {
	if (file && file->f_op == &io_uring_fops &&
	    !(req->flags & REQ_F_INFLIGHT)) {
		io_req_init_async(req);
		req->flags |= REQ_F_INFLIGHT;

@@ -7266,14 +7269,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
						TASK_INTERRUPTIBLE);
		/* make sure we run task_work before checking for signals */
		ret = io_run_task_work_sig();
		if (ret > 0)
		if (ret > 0) {
			finish_wait(&ctx->wait, &iowq.wq);
			continue;
		}
		else if (ret < 0)
			break;
		if (io_should_wake(&iowq))
			break;
		if (test_bit(0, &ctx->cq_check_overflow))
		if (test_bit(0, &ctx->cq_check_overflow)) {
			finish_wait(&ctx->wait, &iowq.wq);
			continue;
		}
		if (uts) {
			timeout = schedule_timeout(timeout);
			if (timeout == 0) {
@@ -8865,30 +8872,31 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
	}
}

static void io_uring_cancel_files(struct io_ring_ctx *ctx,
static int io_uring_count_inflight(struct io_ring_ctx *ctx,
				   struct task_struct *task,
				   struct files_struct *files)
{
	while (!list_empty_careful(&ctx->inflight_list)) {
		struct io_task_cancel cancel = { .task = task, .files = files };
	struct io_kiocb *req;
		DEFINE_WAIT(wait);
		bool found = false;
	int cnt = 0;

	spin_lock_irq(&ctx->inflight_lock);
		list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
			if (!io_match_task(req, task, files))
				continue;
			found = true;
			break;
		}
		if (found)
			prepare_to_wait(&task->io_uring->wait, &wait,
					TASK_UNINTERRUPTIBLE);
	list_for_each_entry(req, &ctx->inflight_list, inflight_entry)
		cnt += io_match_task(req, task, files);
	spin_unlock_irq(&ctx->inflight_lock);
	return cnt;
}

static void io_uring_cancel_files(struct io_ring_ctx *ctx,
				  struct task_struct *task,
				  struct files_struct *files)
{
	while (!list_empty_careful(&ctx->inflight_list)) {
		struct io_task_cancel cancel = { .task = task, .files = files };
		DEFINE_WAIT(wait);
		int inflight;

		/* We need to keep going until we don't find a matching req */
		if (!found)
		inflight = io_uring_count_inflight(ctx, task, files);
		if (!inflight)
			break;

		io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
@@ -8897,6 +8905,10 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
		io_cqring_overflow_flush(ctx, true, task, files);
		/* cancellations _may_ trigger task work */
		io_run_task_work();

		prepare_to_wait(&task->io_uring->wait, &wait,
				TASK_UNINTERRUPTIBLE);
		if (inflight == io_uring_count_inflight(ctx, task, files))
			schedule();
		finish_wait(&task->io_uring->wait, &wait);
	}
@@ -8955,8 +8967,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
	struct task_struct *task = current;

	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
		/* for SQPOLL only sqo_task has task notes */
		WARN_ON_ONCE(ctx->sqo_task != current);
		io_disable_sqo_submit(ctx);
		task = ctx->sq_data->thread;
		atomic_inc(&task->io_uring->in_idle);
@@ -8966,10 +8976,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
	io_cancel_defer_files(ctx, task, files);
	io_cqring_overflow_flush(ctx, true, task, files);

	io_uring_cancel_files(ctx, task, files);
	if (!files)
		__io_uring_cancel_task_requests(ctx, task);
	else
		io_uring_cancel_files(ctx, task, files);

	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
		atomic_dec(&task->io_uring->in_idle);
@@ -9116,16 +9125,15 @@ void __io_uring_task_cancel(void)
		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);

		/*
		 * If we've seen completions, retry. This avoids a race where
		 * a completion comes in before we did prepare_to_wait().
		 * If we've seen completions, retry without waiting. This
		 * avoids a race where a completion comes in before we did
		 * prepare_to_wait().
		 */
		if (inflight != tctx_inflight(tctx))
			continue;
		if (inflight == tctx_inflight(tctx))
			schedule();
		finish_wait(&tctx->wait, &wait);
	} while (1);

	finish_wait(&tctx->wait, &wait);
	atomic_dec(&tctx->in_idle);

	io_uring_remove_task_files(tctx);
@@ -9136,6 +9144,9 @@ static int io_uring_flush(struct file *file, void *data)
	struct io_uring_task *tctx = current->io_uring;
	struct io_ring_ctx *ctx = file->private_data;

	if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
		io_uring_cancel_task_requests(ctx, NULL);

	if (!tctx)
		return 0;