Commit cce5fe5e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.3/io_uring-2023-02-16' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe:

 - Cleanup series making the async prep and handling of
   REQ_F_FORCE_ASYNC easier to follow and verify (Dylan)

 - Enable specifying specific flags for OP_MSG_RING (Breno)

 - Enable use of KASAN with the internal request cache (Breno)

 - Split the opcode definition structs into a hot and cold part (Breno)

 - OP_MSG_RING fixes (Pavel, me)

 - Fix an issue with IOPOLL cancelation and PREEMPT_NONE (me)

 - Handle TIF_NOTIFY_RESUME for the io-wq threads that never return to
   userspace (me)

 - Add support for using io_uring_register() with a registered ring fd
   (Josh)

 - Improve handling of poll on the ring fd (Pavel)

 - Series improving the task_work handling (Pavel)

 - Misc cleanups, fixes, improvements (Dmitrii, Quanfa, Richard, Pavel,
   me)

* tag 'for-6.3/io_uring-2023-02-16' of git://git.kernel.dk/linux: (51 commits)
  io_uring: Support calling io_uring_register with a registered ring fd
  io_uring,audit: don't log IORING_OP_MADVISE
  io_uring: mark task TASK_RUNNING before handling resume/task work
  io_uring: always go async for unsupported open flags
  io_uring: always go async for unsupported fadvise flags
  io_uring: for requests that require async, force it
  io_uring: if a linked request has REQ_F_FORCE_ASYNC then run it async
  io_uring: add reschedule point to handle_tw_list()
  io_uring: add a conditional reschedule to the IOPOLL cancelation loop
  io_uring: return normal tw run linking optimisation
  io_uring: refactor tctx_task_work
  io_uring: refactor io_put_task helpers
  io_uring: refactor req allocation
  io_uring: improve io_get_sqe
  io_uring: kill outdated comment about overflow flush
  io_uring: use user visible tail in io_uring_poll()
  io_uring: pass in io_issue_def to io_assign_file()
  io_uring: Enable KASAN for request cache
  io_uring: handle TIF_NOTIFY_RESUME when checking for task_work
  io_uring/msg-ring: ensure flags passing works for task_work completions
  ...
parents eca3a04f 7d3fd88d
Loading
Loading
Loading
Loading
+12 −9
Original line number Diff line number Diff line
@@ -195,21 +195,23 @@ struct io_alloc_cache {
struct io_ring_ctx {
	/* const or read-mostly hot data */
	struct {
		struct percpu_ref	refs;

		struct io_rings		*rings;
		unsigned int		flags;
		enum task_work_notify_mode	notify_method;
		unsigned int		compat: 1;
		unsigned int		drain_next: 1;
		unsigned int		restricted: 1;
		unsigned int		off_timeout_used: 1;
		unsigned int		drain_active: 1;
		unsigned int		drain_disabled: 1;
		unsigned int		has_evfd: 1;
		unsigned int		syscall_iopoll: 1;
		/* all CQEs should be posted only by the submitter task */
		unsigned int		task_complete: 1;
		unsigned int		syscall_iopoll: 1;
		unsigned int		poll_activated: 1;
		unsigned int		drain_disabled: 1;
		unsigned int		compat: 1;

		enum task_work_notify_mode	notify_method;
		struct io_rings			*rings;
		struct task_struct		*submitter_task;
		struct percpu_ref		refs;
	} ____cacheline_aligned_in_smp;

	/* submission data */
@@ -293,6 +295,7 @@ struct io_ring_ctx {
		spinlock_t		completion_lock;

		bool			poll_multi_queue;
		bool			cq_waiting;

		/*
		 * ->iopoll_list is protected by the ctx->uring_lock for
@@ -318,9 +321,8 @@ struct io_ring_ctx {
	} ____cacheline_aligned_in_smp;

	/* Keep this last, we don't need it for the fast path */

	struct wait_queue_head		poll_wq;
	struct io_restriction		restrictions;
	struct task_struct		*submitter_task;

	/* slow path rsrc auxilary data, used by update/register */
	struct io_rsrc_node		*rsrc_backup_node;
@@ -357,6 +359,7 @@ struct io_ring_ctx {
	u32				iowq_limits[2];
	bool				iowq_limits_set;

	struct callback_head		poll_wq_task_work;
	struct list_head		defer_list;
	unsigned			sq_thread_idle;
	/* protected by ->completion_lock */
+7 −1
Original line number Diff line number Diff line
@@ -347,6 +347,8 @@ enum {
 *				applicable for IORING_MSG_DATA, obviously.
 */
#define IORING_MSG_RING_CQE_SKIP	(1U << 0)
/* Pass through the flags from sqe->file_index to cqe->flags */
#define IORING_MSG_RING_FLAGS_PASS	(1U << 1)

/*
 * IO completion data structure (Completion Queue Entry)
@@ -470,6 +472,7 @@ struct io_uring_params {
#define IORING_FEAT_RSRC_TAGS		(1U << 10)
#define IORING_FEAT_CQE_SKIP		(1U << 11)
#define IORING_FEAT_LINKED_FILE		(1U << 12)
#define IORING_FEAT_REG_REG_RING	(1U << 13)

/*
 * io_uring_register(2) opcodes and arguments
@@ -517,7 +520,10 @@ enum {
	IORING_REGISTER_FILE_ALLOC_RANGE	= 25,

	/* this goes last */
	IORING_REGISTER_LAST
	IORING_REGISTER_LAST,

	/* flag added to the opcode to use a registered ring fd */
	IORING_REGISTER_USE_REGISTERED_RING	= 1U << 31
};

/* io-wq worker categories */
+17 −12
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	ma->addr = READ_ONCE(sqe->addr);
	ma->len = READ_ONCE(sqe->len);
	ma->advice = READ_ONCE(sqe->fadvise_advice);
	req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
#else
	return -EOPNOTSUPP;
@@ -51,8 +52,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
	struct io_madvise *ma = io_kiocb_to_cmd(req, struct io_madvise);
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK)
		return -EAGAIN;
	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);

	ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
	io_req_set_res(req, ret, 0);
@@ -62,6 +62,18 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
#endif
}

static bool io_fadvise_force_async(struct io_fadvise *fa)
{
	switch (fa->advice) {
	case POSIX_FADV_NORMAL:
	case POSIX_FADV_RANDOM:
	case POSIX_FADV_SEQUENTIAL:
		return false;
	default:
		return true;
	}
}

int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	struct io_fadvise *fa = io_kiocb_to_cmd(req, struct io_fadvise);
@@ -72,6 +84,8 @@ int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	fa->offset = READ_ONCE(sqe->off);
	fa->len = READ_ONCE(sqe->len);
	fa->advice = READ_ONCE(sqe->fadvise_advice);
	if (io_fadvise_force_async(fa))
		req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
}

@@ -80,16 +94,7 @@ int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
	struct io_fadvise *fa = io_kiocb_to_cmd(req, struct io_fadvise);
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK) {
		switch (fa->advice) {
		case POSIX_FADV_NORMAL:
		case POSIX_FADV_RANDOM:
		case POSIX_FADV_SEQUENTIAL:
			break;
		default:
			return -EAGAIN;
		}
	}
	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK && io_fadvise_force_async(fa));

	ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
	if (ret < 0)
+10 −10
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	}

	req->flags |= REQ_F_NEED_CLEANUP;
	req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
}

@@ -82,8 +83,7 @@ int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
	struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename);
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK)
		return -EAGAIN;
	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);

	ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd,
				ren->newpath, ren->flags);
@@ -123,6 +123,7 @@ int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
		return PTR_ERR(un->filename);

	req->flags |= REQ_F_NEED_CLEANUP;
	req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
}

@@ -131,8 +132,7 @@ int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
	struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink);
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK)
		return -EAGAIN;
	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);

	if (un->flags & AT_REMOVEDIR)
		ret = do_rmdir(un->dfd, un->filename);
@@ -170,6 +170,7 @@ int io_mkdirat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
		return PTR_ERR(mkd->filename);

	req->flags |= REQ_F_NEED_CLEANUP;
	req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
}

@@ -178,8 +179,7 @@ int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
	struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir);
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK)
		return -EAGAIN;
	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);

	ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);

@@ -220,6 +220,7 @@ int io_symlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	}

	req->flags |= REQ_F_NEED_CLEANUP;
	req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
}

@@ -228,8 +229,7 @@ int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
	struct io_link *sl = io_kiocb_to_cmd(req, struct io_link);
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK)
		return -EAGAIN;
	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);

	ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);

@@ -265,6 +265,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	}

	req->flags |= REQ_F_NEED_CLEANUP;
	req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
}

@@ -273,8 +274,7 @@ int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
	struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link);
	int ret;

	if (issue_flags & IO_URING_F_NONBLOCK)
		return -EAGAIN;
	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);

	ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
				lnk->newpath, lnk->flags);
+311 −163

File changed.

Preview size limit exceeded, changes collapsed.

Loading