Commit 725737e7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'statx-dioalign-for-linus' of...

Merge tag 'statx-dioalign-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux

Pull STATX_DIOALIGN support from Eric Biggers:
 "Make statx() support reporting direct I/O (DIO) alignment information.

  This provides a generic interface for userspace programs to determine
  whether a file supports DIO, and if so with what alignment
  restrictions. Specifically, STATX_DIOALIGN works on block devices, and
  on regular files when their containing filesystem has implemented
  support.

  An interface like this has been requested for years, since the
  conditions for when DIO is supported in Linux have gotten increasingly
  complex over time. Today, DIO support and alignment requirements can
  be affected by various filesystem features such as multi-device
  support, data journalling, inline data, encryption, verity,
  compression, checkpoint disabling, log-structured mode, etc.

  Further complicating things, Linux v6.0 relaxed the traditional rule
  of DIO needing to be aligned to the block device's logical block size;
  now user buffers (but not file offsets) only need to be aligned to the
  DMA alignment.

  The approach of uplifting the XFS specific ioctl XFS_IOC_DIOINFO was
  discarded in favor of creating a clean new interface with statx().

  For more information, see the individual commits and the man page
  update[1]"

Link: https://lore.kernel.org/r/20220722074229.148925-1-ebiggers@kernel.org [1]

* tag 'statx-dioalign-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
  xfs: support STATX_DIOALIGN
  f2fs: support STATX_DIOALIGN
  f2fs: simplify f2fs_force_buffered_io()
  f2fs: move f2fs_force_buffered_io() into file.c
  ext4: support STATX_DIOALIGN
  fscrypt: change fscrypt_dio_supported() to prepare for STATX_DIOALIGN
  vfs: support STATX_DIOALIGN on block devices
  statx: add direct I/O alignment information
parents 5779aa2d 61a223df
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/namei.h>
#include <linux/part_stat.h>
#include <linux/uaccess.h>
#include <linux/stat.h>
#include "../fs/internal.h"
#include "blk.h"

@@ -1069,3 +1070,25 @@ void sync_bdevs(bool wait)
	spin_unlock(&blockdev_superblock->s_inode_list_lock);
	iput(old_inode);
}

/*
 * Handle STATX_DIOALIGN for block devices.
 *
 * Note that the inode passed to this is the inode of a block device node file,
 * not the block device's internal inode.  Therefore it is *not* valid to use
 * I_BDEV() here; the block device has to be looked up by i_rdev instead.
 */
void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
{
	struct block_device *bdev;

	bdev = blkdev_get_no_open(inode->i_rdev);
	if (!bdev)
		return;

	stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
	stat->dio_offset_align = bdev_logical_block_size(bdev);
	stat->result_mask |= STATX_DIOALIGN;

	blkdev_put_no_open(bdev);
}
+24 −25
Original line number Diff line number Diff line
@@ -396,46 +396,45 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio,
EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);

/**
 * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is
 *			     supported as far as encryption is concerned
 * @iocb: the file and position the I/O is targeting
 * @iter: the I/O data segment(s)
 * fscrypt_dio_supported() - check whether DIO (direct I/O) is supported on an
 *			     inode, as far as encryption is concerned
 * @inode: the inode in question
 *
 * Return: %true if there are no encryption constraints that prevent DIO from
 *	   being supported; %false if DIO is unsupported.  (Note that in the
 *	   %true case, the filesystem might have other, non-encryption-related
 *	   constraints that prevent DIO from actually being supported.)
 *	   constraints that prevent DIO from actually being supported.  Also, on
 *	   encrypted files the filesystem is still responsible for only allowing
 *	   DIO when requests are filesystem-block-aligned.)
 */
bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
bool fscrypt_dio_supported(struct inode *inode)
{
	const struct inode *inode = file_inode(iocb->ki_filp);
	const unsigned int blocksize = i_blocksize(inode);
	int err;

	/* If the file is unencrypted, no veto from us. */
	if (!fscrypt_needs_contents_encryption(inode))
		return true;

	/* We only support DIO with inline crypto, not fs-layer crypto. */
	if (!fscrypt_inode_uses_inline_crypto(inode))
		return false;

	/*
	 * Since the granularity of encryption is filesystem blocks, the file
	 * position and total I/O length must be aligned to the filesystem block
	 * size -- not just to the block device's logical block size as is
	 * traditionally the case for DIO on many filesystems.
	 * We only support DIO with inline crypto, not fs-layer crypto.
	 *
	 * We require that the user-provided memory buffers be filesystem block
	 * aligned too.  It is simpler to have a single alignment value required
	 * for all properties of the I/O, as is normally the case for DIO.
	 * Also, allowing less aligned buffers would imply that data units could
	 * cross bvecs, which would greatly complicate the I/O stack, which
	 * assumes that bios can be split at any bvec boundary.
	 * To determine whether the inode is using inline crypto, we have to set
	 * up the key if it wasn't already done.  This is because in the current
	 * design of fscrypt, the decision of whether to use inline crypto or
	 * not isn't made until the inode's encryption key is being set up.  In
	 * the DIO read/write case, the key will always be set up already, since
	 * the file will be open.  But in the case of statx(), the key might not
	 * be set up yet, as the file might not have been opened yet.
	 */
	err = fscrypt_require_key(inode);
	if (err) {
		/*
		 * Key unavailable or couldn't be set up.  This edge case isn't
		 * worth worrying about; just report that DIO is unsupported.
		 */
	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
		return false;

	return true;
	}
	return fscrypt_inode_uses_inline_crypto(inode);
}
EXPORT_SYMBOL_GPL(fscrypt_dio_supported);

+1 −0
Original line number Diff line number Diff line
@@ -2977,6 +2977,7 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
extern int  ext4_write_inode(struct inode *, struct writeback_control *);
extern int  ext4_setattr(struct user_namespace *, struct dentry *,
			 struct iattr *);
extern u32  ext4_dio_alignment(struct inode *inode);
extern int  ext4_getattr(struct user_namespace *, const struct path *,
			 struct kstat *, u32, unsigned int);
extern void ext4_evict_inode(struct inode *);
+26 −11
Original line number Diff line number Diff line
@@ -36,19 +36,34 @@
#include "acl.h"
#include "truncate.h"

static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
/*
 * Returns %true if the given DIO request should be attempted with DIO, or
 * %false if it should fall back to buffered I/O.
 *
 * DIO isn't well specified; when it's unsupported (either due to the request
 * being misaligned, or due to the file not supporting DIO at all), filesystems
 * either fall back to buffered I/O or return EINVAL.  For files that don't use
 * any special features like encryption or verity, ext4 has traditionally
 * returned EINVAL for misaligned DIO.  iomap_dio_rw() uses this convention too.
 * In this case, we should attempt the DIO, *not* fall back to buffered I/O.
 *
 * In contrast, in cases where DIO is unsupported due to ext4 features, ext4
 * traditionally falls back to buffered I/O.
 *
 * This function implements the traditional ext4 behavior in all these cases.
 */
static bool ext4_should_use_dio(struct kiocb *iocb, struct iov_iter *iter)
{
	struct inode *inode = file_inode(iocb->ki_filp);
	u32 dio_align = ext4_dio_alignment(inode);

	if (!fscrypt_dio_supported(iocb, iter))
		return false;
	if (fsverity_active(inode))
		return false;
	if (ext4_should_journal_data(inode))
		return false;
	if (ext4_has_inline_data(inode))
	if (dio_align == 0)
		return false;

	if (dio_align == 1)
		return true;

	return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align);
}

static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -63,7 +78,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
		inode_lock_shared(inode);
	}

	if (!ext4_dio_supported(iocb, to)) {
	if (!ext4_should_use_dio(iocb, to)) {
		inode_unlock_shared(inode);
		/*
		 * Fallback to buffered I/O if the operation being performed on
@@ -511,7 +526,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
	}

	/* Fallback to buffered I/O if the inode does not support direct I/O. */
	if (!ext4_dio_supported(iocb, from)) {
	if (!ext4_should_use_dio(iocb, from)) {
		if (ilock_shared)
			inode_unlock_shared(inode);
		else
+37 −0
Original line number Diff line number Diff line
@@ -5550,6 +5550,22 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
	return error;
}

u32 ext4_dio_alignment(struct inode *inode)
{
	if (fsverity_active(inode))
		return 0;
	if (ext4_should_journal_data(inode))
		return 0;
	if (ext4_has_inline_data(inode))
		return 0;
	if (IS_ENCRYPTED(inode)) {
		if (!fscrypt_dio_supported(inode))
			return 0;
		return i_blocksize(inode);
	}
	return 1; /* use the iomap defaults */
}

int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
		 struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
@@ -5565,6 +5581,27 @@ int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
		stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
	}

	/*
	 * Return the DIO alignment restrictions if requested.  We only return
	 * this information when requested, since on encrypted files it might
	 * take a fair bit of work to get if the file wasn't opened recently.
	 */
	if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
		u32 dio_align = ext4_dio_alignment(inode);

		stat->result_mask |= STATX_DIOALIGN;
		if (dio_align == 1) {
			struct block_device *bdev = inode->i_sb->s_bdev;

			/* iomap defaults */
			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
			stat->dio_offset_align = bdev_logical_block_size(bdev);
		} else {
			stat->dio_mem_align = dio_align;
			stat->dio_offset_align = dio_align;
		}
	}

	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
	if (flags & EXT4_APPEND_FL)
		stat->attributes |= STATX_ATTR_APPEND;
Loading