xfs: split the unaligned DIO write code out (caa89dbc) · Commits · jan.koester / Linux

fs/xfs/xfs_file.c

+85 −85

Original line number	Original line	Diff line number	Diff line
	@@ -499,117 +499,117 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
	};		};

	/*		/*
	* xfs_file_dio_write - handle direct IO writes		* Handle block aligned direct I/O writes
	*
	* Lock the inode appropriately to prepare for and issue a direct IO write.
	* By separating it from the buffered write path we remove all the tricky to
	* follow locking changes and looping.
	*
	* If there are cached pages or we're extending the file, we need IOLOCK_EXCL
	* until we're sure the bytes at the new EOF have been zeroed and/or the cached
	* pages are flushed out.
	*
	* In most cases the direct IO writes will be done holding IOLOCK_SHARED
	* allowing them to be done in parallel with reads and other direct IO writes.
	* However, if the IO is not aligned to filesystem blocks, the direct IO layer
	* needs to do sub-block zeroing and that requires serialisation against other
	* direct IOs to the same block. In this case we need to serialise the
	* submission of the unaligned IOs so that we don't get racing block zeroing in
	* the dio layer. To avoid the problem with aio, we also need to wait for
	* outstanding IOs to complete so that unwritten extent conversion is completed
	* before we try to map the overlapping block. This is currently implemented by
	* hitting it with a big hammer (i.e. inode_dio_wait()).
	*
	* Returns with locks held indicated by @iolock and errors indicated by
	* negative return values.
	*/		*/
	STATIC ssize_t		static noinline ssize_t
	xfs_file_dio_write(		xfs_file_dio_write_aligned(
			struct xfs_inode *ip,
	struct kiocb *iocb,		struct kiocb *iocb,
	struct iov_iter *from)		struct iov_iter *from)
	{		{
	struct file *file = iocb->ki_filp;		int iolock = XFS_IOLOCK_SHARED;
	struct address_space *mapping = file->f_mapping;		ssize_t ret;
	struct inode *inode = mapping->host;
	struct xfs_inode *ip = XFS_I(inode);
	struct xfs_mount *mp = ip->i_mount;
	ssize_t ret = 0;
	int unaligned_io = 0;
	int iolock;
	size_t count = iov_iter_count(from);
	struct xfs_buftarg *target = xfs_inode_buftarg(ip);

	/* DIO must be aligned to device logical sector size */		ret = xfs_ilock_iocb(iocb, iolock);
	if ((iocb->ki_pos \| count) & target->bt_logical_sectormask)		if (ret)
	return -EINVAL;		return ret;
			ret = xfs_file_write_checks(iocb, from, &iolock);
			if (ret)
			goto out_unlock;

	/*		/*
	* Don't take the exclusive iolock here unless the I/O is unaligned to		* We don't need to hold the IOLOCK exclusively across the IO, so demote
	* the file system block size. We don't need to consider the EOF		* the iolock back to shared if we had to take the exclusive lock in
	* extension case here because xfs_file_write_checks() will relock		* xfs_file_write_checks() for other reasons.
	* the inode as necessary for EOF zeroing cases and fill out the new
	* inode size as appropriate.
	*/		*/
	if ((iocb->ki_pos & mp->m_blockmask) \|\|		if (iolock == XFS_IOLOCK_EXCL) {
	((iocb->ki_pos + count) & mp->m_blockmask)) {		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
	unaligned_io = 1;		iolock = XFS_IOLOCK_SHARED;
			}
			trace_xfs_file_direct_write(iocb, from);
			ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
			&xfs_dio_write_ops, 0);
			out_unlock:
			if (iolock)
			xfs_iunlock(ip, iolock);
			return ret;
			}

	/*		/*
	* We can't properly handle unaligned direct I/O to reflink		* Handle block unaligned direct I/O writes
	* files yet, as we can't unshare a partial block.		*
			* In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
			* them to be done in parallel with reads and other direct I/O writes. However,
			* if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
			* to do sub-block zeroing and that requires serialisation against other direct
			* I/O to the same block. In this case we need to serialise the submission of
			* the unaligned I/O so that we don't get racing block zeroing in the dio layer.
			*
			* This means that unaligned dio writes always block. There is no "nowait" fast
			* path in this code - if IOCB_NOWAIT is set we simply return -EAGAIN up front
			* and we don't have to worry about that anymore.
	*/		*/
	if (xfs_is_cow_inode(ip)) {		static noinline ssize_t
	trace_xfs_reflink_bounce_dio_write(iocb, from);		xfs_file_dio_write_unaligned(
	return -ENOTBLK;		struct xfs_inode *ip,
	}		struct kiocb *iocb,
	iolock = XFS_IOLOCK_EXCL;		struct iov_iter *from)
	} else {		{
	iolock = XFS_IOLOCK_SHARED;		int iolock = XFS_IOLOCK_EXCL;
	}		ssize_t ret;

	if (iocb->ki_flags & IOCB_NOWAIT) {
	/* unaligned dio always waits, bail */		/* unaligned dio always waits, bail */
	if (unaligned_io)		if (iocb->ki_flags & IOCB_NOWAIT)
	return -EAGAIN;
	if (!xfs_ilock_nowait(ip, iolock))
	return -EAGAIN;		return -EAGAIN;
	} else {
	xfs_ilock(ip, iolock);		xfs_ilock(ip, iolock);

			/*
			* We can't properly handle unaligned direct I/O to reflink files yet,
			* as we can't unshare a partial block.
			*/
			if (xfs_is_cow_inode(ip)) {
			trace_xfs_reflink_bounce_dio_write(iocb, from);
			ret = -ENOTBLK;
			goto out_unlock;
	}		}

	ret = xfs_file_write_checks(iocb, from, &iolock);		ret = xfs_file_write_checks(iocb, from, &iolock);
	if (ret)		if (ret)
	goto out;		goto out_unlock;
	count = iov_iter_count(from);

	/*		/*
	* If we are doing unaligned IO, we can't allow any other overlapping IO		* If we are doing unaligned I/O, this must be the only I/O in-flight.
	* in-flight at the same time or we risk data corruption. Wait for all		* Otherwise we risk data corruption due to unwritten extent conversions
	* other IO to drain before we submit. If the IO is aligned, demote the		* from the AIO end_io handler. Wait for all other I/O to drain first.
	* iolock if we had to take the exclusive lock in
	* xfs_file_write_checks() for other reasons.
	*/		*/
	if (unaligned_io) {		inode_dio_wait(VFS_I(ip));
	inode_dio_wait(inode);
	} else if (iolock == XFS_IOLOCK_EXCL) {
	xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
	iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_direct_write(iocb, from);		trace_xfs_file_direct_write(iocb, from);
	/*
	* If unaligned, this is the only IO in-flight. Wait on it before we
	* release the iolock to prevent subsequent overlapping IO.
	*/
	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,		ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
	&xfs_dio_write_ops,		&xfs_dio_write_ops, IOMAP_DIO_FORCE_WAIT);
	unaligned_io ? IOMAP_DIO_FORCE_WAIT : 0);		out_unlock:
	out:
	if (iolock)		if (iolock)
	xfs_iunlock(ip, iolock);		xfs_iunlock(ip, iolock);
	return ret;		return ret;
	}		}

			static ssize_t
			xfs_file_dio_write(
			struct kiocb *iocb,
			struct iov_iter *from)
			{
			struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
			struct xfs_buftarg *target = xfs_inode_buftarg(ip);
			size_t count = iov_iter_count(from);

			/* direct I/O must be aligned to device logical sector size */
			if ((iocb->ki_pos \| count) & target->bt_logical_sectormask)
			return -EINVAL;
			if ((iocb->ki_pos \| count) & ip->i_mount->m_blockmask)
			return xfs_file_dio_write_unaligned(ip, iocb, from);
			return xfs_file_dio_write_aligned(ip, iocb, from);
			}

	static noinline ssize_t		static noinline ssize_t
	xfs_file_dax_write(		xfs_file_dax_write(
	struct kiocb *iocb,		struct kiocb *iocb,