Commit 7ac5360c authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Dan Williams
Browse files

dax: remove the copy_from_iter and copy_to_iter methods



These methods indirect the actual DAX read/write path.  In the end pmem
uses magic flush and mc safe variants and fuse and dcssblk use plain ones
while device mapper picks redirects to the underlying device.

Add set_dax_nocache() and set_dax_nomc() APIs to control which copy
routines are used to remove indirect call from the read/write fast path
as well as a lot of boilerplate code.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com> [virtiofs]
Link: https://lore.kernel.org/r/20211215084508.435401-5-hch@lst.de


Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 30c6828a
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -1330,6 +1330,8 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
		goto err_alloc_dax;
		goto err_alloc_dax;
	}
	}
	set_dax_synchronous(dax_dev);
	set_dax_synchronous(dax_dev);
	set_dax_nocache(dax_dev);
	set_dax_nomc(dax_dev);


	/* a device_dax instance is dead while the driver is not attached */
	/* a device_dax instance is dead while the driver is not attached */
	kill_dax(dax_dev);
	kill_dax(dax_dev);
+32 −4
Original line number Original line Diff line number Diff line
@@ -105,6 +105,10 @@ enum dax_device_flags {
	DAXDEV_WRITE_CACHE,
	DAXDEV_WRITE_CACHE,
	/* flag to check if device supports synchronous flush */
	/* flag to check if device supports synchronous flush */
	DAXDEV_SYNC,
	DAXDEV_SYNC,
	/* do not leave the caches dirty after writes */
	DAXDEV_NOCACHE,
	/* handle CPU fetch exceptions during reads */
	DAXDEV_NOMC,
};
};


/**
/**
@@ -146,9 +150,15 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
	if (!dax_alive(dax_dev))
	if (!dax_alive(dax_dev))
		return 0;
		return 0;


	return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
	/*
	 * The userspace address for the memory copy has already been validated
	 * via access_ok() in vfs_write, so use the 'no check' version to bypass
	 * the HARDENED_USERCOPY overhead.
	 */
	if (test_bit(DAXDEV_NOCACHE, &dax_dev->flags))
		return _copy_from_iter_flushcache(addr, bytes, i);
	return _copy_from_iter(addr, bytes, i);
}
}
EXPORT_SYMBOL_GPL(dax_copy_from_iter);


size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
		size_t bytes, struct iov_iter *i)
		size_t bytes, struct iov_iter *i)
@@ -156,9 +166,15 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
	if (!dax_alive(dax_dev))
	if (!dax_alive(dax_dev))
		return 0;
		return 0;


	return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
	/*
	 * The userspace address for the memory copy has already been validated
	 * via access_ok() in vfs_red, so use the 'no check' version to bypass
	 * the HARDENED_USERCOPY overhead.
	 */
	if (test_bit(DAXDEV_NOMC, &dax_dev->flags))
		return _copy_mc_to_iter(addr, bytes, i);
	return _copy_to_iter(addr, bytes, i);
}
}
EXPORT_SYMBOL_GPL(dax_copy_to_iter);


int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
			size_t nr_pages)
			size_t nr_pages)
@@ -220,6 +236,18 @@ void set_dax_synchronous(struct dax_device *dax_dev)
}
}
EXPORT_SYMBOL_GPL(set_dax_synchronous);
EXPORT_SYMBOL_GPL(set_dax_synchronous);


void set_dax_nocache(struct dax_device *dax_dev)
{
	set_bit(DAXDEV_NOCACHE, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nocache);

void set_dax_nomc(struct dax_device *dax_dev)
{
	set_bit(DAXDEV_NOMC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nomc);

bool dax_alive(struct dax_device *dax_dev)
bool dax_alive(struct dax_device *dax_dev)
{
{
	lockdep_assert_held(&dax_srcu);
	lockdep_assert_held(&dax_srcu);
+0 −20
Original line number Original line Diff line number Diff line
@@ -180,22 +180,6 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
}


static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
		void *addr, size_t bytes, struct iov_iter *i)
{
	struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);

	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
		void *addr, size_t bytes, struct iov_iter *i)
{
	struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);

	return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
				      size_t nr_pages)
				      size_t nr_pages)
{
{
@@ -206,8 +190,6 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,


#else
#else
#define linear_dax_direct_access NULL
#define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL
#define linear_dax_zero_page_range NULL
#define linear_dax_zero_page_range NULL
#endif
#endif


@@ -225,8 +207,6 @@ static struct target_type linear_target = {
	.prepare_ioctl = linear_prepare_ioctl,
	.prepare_ioctl = linear_prepare_ioctl,
	.iterate_devices = linear_iterate_devices,
	.iterate_devices = linear_iterate_devices,
	.direct_access = linear_dax_direct_access,
	.direct_access = linear_dax_direct_access,
	.dax_copy_from_iter = linear_dax_copy_from_iter,
	.dax_copy_to_iter = linear_dax_copy_to_iter,
	.dax_zero_page_range = linear_dax_zero_page_range,
	.dax_zero_page_range = linear_dax_zero_page_range,
};
};


+0 −80
Original line number Original line Diff line number Diff line
@@ -902,51 +902,6 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
}
}


#if IS_ENABLED(CONFIG_FS_DAX)
#if IS_ENABLED(CONFIG_FS_DAX)
static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
		   struct iov_iter *i)
{
	struct pending_block *block;

	if (!bytes)
		return 0;

	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
	if (!block) {
		DMERR("Error allocating dax pending block");
		return -ENOMEM;
	}

	block->data = kzalloc(bytes, GFP_KERNEL);
	if (!block->data) {
		DMERR("Error allocating dax data space");
		kfree(block);
		return -ENOMEM;
	}

	/* write data provided via the iterator */
	if (!copy_from_iter(block->data, bytes, i)) {
		DMERR("Error copying dax data");
		kfree(block->data);
		kfree(block);
		return -EIO;
	}

	/* rewind the iterator so that the block driver can use it */
	iov_iter_revert(i, bytes);

	block->datalen = bytes;
	block->sector = bio_to_dev_sectors(lc, sector);
	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;

	atomic_inc(&lc->pending_blocks);
	spin_lock_irq(&lc->blocks_lock);
	list_add_tail(&block->list, &lc->unflushed_blocks);
	spin_unlock_irq(&lc->blocks_lock);
	wake_up_process(lc->log_kthread);

	return 0;
}

static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
		pgoff_t *pgoff)
		pgoff_t *pgoff)
{
{
@@ -964,37 +919,6 @@ static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
}


static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
					    pgoff_t pgoff, void *addr, size_t bytes,
					    struct iov_iter *i)
{
	struct log_writes_c *lc = ti->private;
	sector_t sector = pgoff * PAGE_SECTORS;
	struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
	int err;

	/* Don't bother doing anything if logging has been disabled */
	if (!lc->logging_enabled)
		goto dax_copy;

	err = log_dax(lc, sector, bytes, i);
	if (err) {
		DMWARN("Error %d logging DAX write", err);
		return 0;
	}
dax_copy:
	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
					  pgoff_t pgoff, void *addr, size_t bytes,
					  struct iov_iter *i)
{
	struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);

	return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
					  size_t nr_pages)
					  size_t nr_pages)
{
{
@@ -1005,8 +929,6 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,


#else
#else
#define log_writes_dax_direct_access NULL
#define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
#define log_writes_dax_copy_to_iter NULL
#define log_writes_dax_zero_page_range NULL
#define log_writes_dax_zero_page_range NULL
#endif
#endif


@@ -1024,8 +946,6 @@ static struct target_type log_writes_target = {
	.iterate_devices = log_writes_iterate_devices,
	.iterate_devices = log_writes_iterate_devices,
	.io_hints = log_writes_io_hints,
	.io_hints = log_writes_io_hints,
	.direct_access = log_writes_dax_direct_access,
	.direct_access = log_writes_dax_direct_access,
	.dax_copy_from_iter = log_writes_dax_copy_from_iter,
	.dax_copy_to_iter = log_writes_dax_copy_to_iter,
	.dax_zero_page_range = log_writes_dax_zero_page_range,
	.dax_zero_page_range = log_writes_dax_zero_page_range,
};
};


+0 −20
Original line number Original line Diff line number Diff line
@@ -324,22 +324,6 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
}


static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
		void *addr, size_t bytes, struct iov_iter *i)
{
	struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);

	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
		void *addr, size_t bytes, struct iov_iter *i)
{
	struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);

	return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
				      size_t nr_pages)
				      size_t nr_pages)
{
{
@@ -350,8 +334,6 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,


#else
#else
#define stripe_dax_direct_access NULL
#define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
#define stripe_dax_copy_to_iter NULL
#define stripe_dax_zero_page_range NULL
#define stripe_dax_zero_page_range NULL
#endif
#endif


@@ -488,8 +470,6 @@ static struct target_type stripe_target = {
	.iterate_devices = stripe_iterate_devices,
	.iterate_devices = stripe_iterate_devices,
	.io_hints = stripe_io_hints,
	.io_hints = stripe_io_hints,
	.direct_access = stripe_dax_direct_access,
	.direct_access = stripe_dax_direct_access,
	.dax_copy_from_iter = stripe_dax_copy_from_iter,
	.dax_copy_to_iter = stripe_dax_copy_to_iter,
	.dax_zero_page_range = stripe_dax_zero_page_range,
	.dax_zero_page_range = stripe_dax_zero_page_range,
};
};


Loading