Commit 9dc00b25 authored by Barry Song's avatar Barry Song Committed by Christoph Hellwig
Browse files

dma-mapping: benchmark: pretend DMA is transmitting



In a real dma mapping user case, after dma_map is done, data will be
transmit. Thus, in multi-threaded user scenario, IOMMU contention
should not be that severe. For example, if users enable multiple
threads to send network packets through 1G/10G/100Gbps NIC, usually
the steps will be: map -> transmission -> unmap.  Transmission delay
reduces the contention of IOMMU.

Here a delay is added to simulate the transmission between map and unmap
so that the tested result could be more accurate for TX and simple RX.
A typical TX transmission for NIC would be like: map -> TX -> unmap
since the socket buffers come from OS. Simple RX model eg. disk driver,
is also map -> RX -> unmap, but real RX model in a NIC could be more
complicated considering packets can come spontaneously and many drivers
are using pre-mapped buffers pool. This is in the TBD list.

Signed-off-by: default avatarBarry Song <song.bao.hua@hisilicon.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 9f5f8ec5
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
#define DMA_MAP_MAX_THREADS	1024
#define DMA_MAP_MAX_SECONDS	300
#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC)

#define DMA_MAP_BIDIRECTIONAL	0
#define DMA_MAP_TO_DEVICE	1
@@ -36,7 +37,8 @@ struct map_benchmark {
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u8 expansion[84];	/* For future use */
	__u32 dma_trans_ns; /* time for DMA transmission in ns */
	__u8 expansion[80];	/* For future use */
};

struct map_benchmark_data {
@@ -87,6 +89,9 @@ static int map_benchmark_thread(void *data)
		map_etime = ktime_get();
		map_delta = ktime_sub(map_etime, map_stime);

		/* Pretend DMA is transmitting */
		ndelay(map->bparam.dma_trans_ns);

		unmap_stime = ktime_get();
		dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
		unmap_etime = ktime_get();
@@ -218,6 +223,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
			return -EINVAL;
		}

		if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
			pr_err("invalid transmission delay\n");
			return -EINVAL;
		}

		if (map->bparam.node != NUMA_NO_NODE &&
		    !node_possible(map->bparam.node)) {
			pr_err("invalid numa node\n");
+18 −3
Original line number Diff line number Diff line
@@ -12,9 +12,12 @@
#include <sys/mman.h>
#include <linux/types.h>

#define NSEC_PER_MSEC	1000000L

#define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
#define DMA_MAP_MAX_THREADS	1024
#define DMA_MAP_MAX_SECONDS     300
#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC)

#define DMA_MAP_BIDIRECTIONAL	0
#define DMA_MAP_TO_DEVICE	1
@@ -36,7 +39,8 @@ struct map_benchmark {
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u8 expansion[84];	/* For future use */
	__u32 dma_trans_ns; /* time for DMA transmission in ns */
	__u8 expansion[80];	/* For future use */
};

int main(int argc, char **argv)
@@ -46,12 +50,12 @@ int main(int argc, char **argv)
	/* default single thread, run 20 seconds on NUMA_NO_NODE */
	int threads = 1, seconds = 20, node = -1;
	/* default dma mask 32bit, bidirectional DMA */
	int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
	int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;

	int cmd = DMA_MAP_BENCHMARK;
	char *p;

	while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
	while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
		switch (opt) {
		case 't':
			threads = atoi(optarg);
@@ -68,6 +72,9 @@ int main(int argc, char **argv)
		case 'd':
			dir = atoi(optarg);
			break;
		case 'x':
			xdelay = atoi(optarg);
			break;
		default:
			return -1;
		}
@@ -85,6 +92,12 @@ int main(int argc, char **argv)
		exit(1);
	}

	if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
		fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
			DMA_MAP_MAX_TRANS_DELAY);
		exit(1);
	}

	/* suppose the mininum DMA zone is 1MB in the world */
	if (bits < 20 || bits > 64) {
		fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
@@ -109,6 +122,8 @@ int main(int argc, char **argv)
	map.node = node;
	map.dma_bits = bits;
	map.dma_dir = dir;
	map.dma_trans_ns = xdelay;

	if (ioctl(fd, cmd, &map)) {
		perror("ioctl");
		exit(1);