Commit b3ae4755 authored by Mike Marshall's avatar Mike Marshall
Browse files

Orangefs: implement .write_iter



Until now, orangefs_devreq_write_iter has just been a wrapper for
the old-fashioned orangefs_devreq_writev... linux would call
.write_iter with "struct kiocb *iocb" and "struct iov_iter *iter"
and .write_iter would just:

        return pvfs2_devreq_writev(iocb->ki_filp,
                                   iter->iov,
                                   iter->nr_segs,
                                   &iocb->ki_pos);

Signed-off-by: default avatarMike Marshall <hubcap@omnibond.com>
parent 85096169
Loading
Loading
Loading
Loading
+199 −263
Original line number Diff line number Diff line
@@ -245,202 +245,156 @@ static ssize_t orangefs_devreq_read(struct file *file,
}

/*
 * Function for writev() callers into the device. Readdir related
 * operations have an extra iovec containing info about objects
 * contained in directories.
 * Function for writev() callers into the device.
 *
 * Userspace should have written:
 *  - __u32 version
 *  - __u32 magic
 *  - __u64 tag
 *  - struct orangefs_downcall_s
 *  - trailer buffer (in the case of READDIR operations)
 */
static ssize_t orangefs_devreq_writev(struct file *file,
				   const struct iovec *iov,
				   size_t count,
				   loff_t *offset)
static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
				      struct iov_iter *iter)
{
	ssize_t ret;
	struct orangefs_kernel_op_s *op = NULL;
	void *buffer = NULL;
	void *ptr = NULL;
	unsigned long i = 0;
	int num_remaining = MAX_DEV_REQ_DOWNSIZE;
	int ret = 0;
	/* num elements in iovec without trailer */
	int notrailer_count = 4;
	/*
	 * If there's a trailer, its iov index will be equal to
	 * notrailer_count.
	 */
	int trailer_index = notrailer_count;
	int payload_size = 0;
	int returned_downcall_size = 0;
	__s32 magic = 0;
	__s32 proto_ver = 0;
	__u64 tag = 0;
	ssize_t total_returned_size = 0;
	struct {
		__u32 version;
		__u32 magic;
		__u64 tag;
	} head;
	int total = ret = iov_iter_count(iter);
	int n;
	int downcall_size = sizeof(struct orangefs_downcall_s);
	int head_size = sizeof(head);

	gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n",
		     __func__,
		     total,
		     ret);

	/*
	 * There will always be at least notrailer_count iovecs, and
	 * when there's a trailer, one more than notrailer_count. Check
	 * count's sanity.
	 */
	if (count != notrailer_count && count != (notrailer_count + 1)) {
		gossip_err("%s: count:%zu: notrailer_count :%d:\n",
        if (total < MAX_DEV_REQ_DOWNSIZE) {
		gossip_err("%s: total:%d: must be at least:%lu:\n",
			   __func__,
			count,
			notrailer_count);
		return -EPROTO;
			   total,
			   MAX_DEV_REQ_DOWNSIZE);
		ret = -EFAULT;
		goto out;
	}
     

	/* Copy the non-trailer iovec data into a device request buffer. */
	buffer = dev_req_alloc();
	if (!buffer) {
		gossip_err("%s: dev_req_alloc failed.\n", __func__);
		return -ENOMEM;
	}
	ptr = buffer;
	for (i = 0; i < notrailer_count; i++) {
		if (iov[i].iov_len > num_remaining) {
			gossip_err
			    ("writev error: Freeing buffer and returning\n");
			dev_req_release(buffer);
			return -EMSGSIZE;
		}
		ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
		if (ret) {
			gossip_err("Failed to copy data from user space\n");
			dev_req_release(buffer);
			return -EIO;
		}
		num_remaining -= iov[i].iov_len;
		ptr += iov[i].iov_len;
		payload_size += iov[i].iov_len;
	n = copy_from_iter(&head, head_size, iter);
	if (n < head_size) {
		gossip_err("%s: failed to copy head.\n", __func__);
		ret = -EFAULT;
		goto out;
	}
	total_returned_size = payload_size;

	/* these elements are currently 8 byte aligned (8 bytes for (version +
	 * magic) 8 bytes for tag).  If you add another element, either
	 * make it 8 bytes big, or use get_unaligned when asigning.
	 */
	ptr = buffer;
	proto_ver = *((__s32 *) ptr); /* unused */
	ptr += sizeof(__s32);

	magic = *((__s32 *) ptr);
	ptr += sizeof(__s32);

	tag = *((__u64 *) ptr);
	ptr += sizeof(__u64);
	if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) {
		gossip_err("%s: userspace claims version"
			   "%d, minimum version required: %d.\n",
			   __func__,
			   head.version,
			   ORANGEFS_MINIMUM_USERSPACE_VERSION);
		ret = -EPROTO;
		goto out;
	}

	if (magic != ORANGEFS_DEVREQ_MAGIC) {
	if (head.magic != ORANGEFS_DEVREQ_MAGIC) {
		gossip_err("Error: Device magic number does not match.\n");
		dev_req_release(buffer);
		return -EPROTO;
		ret = -EPROTO;
		goto out;
	}

	op = orangefs_devreq_remove_op(tag);
	if (op) {
		/* Increase ref count! */
		get_op(op);

		/* calculate the size of the returned downcall. */
		returned_downcall_size =
			payload_size - (2 * sizeof(__s32) + sizeof(__u64));

		/* copy the passed in downcall into the op */
		if (returned_downcall_size ==
			sizeof(struct orangefs_downcall_s)) {
			memcpy(&op->downcall,
			       ptr,
			       sizeof(struct orangefs_downcall_s));
		} else {
			gossip_err("%s: returned downcall size:%d: \n",
				   __func__,
				   returned_downcall_size);
			dev_req_release(buffer);
			put_op(op);
			return -EMSGSIZE;
	op = orangefs_devreq_remove_op(head.tag);
	if (!op) {
		gossip_err("WARNING: No one's waiting for tag %llu\n",
			   llu(head.tag));
		goto out;
	}

		/* Don't tolerate an unexpected trailer iovec. */
		if ((op->downcall.trailer_size == 0) &&
		    (count != notrailer_count)) {
			gossip_err("%s: unexpected trailer iovec.\n",
				   __func__);
			dev_req_release(buffer);
	get_op(op); /* increase ref count. */

	n = copy_from_iter(&op->downcall, downcall_size, iter);
	if (n != downcall_size) {
		gossip_err("%s: failed to copy downcall.\n", __func__);
		put_op(op);
			return -EPROTO;
		ret = -EFAULT;
		goto out;
	}

		/* Don't consider the trailer if there's a bad status. */
		if (op->downcall.status != 0)
			goto no_trailer;

		/* get the trailer if there is one. */
		if (op->downcall.trailer_size == 0)
			goto no_trailer;

		gossip_debug(GOSSIP_DEV_DEBUG,
			     "%s: op->downcall.trailer_size %lld\n",
			     __func__,
			     op->downcall.trailer_size);
	if (op->downcall.status)
		goto wakeup;

	/*
		 * Bail if we think think there should be a trailer, but
		 * there's no iovec for it.
	 * We've successfully peeled off the head and the downcall. 
	 * Something has gone awry if total doesn't equal the
	 * sum of head_size, downcall_size and trailer_size.
	 */
		if (count != (notrailer_count + 1)) {
			gossip_err("%s: trailer_size:%lld: count:%zu:\n",
	if ((head_size + downcall_size + op->downcall.trailer_size) != total) {
		gossip_err("%s: funky write, head_size:%d"
			   ": downcall_size:%d: trailer_size:%lld"
			   ": total size:%d:\n",
			   __func__,
			   head_size,
			   downcall_size,
			   op->downcall.trailer_size,
				   count);
			dev_req_release(buffer);
			   total);
		put_op(op);
			return -EPROTO;
		ret = -EFAULT;
		goto out;
	}

		/* Verify that trailer_size is accurate. */
		if (op->downcall.trailer_size != iov[trailer_index].iov_len) {
			gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n",
	/* Only READDIR operations should have trailers. */
	if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) &&
	    (op->downcall.trailer_size != 0)) {
		gossip_err("%s: %x operation with trailer.",
			   __func__,
				   op->downcall.trailer_size,
				   iov[trailer_index].iov_len);
			dev_req_release(buffer);
			   op->downcall.type);
		put_op(op);
		ret = -EFAULT;
		goto out;
	}

	/* READDIR operations should always have trailers. */
	if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) &&
	    (op->downcall.trailer_size == 0)) {
		gossip_err("%s: %x operation with no trailer.",
			   __func__,
			   op->downcall.type);
		put_op(op);
			return -EMSGSIZE;
		ret = -EFAULT;
		goto out;
	}

		total_returned_size += iov[trailer_index].iov_len;
	if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
		goto wakeup;

		/*
		 * Allocate a buffer, copy the trailer bytes into it and
		 * attach it to the downcall.
		 */
		op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len);
		if (op->downcall.trailer_buf != NULL) {
			gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n",
				     op->downcall.trailer_buf);
			ret = copy_from_user(op->downcall.trailer_buf,
					     iov[trailer_index].iov_base,
					     iov[trailer_index].iov_len);
			if (ret) {
				gossip_err("%s: Failed to copy trailer.\n",
	op->downcall.trailer_buf =
		vmalloc(op->downcall.trailer_size);
	if (op->downcall.trailer_buf == NULL) {
		gossip_err("%s: failed trailer vmalloc.\n",
			   __func__);
				dev_req_release(buffer);
				gossip_debug(GOSSIP_DEV_DEBUG,
					     "vfree: %p\n",
					     op->downcall.trailer_buf);
				vfree(op->downcall.trailer_buf);
				op->downcall.trailer_buf = NULL;
		put_op(op);
				return -EIO;
		ret = -ENOMEM;
		goto out;
	}
		} else {
			gossip_err("writev: could not vmalloc for trailer!\n");
			dev_req_release(buffer);
	memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
	n = copy_from_iter(op->downcall.trailer_buf,
			   op->downcall.trailer_size,
			   iter);
	if (n != op->downcall.trailer_size) {
		gossip_err("%s: failed to copy trailer.\n", __func__);
		vfree(op->downcall.trailer_buf);
		put_op(op);
			return -ENOMEM;
		ret = -EFAULT;
		goto out;
	}

no_trailer:
wakeup:

		/* if this operation is an I/O operation we need to wait
	/*
	 * If this operation is an I/O operation we need to wait
	 * for all data to be copied before we can return to avoid
	 * buffer corruption and races that can pull the buffers
	 * out from under us.
@@ -450,7 +404,7 @@ static ssize_t orangefs_devreq_writev(struct file *file,
	 * application reading/writing this device to return until
	 * the buffers are done being used.
	 */
		if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) {
	if (op->downcall.type == ORANGEFS_VFS_OP_FILE_IO) {
		int timed_out = 0;
		DEFINE_WAIT(wait_entry);

@@ -509,7 +463,6 @@ static ssize_t orangefs_devreq_writev(struct file *file,
		if (!timed_out)
			op_release(op);
	} else {

		/*
		 * tell the vfs op waiting on a waitqueue that
		 * this op is done
@@ -524,25 +477,8 @@ static ssize_t orangefs_devreq_writev(struct file *file,
		 */
		wake_up_interruptible(&op->waitq);
	}
	} else {
		/* ignore downcalls that we're not interested in */
		gossip_debug(GOSSIP_DEV_DEBUG,
			     "WARNING: No one's waiting for tag %llu\n",
			     llu(tag));
	}
	/* put_op? */
	dev_req_release(buffer);

	return total_returned_size;
}

static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
				      struct iov_iter *iter)
{
	return orangefs_devreq_writev(iocb->ki_filp,
				   iter->iov,
				   iter->nr_segs,
				   &iocb->ki_pos);
out:
	return ret;
}

/* Returns whether any FS are still pending remounted */