Commit 0b671301 authored by Trond Myklebust's avatar Trond Myklebust
Browse files

NFS: Fix asynchronous read error handling



We must always call ->read_done() before we truncate the page data, or
decide to flag an error. The reasons are that
	in NFSv2, ->read_done() is where the eof flag gets set.
	in NFSv3/v4 ->read_done() handles EJUKEBOX-type errors, and
		  v4 state recovery.

However, we need to mark the pages as uptodate before we deal with short
read errors, since we may need to modify the nfs_read_data arguments.

We therefore split the current nfs_readpage_result() into two parts:
nfs_readpage_result(), which calls ->read_done() etc, and
nfs_readpage_retry(), which subsequently handles short reads.

Note: Removing the code that retries in case of a short read also fixes a
bug in nfs_direct_read_result(), which used to return a corrupted number of
bytes.

Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
parent 46b9f8e1
Loading
Loading
Loading
Loading
+69 −71
Original line number Diff line number Diff line
@@ -460,6 +460,55 @@ nfs_pagein_list(struct list_head *head, int rpages)
	return error;
}

/*
 * This is the callback from RPC telling us whether a reply was
 * received or some error occurred (timeout or socket shutdown).
 */
int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
	int status;

	dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
			task->tk_status);

	status = NFS_PROTO(data->inode)->read_done(task, data);
	if (status != 0)
		return status;

	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);

	if (task->tk_status == -ESTALE) {
		set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
		nfs_mark_for_revalidate(data->inode);
	}
	spin_lock(&data->inode->i_lock);
	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
	spin_unlock(&data->inode->i_lock);
	return 0;
}

static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
{
	struct nfs_readargs *argp = &data->args;
	struct nfs_readres *resp = &data->res;

	if (resp->eof || resp->count == argp->count)
		return 0;

	/* This is a short read! */
	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
	/* Has the server at least made some progress? */
	if (resp->count == 0)
		return 0;

	/* Yes, so retry the read at the end of the data */
	argp->offset += resp->count;
	argp->pgbase += resp->count;
	argp->count -= resp->count;
	rpc_restart_call(task);
	return -EAGAIN;
}

/*
 * Handle a read reply that fills part of a page.
 */
@@ -469,12 +518,16 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
	struct nfs_page *req = data->req;
	struct page *page = req->wb_page;
 
	if (likely(task->tk_status >= 0))
		nfs_readpage_truncate_uninitialised_page(data);
	else
		SetPageError(page);
	if (nfs_readpage_result(task, data) != 0)
		return;

	if (likely(task->tk_status >= 0)) {
		nfs_readpage_truncate_uninitialised_page(data);
		if (nfs_readpage_retry(task, data) != 0)
			return;
	}
	if (unlikely(task->tk_status < 0))
		SetPageError(page);
	if (atomic_dec_and_test(&req->wb_complete)) {
		if (!PageError(page))
			SetPageUptodate(page);
@@ -502,25 +555,13 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
	count += base;
	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
		SetPageUptodate(*pages);
	if (count != 0)
	if (count == 0)
		return;
	/* Was this a short read? */
	if (data->res.eof || data->res.count == data->args.count)
		SetPageUptodate(*pages);
}

static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
{
	unsigned int count = data->args.count;
	unsigned int base = data->args.pgbase;
	struct page **pages;

	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
	base &= ~PAGE_CACHE_MASK;
	count += base;
	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
		SetPageError(*pages);
	if (count != 0)
		SetPageError(*pages);
}

/*
 * This is the callback from RPC telling us whether a reply was
 * received or some error occurred (timeout or socket shutdown).
@@ -529,19 +570,20 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
{
	struct nfs_read_data *data = calldata;

	if (nfs_readpage_result(task, data) != 0)
		return;
	/*
	 * Note: nfs_readpage_result may change the values of
	 * Note: nfs_readpage_retry may change the values of
	 * data->args. In the multi-page case, we therefore need
	 * to ensure that we call the next nfs_readpage_set_page_uptodate()
	 * first in the multi-page case.
	 * to ensure that we call nfs_readpage_set_pages_uptodate()
	 * first.
	 */
	if (likely(task->tk_status >= 0)) {
		nfs_readpage_truncate_uninitialised_page(data);
		nfs_readpage_set_pages_uptodate(data);
	} else
		nfs_readpage_set_pages_error(data);
	if (nfs_readpage_result(task, data) != 0)
		if (nfs_readpage_retry(task, data) != 0)
			return;
	}
	while (!list_empty(&data->pages)) {
		struct nfs_page *req = nfs_list_entry(data->pages.next);

@@ -555,50 +597,6 @@ static const struct rpc_call_ops nfs_read_full_ops = {
	.rpc_release = nfs_readdata_release,
};

/*
 * This is the callback from RPC telling us whether a reply was
 * received or some error occurred (timeout or socket shutdown).
 */
int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
	struct nfs_readargs *argp = &data->args;
	struct nfs_readres *resp = &data->res;
	int status;

	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
		task->tk_pid, task->tk_status);

	status = NFS_PROTO(data->inode)->read_done(task, data);
	if (status != 0)
		return status;

	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);

	if (task->tk_status < 0) {
		if (task->tk_status == -ESTALE) {
			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
			nfs_mark_for_revalidate(data->inode);
		}
	} else if (resp->count < argp->count && !resp->eof) {
		/* This is a short read! */
		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
		/* Has the server at least made some progress? */
		if (resp->count != 0) {
			/* Yes, so retry the read at the end of the data */
			argp->offset += resp->count;
			argp->pgbase += resp->count;
			argp->count -= resp->count;
			rpc_restart_call(task);
			return -EAGAIN;
		}
		task->tk_status = -EIO;
	}
	spin_lock(&data->inode->i_lock);
	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
	spin_unlock(&data->inode->i_lock);
	return 0;
}

/*
 * Read a page over NFS.
 * We read the page synchronously in the following case: