Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
2005-04-16 15:20:36 -07:00
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for the Linux nfs filesystem routines.
+#
+
+obj-$(CONFIG_NFS_FS) += nfs.o
+
+nfs-y 			:= dir.o file.o inode.o nfs2xdr.o pagelist.o \
+			   proc.o read.o symlink.o unlink.o write.o
+nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
+nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
+			   delegation.o idmap.o \
+			   callback.o callback_xdr.o callback_proc.o
+nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
+nfs-objs		:= $(nfs-y)
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -0,0 +1,187 @@
+/*
+ * linux/fs/nfs/callback.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback handling
+ */
+
+#include <linux/config.h>
+#include <linux/completion.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+
+struct nfs_callback_data {
+	unsigned int users;
+	struct svc_serv *serv;
+	pid_t pid;
+	struct completion started;
+	struct completion stopped;
+};
+
+static struct nfs_callback_data nfs_callback_info;
+static DECLARE_MUTEX(nfs_callback_sema);
+static struct svc_program nfs4_callback_program;
+
+unsigned short nfs_callback_tcpport;
+
+/*
+ * This is the callback kernel thread.
+ */
+static void nfs_callback_svc(struct svc_rqst *rqstp)
+{
+	struct svc_serv *serv = rqstp->rq_server;
+	int err;
+
+	__module_get(THIS_MODULE);
+	lock_kernel();
+
+	nfs_callback_info.pid = current->pid;
+	daemonize("nfsv4-svc");
+	/* Process request with signals blocked, but allow SIGKILL.  */
+	allow_signal(SIGKILL);
+
+	complete(&nfs_callback_info.started);
+
+	while (nfs_callback_info.users != 0 || !signalled()) {
+		/*
+		 * Listen for a request on the socket
+		 */
+		err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT);
+		if (err == -EAGAIN || err == -EINTR)
+			continue;
+		if (err < 0) {
+			printk(KERN_WARNING
+					"%s: terminating on error %d\n",
+					__FUNCTION__, -err);
+			break;
+		}
+		dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
+				NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
+		svc_process(serv, rqstp);
+	}
+
+	nfs_callback_info.pid = 0;
+	complete(&nfs_callback_info.stopped);
+	unlock_kernel();
+	module_put_and_exit(0);
+}
+
+/*
+ * Bring up the server process if it is not already up.
+ */
+int nfs_callback_up(void)
+{
+	struct svc_serv *serv;
+	struct svc_sock *svsk;
+	int ret = 0;
+
+	lock_kernel();
+	down(&nfs_callback_sema);
+	if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
+		goto out;
+	init_completion(&nfs_callback_info.started);
+	init_completion(&nfs_callback_info.stopped);
+	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE);
+	ret = -ENOMEM;
+	if (!serv)
+		goto out_err;
+	/* FIXME: We don't want to register this socket with the portmapper */
+	ret = svc_makesock(serv, IPPROTO_TCP, 0);
+	if (ret < 0)
+		goto out_destroy;
+	if (!list_empty(&serv->sv_permsocks)) {
+		svsk = list_entry(serv->sv_permsocks.next,
+				struct svc_sock, sk_list);
+		nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
+		dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
+	} else
+		BUG();
+	ret = svc_create_thread(nfs_callback_svc, serv);
+	if (ret < 0)
+		goto out_destroy;
+	nfs_callback_info.serv = serv;
+	wait_for_completion(&nfs_callback_info.started);
+out:
+	up(&nfs_callback_sema);
+	unlock_kernel();
+	return ret;
+out_destroy:
+	svc_destroy(serv);
+out_err:
+	nfs_callback_info.users--;
+	goto out;
+}
+
+/*
+ * Kill the server process if it is not already up.
+ */
+int nfs_callback_down(void)
+{
+	int ret = 0;
+
+	lock_kernel();
+	down(&nfs_callback_sema);
+	if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
+		goto out;
+	kill_proc(nfs_callback_info.pid, SIGKILL, 1);
+	wait_for_completion(&nfs_callback_info.stopped);
+out:
+	up(&nfs_callback_sema);
+	unlock_kernel();
+	return ret;
+}
+
+static int nfs_callback_authenticate(struct svc_rqst *rqstp)
+{
+	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
+	struct nfs4_client *clp;
+
+	/* Don't talk to strangers */
+	clp = nfs4_find_client(addr);
+	if (clp == NULL)
+		return SVC_DROP;
+	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
+	nfs4_put_client(clp);
+	switch (rqstp->rq_authop->flavour) {
+		case RPC_AUTH_NULL:
+			if (rqstp->rq_proc != CB_NULL)
+				return SVC_DENIED;
+			break;
+		case RPC_AUTH_UNIX:
+			break;
+		case RPC_AUTH_GSS:
+			/* FIXME: RPCSEC_GSS handling? */
+		default:
+			return SVC_DENIED;
+	}
+	return SVC_OK;
+}
+
+/*
+ * Define NFS4 callback program
+ */
+extern struct svc_version nfs4_callback_version1;
+
+static struct svc_version *nfs4_callback_version[] = {
+	[1] = &nfs4_callback_version1,
+};
+
+static struct svc_stat nfs4_callback_stats;
+
+static struct svc_program nfs4_callback_program = {
+	.pg_prog = NFS4_CALLBACK,			/* RPC service number */
+	.pg_nvers = ARRAY_SIZE(nfs4_callback_version),	/* Number of entries */
+	.pg_vers = nfs4_callback_version,		/* version table */
+	.pg_name = "NFSv4 callback",			/* service name */
+	.pg_class = "nfs",				/* authentication class */
+	.pg_stats = &nfs4_callback_stats,
+	.pg_authenticate = nfs_callback_authenticate,
+};
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -0,0 +1,70 @@
+/*
+ * linux/fs/nfs/callback.h
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback definitions
+ */
+#ifndef __LINUX_FS_NFS_CALLBACK_H
+#define __LINUX_FS_NFS_CALLBACK_H
+
+#define NFS4_CALLBACK 0x40000000
+#define NFS4_CALLBACK_XDRSIZE 2048
+#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
+
+enum nfs4_callback_procnum {
+	CB_NULL = 0,
+	CB_COMPOUND = 1,
+};
+
+enum nfs4_callback_opnum {
+	OP_CB_GETATTR = 3,
+	OP_CB_RECALL  = 4,
+	OP_CB_ILLEGAL = 10044,
+};
+
+struct cb_compound_hdr_arg {
+	int taglen;
+	const char *tag;
+	unsigned int callback_ident;
+	unsigned nops;
+};
+
+struct cb_compound_hdr_res {
+	uint32_t *status;
+	int taglen;
+	const char *tag;
+	uint32_t *nops;
+};
+
+struct cb_getattrargs {
+	struct sockaddr_in *addr;
+	struct nfs_fh fh;
+	uint32_t bitmap[2];
+};
+
+struct cb_getattrres {
+	uint32_t status;
+	uint32_t bitmap[2];
+	uint64_t size;
+	uint64_t change_attr;
+	struct timespec ctime;
+	struct timespec mtime;
+};
+
+struct cb_recallargs {
+	struct sockaddr_in *addr;
+	struct nfs_fh fh;
+	nfs4_stateid stateid;
+	uint32_t truncate;
+};
+
+extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+
+extern int nfs_callback_up(void);
+extern int nfs_callback_down(void);
+
+extern unsigned short nfs_callback_tcpport;
+
+#endif /* __LINUX_FS_NFS_CALLBACK_H */
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -0,0 +1,85 @@
+/*
+ * linux/fs/nfs/callback_proc.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback procedures
+ */
+#include <linux/config.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+#include "delegation.h"
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+ 
+unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+{
+	struct nfs4_client *clp;
+	struct nfs_delegation *delegation;
+	struct nfs_inode *nfsi;
+	struct inode *inode;
+	
+	res->bitmap[0] = res->bitmap[1] = 0;
+	res->status = htonl(NFS4ERR_BADHANDLE);
+	clp = nfs4_find_client(&args->addr->sin_addr);
+	if (clp == NULL)
+		goto out;
+	inode = nfs_delegation_find_inode(clp, &args->fh);
+	if (inode == NULL)
+		goto out_putclient;
+	nfsi = NFS_I(inode);
+	down_read(&nfsi->rwsem);
+	delegation = nfsi->delegation;
+	if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
+		goto out_iput;
+	res->size = i_size_read(inode);
+	res->change_attr = NFS_CHANGE_ATTR(inode);
+	res->ctime = inode->i_ctime;
+	res->mtime = inode->i_mtime;
+	res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
+		args->bitmap[0];
+	res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
+		args->bitmap[1];
+	res->status = 0;
+out_iput:
+	up_read(&nfsi->rwsem);
+	iput(inode);
+out_putclient:
+	nfs4_put_client(clp);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
+	return res->status;
+}
+
+unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+{
+	struct nfs4_client *clp;
+	struct inode *inode;
+	unsigned res;
+	
+	res = htonl(NFS4ERR_BADHANDLE);
+	clp = nfs4_find_client(&args->addr->sin_addr);
+	if (clp == NULL)
+		goto out;
+	inode = nfs_delegation_find_inode(clp, &args->fh);
+	if (inode == NULL)
+		goto out_putclient;
+	/* Set up a helper thread to actually return the delegation */
+	switch(nfs_async_inode_return_delegation(inode, &args->stateid)) {
+		case 0:
+			res = 0;
+			break;
+		case -ENOENT:
+			res = htonl(NFS4ERR_BAD_STATEID);
+			break;
+		default:
+			res = htonl(NFS4ERR_RESOURCE);
+	}
+	iput(inode);
+out_putclient:
+	nfs4_put_client(clp);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
+	return res;
+}
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -0,0 +1,481 @@
+/*
+ * linux/fs/nfs/callback_xdr.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback encode/decode procedures
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+
+#define CB_OP_TAGLEN_MAXSZ	(512)
+#define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
+#define CB_OP_GETATTR_BITMAP_MAXSZ	(4)
+#define CB_OP_GETATTR_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+				CB_OP_GETATTR_BITMAP_MAXSZ + \
+				2 + 2 + 3 + 3)
+#define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+
+typedef unsigned (*callback_process_op_t)(void *, void *);
+typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
+typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
+
+
+struct callback_op {
+	callback_process_op_t process_op;
+	callback_decode_arg_t decode_args;
+	callback_encode_res_t encode_res;
+	long res_maxsize;
+};
+
+static struct callback_op callback_ops[];
+
+static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return htonl(NFS4_OK);
+}
+
+static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
+static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
+{
+	uint32_t *p;
+
+	p = xdr_inline_decode(xdr, nbytes);
+	if (unlikely(p == NULL))
+		printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n");
+	return p;
+}
+
+static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
+{
+	uint32_t *p;
+
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	*len = ntohl(*p);
+
+	if (*len != 0) {
+		p = read_buf(xdr, *len);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*str = (const char *)p;
+	} else
+		*str = NULL;
+
+	return 0;
+}
+
+static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+	uint32_t *p;
+
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	fh->size = ntohl(*p);
+	if (fh->size > NFS4_FHSIZE)
+		return htonl(NFS4ERR_BADHANDLE);
+	p = read_buf(xdr, fh->size);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	memcpy(&fh->data[0], p, fh->size);
+	memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size);
+	return 0;
+}
+
+static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+{
+	uint32_t *p;
+	unsigned int attrlen;
+
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	attrlen = ntohl(*p);
+	p = read_buf(xdr, attrlen << 2);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	if (likely(attrlen > 0))
+		bitmap[0] = ntohl(*p++);
+	if (attrlen > 1)
+		bitmap[1] = ntohl(*p);
+	return 0;
+}
+
+static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+{
+	uint32_t *p;
+
+	p = read_buf(xdr, 16);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	memcpy(stateid->data, p, 16);
+	return 0;
+}
+
+static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
+{
+	uint32_t *p;
+	unsigned int minor_version;
+	unsigned status;
+
+	status = decode_string(xdr, &hdr->taglen, &hdr->tag);
+	if (unlikely(status != 0))
+		return status;
+	/* We do not like overly long tags! */
+	if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) {
+		printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
+				__FUNCTION__, hdr->taglen);
+		return htonl(NFS4ERR_RESOURCE);
+	}
+	p = read_buf(xdr, 12);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	minor_version = ntohl(*p++);
+	/* Check minor version is zero. */
+	if (minor_version != 0) {
+		printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n",
+				__FUNCTION__, minor_version);
+		return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+	}
+	hdr->callback_ident = ntohl(*p++);
+	hdr->nops = ntohl(*p);
+	return 0;
+}
+
+static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
+{
+	uint32_t *p;
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	*op = ntohl(*p);
+	return 0;
+}
+
+static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
+{
+	unsigned status;
+
+	status = decode_fh(xdr, &args->fh);
+	if (unlikely(status != 0))
+		goto out;
+	args->addr = &rqstp->rq_addr;
+	status = decode_bitmap(xdr, args->bitmap);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+	return status;
+}
+
+static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
+{
+	uint32_t *p;
+	unsigned status;
+
+	args->addr = &rqstp->rq_addr;
+	status = decode_stateid(xdr, &args->stateid);
+	if (unlikely(status != 0))
+		goto out;
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL)) {
+		status = htonl(NFS4ERR_RESOURCE);
+		goto out;
+	}
+	args->truncate = ntohl(*p);
+	status = decode_fh(xdr, &args->fh);
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+	return 0;
+}
+
+static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
+{
+	uint32_t *p;
+
+	p = xdr_reserve_space(xdr, 4 + len);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	xdr_encode_opaque(p, str, len);
+	return 0;
+}
+
+#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
+#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
+static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
+{
+	uint32_t bm[2];
+	uint32_t *p;
+
+	bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0);
+	bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1);
+	if (bm[1] != 0) {
+		p = xdr_reserve_space(xdr, 16);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*p++ = htonl(2);
+		*p++ = bm[0];
+		*p++ = bm[1];
+	} else if (bm[0] != 0) {
+		p = xdr_reserve_space(xdr, 12);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*p++ = htonl(1);
+		*p++ = bm[0];
+	} else {
+		p = xdr_reserve_space(xdr, 8);
+		if (unlikely(p == NULL))
+			return htonl(NFS4ERR_RESOURCE);
+		*p++ = htonl(0);
+	}
+	*savep = p;
+	return 0;
+}
+
+static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
+{
+	uint32_t *p;
+
+	if (!(bitmap[0] & FATTR4_WORD0_CHANGE))
+		return 0;
+	p = xdr_reserve_space(xdr, 8);
+	if (unlikely(p == 0))
+		return htonl(NFS4ERR_RESOURCE);
+	p = xdr_encode_hyper(p, change);
+	return 0;
+}
+
+static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
+{
+	uint32_t *p;
+
+	if (!(bitmap[0] & FATTR4_WORD0_SIZE))
+		return 0;
+	p = xdr_reserve_space(xdr, 8);
+	if (unlikely(p == 0))
+		return htonl(NFS4ERR_RESOURCE);
+	p = xdr_encode_hyper(p, size);
+	return 0;
+}
+
+static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
+{
+	uint32_t *p;
+
+	p = xdr_reserve_space(xdr, 12);
+	if (unlikely(p == 0))
+		return htonl(NFS4ERR_RESOURCE);
+	p = xdr_encode_hyper(p, time->tv_sec);
+	*p = htonl(time->tv_nsec);
+	return 0;
+}
+
+static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+{
+	if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
+		return 0;
+	return encode_attr_time(xdr,time);
+}
+
+static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+{
+	if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
+		return 0;
+	return encode_attr_time(xdr,time);
+}
+
+static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
+{
+	unsigned status;
+
+	hdr->status = xdr_reserve_space(xdr, 4);
+	if (unlikely(hdr->status == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	status = encode_string(xdr, hdr->taglen, hdr->tag);
+	if (unlikely(status != 0))
+		return status;
+	hdr->nops = xdr_reserve_space(xdr, 4);
+	if (unlikely(hdr->nops == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	return 0;
+}
+
+static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
+{
+	uint32_t *p;
+	
+	p = xdr_reserve_space(xdr, 8);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+	*p++ = htonl(op);
+	*p = res;
+	return 0;
+}
+
+static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
+{
+	uint32_t *savep;
+	unsigned status = res->status;
+	
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_bitmap(xdr, res->bitmap, &savep);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_change(xdr, res->bitmap, res->change_attr);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_size(xdr, res->bitmap, res->size);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
+	if (unlikely(status != 0))
+		goto out;
+	status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
+	*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
+out:
+	dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+	return status;
+}
+
+static unsigned process_op(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr_in, void *argp,
+		struct xdr_stream *xdr_out, void *resp)
+{
+	struct callback_op *op;
+	unsigned int op_nr;
+	unsigned int status = 0;
+	long maxlen;
+	unsigned res;
+
+	dprintk("%s: start\n", __FUNCTION__);
+	status = decode_op_hdr(xdr_in, &op_nr);
+	if (unlikely(status != 0)) {
+		op_nr = OP_CB_ILLEGAL;
+		op = &callback_ops[0];
+	} else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
+		op_nr = OP_CB_ILLEGAL;
+		op = &callback_ops[0];
+		status = htonl(NFS4ERR_OP_ILLEGAL);
+	} else
+		op = &callback_ops[op_nr];
+
+	maxlen = xdr_out->end - xdr_out->p;
+	if (maxlen > 0 && maxlen < PAGE_SIZE) {
+		if (likely(status == 0 && op->decode_args != NULL))
+			status = op->decode_args(rqstp, xdr_in, argp);
+		if (likely(status == 0 && op->process_op != NULL))
+			status = op->process_op(argp, resp);
+	} else
+		status = htonl(NFS4ERR_RESOURCE);
+
+	res = encode_op_hdr(xdr_out, op_nr, status);
+	if (status == 0)
+		status = res;
+	if (op->encode_res != NULL && status == 0)
+		status = op->encode_res(rqstp, xdr_out, resp);
+	dprintk("%s: done, status = %d\n", __FUNCTION__, status);
+	return status;
+}
+
+/*
+ * Decode, process and encode a COMPOUND
+ */
+static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	struct cb_compound_hdr_arg hdr_arg;
+	struct cb_compound_hdr_res hdr_res;
+	struct xdr_stream xdr_in, xdr_out;
+	uint32_t *p;
+	unsigned int status;
+	unsigned int nops = 1;
+
+	dprintk("%s: start\n", __FUNCTION__);
+
+	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+
+	p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
+	rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
+	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
+
+	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
+	hdr_res.taglen = hdr_arg.taglen;
+	hdr_res.tag = hdr_arg.tag;
+	encode_compound_hdr_res(&xdr_out, &hdr_res);
+
+	for (;;) {
+		status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp);
+		if (status != 0)
+			break;
+		if (nops == hdr_arg.nops)
+			break;
+		nops++;
+	}
+	*hdr_res.status = status;
+	*hdr_res.nops = htonl(nops);
+	dprintk("%s: done, status = %u\n", __FUNCTION__, status);
+	return rpc_success;
+}
+
+/*
+ * Define NFS4 callback COMPOUND ops.
+ */
+static struct callback_op callback_ops[] = {
+	[0] = {
+		.res_maxsize = CB_OP_HDR_RES_MAXSZ,
+	},
+	[OP_CB_GETATTR] = {
+		.process_op = (callback_process_op_t)nfs4_callback_getattr,
+		.decode_args = (callback_decode_arg_t)decode_getattr_args,
+		.encode_res = (callback_encode_res_t)encode_getattr_res,
+		.res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
+	},
+	[OP_CB_RECALL] = {
+		.process_op = (callback_process_op_t)nfs4_callback_recall,
+		.decode_args = (callback_decode_arg_t)decode_recall_args,
+		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+	}
+};
+
+/*
+ * Define NFS4 callback procedures
+ */
+static struct svc_procedure nfs4_callback_procedures1[] = {
+	[CB_NULL] = {
+		.pc_func = nfs4_callback_null,
+		.pc_decode = (kxdrproc_t)nfs4_decode_void,
+		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_xdrressize = 1,
+	},
+	[CB_COMPOUND] = {
+		.pc_func = nfs4_callback_compound,
+		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_argsize = 256,
+		.pc_ressize = 256,
+		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
+	}
+};
+
+struct svc_version nfs4_callback_version1 = {
+	.vs_vers = 1,
+	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
+	.vs_proc = nfs4_callback_procedures1,
+	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
+	.vs_dispatch = NULL,
+};
+
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -0,0 +1,342 @@
+/*
+ * linux/fs/nfs/delegation.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFS file delegation management
+ *
+ */
+#include <linux/config.h>
+#include <linux/completion.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+
+#include "delegation.h"
+
+static struct nfs_delegation *nfs_alloc_delegation(void)
+{
+	return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL);
+}
+
+static void nfs_free_delegation(struct nfs_delegation *delegation)
+{
+	if (delegation->cred)
+		put_rpccred(delegation->cred);
+	kfree(delegation);
+}
+
+static void nfs_delegation_claim_opens(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_open_context *ctx;
+	struct nfs4_state *state;
+
+again:
+	spin_lock(&inode->i_lock);
+	list_for_each_entry(ctx, &nfsi->open_files, list) {
+		state = ctx->state;
+		if (state == NULL)
+			continue;
+		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+			continue;
+		get_nfs_open_context(ctx);
+		spin_unlock(&inode->i_lock);
+		if (nfs4_open_delegation_recall(ctx->dentry, state) < 0)
+			return;
+		put_nfs_open_context(ctx);
+		goto again;
+	}
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Set up a delegation on an inode
+ */
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+{
+	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+
+	if (delegation == NULL)
+		return;
+	memcpy(delegation->stateid.data, res->delegation.data,
+			sizeof(delegation->stateid.data));
+	delegation->type = res->delegation_type;
+	delegation->maxsize = res->maxsize;
+	put_rpccred(cred);
+	delegation->cred = get_rpccred(cred);
+	delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
+	NFS_I(inode)->delegation_state = delegation->type;
+	smp_wmb();
+}
+
+/*
+ * Set up a delegation on an inode
+ */
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+{
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int status = 0;
+
+	delegation = nfs_alloc_delegation();
+	if (delegation == NULL)
+		return -ENOMEM;
+	memcpy(delegation->stateid.data, res->delegation.data,
+			sizeof(delegation->stateid.data));
+	delegation->type = res->delegation_type;
+	delegation->maxsize = res->maxsize;
+	delegation->cred = get_rpccred(cred);
+	delegation->inode = inode;
+
+	spin_lock(&clp->cl_lock);
+	if (nfsi->delegation == NULL) {
+		list_add(&delegation->super_list, &clp->cl_delegations);
+		nfsi->delegation = delegation;
+		nfsi->delegation_state = delegation->type;
+		delegation = NULL;
+	} else {
+		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
+					sizeof(delegation->stateid)) != 0 ||
+				delegation->type != nfsi->delegation->type) {
+			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
+					__FUNCTION__, NIPQUAD(clp->cl_addr));
+			status = -EIO;
+		}
+	}
+	spin_unlock(&clp->cl_lock);
+	if (delegation != NULL)
+		kfree(delegation);
+	return status;
+}
+
+static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
+{
+	int res = 0;
+
+	__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
+	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
+	nfs_free_delegation(delegation);
+	return res;
+}
+
+/* Sync all data to disk upon delegation return */
+static void nfs_msync_inode(struct inode *inode)
+{
+	filemap_fdatawrite(inode->i_mapping);
+	nfs_wb_all(inode);
+	filemap_fdatawait(inode->i_mapping);
+}
+
+/*
+ * Basic procedure for returning a delegation to the server
+ */
+int nfs_inode_return_delegation(struct inode *inode)
+{
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int res = 0;
+
+	nfs_msync_inode(inode);
+	down_read(&clp->cl_sem);
+	/* Guard against new delegated open calls */
+	down_write(&nfsi->rwsem);
+	spin_lock(&clp->cl_lock);
+	delegation = nfsi->delegation;
+	if (delegation != NULL) {
+		list_del_init(&delegation->super_list);
+		nfsi->delegation = NULL;
+		nfsi->delegation_state = 0;
+	}
+	spin_unlock(&clp->cl_lock);
+	nfs_delegation_claim_opens(inode);
+	up_write(&nfsi->rwsem);
+	up_read(&clp->cl_sem);
+	nfs_msync_inode(inode);
+
+	if (delegation != NULL)
+		res = nfs_do_return_delegation(inode, delegation);
+	return res;
+}
+
+/*
+ * Return all delegations associated to a super block
+ */
+void nfs_return_all_delegations(struct super_block *sb)
+{
+	struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_delegation *delegation;
+	struct inode *inode;
+
+	if (clp == NULL)
+		return;
+restart:
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+		if (delegation->inode->i_sb != sb)
+			continue;
+		inode = igrab(delegation->inode);
+		if (inode == NULL)
+			continue;
+		spin_unlock(&clp->cl_lock);
+		nfs_inode_return_delegation(inode);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
+ */
+void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+{
+	struct nfs_delegation *delegation;
+	struct inode *inode;
+
+	if (clp == NULL)
+		return;
+restart:
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+		inode = igrab(delegation->inode);
+		if (inode == NULL)
+			continue;
+		spin_unlock(&clp->cl_lock);
+		nfs_inode_return_delegation(inode);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&clp->cl_lock);
+}
+
+struct recall_threadargs {
+	struct inode *inode;
+	struct nfs4_client *clp;
+	const nfs4_stateid *stateid;
+
+	struct completion started;
+	int result;
+};
+
+static int recall_thread(void *data)
+{
+	struct recall_threadargs *args = (struct recall_threadargs *)data;
+	struct inode *inode = igrab(args->inode);
+	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+
+	daemonize("nfsv4-delegreturn");
+
+	nfs_msync_inode(inode);
+	down_read(&clp->cl_sem);
+	down_write(&nfsi->rwsem);
+	spin_lock(&clp->cl_lock);
+	delegation = nfsi->delegation;
+	if (delegation != NULL && memcmp(delegation->stateid.data,
+				args->stateid->data,
+				sizeof(delegation->stateid.data)) == 0) {
+		list_del_init(&delegation->super_list);
+		nfsi->delegation = NULL;
+		nfsi->delegation_state = 0;
+		args->result = 0;
+	} else {
+		delegation = NULL;
+		args->result = -ENOENT;
+	}
+	spin_unlock(&clp->cl_lock);
+	complete(&args->started);
+	nfs_delegation_claim_opens(inode);
+	up_write(&nfsi->rwsem);
+	up_read(&clp->cl_sem);
+	nfs_msync_inode(inode);
+
+	if (delegation != NULL)
+		nfs_do_return_delegation(inode, delegation);
+	iput(inode);
+	module_put_and_exit(0);
+}
+
+/*
+ * Asynchronous delegation recall!
+ */
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+{
+	struct recall_threadargs data = {
+		.inode = inode,
+		.stateid = stateid,
+	};
+	int status;
+
+	init_completion(&data.started);
+	__module_get(THIS_MODULE);
+	status = kernel_thread(recall_thread, &data, CLONE_KERNEL);
+	if (status < 0)
+		goto out_module_put;
+	wait_for_completion(&data.started);
+	return data.result;
+out_module_put:
+	module_put(THIS_MODULE);
+	return status;
+}
+
+/*
+ * Retrieve the inode associated with a delegation
+ */
+struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+{
+	struct nfs_delegation *delegation;
+	struct inode *res = NULL;
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+		if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
+			res = igrab(delegation->inode);
+			break;
+		}
+	}
+	spin_unlock(&clp->cl_lock);
+	return res;
+}
+
+/*
+ * Mark all delegations as needing to be reclaimed
+ */
+void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+{
+	struct nfs_delegation *delegation;
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+		delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
+	spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Reap all unclaimed delegations after reboot recovery is done
+ */
+void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+{
+	struct nfs_delegation *delegation, *n;
+	LIST_HEAD(head);
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+		if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
+			continue;
+		list_move(&delegation->super_list, &head);
+		NFS_I(delegation->inode)->delegation = NULL;
+		NFS_I(delegation->inode)->delegation_state = 0;
+	}
+	spin_unlock(&clp->cl_lock);
+	while(!list_empty(&head)) {
+		delegation = list_entry(head.next, struct nfs_delegation, super_list);
+		list_del(&delegation->super_list);
+		nfs_free_delegation(delegation);
+	}
+}
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -0,0 +1,57 @@
+/*
+ * linux/fs/nfs/delegation.h
+ *
+ * Copyright (c) Trond Myklebust
+ *
+ * Definitions pertaining to NFS delegated files
+ */
+#ifndef FS_NFS_DELEGATION_H
+#define FS_NFS_DELEGATION_H
+
+#if defined(CONFIG_NFS_V4)
+/*
+ * NFSv4 delegation
+ */
+struct nfs_delegation {
+	struct list_head super_list;
+	struct rpc_cred *cred;
+	struct inode *inode;
+	nfs4_stateid stateid;
+	int type;
+#define NFS_DELEGATION_NEED_RECLAIM 1
+	long flags;
+	loff_t maxsize;
+};
+
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+int nfs_inode_return_delegation(struct inode *inode);
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+
+struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+void nfs_return_all_delegations(struct super_block *sb);
+void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+
+void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+
+/* NFSv4 delegation-related procedures */
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
+int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+
+static inline int nfs_have_delegation(struct inode *inode, int flags)
+{
+	flags &= FMODE_READ|FMODE_WRITE;
+	smp_rmb();
+	if ((NFS_I(inode)->delegation_state & flags) == flags)
+		return 1;
+	return 0;
+}
+#else
+static inline int nfs_have_delegation(struct inode *inode, int flags)
+{
+	return 0;
+}
+#endif
+
+#endif
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -0,0 +1,808 @@
+/*
+ * linux/fs/nfs/direct.c
+ *
+ * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
+ *
+ * High-performance uncached I/O for the Linux NFS client
+ *
+ * There are important applications whose performance or correctness
+ * depends on uncached access to file data.  Database clusters
+ * (multiple copies of the same instance running on separate hosts) 
+ * implement their own cache coherency protocol that subsumes file
+ * system cache protocols.  Applications that process datasets 
+ * considerably larger than the client's memory do not always benefit 
+ * from a local cache.  A streaming video server, for instance, has no 
+ * need to cache the contents of a file.
+ *
+ * When an application requests uncached I/O, all read and write requests
+ * are made directly to the server; data stored or fetched via these
+ * requests is not cached in the Linux page cache.  The client does not
+ * correct unaligned requests from applications.  All requested bytes are
+ * held on permanent storage before a direct write system call returns to
+ * an application.
+ *
+ * Solaris implements an uncached I/O facility called directio() that
+ * is used for backups and sequential I/O to very large files.  Solaris
+ * also supports uncaching whole NFS partitions with "-o forcedirectio,"
+ * an undocumented mount option.
+ *
+ * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
+ * help from Andrew Morton.
+ *
+ * 18 Dec 2001	Initial implementation for 2.4  --cel
+ * 08 Jul 2002	Version for 2.4.19, with bug fixes --trondmy
+ * 08 Jun 2003	Port to 2.5 APIs  --cel
+ * 31 Mar 2004	Handle direct I/O without VFS support  --cel
+ * 15 Sep 2004	Parallel async reads  --cel
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/kref.h>
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#define NFSDBG_FACILITY		NFSDBG_VFS
+#define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
+
+static kmem_cache_t *nfs_direct_cachep;
+
+/*
+ * This represents a set of asynchronous requests that we're waiting on
+ */
+struct nfs_direct_req {
+	struct kref		kref;		/* release manager */
+	struct list_head	list;		/* nfs_read_data structs */
+	wait_queue_head_t	wait;		/* wait for i/o completion */
+	struct page **		pages;		/* pages in our buffer */
+	unsigned int		npages;		/* count of pages */
+	atomic_t		complete,	/* i/os we're waiting for */
+				count,		/* bytes actually processed */
+				error;		/* any reported error */
+};
+
+
+/**
+ * nfs_get_user_pages - find and set up pages underlying user's buffer
+ * rw: direction (read or write)
+ * user_addr: starting address of this segment of user's buffer
+ * count: size of this segment
+ * @pages: returned array of page struct pointers underlying user's buffer
+ */
+static inline int
+nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
+		struct page ***pages)
+{
+	int result = -ENOMEM;
+	unsigned long page_count;
+	size_t array_size;
+
+	/* set an arbitrary limit to prevent type overflow */
+	/* XXX: this can probably be as large as INT_MAX */
+	if (size > MAX_DIRECTIO_SIZE) {
+		*pages = NULL;
+		return -EFBIG;
+	}
+
+	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	page_count -= user_addr >> PAGE_SHIFT;
+
+	array_size = (page_count * sizeof(struct page *));
+	*pages = kmalloc(array_size, GFP_KERNEL);
+	if (*pages) {
+		down_read(&current->mm->mmap_sem);
+		result = get_user_pages(current, current->mm, user_addr,
+					page_count, (rw == READ), 0,
+					*pages, NULL);
+		up_read(&current->mm->mmap_sem);
+	}
+	return result;
+}
+
+/**
+ * nfs_free_user_pages - tear down page struct array
+ * @pages: array of page struct pointers underlying target buffer
+ * @npages: number of pages in the array
+ * @do_dirty: dirty the pages as we release them
+ */
+static void
+nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+	int i;
+	for (i = 0; i < npages; i++) {
+		if (do_dirty)
+			set_page_dirty_lock(pages[i]);
+		page_cache_release(pages[i]);
+	}
+	kfree(pages);
+}
+
+/**
+ * nfs_direct_req_release - release  nfs_direct_req structure for direct read
+ * @kref: kref object embedded in an nfs_direct_req structure
+ *
+ */
+static void nfs_direct_req_release(struct kref *kref)
+{
+	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+	kmem_cache_free(nfs_direct_cachep, dreq);
+}
+
+/**
+ * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
+ * @count: count of bytes for the read request
+ * @rsize: local rsize setting
+ *
+ * Note we also set the number of requests we have in the dreq when we are
+ * done.  This prevents races with I/O completion so we will always wait
+ * until all requests have been dispatched and completed.
+ */
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+{
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int reads = 0;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_waitqueue_head(&dreq->wait);
+	INIT_LIST_HEAD(&dreq->list);
+	atomic_set(&dreq->count, 0);
+	atomic_set(&dreq->error, 0);
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_read_data *data = nfs_readdata_alloc();
+
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_read_data, pages);
+				list_del(&data->pages);
+				nfs_readdata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		reads++;
+		if (nbytes <= rsize)
+			break;
+		nbytes -= rsize;
+	}
+	kref_get(&dreq->kref);
+	atomic_set(&dreq->complete, reads);
+	return dreq;
+}
+
+/**
+ * nfs_direct_read_result - handle a read reply for a direct read request
+ * @data: address of NFS READ operation control block
+ * @status: status of this NFS READ operation
+ *
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ */
+static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+{
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	if (likely(status >= 0))
+		atomic_add(data->res.count, &dreq->count);
+	else
+		atomic_set(&dreq->error, status);
+
+	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
+		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+		wake_up(&dreq->wait);
+		kref_put(&dreq->kref, nfs_direct_req_release);
+	}
+}
+
+/**
+ * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
+ * @dreq: address of nfs_direct_req struct for this request
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ *
+ * For each nfs_read_data struct that was allocated on the list, dispatch
+ * an NFS READ operation
+ */
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
+		struct inode *inode, struct nfs_open_context *ctx,
+		unsigned long user_addr, size_t count, loff_t file_offset)
+{
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	unsigned int curpage, pgbase;
+	unsigned int rsize = NFS_SERVER(inode)->rsize;
+
+	curpage = 0;
+	pgbase = user_addr & ~PAGE_MASK;
+	do {
+		struct nfs_read_data *data;
+		unsigned int bytes;
+
+		bytes = rsize;
+		if (count < rsize)
+			bytes = count;
+
+		data = list_entry(list->next, struct nfs_read_data, pages);
+		list_del_init(&data->pages);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = file_offset;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.eof = 0;
+		data->res.count = bytes;
+
+		NFS_PROTO(inode)->read_setup(data);
+
+		data->task.tk_cookie = (unsigned long) inode;
+		data->task.tk_calldata = data;
+		data->task.tk_release = nfs_readdata_release;
+		data->complete = nfs_direct_read_result;
+
+		lock_kernel();
+		rpc_execute(&data->task);
+		unlock_kernel();
+
+		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);
+
+		file_offset += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
+
+		count -= bytes;
+	} while (count != 0);
+}
+
+/**
+ * nfs_direct_read_wait - wait for I/O completion for direct reads
+ * @dreq: request on which we are to wait
+ * @intr: whether or not this wait can be interrupted
+ *
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
+{
+	int result = 0;
+
+	if (intr) {
+		result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+	} else {
+		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
+	}
+
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+/**
+ * nfs_direct_read_seg - Read in one iov segment.  Generate separate
+ *                        read RPCs for each "rsize" bytes.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * @nr_pages: number of pages in the array
+ *
+ */
+static ssize_t nfs_direct_read_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		unsigned int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
+
+	dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
+	if (!dreq)
+		return -ENOMEM;
+
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
+				 file_offset);
+	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	rpc_clnt_sigunmask(clnt, &oldset);
+
+	return result;
+}
+
+/**
+ * nfs_direct_read - For each iov segment, map the user's buffer
+ *                   then generate read RPCs.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ * We've already pushed out any non-direct writes so that this read
+ * will see them when we read from the server.
+ */
+static ssize_t
+nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
+		const struct iovec *iov, loff_t file_offset,
+		unsigned long nr_segs)
+{
+	ssize_t tot_bytes = 0;
+	unsigned long seg = 0;
+
+	while ((seg < nr_segs) && (tot_bytes >= 0)) {
+		ssize_t result;
+		int page_count;
+		struct page **pages;
+		const struct iovec *vec = &iov[seg++];
+		unsigned long user_addr = (unsigned long) vec->iov_base;
+		size_t size = vec->iov_len;
+
+                page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
+                if (page_count < 0) {
+                        nfs_free_user_pages(pages, 0, 0);
+			if (tot_bytes > 0)
+				break;
+                        return page_count;
+                }
+
+		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
+				file_offset, pages, page_count);
+
+		if (result <= 0) {
+			if (tot_bytes > 0)
+				break;
+			return result;
+		}
+		tot_bytes += result;
+		file_offset += result;
+		if (result < size)
+			break;
+	}
+
+	return tot_bytes;
+}
+
+/**
+ * nfs_direct_write_seg - Write out one iov segment.  Generate separate
+ *                        write RPCs for each "wsize" bytes, then commit.
+ * @inode: target inode
+ * @ctx: target file open context
+ * user_addr: starting address of this segment of user's buffer
+ * count: size of this segment
+ * file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * nr_pages: size of pages array
+ */
+static ssize_t nfs_direct_write_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		int nr_pages)
+{
+	const unsigned int wsize = NFS_SERVER(inode)->wsize;
+	size_t request;
+	int curpage, need_commit;
+	ssize_t result, tot_bytes;
+	struct nfs_writeverf first_verf;
+	struct nfs_write_data *wdata;
+
+	wdata = nfs_writedata_alloc();
+	if (!wdata)
+		return -ENOMEM;
+
+	wdata->inode = inode;
+	wdata->cred = ctx->cred;
+	wdata->args.fh = NFS_FH(inode);
+	wdata->args.context = ctx;
+	wdata->args.stable = NFS_UNSTABLE;
+	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
+		wdata->args.stable = NFS_FILE_SYNC;
+	wdata->res.fattr = &wdata->fattr;
+	wdata->res.verf = &wdata->verf;
+
+	nfs_begin_data_update(inode);
+retry:
+	need_commit = 0;
+	tot_bytes = 0;
+	curpage = 0;
+	request = count;
+	wdata->args.pgbase = user_addr & ~PAGE_MASK;
+	wdata->args.offset = file_offset;
+	do {
+		wdata->args.count = request;
+		if (wdata->args.count > wsize)
+			wdata->args.count = wsize;
+		wdata->args.pages = &pages[curpage];
+
+		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
+			wdata->args.count, (long long) wdata->args.offset,
+			user_addr + tot_bytes, wdata->args.pgbase, curpage);
+
+		lock_kernel();
+		result = NFS_PROTO(inode)->write(wdata);
+		unlock_kernel();
+
+		if (result <= 0) {
+			if (tot_bytes > 0)
+				break;
+			goto out;
+		}
+
+		if (tot_bytes == 0)
+			memcpy(&first_verf.verifier, &wdata->verf.verifier,
+						sizeof(first_verf.verifier));
+		if (wdata->verf.committed != NFS_FILE_SYNC) {
+			need_commit = 1;
+			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
+					sizeof(first_verf.verifier)));
+				goto sync_retry;
+		}
+
+		tot_bytes += result;
+
+		/* in case of a short write: stop now, let the app recover */
+		if (result < wdata->args.count)
+			break;
+
+		wdata->args.offset += result;
+		wdata->args.pgbase += result;
+		curpage += wdata->args.pgbase >> PAGE_SHIFT;
+		wdata->args.pgbase &= ~PAGE_MASK;
+		request -= result;
+	} while (request != 0);
+
+	/*
+	 * Commit data written so far, even in the event of an error
+	 */
+	if (need_commit) {
+		wdata->args.count = tot_bytes;
+		wdata->args.offset = file_offset;
+
+		lock_kernel();
+		result = NFS_PROTO(inode)->commit(wdata);
+		unlock_kernel();
+
+		if (result < 0 || memcmp(&first_verf.verifier,
+					 &wdata->verf.verifier,
+					 sizeof(first_verf.verifier)) != 0)
+			goto sync_retry;
+	}
+	result = tot_bytes;
+
+out:
+	nfs_end_data_update_defer(inode);
+	nfs_writedata_free(wdata);
+	return result;
+
+sync_retry:
+	wdata->args.stable = NFS_FILE_SYNC;
+	goto retry;
+}
+
+/**
+ * nfs_direct_write - For each iov segment, map the user's buffer
+ *                    then generate write and commit RPCs.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ * Upon return, generic_file_direct_IO invalidates any cached pages
+ * that non-direct readers might access, so they will pick up these
+ * writes immediately.
+ */
+static ssize_t nfs_direct_write(struct inode *inode,
+		struct nfs_open_context *ctx, const struct iovec *iov,
+		loff_t file_offset, unsigned long nr_segs)
+{
+	ssize_t tot_bytes = 0;
+	unsigned long seg = 0;
+
+	while ((seg < nr_segs) && (tot_bytes >= 0)) {
+		ssize_t result;
+		int page_count;
+		struct page **pages;
+		const struct iovec *vec = &iov[seg++];
+		unsigned long user_addr = (unsigned long) vec->iov_base;
+		size_t size = vec->iov_len;
+
+                page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
+                if (page_count < 0) {
+                        nfs_free_user_pages(pages, 0, 0);
+			if (tot_bytes > 0)
+				break;
+                        return page_count;
+                }
+
+		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
+				file_offset, pages, page_count);
+		nfs_free_user_pages(pages, page_count, 0);
+
+		if (result <= 0) {
+			if (tot_bytes > 0)
+				break;
+			return result;
+		}
+		tot_bytes += result;
+		file_offset += result;
+		if (result < size)
+			break;
+	}
+	return tot_bytes;
+}
+
+/**
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ */
+ssize_t
+nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+		loff_t file_offset, unsigned long nr_segs)
+{
+	ssize_t result = -EINVAL;
+	struct file *file = iocb->ki_filp;
+	struct nfs_open_context *ctx;
+	struct dentry *dentry = file->f_dentry;
+	struct inode *inode = dentry->d_inode;
+
+	/*
+	 * No support for async yet
+	 */
+	if (!is_sync_kiocb(iocb))
+		return result;
+
+	ctx = (struct nfs_open_context *)file->private_data;
+	switch (rw) {
+	case READ:
+		dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
+				dentry->d_name.name, file_offset, nr_segs);
+
+		result = nfs_direct_read(inode, ctx, iov,
+						file_offset, nr_segs);
+		break;
+	case WRITE:
+		dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
+				dentry->d_name.name, file_offset, nr_segs);
+
+		result = nfs_direct_write(inode, ctx, iov,
+						file_offset, nr_segs);
+		break;
+	default:
+		break;
+	}
+	return result;
+}
+
+/**
+ * nfs_file_direct_read - file direct read operation for NFS files
+ * @iocb: target I/O control block
+ * @buf: user's buffer into which to read data
+ * count: number of bytes to read
+ * pos: byte offset in file where reading starts
+ *
+ * We use this function for direct reads instead of calling
+ * generic_file_aio_read() in order to avoid gfar's check to see if
+ * the request starts before the end of the file.  For that check
+ * to work, we must generate a GETATTR before each direct read, and
+ * even then there is a window between the GETATTR and the subsequent
+ * READ where the file size could change.  So our preference is simply
+ * to do all reads the application wants, and the server will take
+ * care of managing the end of file boundary.
+ * 
+ * This function also eliminates unnecessarily updating the file's
+ * atime locally, as the NFS server sets the file's atime, and this
+ * client must read the updated atime from the server back into its
+ * cache.
+ */
+ssize_t
+nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+{
+	ssize_t retval = -EINVAL;
+	loff_t *ppos = &iocb->ki_pos;
+	struct file *file = iocb->ki_filp;
+	struct nfs_open_context *ctx =
+			(struct nfs_open_context *) file->private_data;
+	struct dentry *dentry = file->f_dentry;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = count,
+	};
+
+	dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		(unsigned long) count, (unsigned long) pos);
+
+	if (!is_sync_kiocb(iocb))
+		goto out;
+	if (count < 0)
+		goto out;
+	retval = -EFAULT;
+	if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+		goto out;
+	retval = 0;
+	if (!count)
+		goto out;
+
+	if (mapping->nrpages) {
+		retval = filemap_fdatawrite(mapping);
+		if (retval == 0)
+			retval = nfs_wb_all(inode);
+		if (retval == 0)
+			retval = filemap_fdatawait(mapping);
+		if (retval)
+			goto out;
+	}
+
+	retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+	if (retval > 0)
+		*ppos = pos + retval;
+
+out:
+	return retval;
+}
+
+/**
+ * nfs_file_direct_write - file direct write operation for NFS files
+ * @iocb: target I/O control block
+ * @buf: user's buffer from which to write data
+ * count: number of bytes to write
+ * pos: byte offset in file where writing starts
+ *
+ * We use this function for direct writes instead of calling
+ * generic_file_aio_write() in order to avoid taking the inode
+ * semaphore and updating the i_size.  The NFS server will set
+ * the new i_size and this client must read the updated size
+ * back into its cache.  We let the server do generic write
+ * parameter checking and report problems.
+ *
+ * We also avoid an unnecessary invocation of generic_osync_inode(),
+ * as it is fairly meaningless to sync the metadata of an NFS file.
+ *
+ * We eliminate local atime updates, see direct read above.
+ *
+ * We avoid unnecessary page cache invalidations for normal cached
+ * readers of this file.
+ *
+ * Note that O_APPEND is not supported for NFS direct writes, as there
+ * is no atomic O_APPEND write facility in the NFS protocol.
+ */
+ssize_t
+nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+{
+	ssize_t retval = -EINVAL;
+	loff_t *ppos = &iocb->ki_pos;
+	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	struct file *file = iocb->ki_filp;
+	struct nfs_open_context *ctx =
+			(struct nfs_open_context *) file->private_data;
+	struct dentry *dentry = file->f_dentry;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct iovec iov = {
+		.iov_base = (char __user *)buf,
+		.iov_len = count,
+	};
+
+	dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		inode->i_ino, (unsigned long) count, (unsigned long) pos);
+
+	if (!is_sync_kiocb(iocb))
+		goto out;
+	if (count < 0)
+		goto out;
+        if (pos < 0)
+		goto out;
+	retval = -EFAULT;
+	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+		goto out;
+        if (file->f_error) {
+                retval = file->f_error;
+                file->f_error = 0;
+                goto out;
+        }
+	retval = -EFBIG;
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			goto out;
+		}
+		if (count > limit - (unsigned long) pos)
+			count = limit - (unsigned long) pos;
+	}
+	retval = 0;
+	if (!count)
+		goto out;
+
+	if (mapping->nrpages) {
+		retval = filemap_fdatawrite(mapping);
+		if (retval == 0)
+			retval = nfs_wb_all(inode);
+		if (retval == 0)
+			retval = filemap_fdatawait(mapping);
+		if (retval)
+			goto out;
+	}
+
+	retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+	if (mapping->nrpages)
+		invalidate_inode_pages2(mapping);
+	if (retval > 0)
+		*ppos = pos + retval;
+
+out:
+	return retval;
+}
+
+int nfs_init_directcache(void)
+{
+	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
+						sizeof(struct nfs_direct_req),
+						0, SLAB_RECLAIM_ACCOUNT,
+						NULL, NULL);
+	if (nfs_direct_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_directcache(void)
+{
+	if (kmem_cache_destroy(nfs_direct_cachep))
+		printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
+}
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -0,0 +1,484 @@
+/*
+ *  linux/fs/nfs/file.c
+ *
+ *  Copyright (C) 1992  Rick Sladkey
+ *
+ *  Changes Copyright (C) 1994 by Florian La Roche
+ *   - Do not copy data too often around in the kernel.
+ *   - In nfs_file_read the return value of kmalloc wasn't checked.
+ *   - Put in a better version of read look-ahead buffering. Original idea
+ *     and implementation by Wai S Kok elekokws@ee.nus.sg.
+ *
+ *  Expire cache on write to a file by Wai S Kok (Oct 1994).
+ *
+ *  Total rewrite of read side for new NFS buffer cache.. Linus.
+ *
+ *  nfs regular file handling functions
+ */
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY		NFSDBG_FILE
+
+static int nfs_file_open(struct inode *, struct file *);
+static int nfs_file_release(struct inode *, struct file *);
+static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
+static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
+static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static int  nfs_file_flush(struct file *);
+static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
+static int nfs_check_flags(int flags);
+static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
+static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
+
+struct file_operations nfs_file_operations = {
+	.llseek		= remote_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read		= nfs_file_read,
+	.aio_write		= nfs_file_write,
+	.mmap		= nfs_file_mmap,
+	.open		= nfs_file_open,
+	.flush		= nfs_file_flush,
+	.release	= nfs_file_release,
+	.fsync		= nfs_fsync,
+	.lock		= nfs_lock,
+	.flock		= nfs_flock,
+	.sendfile	= nfs_file_sendfile,
+	.check_flags	= nfs_check_flags,
+};
+
+struct inode_operations nfs_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+};
+
+/* Hack for future NFS swap support */
+#ifndef IS_SWAPFILE
+# define IS_SWAPFILE(inode)	(0)
+#endif
+
+static int nfs_check_flags(int flags)
+{
+	if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Open file
+ */
+static int
+nfs_file_open(struct inode *inode, struct file *filp)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	int (*open)(struct inode *, struct file *);
+	int res;
+
+	res = nfs_check_flags(filp->f_flags);
+	if (res)
+		return res;
+
+	lock_kernel();
+	/* Do NFSv4 open() call */
+	if ((open = server->rpc_ops->file_open) != NULL)
+		res = open(inode, filp);
+	unlock_kernel();
+	return res;
+}
+
+static int
+nfs_file_release(struct inode *inode, struct file *filp)
+{
+	/* Ensure that dirty pages are flushed out with the right creds */
+	if (filp->f_mode & FMODE_WRITE)
+		filemap_fdatawrite(filp->f_mapping);
+	return NFS_PROTO(inode)->file_release(inode, filp);
+}
+
+/*
+ * Flush all dirty pages, and check for write errors.
+ *
+ */
+static int
+nfs_file_flush(struct file *file)
+{
+	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct inode	*inode = file->f_dentry->d_inode;
+	int		status;
+
+	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+	if ((file->f_mode & FMODE_WRITE) == 0)
+		return 0;
+	lock_kernel();
+	/* Ensure that data+attribute caches are up to date after close() */
+	status = nfs_wb_all(inode);
+	if (!status) {
+		status = ctx->error;
+		ctx->error = 0;
+		if (!status && !nfs_have_delegation(inode, FMODE_READ))
+			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	}
+	unlock_kernel();
+	return status;
+}
+
+static ssize_t
+nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+{
+	struct dentry * dentry = iocb->ki_filp->f_dentry;
+	struct inode * inode = dentry->d_inode;
+	ssize_t result;
+
+#ifdef CONFIG_NFS_DIRECTIO
+	if (iocb->ki_filp->f_flags & O_DIRECT)
+		return nfs_file_direct_read(iocb, buf, count, pos);
+#endif
+
+	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		(unsigned long) count, (unsigned long) pos);
+
+	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!result)
+		result = generic_file_aio_read(iocb, buf, count, pos);
+	return result;
+}
+
+static ssize_t
+nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
+		read_actor_t actor, void *target)
+{
+	struct dentry *dentry = filp->f_dentry;
+	struct inode *inode = dentry->d_inode;
+	ssize_t res;
+
+	dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		(unsigned long) count, (unsigned long long) *ppos);
+
+	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!res)
+		res = generic_file_sendfile(filp, ppos, count, actor, target);
+	return res;
+}
+
+static int
+nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
+{
+	struct dentry *dentry = file->f_dentry;
+	struct inode *inode = dentry->d_inode;
+	int	status;
+
+	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+
+	status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!status)
+		status = generic_file_mmap(file, vma);
+	return status;
+}
+
+/*
+ * Flush any dirty pages for this process, and check for write errors.
+ * The return status from this call provides a reliable indication of
+ * whether any write errors occurred for this process.
+ */
+static int
+nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+	struct inode *inode = dentry->d_inode;
+	int status;
+
+	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+	lock_kernel();
+	status = nfs_wb_all(inode);
+	if (!status) {
+		status = ctx->error;
+		ctx->error = 0;
+	}
+	unlock_kernel();
+	return status;
+}
+
+/*
+ * This does the "real" work of the write. The generic routine has
+ * allocated the page, locked it, done all the page alignment stuff
+ * calculations etc. Now we should just copy the data from user
+ * space and write it back to the real medium..
+ *
+ * If the writer ends up delaying the write, the writer needs to
+ * increment the page use counts until he is done with the page.
+ */
+static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+	return nfs_flush_incompatible(file, page);
+}
+
+static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+	long status;
+
+	lock_kernel();
+	status = nfs_updatepage(file, page, offset, to-offset);
+	unlock_kernel();
+	return status;
+}
+
+struct address_space_operations nfs_file_aops = {
+	.readpage = nfs_readpage,
+	.readpages = nfs_readpages,
+	.set_page_dirty = __set_page_dirty_nobuffers,
+	.writepage = nfs_writepage,
+	.writepages = nfs_writepages,
+	.prepare_write = nfs_prepare_write,
+	.commit_write = nfs_commit_write,
+#ifdef CONFIG_NFS_DIRECTIO
+	.direct_IO = nfs_direct_IO,
+#endif
+};
+
+/* 
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+{
+	struct dentry * dentry = iocb->ki_filp->f_dentry;
+	struct inode * inode = dentry->d_inode;
+	ssize_t result;
+
+#ifdef CONFIG_NFS_DIRECTIO
+	if (iocb->ki_filp->f_flags & O_DIRECT)
+		return nfs_file_direct_write(iocb, buf, count, pos);
+#endif
+
+	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		inode->i_ino, (unsigned long) count, (unsigned long) pos);
+
+	result = -EBUSY;
+	if (IS_SWAPFILE(inode))
+		goto out_swapfile;
+	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (result)
+		goto out;
+
+	result = count;
+	if (!count)
+		goto out;
+
+	result = generic_file_aio_write(iocb, buf, count, pos);
+out:
+	return result;
+
+out_swapfile:
+	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
+	goto out;
+}
+
+static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_mapping->host;
+	int status = 0;
+
+	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else {
+		struct file_lock *cfl = posix_test_lock(filp, fl);
+
+		fl->fl_type = F_UNLCK;
+		if (cfl != NULL)
+			memcpy(fl, cfl, sizeof(*fl));
+	}
+	unlock_kernel();
+	return status;
+}
+
+static int do_vfs_lock(struct file *file, struct file_lock *fl)
+{
+	int res = 0;
+	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+		case FL_POSIX:
+			res = posix_lock_file_wait(file, fl);
+			break;
+		case FL_FLOCK:
+			res = flock_lock_file_wait(file, fl);
+			break;
+		default:
+			BUG();
+	}
+	if (res < 0)
+		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
+				__FUNCTION__);
+	return res;
+}
+
+static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_mapping->host;
+	sigset_t oldset;
+	int status;
+
+	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
+	/*
+	 * Flush all pending writes before doing anything
+	 * with locks..
+	 */
+	filemap_fdatawrite(filp->f_mapping);
+	down(&inode->i_sem);
+	nfs_wb_all(inode);
+	up(&inode->i_sem);
+	filemap_fdatawait(filp->f_mapping);
+
+	/* NOTE: special case
+	 * 	If we're signalled while cleaning up locks on process exit, we
+	 * 	still need to complete the unlock.
+	 */
+	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else
+		status = do_vfs_lock(filp, fl);
+	unlock_kernel();
+	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
+	return status;
+}
+
+static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_mapping->host;
+	sigset_t oldset;
+	int status;
+
+	rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
+	/*
+	 * Flush all pending writes before doing anything
+	 * with locks..
+	 */
+	status = filemap_fdatawrite(filp->f_mapping);
+	if (status == 0) {
+		down(&inode->i_sem);
+		status = nfs_wb_all(inode);
+		up(&inode->i_sem);
+		if (status == 0)
+			status = filemap_fdatawait(filp->f_mapping);
+	}
+	if (status < 0)
+		goto out;
+
+	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+		/* If we were signalled we still need to ensure that
+		 * we clean up any state on the server. We therefore
+		 * record the lock call as having succeeded in order to
+		 * ensure that locks_remove_posix() cleans it out when
+		 * the process exits.
+		 */
+		if (status == -EINTR || status == -ERESTARTSYS)
+			do_vfs_lock(filp, fl);
+	} else
+		status = do_vfs_lock(filp, fl);
+	unlock_kernel();
+	if (status < 0)
+		goto out;
+	/*
+	 * Make sure we clear the cache whenever we try to get the lock.
+	 * This makes locking act as a cache coherency point.
+	 */
+	filemap_fdatawrite(filp->f_mapping);
+	down(&inode->i_sem);
+	nfs_wb_all(inode);	/* we may have slept */
+	up(&inode->i_sem);
+	filemap_fdatawait(filp->f_mapping);
+	nfs_zap_caches(inode);
+out:
+	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
+	return status;
+}
+
+/*
+ * Lock a (portion of) a file
+ */
+static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode * inode = filp->f_mapping->host;
+
+	dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
+			inode->i_sb->s_id, inode->i_ino,
+			fl->fl_type, fl->fl_flags,
+			(long long)fl->fl_start, (long long)fl->fl_end);
+
+	if (!inode)
+		return -EINVAL;
+
+	/* No mandatory locks over NFS */
+	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+		return -ENOLCK;
+
+	if (IS_GETLK(cmd))
+		return do_getlk(filp, cmd, fl);
+	if (fl->fl_type == F_UNLCK)
+		return do_unlk(filp, cmd, fl);
+	return do_setlk(filp, cmd, fl);
+}
+
+/*
+ * Lock a (portion of) a file
+ */
+static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode * inode = filp->f_mapping->host;
+
+	dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
+			inode->i_sb->s_id, inode->i_ino,
+			fl->fl_type, fl->fl_flags);
+
+	if (!inode)
+		return -EINVAL;
+
+	/*
+	 * No BSD flocks over NFS allowed.
+	 * Note: we could try to fake a POSIX lock request here by
+	 * using ((u32) filp | 0x80000000) or some such as the pid.
+	 * Not sure whether that would be unique, though, or whether
+	 * that would break in other places.
+	 */
+	if (!(fl->fl_flags & FL_FLOCK))
+		return -ENOLCK;
+
+	/* We're simulating flock() locks using posix locks on the server */
+	fl->fl_owner = (fl_owner_t)filp;
+	fl->fl_start = 0;
+	fl->fl_end = OFFSET_MAX;
+
+	if (fl->fl_type == F_UNLCK)
+		return do_unlk(filp, cmd, fl);
+	return do_setlk(filp, cmd, fl);
+}
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -0,0 +1,498 @@
+/*
+ * fs/nfs/idmap.c
+ *
+ *  UID and GID to name mapping for clients.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/sched.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_fs.h>
+
+#include <linux/nfs_idmap.h>
+
+#define IDMAP_HASH_SZ          128
+
+struct idmap_hashent {
+	__u32 ih_id;
+	int ih_namelen;
+	char ih_name[IDMAP_NAMESZ];
+};
+
+struct idmap_hashtable {
+	__u8 h_type;
+	struct idmap_hashent h_entries[IDMAP_HASH_SZ];
+};
+
+struct idmap {
+	char                  idmap_path[48];
+	struct dentry        *idmap_dentry;
+	wait_queue_head_t     idmap_wq;
+	struct idmap_msg      idmap_im;
+	struct semaphore      idmap_lock;    /* Serializes upcalls */
+	struct semaphore      idmap_im_lock; /* Protects the hashtable */
+	struct idmap_hashtable idmap_user_hash;
+	struct idmap_hashtable idmap_group_hash;
+};
+
+static ssize_t   idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
+		     char __user *, size_t);
+static ssize_t   idmap_pipe_downcall(struct file *, const char __user *,
+		     size_t);
+void             idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+
+static unsigned int fnvhash32(const void *, size_t);
+
+static struct rpc_pipe_ops idmap_upcall_ops = {
+        .upcall         = idmap_pipe_upcall,
+        .downcall       = idmap_pipe_downcall,
+        .destroy_msg    = idmap_pipe_destroy_msg,
+};
+
+void
+nfs_idmap_new(struct nfs4_client *clp)
+{
+	struct idmap *idmap;
+
+	if (clp->cl_idmap != NULL)
+		return;
+        if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+                return;
+
+	memset(idmap, 0, sizeof(*idmap));
+
+	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
+	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
+
+        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+	    idmap, &idmap_upcall_ops, 0);
+        if (IS_ERR(idmap->idmap_dentry)) {
+		kfree(idmap);
+		return;
+	}
+
+        init_MUTEX(&idmap->idmap_lock);
+        init_MUTEX(&idmap->idmap_im_lock);
+	init_waitqueue_head(&idmap->idmap_wq);
+	idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
+	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
+
+	clp->cl_idmap = idmap;
+}
+
+void
+nfs_idmap_delete(struct nfs4_client *clp)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	if (!idmap)
+		return;
+	rpc_unlink(idmap->idmap_path);
+	clp->cl_idmap = NULL;
+	kfree(idmap);
+}
+
+/*
+ * Helper routines for manipulating the hashtable
+ */
+static inline struct idmap_hashent *
+idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
+{
+	return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
+}
+
+static struct idmap_hashent *
+idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
+{
+	struct idmap_hashent *he = idmap_name_hash(h, name, len);
+
+	if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
+		return NULL;
+	return he;
+}
+
+static inline struct idmap_hashent *
+idmap_id_hash(struct idmap_hashtable* h, __u32 id)
+{
+	return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
+}
+
+static struct idmap_hashent *
+idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
+{
+	struct idmap_hashent *he = idmap_id_hash(h, id);
+	if (he->ih_id != id || he->ih_namelen == 0)
+		return NULL;
+	return he;
+}
+
+/*
+ * Routines for allocating new entries in the hashtable.
+ * For now, we just have 1 entry per bucket, so it's all
+ * pretty trivial.
+ */
+static inline struct idmap_hashent *
+idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len)
+{
+	return idmap_name_hash(h, name, len);
+}
+
+static inline struct idmap_hashent *
+idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
+{
+	return idmap_id_hash(h, id);
+}
+
+static void
+idmap_update_entry(struct idmap_hashent *he, const char *name,
+		size_t namelen, __u32 id)
+{
+	he->ih_id = id;
+	memcpy(he->ih_name, name, namelen);
+	he->ih_name[namelen] = '\0';
+	he->ih_namelen = namelen;
+}
+
+/*
+ * Name -> ID
+ */
+static int
+nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
+		const char *name, size_t namelen, __u32 *id)
+{
+	struct rpc_pipe_msg msg;
+	struct idmap_msg *im;
+	struct idmap_hashent *he;
+	DECLARE_WAITQUEUE(wq, current);
+	int ret = -EIO;
+
+	im = &idmap->idmap_im;
+
+	/*
+	 * String sanity checks
+	 * Note that the userland daemon expects NUL terminated strings
+	 */
+	for (;;) {
+		if (namelen == 0)
+			return -EINVAL;
+		if (name[namelen-1] != '\0')
+			break;
+		namelen--;
+	}
+	if (namelen >= IDMAP_NAMESZ)
+		return -EINVAL;
+
+	down(&idmap->idmap_lock);
+	down(&idmap->idmap_im_lock);
+
+	he = idmap_lookup_name(h, name, namelen);
+	if (he != NULL) {
+		*id = he->ih_id;
+		ret = 0;
+		goto out;
+	}
+
+	memset(im, 0, sizeof(*im));
+	memcpy(im->im_name, name, namelen);
+
+	im->im_type = h->h_type;
+	im->im_conv = IDMAP_CONV_NAMETOID;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.data = im;
+	msg.len = sizeof(*im);
+
+	add_wait_queue(&idmap->idmap_wq, &wq);
+	if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
+		remove_wait_queue(&idmap->idmap_wq, &wq);
+		goto out;
+	}
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	up(&idmap->idmap_im_lock);
+	schedule();
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&idmap->idmap_wq, &wq);
+	down(&idmap->idmap_im_lock);
+
+	if (im->im_status & IDMAP_STATUS_SUCCESS) {
+		*id = im->im_id;
+		ret = 0;
+	}
+
+ out:
+	memset(im, 0, sizeof(*im));
+	up(&idmap->idmap_im_lock);
+	up(&idmap->idmap_lock);
+	return (ret);
+}
+
+/*
+ * ID -> Name
+ */
+static int
+nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
+		__u32 id, char *name)
+{
+	struct rpc_pipe_msg msg;
+	struct idmap_msg *im;
+	struct idmap_hashent *he;
+	DECLARE_WAITQUEUE(wq, current);
+	int ret = -EIO;
+	unsigned int len;
+
+	im = &idmap->idmap_im;
+
+	down(&idmap->idmap_lock);
+	down(&idmap->idmap_im_lock);
+
+	he = idmap_lookup_id(h, id);
+	if (he != 0) {
+		memcpy(name, he->ih_name, he->ih_namelen);
+		ret = he->ih_namelen;
+		goto out;
+	}
+
+	memset(im, 0, sizeof(*im));
+	im->im_type = h->h_type;
+	im->im_conv = IDMAP_CONV_IDTONAME;
+	im->im_id = id;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.data = im;
+	msg.len = sizeof(*im);
+
+	add_wait_queue(&idmap->idmap_wq, &wq);
+
+	if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
+		remove_wait_queue(&idmap->idmap_wq, &wq);
+		goto out;
+	}
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	up(&idmap->idmap_im_lock);
+	schedule();
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&idmap->idmap_wq, &wq);
+	down(&idmap->idmap_im_lock);
+
+	if (im->im_status & IDMAP_STATUS_SUCCESS) {
+		if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
+			goto out;
+		memcpy(name, im->im_name, len);
+		ret = len;
+	}
+
+ out:
+	memset(im, 0, sizeof(*im));
+	up(&idmap->idmap_im_lock);
+	up(&idmap->idmap_lock);
+	return ret;
+}
+
+/* RPC pipefs upcall/downcall routines */
+static ssize_t
+idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+    char __user *dst, size_t buflen)
+{
+        char *data = (char *)msg->data + msg->copied;
+        ssize_t mlen = msg->len - msg->copied;
+        ssize_t left;
+
+        if (mlen > buflen)
+                mlen = buflen;
+
+        left = copy_to_user(dst, data, mlen);
+	if (left < 0) {
+		msg->errno = left;
+		return left;
+	}
+	mlen -= left;
+	msg->copied += mlen;
+	msg->errno = 0;
+        return mlen;
+}
+
+static ssize_t
+idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+        struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	struct idmap *idmap = (struct idmap *)rpci->private;
+	struct idmap_msg im_in, *im = &idmap->idmap_im;
+	struct idmap_hashtable *h;
+	struct idmap_hashent *he = NULL;
+	int namelen_in;
+	int ret;
+
+        if (mlen != sizeof(im_in))
+                return (-ENOSPC);
+
+        if (copy_from_user(&im_in, src, mlen) != 0)
+		return (-EFAULT);
+
+	down(&idmap->idmap_im_lock);
+
+	ret = mlen;
+	im->im_status = im_in.im_status;
+	/* If we got an error, terminate now, and wake up pending upcalls */
+	if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
+		wake_up(&idmap->idmap_wq);
+		goto out;
+	}
+
+	/* Sanity checking of strings */
+	ret = -EINVAL;
+	namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
+	if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
+		goto out;
+
+	switch (im_in.im_type) {
+		case IDMAP_TYPE_USER:
+			h = &idmap->idmap_user_hash;
+			break;
+		case IDMAP_TYPE_GROUP:
+			h = &idmap->idmap_group_hash;
+			break;
+		default:
+			goto out;
+	}
+
+	switch (im_in.im_conv) {
+	case IDMAP_CONV_IDTONAME:
+		/* Did we match the current upcall? */
+		if (im->im_conv == IDMAP_CONV_IDTONAME
+				&& im->im_type == im_in.im_type
+				&& im->im_id == im_in.im_id) {
+			/* Yes: copy string, including the terminating '\0'  */
+			memcpy(im->im_name, im_in.im_name, namelen_in);
+			im->im_name[namelen_in] = '\0';
+			wake_up(&idmap->idmap_wq);
+		}
+		he = idmap_alloc_id(h, im_in.im_id);
+		break;
+	case IDMAP_CONV_NAMETOID:
+		/* Did we match the current upcall? */
+		if (im->im_conv == IDMAP_CONV_NAMETOID
+				&& im->im_type == im_in.im_type
+				&& strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
+				&& memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
+			im->im_id = im_in.im_id;
+			wake_up(&idmap->idmap_wq);
+		}
+		he = idmap_alloc_name(h, im_in.im_name, namelen_in);
+		break;
+	default:
+		goto out;
+	}
+
+	/* If the entry is valid, also copy it to the cache */
+	if (he != NULL)
+		idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
+	ret = mlen;
+out:
+	up(&idmap->idmap_im_lock);
+	return ret;
+}
+
+void
+idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+	struct idmap_msg *im = msg->data;
+	struct idmap *idmap = container_of(im, struct idmap, idmap_im); 
+
+	if (msg->errno >= 0)
+		return;
+	down(&idmap->idmap_im_lock);
+	im->im_status = IDMAP_STATUS_LOOKUPFAIL;
+	wake_up(&idmap->idmap_wq);
+	up(&idmap->idmap_im_lock);
+}
+
+/* 
+ * Fowler/Noll/Vo hash
+ *    http://www.isthe.com/chongo/tech/comp/fnv/
+ */
+
+#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
+#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
+
+static unsigned int fnvhash32(const void *buf, size_t buflen)
+{
+	const unsigned char *p, *end = (const unsigned char *)buf + buflen;
+	unsigned int hash = FNV_1_32;
+
+	for (p = buf; p < end; p++) {
+		hash *= FNV_P_32;
+		hash ^= (unsigned int)*p;
+	}
+
+	return (hash);
+}
+
+int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
+}
+
+int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
+}
+
+int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
+}
+int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+{
+	struct idmap *idmap = clp->cl_idmap;
+
+	return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
+}
+
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -0,0 +1,183 @@
+/*
+ * linux/fs/nfs/mount_clnt.c
+ *
+ * MOUNT client to support NFSroot.
+ *
+ * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/uio.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs_fs.h>
+
+#ifdef RPC_DEBUG
+# define NFSDBG_FACILITY	NFSDBG_ROOT
+#endif
+
+/*
+#define MOUNT_PROGRAM		100005
+#define MOUNT_VERSION		1
+#define MOUNT_MNT		1
+#define MOUNT_UMNT		3
+ */
+
+static struct rpc_clnt *	mnt_create(char *, struct sockaddr_in *,
+								int, int);
+static struct rpc_program	mnt_program;
+
+struct mnt_fhstatus {
+	unsigned int		status;
+	struct nfs_fh *		fh;
+};
+
+/*
+ * Obtain an NFS file handle for the given host and path
+ */
+int
+nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
+		int version, int protocol)
+{
+	struct rpc_clnt		*mnt_clnt;
+	struct mnt_fhstatus	result = {
+		.fh		= fh
+	};
+	char			hostname[32];
+	int			status;
+	int			call;
+
+	dprintk("NFS:      nfs_mount(%08x:%s)\n",
+			(unsigned)ntohl(addr->sin_addr.s_addr), path);
+
+	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
+	mnt_clnt = mnt_create(hostname, addr, version, protocol);
+	if (IS_ERR(mnt_clnt))
+		return PTR_ERR(mnt_clnt);
+
+	call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
+	status = rpc_call(mnt_clnt, call, path, &result, 0);
+	return status < 0? status : (result.status? -EACCES : 0);
+}
+
+static struct rpc_clnt *
+mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
+		int protocol)
+{
+	struct rpc_xprt	*xprt;
+	struct rpc_clnt	*clnt;
+
+	xprt = xprt_create_proto(protocol, srvaddr, NULL);
+	if (IS_ERR(xprt))
+		return (struct rpc_clnt *)xprt;
+
+	clnt = rpc_create_client(xprt, hostname,
+				&mnt_program, version,
+				RPC_AUTH_UNIX);
+	if (IS_ERR(clnt)) {
+		xprt_destroy(xprt);
+	} else {
+		clnt->cl_softrtry = 1;
+		clnt->cl_chatty   = 1;
+		clnt->cl_oneshot  = 1;
+		clnt->cl_intr = 1;
+	}
+	return clnt;
+}
+
+/*
+ * XDR encode/decode functions for MOUNT
+ */
+static int
+xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
+{
+	p = xdr_encode_string(p, path);
+
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+{
+	struct nfs_fh *fh = res->fh;
+
+	if ((res->status = ntohl(*p++)) == 0) {
+		fh->size = NFS2_FHSIZE;
+		memcpy(fh->data, p, NFS2_FHSIZE);
+	}
+	return 0;
+}
+
+static int
+xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+{
+	struct nfs_fh *fh = res->fh;
+
+	if ((res->status = ntohl(*p++)) == 0) {
+		int size = ntohl(*p++);
+		if (size <= NFS3_FHSIZE) {
+			fh->size = size;
+			memcpy(fh->data, p, size);
+		} else
+			res->status = -EBADHANDLE;
+	}
+	return 0;
+}
+
+#define MNT_dirpath_sz		(1 + 256)
+#define MNT_fhstatus_sz		(1 + 8)
+
+static struct rpc_procinfo	mnt_procedures[] = {
+[MNTPROC_MNT] = {
+	  .p_proc		= MNTPROC_MNT,
+	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
+	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
+	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	},
+};
+
+static struct rpc_procinfo mnt3_procedures[] = {
+[MOUNTPROC3_MNT] = {
+	  .p_proc		= MOUNTPROC3_MNT,
+	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
+	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
+	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	},
+};
+
+
+static struct rpc_version	mnt_version1 = {
+		.number		= 1,
+		.nrprocs 	= 2,
+		.procs 		= mnt_procedures
+};
+
+static struct rpc_version       mnt_version3 = {
+		.number		= 3,
+		.nrprocs	= 2,
+		.procs		= mnt3_procedures
+};
+
+static struct rpc_version *	mnt_version[] = {
+	NULL,
+	&mnt_version1,
+	NULL,
+	&mnt_version3,
+};
+
+static struct rpc_stat		mnt_stats;
+
+static struct rpc_program	mnt_program = {
+	.name		= "mount",
+	.number		= NFS_MNT_PROGRAM,
+	.nrvers		= sizeof(mnt_version)/sizeof(mnt_version[0]),
+	.version	= mnt_version,
+	.stats		= &mnt_stats,
+};
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -0,0 +1,711 @@
+/*
+ * linux/fs/nfs/nfs2xdr.c
+ *
+ * XDR functions to encode/decode NFS RPC arguments and results.
+ *
+ * Copyright (C) 1992, 1993, 1994  Rick Sladkey
+ * Copyright (C) 1996 Olaf Kirch
+ * 04 Aug 1998  Ion Badulescu <ionut@cs.columbia.edu>
+ * 		FIFO's need special handling in NFSv2
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY		NFSDBG_XDR
+/* #define NFS_PARANOIA 1 */
+
+extern int			nfs_stat_to_errno(int stat);
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO		EIO
+
+/*
+ * Declare the space requirements for NFS arguments and replies as
+ * number of 32bit-words
+ */
+#define NFS_fhandle_sz		(8)
+#define NFS_sattr_sz		(8)
+#define NFS_filename_sz		(1+(NFS2_MAXNAMLEN>>2))
+#define NFS_path_sz		(1+(NFS2_MAXPATHLEN>>2))
+#define NFS_fattr_sz		(17)
+#define NFS_info_sz		(5)
+#define NFS_entry_sz		(NFS_filename_sz+3)
+
+#define NFS_diropargs_sz	(NFS_fhandle_sz+NFS_filename_sz)
+#define NFS_sattrargs_sz	(NFS_fhandle_sz+NFS_sattr_sz)
+#define NFS_readlinkargs_sz	(NFS_fhandle_sz)
+#define NFS_readargs_sz		(NFS_fhandle_sz+3)
+#define NFS_writeargs_sz	(NFS_fhandle_sz+4)
+#define NFS_createargs_sz	(NFS_diropargs_sz+NFS_sattr_sz)
+#define NFS_renameargs_sz	(NFS_diropargs_sz+NFS_diropargs_sz)
+#define NFS_linkargs_sz		(NFS_fhandle_sz+NFS_diropargs_sz)
+#define NFS_symlinkargs_sz	(NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_readdirargs_sz	(NFS_fhandle_sz+2)
+
+#define NFS_attrstat_sz		(1+NFS_fattr_sz)
+#define NFS_diropres_sz		(1+NFS_fhandle_sz+NFS_fattr_sz)
+#define NFS_readlinkres_sz	(2)
+#define NFS_readres_sz		(1+NFS_fattr_sz+1)
+#define NFS_writeres_sz         (NFS_attrstat_sz)
+#define NFS_stat_sz		(1)
+#define NFS_readdirres_sz	(1)
+#define NFS_statfsres_sz	(1+NFS_info_sz)
+
+/*
+ * Common NFS XDR functions as inlines
+ */
+static inline u32 *
+xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle)
+{
+	memcpy(p, fhandle->data, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
+}
+
+static inline u32 *
+xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
+{
+	/* NFSv2 handles have a fixed length */
+	fhandle->size = NFS2_FHSIZE;
+	memcpy(fhandle->data, p, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
+}
+
+static inline u32*
+xdr_encode_time(u32 *p, struct timespec *timep)
+{
+	*p++ = htonl(timep->tv_sec);
+	/* Convert nanoseconds into microseconds */
+	*p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0);
+	return p;
+}
+
+static inline u32*
+xdr_encode_current_server_time(u32 *p, struct timespec *timep)
+{
+	/*
+	 * Passing the invalid value useconds=1000000 is a
+	 * Sun convention for "set to current server time".
+	 * It's needed to make permissions checks for the
+	 * "touch" program across v2 mounts to Solaris and
+	 * Irix boxes work correctly. See description of
+	 * sattr in section 6.1 of "NFS Illustrated" by
+	 * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
+	 */
+	*p++ = htonl(timep->tv_sec);
+	*p++ = htonl(1000000);
+	return p;
+}
+
+static inline u32*
+xdr_decode_time(u32 *p, struct timespec *timep)
+{
+	timep->tv_sec = ntohl(*p++);
+	/* Convert microseconds into nanoseconds */
+	timep->tv_nsec = ntohl(*p++) * 1000;
+	return p;
+}
+
+static u32 *
+xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
+{
+	u32 rdev;
+	fattr->type = (enum nfs_ftype) ntohl(*p++);
+	fattr->mode = ntohl(*p++);
+	fattr->nlink = ntohl(*p++);
+	fattr->uid = ntohl(*p++);
+	fattr->gid = ntohl(*p++);
+	fattr->size = ntohl(*p++);
+	fattr->du.nfs2.blocksize = ntohl(*p++);
+	rdev = ntohl(*p++);
+	fattr->du.nfs2.blocks = ntohl(*p++);
+	fattr->fsid_u.nfs3 = ntohl(*p++);
+	fattr->fileid = ntohl(*p++);
+	p = xdr_decode_time(p, &fattr->atime);
+	p = xdr_decode_time(p, &fattr->mtime);
+	p = xdr_decode_time(p, &fattr->ctime);
+	fattr->valid |= NFS_ATTR_FATTR;
+	fattr->rdev = new_decode_dev(rdev);
+	if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
+		fattr->type = NFFIFO;
+		fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
+		fattr->rdev = 0;
+	}
+	fattr->timestamp = jiffies;
+	return p;
+}
+
+#define SATTR(p, attr, flag, field) \
+        *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
+static inline u32 *
+xdr_encode_sattr(u32 *p, struct iattr *attr)
+{
+	SATTR(p, attr, ATTR_MODE, ia_mode);
+	SATTR(p, attr, ATTR_UID, ia_uid);
+	SATTR(p, attr, ATTR_GID, ia_gid);
+	SATTR(p, attr, ATTR_SIZE, ia_size);
+
+	if (attr->ia_valid & ATTR_ATIME_SET) {
+		p = xdr_encode_time(p, &attr->ia_atime);
+	} else if (attr->ia_valid & ATTR_ATIME) {
+		p = xdr_encode_current_server_time(p, &attr->ia_atime);
+	} else {
+		*p++ = ~(u32) 0;
+		*p++ = ~(u32) 0;
+	}
+
+	if (attr->ia_valid & ATTR_MTIME_SET) {
+		p = xdr_encode_time(p, &attr->ia_mtime);
+	} else if (attr->ia_valid & ATTR_MTIME) {
+		p = xdr_encode_current_server_time(p, &attr->ia_mtime);
+	} else {
+		*p++ = ~(u32) 0;	
+		*p++ = ~(u32) 0;
+	}
+  	return p;
+}
+#undef SATTR
+
+/*
+ * NFS encode functions
+ */
+/*
+ * Encode file handle argument
+ * GETATTR, READLINK, STATFS
+ */
+static int
+nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
+{
+	p = xdr_encode_fhandle(p, fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode SETATTR arguments
+ */
+static int
+nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_sattr(p, args->sattr);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode directory ops argument
+ * LOOKUP, REMOVE, RMDIR
+ */
+static int
+nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Arguments to a READ call. Since we read data directly into the page
+ * cache, we also set up the reply iovec here so that iov[1] points
+ * exactly to the page we want to fetch.
+ */
+static int
+nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+{
+	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	unsigned int replen;
+	u32 offset = (u32)args->offset;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(offset);
+	*p++ = htonl(count);
+	*p++ = htonl(count);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+			 args->pages, args->pgbase, count);
+	return 0;
+}
+
+/*
+ * Decode READ reply
+ */
+static int
+nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+{
+	struct kvec *iov = req->rq_rcv_buf.head;
+	int	status, count, recvd, hdrlen;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	p = xdr_decode_fattr(p, res->fattr);
+
+	count = ntohl(*p++);
+	res->eof = 0;
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READ reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READ header is short. iovec will be shifted.\n");
+		xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
+	}
+
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (count > recvd) {
+		printk(KERN_WARNING "NFS: server cheating in read reply: "
+			"count %d > recvd %d\n", count, recvd);
+		count = recvd;
+	}
+
+	dprintk("RPC:      readres OK count %d\n", count);
+	if (count < res->count)
+		res->count = count;
+
+	return count;
+}
+
+
+/*
+ * Write arguments. Splice the buffer to be written into the iovec.
+ */
+static int
+nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	u32 offset = (u32)args->offset;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(offset);
+	*p++ = htonl(offset);
+	*p++ = htonl(count);
+	*p++ = htonl(count);
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+	/* Copy the page array */
+	xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+	return 0;
+}
+
+/*
+ * Encode create arguments
+ * CREATE, MKDIR
+ */
+static int
+nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fh);
+	p = xdr_encode_array(p, args->name, args->len);
+	p = xdr_encode_sattr(p, args->sattr);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode RENAME arguments
+ */
+static int
+nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_array(p, args->fromname, args->fromlen);
+	p = xdr_encode_fhandle(p, args->tofh);
+	p = xdr_encode_array(p, args->toname, args->tolen);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode LINK arguments
+ */
+static int
+nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_fhandle(p, args->tofh);
+	p = xdr_encode_array(p, args->toname, args->tolen);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode SYMLINK arguments
+ */
+static int
+nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
+{
+	p = xdr_encode_fhandle(p, args->fromfh);
+	p = xdr_encode_array(p, args->fromname, args->fromlen);
+	p = xdr_encode_array(p, args->topath, args->tolen);
+	p = xdr_encode_sattr(p, args->sattr);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+/*
+ * Encode arguments to readdir call
+ */
+static int
+nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
+{
+	struct rpc_task	*task = req->rq_task;
+	struct rpc_auth	*auth = task->tk_auth;
+	unsigned int replen;
+	u32 count = args->count;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(args->cookie);
+	*p++ = htonl(count); /* see above */
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
+	return 0;
+}
+
+/*
+ * Decode the result of a readdir call.
+ * We're not really decoding anymore, we just leave the buffer untouched
+ * and only check that it is syntactically correct.
+ * The real decoding happens in nfs_decode_entry below, called directly
+ * from nfs_readdir for each entry.
+ */
+static int
+nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	struct page **page;
+	int hdrlen, recvd;
+	int status, nr;
+	unsigned int len, pglen;
+	u32 *end, *entry, *kaddr;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
+		xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+	}
+
+	pglen = rcvbuf->page_len;
+	recvd = rcvbuf->len - hdrlen;
+	if (pglen > recvd)
+		pglen = recvd;
+	page = rcvbuf->pages;
+	kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
+	end = (u32 *)((char *)p + pglen);
+	entry = p;
+	for (nr = 0; *p++; nr++) {
+		if (p + 2 > end)
+			goto short_pkt;
+		p++; /* fileid */
+		len = ntohl(*p++);
+		p += XDR_QUADLEN(len) + 1;	/* name plus cookie */
+		if (len > NFS2_MAXNAMLEN) {
+			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
+						len);
+			goto err_unmap;
+		}
+		if (p + 2 > end)
+			goto short_pkt;
+		entry = p;
+	}
+	if (!nr && (entry[0] != 0 || entry[1] == 0))
+		goto short_pkt;
+ out:
+	kunmap_atomic(kaddr, KM_USER0);
+	return nr;
+ short_pkt:
+	entry[0] = entry[1] = 0;
+	/* truncate listing ? */
+	if (!nr) {
+		printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+		entry[1] = 1;
+	}
+	goto out;
+err_unmap:
+	nr = -errno_NFSERR_IO;
+	goto out;
+}
+
+u32 *
+nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+	if (!*p++) {
+		if (!*p)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	entry->ino	  = ntohl(*p++);
+	entry->len	  = ntohl(*p++);
+	entry->name	  = (const char *) p;
+	p		 += XDR_QUADLEN(entry->len);
+	entry->prev_cookie	  = entry->cookie;
+	entry->cookie	  = ntohl(*p++);
+	entry->eof	  = !p[0] && p[1];
+
+	return p;
+}
+
+/*
+ * NFS XDR decode functions
+ */
+/*
+ * Decode simple status reply
+ */
+static int
+nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)) != 0)
+		status = -nfs_stat_to_errno(status);
+	return status;
+}
+
+/*
+ * Decode attrstat reply
+ * GETATTR, SETATTR, WRITE
+ */
+static int
+nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	xdr_decode_fattr(p, fattr);
+	return 0;
+}
+
+/*
+ * Decode diropres reply
+ * LOOKUP, CREATE, MKDIR
+ */
+static int
+nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	p = xdr_decode_fhandle(p, res->fh);
+	xdr_decode_fattr(p, res->fattr);
+	return 0;
+}
+
+/*
+ * Encode READLINK args
+ */
+static int
+nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	unsigned int replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Inline the page array */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
+	return 0;
+}
+
+/*
+ * Decode READLINK reply
+ */
+static int
+nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	int hdrlen, len, recvd;
+	char	*kaddr;
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+	/* Convert length of symlink */
+	len = ntohl(*p++);
+	if (len >= rcvbuf->page_len || len <= 0) {
+		dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+		return -ENAMETOOLONG;
+	}
+	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+	if (iov->iov_len < hdrlen) {
+		printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+				"length %d > %Zu\n", hdrlen, iov->iov_len);
+		return -errno_NFSERR_IO;
+	} else if (iov->iov_len != hdrlen) {
+		dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+		xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+	}
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (recvd < len) {
+		printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+				"count %u > recvd %u\n", len, recvd);
+		return -EIO;
+	}
+
+	/* NULL terminate the string we got */
+	kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
+	kaddr[len+rcvbuf->page_base] = '\0';
+	kunmap_atomic(kaddr, KM_USER0);
+	return 0;
+}
+
+/*
+ * Decode WRITE reply
+ */
+static int
+nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+	res->verf->committed = NFS_FILE_SYNC;
+	return nfs_xdr_attrstat(req, p, res->fattr);
+}
+
+/*
+ * Decode STATFS reply
+ */
+static int
+nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
+{
+	int	status;
+
+	if ((status = ntohl(*p++)))
+		return -nfs_stat_to_errno(status);
+
+	res->tsize  = ntohl(*p++);
+	res->bsize  = ntohl(*p++);
+	res->blocks = ntohl(*p++);
+	res->bfree  = ntohl(*p++);
+	res->bavail = ntohl(*p++);
+	return 0;
+}
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static struct {
+	int stat;
+	int errno;
+} nfs_errtbl[] = {
+	{ NFS_OK,		0		},
+	{ NFSERR_PERM,		EPERM		},
+	{ NFSERR_NOENT,		ENOENT		},
+	{ NFSERR_IO,		errno_NFSERR_IO	},
+	{ NFSERR_NXIO,		ENXIO		},
+/*	{ NFSERR_EAGAIN,	EAGAIN		}, */
+	{ NFSERR_ACCES,		EACCES		},
+	{ NFSERR_EXIST,		EEXIST		},
+	{ NFSERR_XDEV,		EXDEV		},
+	{ NFSERR_NODEV,		ENODEV		},
+	{ NFSERR_NOTDIR,	ENOTDIR		},
+	{ NFSERR_ISDIR,		EISDIR		},
+	{ NFSERR_INVAL,		EINVAL		},
+	{ NFSERR_FBIG,		EFBIG		},
+	{ NFSERR_NOSPC,		ENOSPC		},
+	{ NFSERR_ROFS,		EROFS		},
+	{ NFSERR_MLINK,		EMLINK		},
+	{ NFSERR_NAMETOOLONG,	ENAMETOOLONG	},
+	{ NFSERR_NOTEMPTY,	ENOTEMPTY	},
+	{ NFSERR_DQUOT,		EDQUOT		},
+	{ NFSERR_STALE,		ESTALE		},
+	{ NFSERR_REMOTE,	EREMOTE		},
+#ifdef EWFLUSH
+	{ NFSERR_WFLUSH,	EWFLUSH		},
+#endif
+	{ NFSERR_BADHANDLE,	EBADHANDLE	},
+	{ NFSERR_NOT_SYNC,	ENOTSYNC	},
+	{ NFSERR_BAD_COOKIE,	EBADCOOKIE	},
+	{ NFSERR_NOTSUPP,	ENOTSUPP	},
+	{ NFSERR_TOOSMALL,	ETOOSMALL	},
+	{ NFSERR_SERVERFAULT,	ESERVERFAULT	},
+	{ NFSERR_BADTYPE,	EBADTYPE	},
+	{ NFSERR_JUKEBOX,	EJUKEBOX	},
+	{ -1,			EIO		}
+};
+
+/*
+ * Convert an NFS error code to a local one.
+ * This one is used jointly by NFSv2 and NFSv3.
+ */
+int
+nfs_stat_to_errno(int stat)
+{
+	int i;
+
+	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+		if (nfs_errtbl[i].stat == stat)
+			return nfs_errtbl[i].errno;
+	}
+	printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+	return nfs_errtbl[i].errno;
+}
+
+#ifndef MAX
+# define MAX(a, b)	(((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype, timer)				\
+[NFSPROC_##proc] = {							\
+	.p_proc	    =  NFSPROC_##proc,					\
+	.p_encode   =  (kxdrproc_t) nfs_xdr_##argtype,			\
+	.p_decode   =  (kxdrproc_t) nfs_xdr_##restype,			\
+	.p_bufsiz   =  MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
+	.p_timer    =  timer						\
+	}
+struct rpc_procinfo	nfs_procedures[] = {
+    PROC(GETATTR,	fhandle,	attrstat, 1),
+    PROC(SETATTR,	sattrargs,	attrstat, 0),
+    PROC(LOOKUP,	diropargs,	diropres, 2),
+    PROC(READLINK,	readlinkargs,	readlinkres, 3),
+    PROC(READ,		readargs,	readres, 3),
+    PROC(WRITE,		writeargs,	writeres, 4),
+    PROC(CREATE,	createargs,	diropres, 0),
+    PROC(REMOVE,	diropargs,	stat, 0),
+    PROC(RENAME,	renameargs,	stat, 0),
+    PROC(LINK,		linkargs,	stat, 0),
+    PROC(SYMLINK,	symlinkargs,	stat, 0),
+    PROC(MKDIR,		createargs,	diropres, 0),
+    PROC(RMDIR,		diropargs,	stat, 0),
+    PROC(READDIR,	readdirargs,	readdirres, 3),
+    PROC(STATFS,	fhandle,	statfsres, 0),
+};
+
+struct rpc_version		nfs_version2 = {
+	.number			= 2,
+	.nrprocs		= sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
+	.procs			= nfs_procedures
+};
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -0,0 +1,859 @@
+/*
+ *  linux/fs/nfs/nfs3proc.c
+ *
+ *  Client-side NFSv3 procedures stubs.
+ *
+ *  Copyright (C) 1997, Olaf Kirch
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+extern struct rpc_procinfo nfs3_procedures[];
+
+/* A wrapper to handle the EJUKEBOX error message */
+static int
+nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
+{
+	sigset_t oldset;
+	int res;
+	rpc_clnt_sigmask(clnt, &oldset);
+	do {
+		res = rpc_call_sync(clnt, msg, flags);
+		if (res != -EJUKEBOX)
+			break;
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
+		res = -ERESTARTSYS;
+	} while (!signalled());
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return res;
+}
+
+static inline int
+nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
+{
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[proc],
+		.rpc_argp	= argp,
+		.rpc_resp	= resp,
+	};
+	return nfs3_rpc_wrapper(clnt, &msg, flags);
+}
+
+#define rpc_call(clnt, proc, argp, resp, flags) \
+		nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
+#define rpc_call_sync(clnt, msg, flags) \
+		nfs3_rpc_wrapper(clnt, msg, flags)
+
+static int
+nfs3_async_handle_jukebox(struct rpc_task *task)
+{
+	if (task->tk_status != -EJUKEBOX)
+		return 0;
+	task->tk_status = 0;
+	rpc_restart_call(task);
+	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
+	return 1;
+}
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("%s: call  fsinfo\n", __FUNCTION__);
+	info->fattr->valid = 0;
+	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
+	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
+		status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
+	}
+	return status;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+static int
+nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int	status;
+
+	dprintk("NFS call  getattr\n");
+	fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_GETATTR,
+			  fhandle, fattr, 0);
+	dprintk("NFS reply getattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+			struct iattr *sattr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct nfs3_sattrargs	arg = {
+		.fh		= NFS_FH(inode),
+		.sattr		= sattr,
+	};
+	int	status;
+
+	dprintk("NFS call  setattr\n");
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	dprintk("NFS reply setattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_lookup(struct inode *dir, struct qstr *name,
+		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= fhandle,
+		.fattr		= fattr
+	};
+	int			status;
+
+	dprintk("NFS call  lookup %s\n", name->name);
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
+	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
+		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
+			 fhandle, fattr, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+	if (status >= 0)
+		status = nfs_refresh_inode(dir, &dir_attr);
+	return status;
+}
+
+static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+{
+	struct nfs_fattr	fattr;
+	struct nfs3_accessargs	arg = {
+		.fh		= NFS_FH(inode),
+	};
+	struct nfs3_accessres	res = {
+		.fattr		= &fattr,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+		.rpc_cred	= entry->cred
+	};
+	int mode = entry->mask;
+	int status;
+
+	dprintk("NFS call  access\n");
+	fattr.valid = 0;
+
+	if (mode & MAY_READ)
+		arg.access |= NFS3_ACCESS_READ;
+	if (S_ISDIR(inode->i_mode)) {
+		if (mode & MAY_WRITE)
+			arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
+		if (mode & MAY_EXEC)
+			arg.access |= NFS3_ACCESS_LOOKUP;
+	} else {
+		if (mode & MAY_WRITE)
+			arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
+		if (mode & MAY_EXEC)
+			arg.access |= NFS3_ACCESS_EXECUTE;
+	}
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	nfs_refresh_inode(inode, &fattr);
+	if (status == 0) {
+		entry->mask = 0;
+		if (res.access & NFS3_ACCESS_READ)
+			entry->mask |= MAY_READ;
+		if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE))
+			entry->mask |= MAY_WRITE;
+		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
+			entry->mask |= MAY_EXEC;
+	}
+	dprintk("NFS reply access: %d\n", status);
+	return status;
+}
+
+static int nfs3_proc_readlink(struct inode *inode, struct page *page,
+		unsigned int pgbase, unsigned int pglen)
+{
+	struct nfs_fattr	fattr;
+	struct nfs3_readlinkargs args = {
+		.fh		= NFS_FH(inode),
+		.pgbase		= pgbase,
+		.pglen		= pglen,
+		.pages		= &page
+	};
+	int			status;
+
+	dprintk("NFS call  readlink\n");
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
+			  &args, &fattr, 0);
+	nfs_refresh_inode(inode, &fattr);
+	dprintk("NFS reply readlink: %d\n", status);
+	return status;
+}
+
+static int nfs3_proc_read(struct nfs_read_data *rdata)
+{
+	int			flags = rdata->flags;
+	struct inode *		inode = rdata->inode;
+	struct nfs_fattr *	fattr = rdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
+		.rpc_argp	= &rdata->args,
+		.rpc_resp	= &rdata->res,
+		.rpc_cred	= rdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
+			(long long) rdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+	if (status >= 0)
+		nfs_refresh_inode(inode, fattr);
+	dprintk("NFS reply read: %d\n", status);
+	return status;
+}
+
+static int nfs3_proc_write(struct nfs_write_data *wdata)
+{
+	int			rpcflags = wdata->flags;
+	struct inode *		inode = wdata->inode;
+	struct nfs_fattr *	fattr = wdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
+		.rpc_argp	= &wdata->args,
+		.rpc_resp	= &wdata->res,
+		.rpc_cred	= wdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
+			(long long) wdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
+	if (status >= 0)
+		nfs_refresh_inode(inode, fattr);
+	dprintk("NFS reply write: %d\n", status);
+	return status < 0? status : wdata->res.count;
+}
+
+static int nfs3_proc_commit(struct nfs_write_data *cdata)
+{
+	struct inode *		inode = cdata->inode;
+	struct nfs_fattr *	fattr = cdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
+		.rpc_argp	= &cdata->args,
+		.rpc_resp	= &cdata->res,
+		.rpc_cred	= cdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
+			(long long) cdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (status >= 0)
+		nfs_refresh_inode(inode, fattr);
+	dprintk("NFS reply commit: %d\n", status);
+	return status;
+}
+
+/*
+ * Create a regular file.
+ * For now, we don't implement O_EXCL.
+ */
+static int
+nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+		 int flags)
+{
+	struct nfs_fh		fhandle;
+	struct nfs_fattr	fattr;
+	struct nfs_fattr	dir_attr;
+	struct nfs3_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr,
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
+	arg.createmode = NFS3_CREATE_UNCHECKED;
+	if (flags & O_EXCL) {
+		arg.createmode  = NFS3_CREATE_EXCLUSIVE;
+		arg.verifier[0] = jiffies;
+		arg.verifier[1] = current->pid;
+	}
+
+again:
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+
+	/* If the server doesn't support the exclusive creation semantics,
+	 * try again with simple 'guarded' mode. */
+	if (status == NFSERR_NOTSUPP) {
+		switch (arg.createmode) {
+			case NFS3_CREATE_EXCLUSIVE:
+				arg.createmode = NFS3_CREATE_GUARDED;
+				break;
+
+			case NFS3_CREATE_GUARDED:
+				arg.createmode = NFS3_CREATE_UNCHECKED;
+				break;
+
+			case NFS3_CREATE_UNCHECKED:
+				goto out;
+		}
+		goto again;
+	}
+
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+
+	/* When we created the file with exclusive semantics, make
+	 * sure we set the attributes afterwards. */
+	if (arg.createmode == NFS3_CREATE_EXCLUSIVE) {
+		dprintk("NFS call  setattr (post-create)\n");
+
+		if (!(sattr->ia_valid & ATTR_ATIME_SET))
+			sattr->ia_valid |= ATTR_ATIME;
+		if (!(sattr->ia_valid & ATTR_MTIME_SET))
+			sattr->ia_valid |= ATTR_MTIME;
+
+		/* Note: we could use a guarded setattr here, but I'm
+		 * not sure this buys us anything (and I'd have
+		 * to revamp the NFSv3 XDR code) */
+		status = nfs3_proc_setattr(dentry, &fattr, sattr);
+		nfs_refresh_inode(dentry->d_inode, &fattr);
+		dprintk("NFS reply setattr (post-create): %d\n", status);
+	}
+out:
+	dprintk("NFS reply create: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_REMOVE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &dir_attr,
+	};
+	int			status;
+
+	dprintk("NFS call  remove %s\n", name->name);
+	dir_attr.valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply remove: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+	struct unlinkxdr {
+		struct nfs3_diropargs arg;
+		struct nfs_fattr res;
+	} *ptr;
+
+	ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+	ptr->arg.fh = NFS_FH(dir->d_inode);
+	ptr->arg.name = name->name;
+	ptr->arg.len = name->len;
+	ptr->res.valid = 0;
+	msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
+	msg->rpc_argp = &ptr->arg;
+	msg->rpc_resp = &ptr->res;
+	return 0;
+}
+
+static int
+nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	struct nfs_fattr	*dir_attr;
+
+	if (nfs3_async_handle_jukebox(task))
+		return 1;
+	if (msg->rpc_argp) {
+		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
+		nfs_refresh_inode(dir->d_inode, dir_attr);
+		kfree(msg->rpc_argp);
+	}
+	return 0;
+}
+
+static int
+nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		 struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs_fattr	old_dir_attr, new_dir_attr;
+	struct nfs3_renameargs	arg = {
+		.fromfh		= NFS_FH(old_dir),
+		.fromname	= old_name->name,
+		.fromlen	= old_name->len,
+		.tofh		= NFS_FH(new_dir),
+		.toname		= new_name->name,
+		.tolen		= new_name->len
+	};
+	struct nfs3_renameres	res = {
+		.fromattr	= &old_dir_attr,
+		.toattr		= &new_dir_attr
+	};
+	int			status;
+
+	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+	old_dir_attr.valid = 0;
+	new_dir_attr.valid = 0;
+	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+	nfs_refresh_inode(old_dir, &old_dir_attr);
+	nfs_refresh_inode(new_dir, &new_dir_attr);
+	dprintk("NFS reply rename: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs_fattr	dir_attr, fattr;
+	struct nfs3_linkargs	arg = {
+		.fromfh		= NFS_FH(inode),
+		.tofh		= NFS_FH(dir),
+		.toname		= name->name,
+		.tolen		= name->len
+	};
+	struct nfs3_linkres	res = {
+		.dir_attr	= &dir_attr,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  link %s\n", name->name);
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	nfs_refresh_inode(inode, &fattr);
+	dprintk("NFS reply link: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+		  struct iattr *sattr, struct nfs_fh *fhandle,
+		  struct nfs_fattr *fattr)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_symlinkargs	arg = {
+		.fromfh		= NFS_FH(dir),
+		.fromname	= name->name,
+		.fromlen	= name->len,
+		.topath		= path->name,
+		.tolen		= path->len,
+		.sattr		= sattr
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= fhandle,
+		.fattr		= fattr
+	};
+	int			status;
+
+	if (path->len > NFS3_MAXPATHLEN)
+		return -ENAMETOOLONG;
+	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+	dir_attr.valid = 0;
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply symlink: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+{
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs3_mkdirargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply mkdir: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
+{
+	struct nfs_fattr	dir_attr;
+	struct nfs3_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	int			status;
+
+	dprintk("NFS call  rmdir %s\n", name->name);
+	dir_attr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply rmdir: %d\n", status);
+	return status;
+}
+
+/*
+ * The READDIR implementation is somewhat hackish - we pass the user buffer
+ * to the encode function, which installs it in the receive iovec.
+ * The decode function itself doesn't perform any decoding, it just makes
+ * sure the reply is syntactically correct.
+ *
+ * Also note that this implementation handles both plain readdir and
+ * readdirplus.
+ */
+static int
+nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+		  u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs_fattr	dir_attr;
+	u32			*verf = NFS_COOKIEVERF(dir);
+	struct nfs3_readdirargs	arg = {
+		.fh		= NFS_FH(dir),
+		.cookie		= cookie,
+		.verf		= {verf[0], verf[1]},
+		.plus		= plus,
+		.count		= count,
+		.pages		= &page
+	};
+	struct nfs3_readdirres	res = {
+		.dir_attr	= &dir_attr,
+		.verf		= verf,
+		.plus		= plus
+	};
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+		.rpc_cred	= cred
+	};
+	int			status;
+
+	lock_kernel();
+
+	if (plus)
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+
+	dprintk("NFS call  readdir%s %d\n",
+			plus? "plus" : "", (unsigned int) cookie);
+
+	dir_attr.valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	dprintk("NFS reply readdir: %d\n", status);
+	unlock_kernel();
+	return status;
+}
+
+static int
+nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+		dev_t rdev)
+{
+	struct nfs_fh fh;
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs3_mknodargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr,
+		.rdev		= rdev
+	};
+	struct nfs3_diropres	res = {
+		.dir_attr	= &dir_attr,
+		.fh		= &fh,
+		.fattr		= &fattr
+	};
+	int status;
+
+	switch (sattr->ia_mode & S_IFMT) {
+	case S_IFBLK:	arg.type = NF3BLK;  break;
+	case S_IFCHR:	arg.type = NF3CHR;  break;
+	case S_IFIFO:	arg.type = NF3FIFO; break;
+	case S_IFSOCK:	arg.type = NF3SOCK; break;
+	default:	return -EINVAL;
+	}
+
+	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
+			MAJOR(rdev), MINOR(rdev));
+	dir_attr.valid = 0;
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+	nfs_refresh_inode(dir, &dir_attr);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fh, &fattr);
+	dprintk("NFS reply mknod: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsstat *stat)
+{
+	int	status;
+
+	dprintk("NFS call  fsstat\n");
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	dprintk("NFS reply statfs: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+		 struct nfs_fsinfo *info)
+{
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	return status;
+}
+
+static int
+nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		   struct nfs_pathconf *info)
+{
+	int	status;
+
+	dprintk("NFS call  pathconf\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	dprintk("NFS reply pathconf: %d\n", status);
+	return status;
+}
+
+extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
+
+static void
+nfs3_read_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+
+	if (nfs3_async_handle_jukebox(task))
+		return;
+	/* Call back common NFS readpage processing */
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, &data->fattr);
+	nfs_readpage_result(task);
+}
+
+static void
+nfs3_proc_read_setup(struct nfs_read_data *data)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs3_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data;
+
+	if (nfs3_async_handle_jukebox(task))
+		return;
+	data = (struct nfs_write_data *)task->tk_calldata;
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, data->res.fattr);
+	nfs_writeback_done(task);
+}
+
+static void
+nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			stable;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	if (how & FLUSH_STABLE) {
+		if (!NFS_I(inode)->ncommit)
+			stable = NFS_FILE_SYNC;
+		else
+			stable = NFS_DATA_SYNC;
+	} else
+		stable = NFS_UNSTABLE;
+	data->args.stable = stable;
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs3_commit_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data;
+
+	if (nfs3_async_handle_jukebox(task))
+		return;
+	data = (struct nfs_write_data *)task->tk_calldata;
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, data->res.fattr);
+	nfs_commit_done(task);
+}
+
+static void
+nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static int
+nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+}
+
+struct nfs_rpc_ops	nfs_v3_clientops = {
+	.version	= 3,			/* protocol version */
+	.dentry_ops	= &nfs_dentry_operations,
+	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.getroot	= nfs3_proc_get_root,
+	.getattr	= nfs3_proc_getattr,
+	.setattr	= nfs3_proc_setattr,
+	.lookup		= nfs3_proc_lookup,
+	.access		= nfs3_proc_access,
+	.readlink	= nfs3_proc_readlink,
+	.read		= nfs3_proc_read,
+	.write		= nfs3_proc_write,
+	.commit		= nfs3_proc_commit,
+	.create		= nfs3_proc_create,
+	.remove		= nfs3_proc_remove,
+	.unlink_setup	= nfs3_proc_unlink_setup,
+	.unlink_done	= nfs3_proc_unlink_done,
+	.rename		= nfs3_proc_rename,
+	.link		= nfs3_proc_link,
+	.symlink	= nfs3_proc_symlink,
+	.mkdir		= nfs3_proc_mkdir,
+	.rmdir		= nfs3_proc_rmdir,
+	.readdir	= nfs3_proc_readdir,
+	.mknod		= nfs3_proc_mknod,
+	.statfs		= nfs3_proc_statfs,
+	.fsinfo		= nfs3_proc_fsinfo,
+	.pathconf	= nfs3_proc_pathconf,
+	.decode_dirent	= nfs3_decode_dirent,
+	.read_setup	= nfs3_proc_read_setup,
+	.write_setup	= nfs3_proc_write_setup,
+	.commit_setup	= nfs3_proc_commit_setup,
+	.file_open	= nfs_open,
+	.file_release	= nfs_release,
+	.lock		= nfs3_proc_lock,
+};
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -0,0 +1,148 @@
+/*
+ *  fs/nfs/nfs4renewd.c
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 "renew daemon", which wakes up periodically to
+ * send a RENEW, to keep state alive on the server.  The daemon is implemented
+ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
+ * context.  There is one renewd per nfs_server.
+ *
+ * TODO: If the send queue gets backlogged (e.g., if the server goes down),
+ * we will keep filling the queue with periodic RENEW requests.  We need a
+ * mechanism for ensuring that if renewd successfully sends off a request,
+ * then it only wakes up when the request is finished.  Maybe use the
+ * child task framework of the RPC layer?
+ */
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY	NFSDBG_PROC
+
+void
+nfs4_renew_state(void *data)
+{
+	struct nfs4_client *clp = (struct nfs4_client *)data;
+	long lease, timeout;
+	unsigned long last, now;
+
+	down_read(&clp->cl_sem);
+	dprintk("%s: start\n", __FUNCTION__);
+	/* Are there any active superblocks? */
+	if (list_empty(&clp->cl_superblocks))
+		goto out; 
+	spin_lock(&clp->cl_lock);
+	lease = clp->cl_lease_time;
+	last = clp->cl_last_renewal;
+	now = jiffies;
+	timeout = (2 * lease) / 3 + (long)last - (long)now;
+	/* Are we close to a lease timeout? */
+	if (time_after(now, last + lease/3)) {
+		spin_unlock(&clp->cl_lock);
+		/* Queue an asynchronous RENEW. */
+		nfs4_proc_async_renew(clp);
+		timeout = (2 * lease) / 3;
+		spin_lock(&clp->cl_lock);
+	} else
+		dprintk("%s: failed to call renewd. Reason: lease not expired \n",
+				__FUNCTION__);
+	if (timeout < 5 * HZ)    /* safeguard */
+		timeout = 5 * HZ;
+	dprintk("%s: requeueing work. Lease period = %ld\n",
+			__FUNCTION__, (timeout + HZ - 1) / HZ);
+	cancel_delayed_work(&clp->cl_renewd);
+	schedule_delayed_work(&clp->cl_renewd, timeout);
+	spin_unlock(&clp->cl_lock);
+out:
+	up_read(&clp->cl_sem);
+	dprintk("%s: done\n", __FUNCTION__);
+}
+
+/* Must be called with clp->cl_sem locked for writes */
+void
+nfs4_schedule_state_renewal(struct nfs4_client *clp)
+{
+	long timeout;
+
+	spin_lock(&clp->cl_lock);
+	timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal
+		- (long)jiffies;
+	if (timeout < 5 * HZ)
+		timeout = 5 * HZ;
+	dprintk("%s: requeueing work. Lease period = %ld\n",
+			__FUNCTION__, (timeout + HZ - 1) / HZ);
+	cancel_delayed_work(&clp->cl_renewd);
+	schedule_delayed_work(&clp->cl_renewd, timeout);
+	spin_unlock(&clp->cl_lock);
+}
+
+void
+nfs4_renewd_prepare_shutdown(struct nfs_server *server)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+
+	if (!clp)
+		return;
+	flush_scheduled_work();
+	down_write(&clp->cl_sem);
+	if (!list_empty(&server->nfs4_siblings))
+		list_del_init(&server->nfs4_siblings);
+	up_write(&clp->cl_sem);
+}
+
+/* Must be called with clp->cl_sem locked for writes */
+void
+nfs4_kill_renewd(struct nfs4_client *clp)
+{
+	down_read(&clp->cl_sem);
+	if (!list_empty(&clp->cl_superblocks)) {
+		up_read(&clp->cl_sem);
+		return;
+	}
+	cancel_delayed_work(&clp->cl_renewd);
+	up_read(&clp->cl_sem);
+	flush_scheduled_work();
+}
+
+/*
+ * Local variables:
+ *   c-basic-offset: 8
+ * End:
+ */
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -0,0 +1,932 @@
+/*
+ *  fs/nfs/nfs4state.c
+ *
+ *  Client-side XDR for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 state model.  For the time being,
+ * this is minimal, but will be made much more complex in a
+ * subsequent patch.
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+
+#include "callback.h"
+#include "delegation.h"
+
+#define OPENOWNER_POOL_SIZE	8
+
+static DEFINE_SPINLOCK(state_spinlock);
+
+nfs4_stateid zero_stateid;
+
+#if 0
+nfs4_stateid one_stateid =
+	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+#endif
+
+static LIST_HEAD(nfs4_clientid_list);
+
+static void nfs4_recover_state(void *);
+extern void nfs4_renew_state(void *);
+
+void
+init_nfsv4_state(struct nfs_server *server)
+{
+	server->nfs4_state = NULL;
+	INIT_LIST_HEAD(&server->nfs4_siblings);
+}
+
+void
+destroy_nfsv4_state(struct nfs_server *server)
+{
+	if (server->mnt_path) {
+		kfree(server->mnt_path);
+		server->mnt_path = NULL;
+	}
+	if (server->nfs4_state) {
+		nfs4_put_client(server->nfs4_state);
+		server->nfs4_state = NULL;
+	}
+}
+
+/*
+ * nfs4_get_client(): returns an empty client structure
+ * nfs4_put_client(): drops reference to client structure
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs4_client *
+nfs4_alloc_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp;
+
+	if (nfs_callback_up() < 0)
+		return NULL;
+	if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
+		nfs_callback_down();
+		return NULL;
+	}
+	memset(clp, 0, sizeof(*clp));
+	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+	init_rwsem(&clp->cl_sem);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_state_owners);
+	INIT_LIST_HEAD(&clp->cl_unused);
+	spin_lock_init(&clp->cl_lock);
+	atomic_set(&clp->cl_count, 1);
+	INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp);
+	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_LIST_HEAD(&clp->cl_superblocks);
+	init_waitqueue_head(&clp->cl_waitq);
+	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
+	clp->cl_boot_time = CURRENT_TIME;
+	clp->cl_state = 1 << NFS4CLNT_OK;
+	return clp;
+}
+
+static void
+nfs4_free_client(struct nfs4_client *clp)
+{
+	struct nfs4_state_owner *sp;
+
+	while (!list_empty(&clp->cl_unused)) {
+		sp = list_entry(clp->cl_unused.next,
+				struct nfs4_state_owner,
+				so_list);
+		list_del(&sp->so_list);
+		kfree(sp);
+	}
+	BUG_ON(!list_empty(&clp->cl_state_owners));
+	if (clp->cl_cred)
+		put_rpccred(clp->cl_cred);
+	nfs_idmap_delete(clp);
+	if (clp->cl_rpcclient)
+		rpc_shutdown_client(clp->cl_rpcclient);
+	kfree(clp);
+	nfs_callback_down();
+}
+
+static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp;
+	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
+		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
+			atomic_inc(&clp->cl_count);
+			return clp;
+		}
+	}
+	return NULL;
+}
+
+struct nfs4_client *nfs4_find_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp;
+	spin_lock(&state_spinlock);
+	clp = __nfs4_find_client(addr);
+	spin_unlock(&state_spinlock);
+	return clp;
+}
+
+struct nfs4_client *
+nfs4_get_client(struct in_addr *addr)
+{
+	struct nfs4_client *clp, *new = NULL;
+
+	spin_lock(&state_spinlock);
+	for (;;) {
+		clp = __nfs4_find_client(addr);
+		if (clp != NULL)
+			break;
+		clp = new;
+		if (clp != NULL) {
+			list_add(&clp->cl_servers, &nfs4_clientid_list);
+			new = NULL;
+			break;
+		}
+		spin_unlock(&state_spinlock);
+		new = nfs4_alloc_client(addr);
+		spin_lock(&state_spinlock);
+		if (new == NULL)
+			break;
+	}
+	spin_unlock(&state_spinlock);
+	if (new)
+		nfs4_free_client(new);
+	return clp;
+}
+
+void
+nfs4_put_client(struct nfs4_client *clp)
+{
+	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
+		return;
+	list_del(&clp->cl_servers);
+	spin_unlock(&state_spinlock);
+	BUG_ON(!list_empty(&clp->cl_superblocks));
+	wake_up_all(&clp->cl_waitq);
+	rpc_wake_up(&clp->cl_rpcwaitq);
+	nfs4_kill_renewd(clp);
+	nfs4_free_client(clp);
+}
+
+static int __nfs4_init_client(struct nfs4_client *clp)
+{
+	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport);
+	if (status == 0)
+		status = nfs4_proc_setclientid_confirm(clp);
+	if (status == 0)
+		nfs4_schedule_state_renewal(clp);
+	return status;
+}
+
+int nfs4_init_client(struct nfs4_client *clp)
+{
+	return nfs4_map_errors(__nfs4_init_client(clp));
+}
+
+u32
+nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+{
+	return clp->cl_lockowner_id ++;
+}
+
+static struct nfs4_state_owner *
+nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+	struct nfs4_state_owner *sp = NULL;
+
+	if (!list_empty(&clp->cl_unused)) {
+		sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
+		atomic_inc(&sp->so_count);
+		sp->so_cred = cred;
+		list_move(&sp->so_list, &clp->cl_state_owners);
+		clp->cl_nunused--;
+	}
+	return sp;
+}
+
+static struct nfs4_state_owner *
+nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+	struct nfs4_state_owner *sp, *res = NULL;
+
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		if (sp->so_cred != cred)
+			continue;
+		atomic_inc(&sp->so_count);
+		/* Move to the head of the list */
+		list_move(&sp->so_list, &clp->cl_state_owners);
+		res = sp;
+		break;
+	}
+	return res;
+}
+
+/*
+ * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
+ * create a new state_owner.
+ *
+ */
+static struct nfs4_state_owner *
+nfs4_alloc_state_owner(void)
+{
+	struct nfs4_state_owner *sp;
+
+	sp = kmalloc(sizeof(*sp),GFP_KERNEL);
+	if (!sp)
+		return NULL;
+	init_MUTEX(&sp->so_sema);
+	sp->so_seqid = 0;                 /* arbitrary */
+	INIT_LIST_HEAD(&sp->so_states);
+	INIT_LIST_HEAD(&sp->so_delegations);
+	atomic_set(&sp->so_count, 1);
+	return sp;
+}
+
+void
+nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+{
+	struct nfs4_client *clp = sp->so_client;
+	spin_lock(&clp->cl_lock);
+	list_del_init(&sp->so_list);
+	spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Note: must be called with clp->cl_sem held in order to prevent races
+ *       with reboot recovery!
+ */
+struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+{
+	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs4_state_owner *sp, *new;
+
+	get_rpccred(cred);
+	new = nfs4_alloc_state_owner();
+	spin_lock(&clp->cl_lock);
+	sp = nfs4_find_state_owner(clp, cred);
+	if (sp == NULL)
+		sp = nfs4_client_grab_unused(clp, cred);
+	if (sp == NULL && new != NULL) {
+		list_add(&new->so_list, &clp->cl_state_owners);
+		new->so_client = clp;
+		new->so_id = nfs4_alloc_lockowner_id(clp);
+		new->so_cred = cred;
+		sp = new;
+		new = NULL;
+	}
+	spin_unlock(&clp->cl_lock);
+	if (new)
+		kfree(new);
+	if (sp != NULL)
+		return sp;
+	put_rpccred(cred);
+	return NULL;
+}
+
+/*
+ * Must be called with clp->cl_sem held in order to avoid races
+ * with state recovery...
+ */
+void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+{
+	struct nfs4_client *clp = sp->so_client;
+	struct rpc_cred *cred = sp->so_cred;
+
+	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+		return;
+	if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
+		goto out_free;
+	if (list_empty(&sp->so_list))
+		goto out_free;
+	list_move(&sp->so_list, &clp->cl_unused);
+	clp->cl_nunused++;
+	spin_unlock(&clp->cl_lock);
+	put_rpccred(cred);
+	cred = NULL;
+	return;
+out_free:
+	list_del(&sp->so_list);
+	spin_unlock(&clp->cl_lock);
+	put_rpccred(cred);
+	kfree(sp);
+}
+
+static struct nfs4_state *
+nfs4_alloc_open_state(void)
+{
+	struct nfs4_state *state;
+
+	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+	state->state = 0;
+	state->nreaders = 0;
+	state->nwriters = 0;
+	state->flags = 0;
+	memset(state->stateid.data, 0, sizeof(state->stateid.data));
+	atomic_set(&state->count, 1);
+	INIT_LIST_HEAD(&state->lock_states);
+	init_MUTEX(&state->lock_sema);
+	rwlock_init(&state->state_lock);
+	return state;
+}
+
+static struct nfs4_state *
+__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_state *state;
+
+	mode &= (FMODE_READ|FMODE_WRITE);
+	list_for_each_entry(state, &nfsi->open_states, inode_states) {
+		if (state->owner->so_cred != cred)
+			continue;
+		if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
+			continue;
+		if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
+			continue;
+		if ((state->state & mode) != mode)
+			continue;
+		atomic_inc(&state->count);
+		if (mode & FMODE_READ)
+			state->nreaders++;
+		if (mode & FMODE_WRITE)
+			state->nwriters++;
+		return state;
+	}
+	return NULL;
+}
+
+static struct nfs4_state *
+__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_state *state;
+
+	list_for_each_entry(state, &nfsi->open_states, inode_states) {
+		/* Is this in the process of being freed? */
+		if (state->nreaders == 0 && state->nwriters == 0)
+			continue;
+		if (state->owner == owner) {
+			atomic_inc(&state->count);
+			return state;
+		}
+	}
+	return NULL;
+}
+
+struct nfs4_state *
+nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
+{
+	struct nfs4_state *state;
+
+	spin_lock(&inode->i_lock);
+	state = __nfs4_find_state(inode, cred, mode);
+	spin_unlock(&inode->i_lock);
+	return state;
+}
+
+static void
+nfs4_free_open_state(struct nfs4_state *state)
+{
+	kfree(state);
+}
+
+struct nfs4_state *
+nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
+{
+	struct nfs4_state *state, *new;
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&inode->i_lock);
+	state = __nfs4_find_state_byowner(inode, owner);
+	spin_unlock(&inode->i_lock);
+	if (state)
+		goto out;
+	new = nfs4_alloc_open_state();
+	spin_lock(&inode->i_lock);
+	state = __nfs4_find_state_byowner(inode, owner);
+	if (state == NULL && new != NULL) {
+		state = new;
+		/* Caller *must* be holding owner->so_sem */
+		/* Note: The reclaim code dictates that we add stateless
+		 * and read-only stateids to the end of the list */
+		list_add_tail(&state->open_states, &owner->so_states);
+		state->owner = owner;
+		atomic_inc(&owner->so_count);
+		list_add(&state->inode_states, &nfsi->open_states);
+		state->inode = igrab(inode);
+		spin_unlock(&inode->i_lock);
+	} else {
+		spin_unlock(&inode->i_lock);
+		if (new)
+			nfs4_free_open_state(new);
+	}
+out:
+	return state;
+}
+
+/*
+ * Beware! Caller must be holding exactly one
+ * reference to clp->cl_sem and owner->so_sema!
+ */
+void nfs4_put_open_state(struct nfs4_state *state)
+{
+	struct inode *inode = state->inode;
+	struct nfs4_state_owner *owner = state->owner;
+
+	if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
+		return;
+	if (!list_empty(&state->inode_states))
+		list_del(&state->inode_states);
+	spin_unlock(&inode->i_lock);
+	list_del(&state->open_states);
+	iput(inode);
+	BUG_ON (state->state != 0);
+	nfs4_free_open_state(state);
+	nfs4_put_state_owner(owner);
+}
+
+/*
+ * Beware! Caller must be holding no references to clp->cl_sem!
+ * of owner->so_sema!
+ */
+void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+{
+	struct inode *inode = state->inode;
+	struct nfs4_state_owner *owner = state->owner;
+	struct nfs4_client *clp = owner->so_client;
+	int newstate;
+
+	atomic_inc(&owner->so_count);
+	down_read(&clp->cl_sem);
+	down(&owner->so_sema);
+	/* Protect against nfs4_find_state() */
+	spin_lock(&inode->i_lock);
+	if (mode & FMODE_READ)
+		state->nreaders--;
+	if (mode & FMODE_WRITE)
+		state->nwriters--;
+	if (state->nwriters == 0) {
+		if (state->nreaders == 0)
+			list_del_init(&state->inode_states);
+		/* See reclaim code */
+		list_move_tail(&state->open_states, &owner->so_states);
+	}
+	spin_unlock(&inode->i_lock);
+	newstate = 0;
+	if (state->state != 0) {
+		if (state->nreaders)
+			newstate |= FMODE_READ;
+		if (state->nwriters)
+			newstate |= FMODE_WRITE;
+		if (state->state == newstate)
+			goto out;
+		if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
+			return;
+	}
+out:
+	nfs4_put_open_state(state);
+	up(&owner->so_sema);
+	nfs4_put_state_owner(owner);
+	up_read(&clp->cl_sem);
+}
+
+/*
+ * Search the state->lock_states for an existing lock_owner
+ * that is compatible with current->files
+ */
+static struct nfs4_lock_state *
+__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	struct nfs4_lock_state *pos;
+	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+		if (pos->ls_owner != fl_owner)
+			continue;
+		atomic_inc(&pos->ls_count);
+		return pos;
+	}
+	return NULL;
+}
+
+struct nfs4_lock_state *
+nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	struct nfs4_lock_state *lsp;
+	read_lock(&state->state_lock);
+	lsp = __nfs4_find_lock_state(state, fl_owner);
+	read_unlock(&state->state_lock);
+	return lsp;
+}
+
+/*
+ * Return a compatible lock_state. If no initialized lock_state structure
+ * exists, return an uninitialized one.
+ *
+ * The caller must be holding state->lock_sema
+ */
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	struct nfs4_lock_state *lsp;
+	struct nfs4_client *clp = state->owner->so_client;
+
+	lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
+	if (lsp == NULL)
+		return NULL;
+	lsp->ls_flags = 0;
+	lsp->ls_seqid = 0;	/* arbitrary */
+	lsp->ls_id = -1; 
+	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
+	atomic_set(&lsp->ls_count, 1);
+	lsp->ls_owner = fl_owner;
+	INIT_LIST_HEAD(&lsp->ls_locks);
+	spin_lock(&clp->cl_lock);
+	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+	spin_unlock(&clp->cl_lock);
+	return lsp;
+}
+
+/*
+ * Return a compatible lock_state. If no initialized lock_state structure
+ * exists, return an uninitialized one.
+ *
+ * The caller must be holding state->lock_sema and clp->cl_sem
+ */
+struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
+{
+	struct nfs4_lock_state * lsp;
+	
+	lsp = nfs4_find_lock_state(state, owner);
+	if (lsp == NULL)
+		lsp = nfs4_alloc_lock_state(state, owner);
+	return lsp;
+}
+
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
+ */
+void
+nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+{
+	if (test_bit(LK_STATE_IN_USE, &state->flags)) {
+		struct nfs4_lock_state *lsp;
+
+		lsp = nfs4_find_lock_state(state, fl_owner);
+		if (lsp) {
+			memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+			nfs4_put_lock_state(lsp);
+			return;
+		}
+	}
+	memcpy(dst, &state->stateid, sizeof(*dst));
+}
+
+/*
+* Called with state->lock_sema and clp->cl_sem held.
+*/
+void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+{
+	if (status == NFS_OK || seqid_mutating_err(-status))
+		lsp->ls_seqid++;
+}
+
+/* 
+* Check to see if the request lock (type FL_UNLK) effects the fl lock.
+*
+* fl and request must have the same posix owner
+*
+* return: 
+* 0 -> fl not effected by request
+* 1 -> fl consumed by request
+*/
+
+static int
+nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
+{
+	if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
+		return 1;
+	return 0;
+}
+
+/*
+ * Post an initialized lock_state on the state->lock_states list.
+ */
+void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+{
+	if (!list_empty(&lsp->ls_locks))
+		return;
+	atomic_inc(&lsp->ls_count);
+	write_lock(&state->state_lock);
+	list_add(&lsp->ls_locks, &state->lock_states);
+	set_bit(LK_STATE_IN_USE, &state->flags);
+	write_unlock(&state->state_lock);
+}
+
+/* 
+ * to decide to 'reap' lock state:
+ * 1) search i_flock for file_locks with fl.lock_state = to ls.
+ * 2) determine if unlock will consume found lock. 
+ * 	if so, reap
+ *
+ * 	else, don't reap.
+ *
+ */
+void
+nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+{
+	struct inode *inode = state->inode;
+	struct file_lock *fl;
+
+	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
+		if (!(fl->fl_flags & FL_POSIX))
+			continue;
+		if (fl->fl_owner != lsp->ls_owner)
+			continue;
+		/* Exit if we find at least one lock which is not consumed */
+		if (nfs4_check_unlock(fl,request) == 0)
+			return;
+	}
+
+	write_lock(&state->state_lock);
+	list_del_init(&lsp->ls_locks);
+	if (list_empty(&state->lock_states))
+		clear_bit(LK_STATE_IN_USE, &state->flags);
+	write_unlock(&state->state_lock);
+	nfs4_put_lock_state(lsp);
+}
+
+/*
+ * Release reference to lock_state, and free it if we see that
+ * it is no longer in use
+ */
+void
+nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+{
+	if (!atomic_dec_and_test(&lsp->ls_count))
+		return;
+	BUG_ON (!list_empty(&lsp->ls_locks));
+	kfree(lsp);
+}
+
+/*
+* Called with sp->so_sema and clp->cl_sem held.
+*
+* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
+* failed with a seqid incrementing error -
+* see comments nfs_fs.h:seqid_mutating_error()
+*/
+void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
+{
+	if (status == NFS_OK || seqid_mutating_err(-status))
+		sp->so_seqid++;
+	/* If the server returns BAD_SEQID, unhash state_owner here */
+	if (status == -NFS4ERR_BAD_SEQID)
+		nfs4_drop_state_owner(sp);
+}
+
+static int reclaimer(void *);
+struct reclaimer_args {
+	struct nfs4_client *clp;
+	struct completion complete;
+};
+
+/*
+ * State recovery routine
+ */
+void
+nfs4_recover_state(void *data)
+{
+	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct reclaimer_args args = {
+		.clp = clp,
+	};
+	might_sleep();
+
+	init_completion(&args.complete);
+
+	if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
+		goto out_failed_clear;
+	wait_for_completion(&args.complete);
+	return;
+out_failed_clear:
+	set_bit(NFS4CLNT_OK, &clp->cl_state);
+	wake_up_all(&clp->cl_waitq);
+	rpc_wake_up(&clp->cl_rpcwaitq);
+}
+
+/*
+ * Schedule a state recovery attempt
+ */
+void
+nfs4_schedule_state_recovery(struct nfs4_client *clp)
+{
+	if (!clp)
+		return;
+	if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state))
+		schedule_work(&clp->cl_recoverd);
+}
+
+static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state)
+{
+	struct inode *inode = state->inode;
+	struct file_lock *fl;
+	int status = 0;
+
+	for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
+		if (!(fl->fl_flags & FL_POSIX))
+			continue;
+		if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
+			continue;
+		status = ops->recover_lock(state, fl);
+		if (status >= 0)
+			continue;
+		switch (status) {
+			default:
+				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
+						__FUNCTION__, status);
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_NO_GRACE:
+			case -NFS4ERR_RECLAIM_BAD:
+			case -NFS4ERR_RECLAIM_CONFLICT:
+				/* kill_proc(fl->fl_owner, SIGLOST, 1); */
+				break;
+			case -NFS4ERR_STALE_CLIENTID:
+				goto out_err;
+		}
+	}
+	return 0;
+out_err:
+	return status;
+}
+
+static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp)
+{
+	struct nfs4_state *state;
+	struct nfs4_lock_state *lock;
+	int status = 0;
+
+	/* Note: we rely on the sp->so_states list being ordered 
+	 * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
+	 * states first.
+	 * This is needed to ensure that the server won't give us any
+	 * read delegations that we have to return if, say, we are
+	 * recovering after a network partition or a reboot from a
+	 * server that doesn't support a grace period.
+	 */
+	list_for_each_entry(state, &sp->so_states, open_states) {
+		if (state->state == 0)
+			continue;
+		status = ops->recover_open(sp, state);
+		list_for_each_entry(lock, &state->lock_states, ls_locks)
+			lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+		if (status >= 0) {
+			status = nfs4_reclaim_locks(ops, state);
+			if (status < 0)
+				goto out_err;
+			list_for_each_entry(lock, &state->lock_states, ls_locks) {
+				if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
+					printk("%s: Lock reclaim failed!\n",
+							__FUNCTION__);
+			}
+			continue;
+		}
+		switch (status) {
+			default:
+				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
+						__FUNCTION__, status);
+			case -ENOENT:
+			case -NFS4ERR_RECLAIM_BAD:
+			case -NFS4ERR_RECLAIM_CONFLICT:
+				/*
+				 * Open state on this file cannot be recovered
+				 * All we can do is revert to using the zero stateid.
+				 */
+				memset(state->stateid.data, 0,
+					sizeof(state->stateid.data));
+				/* Mark the file as being 'closed' */
+				state->state = 0;
+				break;
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_NO_GRACE:
+			case -NFS4ERR_STALE_CLIENTID:
+				goto out_err;
+		}
+	}
+	return 0;
+out_err:
+	return status;
+}
+
+static int reclaimer(void *ptr)
+{
+	struct reclaimer_args *args = (struct reclaimer_args *)ptr;
+	struct nfs4_client *clp = args->clp;
+	struct nfs4_state_owner *sp;
+	struct nfs4_state_recovery_ops *ops;
+	int status = 0;
+
+	daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
+	allow_signal(SIGKILL);
+
+	atomic_inc(&clp->cl_count);
+	complete(&args->complete);
+
+	/* Ensure exclusive access to NFSv4 state */
+	lock_kernel();
+	down_write(&clp->cl_sem);
+	/* Are there any NFS mounts out there? */
+	if (list_empty(&clp->cl_superblocks))
+		goto out;
+restart_loop:
+	status = nfs4_proc_renew(clp);
+	switch (status) {
+		case 0:
+		case -NFS4ERR_CB_PATH_DOWN:
+			goto out;
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_LEASE_MOVED:
+			ops = &nfs4_reboot_recovery_ops;
+			break;
+		default:
+			ops = &nfs4_network_partition_recovery_ops;
+	};
+	status = __nfs4_init_client(clp);
+	if (status)
+		goto out_error;
+	/* Mark all delegations for reclaim */
+	nfs_delegation_mark_reclaim(clp);
+	/* Note: list is protected by exclusive lock on cl->cl_sem */
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		status = nfs4_reclaim_open_state(ops, sp);
+		if (status < 0) {
+			if (status == -NFS4ERR_NO_GRACE) {
+				ops = &nfs4_network_partition_recovery_ops;
+				status = nfs4_reclaim_open_state(ops, sp);
+			}
+			if (status == -NFS4ERR_STALE_CLIENTID)
+				goto restart_loop;
+			if (status == -NFS4ERR_EXPIRED)
+				goto restart_loop;
+		}
+	}
+	nfs_delegation_reap_unclaimed(clp);
+out:
+	set_bit(NFS4CLNT_OK, &clp->cl_state);
+	up_write(&clp->cl_sem);
+	unlock_kernel();
+	wake_up_all(&clp->cl_waitq);
+	rpc_wake_up(&clp->cl_rpcwaitq);
+	if (status == -NFS4ERR_CB_PATH_DOWN)
+		nfs_handle_cb_pathdown(clp);
+	nfs4_put_client(clp);
+	return 0;
+out_error:
+	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
+				NIPQUAD(clp->cl_addr.s_addr), -status);
+	goto out;
+}
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -0,0 +1,513 @@
+/*
+ *  $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $
+ *
+ *  Copyright (C) 1995, 1996  Gero Kuhlmann <gero@gkminix.han.de>
+ *
+ *  Allow an NFS filesystem to be mounted as root. The way this works is:
+ *     (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
+ *     (2) Handle RPC negotiation with the system which replied to RARP or
+ *         was reported as a boot server by BOOTP or manually.
+ *     (3) The actual mounting is done later, when init() is running.
+ *
+ *
+ *	Changes:
+ *
+ *	Alan Cox	:	Removed get_address name clash with FPU.
+ *	Alan Cox	:	Reformatted a bit.
+ *	Gero Kuhlmann	:	Code cleanup
+ *	Michael Rausch  :	Fixed recognition of an incoming RARP answer.
+ *	Martin Mares	: (2.0)	Auto-configuration via BOOTP supported.
+ *	Martin Mares	:	Manual selection of interface & BOOTP/RARP.
+ *	Martin Mares	:	Using network routes instead of host routes,
+ *				allowing the default configuration to be used
+ *				for normal operation of the host.
+ *	Martin Mares	:	Randomized timer with exponential backoff
+ *				installed to minimize network congestion.
+ *	Martin Mares	:	Code cleanup.
+ *	Martin Mares	: (2.1)	BOOTP and RARP made configuration options.
+ *	Martin Mares	:	Server hostname generation fixed.
+ *	Gerd Knorr	:	Fixed wired inode handling
+ *	Martin Mares	: (2.2)	"0.0.0.0" addresses from command line ignored.
+ *	Martin Mares	:	RARP replies not tested for server address.
+ *	Gero Kuhlmann	: (2.3) Some bug fixes and code cleanup again (please
+ *				send me your new patches _before_ bothering
+ *				Linus so that I don' always have to cleanup
+ *				_afterwards_ - thanks)
+ *	Gero Kuhlmann	:	Last changes of Martin Mares undone.
+ *	Gero Kuhlmann	: 	RARP replies are tested for specified server
+ *				again. However, it's now possible to have
+ *				different RARP and NFS servers.
+ *	Gero Kuhlmann	:	"0.0.0.0" addresses from command line are
+ *				now mapped to INADDR_NONE.
+ *	Gero Kuhlmann	:	Fixed a bug which prevented BOOTP path name
+ *				from being used (thanks to Leo Spiekman)
+ *	Andy Walker	:	Allow to specify the NFS server in nfs_root
+ *				without giving a path name
+ *	Swen Th<54>mmler	:	Allow to specify the NFS options in nfs_root
+ *				without giving a path name. Fix BOOTP request
+ *				for domainname (domainname is NIS domain, not
+ *				DNS domain!). Skip dummy devices for BOOTP.
+ *	Jacek Zapala	:	Fixed a bug which prevented server-ip address
+ *				from nfsroot parameter from being used.
+ *	Olaf Kirch	:	Adapted to new NFS code.
+ *	Jakub Jelinek	:	Free used code segment.
+ *	Marko Kohtala	:	Fixed some bugs.
+ *	Martin Mares	:	Debug message cleanup
+ *	Martin Mares	:	Changed to use the new generic IP layer autoconfig
+ *				code. BOOTP and RARP moved there.
+ *	Martin Mares	:	Default path now contains host name instead of
+ *				host IP address (but host name defaults to IP
+ *				address anyway).
+ *	Martin Mares	:	Use root_server_addr appropriately during setup.
+ *	Martin Mares	:	Rewrote parameter parsing, now hopefully giving
+ *				correct overriding.
+ *	Trond Myklebust :	Add in preliminary support for NFSv3 and TCP.
+ *				Fix bug in root_nfs_addr(). nfs_data.namlen
+ *				is NOT for the length of the hostname.
+ *	Hua Qin		:	Support for mounting root file system via
+ *				NFS over TCP.
+ *	Fabian Frederick:	Option parser rebuilt (using parser lib)
+*/
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/in.h>
+#include <linux/major.h>
+#include <linux/utsname.h>
+#include <linux/inet.h>
+#include <linux/root_dev.h>
+#include <net/ipconfig.h>
+#include <linux/parser.h>
+
+/* Define this to allow debugging output */
+#undef NFSROOT_DEBUG
+#define NFSDBG_FACILITY NFSDBG_ROOT
+
+/* Default path we try to mount. "%s" gets replaced by our IP address */
+#define NFS_ROOT		"/tftpboot/%s"
+
+/* Parameters passed from the kernel command line */
+static char nfs_root_name[256] __initdata = "";
+
+/* Address of NFS server */
+static __u32 servaddr __initdata = 0;
+
+/* Name of directory to mount */
+static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
+
+/* NFS-related data */
+static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
+static int nfs_port __initdata = 0;		/* Port to connect to for NFS */
+static int mount_port __initdata = 0;		/* Mount daemon port number */
+
+
+/***************************************************************************
+
+			     Parsing of options
+
+ ***************************************************************************/
+
+enum {
+	/* Options that take integer arguments */
+	Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
+	Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
+	/* Options that take no arguments */
+	Opt_soft, Opt_hard, Opt_intr,
+	Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
+	Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
+	/* Error token */
+	Opt_err
+};
+
+static match_table_t __initdata tokens = {
+	{Opt_port, "port=%u"},
+	{Opt_rsize, "rsize=%u"},
+	{Opt_wsize, "wsize=%u"},
+	{Opt_timeo, "timeo=%u"},
+	{Opt_retrans, "retrans=%u"},
+	{Opt_acregmin, "acregmin=%u"},
+	{Opt_acregmax, "acregmax=%u"},
+	{Opt_acdirmin, "acdirmin=%u"},
+	{Opt_acdirmax, "acdirmax=%u"},
+	{Opt_soft, "soft"},
+	{Opt_hard, "hard"},
+	{Opt_intr, "intr"},
+	{Opt_nointr, "nointr"},
+	{Opt_posix, "posix"},
+	{Opt_noposix, "noposix"},
+	{Opt_cto, "cto"},
+	{Opt_nocto, "nocto"},
+	{Opt_ac, "ac"},
+	{Opt_noac, "noac"},
+	{Opt_lock, "lock"},
+	{Opt_nolock, "nolock"},
+	{Opt_v2, "nfsvers=2"},
+	{Opt_v2, "v2"},
+	{Opt_v3, "nfsvers=3"},
+	{Opt_v3, "v3"},
+	{Opt_udp, "proto=udp"},
+	{Opt_udp, "udp"},
+	{Opt_tcp, "proto=tcp"},
+	{Opt_tcp, "tcp"},
+	{Opt_err, NULL}
+	
+};
+
+/*
+ *  Parse option string.
+ */
+
+static int __init root_nfs_parse(char *name, char *buf)
+{
+
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+
+	if (!name)
+		return 1;
+
+	/* Set the NFS remote path */
+	p = strsep(&name, ",");
+	if (p[0] != '\0' && strcmp(p, "default") != 0)
+		strlcpy(buf, p, NFS_MAXPATHLEN);
+
+	while ((p = strsep (&name, ",")) != NULL) {
+		int token; 
+		if (!*p)
+			continue;
+		token = match_token(p, tokens, args);
+
+		/* %u tokens only. Beware if you add new tokens! */
+		if (token < Opt_soft && match_int(&args[0], &option))
+			return 0;
+		switch (token) {
+			case Opt_port:
+				nfs_port = option;
+				break;
+			case Opt_rsize:
+				nfs_data.rsize = option;
+				break;
+			case Opt_wsize:
+				nfs_data.wsize = option;
+				break;
+			case Opt_timeo:
+				nfs_data.timeo = option;
+				break;
+			case Opt_retrans:
+				nfs_data.retrans = option;
+				break;
+			case Opt_acregmin:
+				nfs_data.acregmin = option;
+				break;
+			case Opt_acregmax:
+				nfs_data.acregmax = option;
+				break;
+			case Opt_acdirmin:
+				nfs_data.acdirmin = option;
+				break;
+			case Opt_acdirmax:
+				nfs_data.acdirmax = option;
+				break;
+			case Opt_soft:
+				nfs_data.flags |= NFS_MOUNT_SOFT;
+				break;
+			case Opt_hard:
+				nfs_data.flags &= ~NFS_MOUNT_SOFT;
+				break;
+			case Opt_intr:
+				nfs_data.flags |= NFS_MOUNT_INTR;
+				break;
+			case Opt_nointr:
+				nfs_data.flags &= ~NFS_MOUNT_INTR;
+				break;
+			case Opt_posix:
+				nfs_data.flags |= NFS_MOUNT_POSIX;
+				break;
+			case Opt_noposix:
+				nfs_data.flags &= ~NFS_MOUNT_POSIX;
+				break;
+			case Opt_cto:
+				nfs_data.flags &= ~NFS_MOUNT_NOCTO;
+				break;
+			case Opt_nocto:
+				nfs_data.flags |= NFS_MOUNT_NOCTO;
+				break;
+			case Opt_ac:
+				nfs_data.flags &= ~NFS_MOUNT_NOAC;
+				break;
+			case Opt_noac:
+				nfs_data.flags |= NFS_MOUNT_NOAC;
+				break;
+			case Opt_lock:
+				nfs_data.flags &= ~NFS_MOUNT_NONLM;
+				break;
+			case Opt_nolock:
+				nfs_data.flags |= NFS_MOUNT_NONLM;
+				break;
+			case Opt_v2:
+				nfs_data.flags &= ~NFS_MOUNT_VER3;
+				break;
+			case Opt_v3:
+				nfs_data.flags |= NFS_MOUNT_VER3;
+				break;
+			case Opt_udp:
+				nfs_data.flags &= ~NFS_MOUNT_TCP;
+				break;
+			case Opt_tcp:
+				nfs_data.flags |= NFS_MOUNT_TCP;
+				break;
+			default : 
+				return 0;
+		}
+	}
+
+	return 1;
+}
+
+/*
+ *  Prepare the NFS data structure and parse all options.
+ */
+static int __init root_nfs_name(char *name)
+{
+	static char buf[NFS_MAXPATHLEN] __initdata;
+	char *cp;
+
+	/* Set some default values */
+	memset(&nfs_data, 0, sizeof(nfs_data));
+	nfs_port          = -1;
+	nfs_data.version  = NFS_MOUNT_VERSION;
+	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
+	nfs_data.rsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	nfs_data.wsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	nfs_data.acregmin = 3;
+	nfs_data.acregmax = 60;
+	nfs_data.acdirmin = 30;
+	nfs_data.acdirmax = 60;
+	strcpy(buf, NFS_ROOT);
+
+	/* Process options received from the remote server */
+	root_nfs_parse(root_server_path, buf);
+
+	/* Override them by options set on kernel command-line */
+	root_nfs_parse(name, buf);
+
+	cp = system_utsname.nodename;
+	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+		return -1;
+	}
+	sprintf(nfs_path, buf, cp);
+
+	return 1;
+}
+
+
+/*
+ *  Get NFS server address.
+ */
+static int __init root_nfs_addr(void)
+{
+	if ((servaddr = root_server_addr) == INADDR_NONE) {
+		printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
+		return -1;
+	}
+
+	snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
+		 "%u.%u.%u.%u", NIPQUAD(servaddr));
+	return 0;
+}
+
+/*
+ *  Tell the user what's going on.
+ */
+#ifdef NFSROOT_DEBUG
+static void __init root_nfs_print(void)
+{
+	printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
+		nfs_path, nfs_data.hostname);
+	printk(KERN_NOTICE "Root-NFS:     rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
+		nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
+	printk(KERN_NOTICE "Root-NFS:     acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
+		nfs_data.acregmin, nfs_data.acregmax,
+		nfs_data.acdirmin, nfs_data.acdirmax);
+	printk(KERN_NOTICE "Root-NFS:     nfsd port = %d, mountd port = %d, flags = %08x\n",
+		nfs_port, mount_port, nfs_data.flags);
+}
+#endif
+
+
+static int __init root_nfs_init(void)
+{
+#ifdef NFSROOT_DEBUG
+	nfs_debug |= NFSDBG_ROOT;
+#endif
+
+	/*
+	 * Decode the root directory path name and NFS options from
+	 * the kernel command line. This has to go here in order to
+	 * be able to use the client IP address for the remote root
+	 * directory (necessary for pure RARP booting).
+	 */
+	if (root_nfs_name(nfs_root_name) < 0 ||
+	    root_nfs_addr() < 0)
+		return -1;
+
+#ifdef NFSROOT_DEBUG
+	root_nfs_print();
+#endif
+
+	return 0;
+}
+
+
+/*
+ *  Parse NFS server and directory information passed on the kernel
+ *  command line.
+ */
+static int __init nfs_root_setup(char *line)
+{
+	ROOT_DEV = Root_NFS;
+	if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
+		strlcpy(nfs_root_name, line, sizeof(nfs_root_name));
+	} else {
+		int n = strlen(line) + sizeof(NFS_ROOT) - 1;
+		if (n >= sizeof(nfs_root_name))
+			line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0';
+		sprintf(nfs_root_name, NFS_ROOT, line);
+	}
+	root_server_addr = root_nfs_parse_addr(nfs_root_name);
+	return 1;
+}
+
+__setup("nfsroot=", nfs_root_setup);
+
+/***************************************************************************
+
+	       Routines to actually mount the root directory
+
+ ***************************************************************************/
+
+/*
+ *  Construct sockaddr_in from address and port number.
+ */
+static inline void
+set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port)
+{
+	sin->sin_family = AF_INET;
+	sin->sin_addr.s_addr = addr;
+	sin->sin_port = port;
+}
+
+/*
+ *  Query server portmapper for the port of a daemon program.
+ */
+static int __init root_nfs_getport(int program, int version, int proto)
+{
+	struct sockaddr_in sin;
+
+	printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
+		program, version, NIPQUAD(servaddr));
+	set_sockaddr(&sin, servaddr, 0);
+	return rpc_getport_external(&sin, program, version, proto);
+}
+
+
+/*
+ *  Use portmapper to find mountd and nfsd port numbers if not overriden
+ *  by the user. Use defaults if portmapper is not available.
+ *  XXX: Is there any nfs server with no portmapper?
+ */
+static int __init root_nfs_ports(void)
+{
+	int port;
+	int nfsd_ver, mountd_ver;
+	int nfsd_port, mountd_port;
+	int proto;
+
+	if (nfs_data.flags & NFS_MOUNT_VER3) {
+		nfsd_ver = NFS3_VERSION;
+		mountd_ver = NFS_MNT3_VERSION;
+		nfsd_port = NFS_PORT;
+		mountd_port = NFS_MNT_PORT;
+	} else {
+		nfsd_ver = NFS2_VERSION;
+		mountd_ver = NFS_MNT_VERSION;
+		nfsd_port = NFS_PORT;
+		mountd_port = NFS_MNT_PORT;
+	}
+
+	proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+
+	if (nfs_port < 0) {
+		if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
+			printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
+					"number from server, using default\n");
+			port = nfsd_port;
+		}
+		nfs_port = htons(port);
+		dprintk("Root-NFS: Portmapper on server returned %d "
+			"as nfsd port\n", port);
+	}
+
+	if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
+		printk(KERN_ERR "Root-NFS: Unable to get mountd port "
+				"number from server, using default\n");
+		port = mountd_port;
+	}
+	mount_port = htons(port);
+	dprintk("Root-NFS: mountd port is %d\n", port);
+
+	return 0;
+}
+
+
+/*
+ *  Get a file handle from the server for the directory which is to be
+ *  mounted.
+ */
+static int __init root_nfs_get_handle(void)
+{
+	struct nfs_fh fh;
+	struct sockaddr_in sin;
+	int status;
+	int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
+					IPPROTO_TCP : IPPROTO_UDP;
+	int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
+					NFS_MNT3_VERSION : NFS_MNT_VERSION;
+
+	set_sockaddr(&sin, servaddr, mount_port);
+	status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
+	if (status < 0)
+		printk(KERN_ERR "Root-NFS: Server returned error %d "
+				"while mounting %s\n", status, nfs_path);
+	else {
+		nfs_data.root.size = fh.size;
+		memcpy(nfs_data.root.data, fh.data, fh.size);
+	}
+
+	return status;
+}
+
+/*
+ *  Get the NFS port numbers and file handle, and return the prepared 'data'
+ *  argument for mount() if everything went OK. Return NULL otherwise.
+ */
+void * __init nfs_root_data(void)
+{
+	if (root_nfs_init() < 0
+	 || root_nfs_ports() < 0
+	 || root_nfs_get_handle() < 0)
+		return NULL;
+	set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port);
+	return (void*)&nfs_data;
+}
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -0,0 +1,309 @@
+/*
+ * linux/fs/nfs/pagelist.c
+ *
+ * A set of helper functions for managing NFS read and write requests.
+ * The main purpose of these routines is to provide support for the
+ * coalescing of several requests into a single RPC call.
+ *
+ * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_page.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+
+#define NFS_PARANOIA 1
+
+static kmem_cache_t *nfs_page_cachep;
+
+static inline struct nfs_page *
+nfs_page_alloc(void)
+{
+	struct nfs_page	*p;
+	p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->wb_list);
+	}
+	return p;
+}
+
+static inline void
+nfs_page_free(struct nfs_page *p)
+{
+	kmem_cache_free(nfs_page_cachep, p);
+}
+
+/**
+ * nfs_create_request - Create an NFS read/write request.
+ * @file: file descriptor to use
+ * @inode: inode to which the request is attached
+ * @page: page to write
+ * @offset: starting offset within the page for the write
+ * @count: number of bytes to read/write
+ *
+ * The page must be locked by the caller. This makes sure we never
+ * create two different requests for the same page, and avoids
+ * a possible deadlock when we reach the hard limit on the number
+ * of dirty pages.
+ * User should ensure it is safe to sleep in this function.
+ */
+struct nfs_page *
+nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+		   struct page *page,
+		   unsigned int offset, unsigned int count)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_page		*req;
+
+	/* Deal with hard limits.  */
+	for (;;) {
+		/* try to allocate the request struct */
+		req = nfs_page_alloc();
+		if (req != NULL)
+			break;
+
+		/* Try to free up at least one request in order to stay
+		 * below the hard limit
+		 */
+		if (signalled() && (server->flags & NFS_MOUNT_INTR))
+			return ERR_PTR(-ERESTARTSYS);
+		yield();
+	}
+
+	/* Initialize the request struct. Initially, we assume a
+	 * long write-back delay. This will be adjusted in
+	 * update_nfs_request below if the region is not locked. */
+	req->wb_page    = page;
+	atomic_set(&req->wb_complete, 0);
+	req->wb_index	= page->index;
+	page_cache_get(page);
+	req->wb_offset  = offset;
+	req->wb_pgbase	= offset;
+	req->wb_bytes   = count;
+	atomic_set(&req->wb_count, 1);
+	req->wb_context = get_nfs_open_context(ctx);
+
+	return req;
+}
+
+/**
+ * nfs_unlock_request - Unlock request and wake up sleepers.
+ * @req:
+ */
+void nfs_unlock_request(struct nfs_page *req)
+{
+	if (!NFS_WBACK_BUSY(req)) {
+		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
+		BUG();
+	}
+	smp_mb__before_clear_bit();
+	clear_bit(PG_BUSY, &req->wb_flags);
+	smp_mb__after_clear_bit();
+	wake_up_all(&req->wb_context->waitq);
+	nfs_release_request(req);
+}
+
+/**
+ * nfs_clear_request - Free up all resources allocated to the request
+ * @req:
+ *
+ * Release page resources associated with a write request after it
+ * has completed.
+ */
+void nfs_clear_request(struct nfs_page *req)
+{
+	if (req->wb_page) {
+		page_cache_release(req->wb_page);
+		req->wb_page = NULL;
+	}
+}
+
+
+/**
+ * nfs_release_request - Release the count on an NFS read/write request
+ * @req: request to release
+ *
+ * Note: Should never be called with the spinlock held!
+ */
+void
+nfs_release_request(struct nfs_page *req)
+{
+	if (!atomic_dec_and_test(&req->wb_count))
+		return;
+
+#ifdef NFS_PARANOIA
+	BUG_ON (!list_empty(&req->wb_list));
+	BUG_ON (NFS_WBACK_BUSY(req));
+#endif
+
+	/* Release struct file or cached credential */
+	nfs_clear_request(req);
+	put_nfs_open_context(req->wb_context);
+	nfs_page_free(req);
+}
+
+/**
+ * nfs_list_add_request - Insert a request into a sorted list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ *
+ * Note that the wb_list is sorted by page index in order to facilitate
+ * coalescing of requests.
+ * We use an insertion sort that is optimized for the case of appended
+ * writes.
+ */
+void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+{
+	struct list_head *pos;
+
+#ifdef NFS_PARANOIA
+	if (!list_empty(&req->wb_list)) {
+		printk(KERN_ERR "NFS: Add to list failed!\n");
+		BUG();
+	}
+#endif
+	list_for_each_prev(pos, head) {
+		struct nfs_page	*p = nfs_list_entry(pos);
+		if (p->wb_index < req->wb_index)
+			break;
+	}
+	list_add(&req->wb_list, pos);
+	req->wb_list_head = head;
+}
+
+/**
+ * nfs_wait_on_request - Wait for a request to complete.
+ * @req: request to wait upon.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ * The user is responsible for holding a count on the request.
+ */
+int
+nfs_wait_on_request(struct nfs_page *req)
+{
+	struct inode	*inode = req->wb_context->dentry->d_inode;
+        struct rpc_clnt	*clnt = NFS_CLIENT(inode);
+
+	if (!NFS_WBACK_BUSY(req))
+		return 0;
+	return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
+}
+
+/**
+ * nfs_coalesce_requests - Split coalesced requests out from a list.
+ * @head: source list
+ * @dst: destination list
+ * @nmax: maximum number of requests to coalesce
+ *
+ * Moves a maximum of 'nmax' elements from one list to another.
+ * The elements are checked to ensure that they form a contiguous set
+ * of pages, and that the RPC credentials are the same.
+ */
+int
+nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
+		      unsigned int nmax)
+{
+	struct nfs_page		*req = NULL;
+	unsigned int		npages = 0;
+
+	while (!list_empty(head)) {
+		struct nfs_page	*prev = req;
+
+		req = nfs_list_entry(head->next);
+		if (prev) {
+			if (req->wb_context->cred != prev->wb_context->cred)
+				break;
+			if (req->wb_context->lockowner != prev->wb_context->lockowner)
+				break;
+			if (req->wb_context->state != prev->wb_context->state)
+				break;
+			if (req->wb_index != (prev->wb_index + 1))
+				break;
+
+			if (req->wb_pgbase != 0)
+				break;
+		}
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, dst);
+		npages++;
+		if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE)
+			break;
+		if (npages >= nmax)
+			break;
+	}
+	return npages;
+}
+
+/**
+ * nfs_scan_list - Scan a list for matching requests
+ * @head: One of the NFS inode request lists
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+ * starting at index idx_start, is scanned.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ * You must be holding the inode's req_lock when calling this function
+ */
+int
+nfs_scan_list(struct list_head *head, struct list_head *dst,
+	      unsigned long idx_start, unsigned int npages)
+{
+	struct list_head	*pos, *tmp;
+	struct nfs_page		*req;
+	unsigned long		idx_end;
+	int			res;
+
+	res = 0;
+	if (npages == 0)
+		idx_end = ~0;
+	else
+		idx_end = idx_start + npages - 1;
+
+	list_for_each_safe(pos, tmp, head) {
+
+		req = nfs_list_entry(pos);
+
+		if (req->wb_index < idx_start)
+			continue;
+		if (req->wb_index > idx_end)
+			break;
+
+		if (!nfs_lock_request(req))
+			continue;
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, dst);
+		res++;
+	}
+	return res;
+}
+
+int nfs_init_nfspagecache(void)
+{
+	nfs_page_cachep = kmem_cache_create("nfs_page",
+					    sizeof(struct nfs_page),
+					    0, SLAB_HWCACHE_ALIGN,
+					    NULL, NULL);
+	if (nfs_page_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_nfspagecache(void)
+{
+	if (kmem_cache_destroy(nfs_page_cachep))
+		printk(KERN_INFO "nfs_page: not all structures were freed\n");
+}
+
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -0,0 +1,655 @@
+/*
+ *  linux/fs/nfs/proc.c
+ *
+ *  Copyright (C) 1992, 1993, 1994  Rick Sladkey
+ *
+ *  OS-independent nfs remote procedure call functions
+ *
+ *  Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers
+ *  so at last we can have decent(ish) throughput off a 
+ *  Sun server.
+ *
+ *  Coding optimized and cleaned up by Florian La Roche.
+ *  Note: Error returns are optimized for NFS_OK, which isn't translated via
+ *  nfs_stat_to_errno(), but happens to be already the right return code.
+ *
+ *  Also, the code currently doesn't check the size of the packet, when
+ *  it decodes the packet.
+ *
+ *  Feel free to fix it and mail me the diffs if it worries you.
+ *
+ *  Completely rewritten to support the new RPC call interface;
+ *  rewrote and moved the entire XDR stuff to xdr.c
+ *  --Olaf Kirch June 1996
+ *
+ *  The code below initializes all auto variables explicitly, otherwise
+ *  it will fail to work as a module (gcc generates a memset call for an
+ *  incomplete struct).
+ */
+
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PROC
+
+extern struct rpc_procinfo nfs_procedures[];
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+		  struct nfs_fsinfo *info)
+{
+	struct nfs_fattr *fattr = info->fattr;
+	struct nfs2_fsstat fsinfo;
+	int status;
+
+	dprintk("%s: call getattr\n", __FUNCTION__);
+	fattr->valid = 0;
+	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
+	if (status)
+		return status;
+	dprintk("%s: call statfs\n", __FUNCTION__);
+	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
+	if (status)
+		return status;
+	info->rtmax  = NFS_MAXDATA;
+	info->rtpref = fsinfo.tsize;
+	info->rtmult = fsinfo.bsize;
+	info->wtmax  = NFS_MAXDATA;
+	info->wtpref = fsinfo.tsize;
+	info->wtmult = fsinfo.bsize;
+	info->dtpref = fsinfo.tsize;
+	info->maxfilesize = 0x7FFFFFFF;
+	info->lease_time = 0;
+	return 0;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+static int
+nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int	status;
+
+	dprintk("NFS call  getattr\n");
+	fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_GETATTR,
+				fhandle, fattr, 0);
+	dprintk("NFS reply getattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+		 struct iattr *sattr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct nfs_sattrargs	arg = { 
+		.fh	= NFS_FH(inode),
+		.sattr	= sattr
+	};
+	int	status;
+
+	dprintk("NFS call  setattr\n");
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+	dprintk("NFS reply setattr: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_lookup(struct inode *dir, struct qstr *name,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+	struct nfs_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct nfs_diropok	res = {
+		.fh		= fhandle,
+		.fattr		= fattr
+	};
+	int			status;
+
+	dprintk("NFS call  lookup %s\n", name->name);
+	fattr->valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+	dprintk("NFS reply lookup: %d\n", status);
+	return status;
+}
+
+static int nfs_proc_readlink(struct inode *inode, struct page *page,
+		unsigned int pgbase, unsigned int pglen)
+{
+	struct nfs_readlinkargs	args = {
+		.fh		= NFS_FH(inode),
+		.pgbase		= pgbase,
+		.pglen		= pglen,
+		.pages		= &page
+	};
+	int			status;
+
+	dprintk("NFS call  readlink\n");
+	status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+	dprintk("NFS reply readlink: %d\n", status);
+	return status;
+}
+
+static int nfs_proc_read(struct nfs_read_data *rdata)
+{
+	int			flags = rdata->flags;
+	struct inode *		inode = rdata->inode;
+	struct nfs_fattr *	fattr = rdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
+		.rpc_argp	= &rdata->args,
+		.rpc_resp	= &rdata->res,
+		.rpc_cred	= rdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
+			(long long) rdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+	if (status >= 0) {
+		nfs_refresh_inode(inode, fattr);
+		/* Emulate the eof flag, which isn't normally needed in NFSv2
+		 * as it is guaranteed to always return the file attributes
+		 */
+		if (rdata->args.offset + rdata->args.count >= fattr->size)
+			rdata->res.eof = 1;
+	}
+	dprintk("NFS reply read: %d\n", status);
+	return status;
+}
+
+static int nfs_proc_write(struct nfs_write_data *wdata)
+{
+	int			flags = wdata->flags;
+	struct inode *		inode = wdata->inode;
+	struct nfs_fattr *	fattr = wdata->res.fattr;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
+		.rpc_argp	= &wdata->args,
+		.rpc_resp	= &wdata->res,
+		.rpc_cred	= wdata->cred,
+	};
+	int			status;
+
+	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
+			(long long) wdata->args.offset);
+	fattr->valid = 0;
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+	if (status >= 0) {
+		nfs_refresh_inode(inode, fattr);
+		wdata->res.count = wdata->args.count;
+		wdata->verf.committed = NFS_FILE_SYNC;
+	}
+	dprintk("NFS reply write: %d\n", status);
+	return status < 0? status : wdata->res.count;
+}
+
+static int
+nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+		int flags)
+{
+	struct nfs_fh		fhandle;
+	struct nfs_fattr	fattr;
+	struct nfs_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs_diropok	res = {
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	fattr.valid = 0;
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply create: %d\n", status);
+	return status;
+}
+
+/*
+ * In NFSv2, mknod is grafted onto the create call.
+ */
+static int
+nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+	       dev_t rdev)
+{
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
+	struct nfs_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs_diropok	res = {
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int status, mode;
+
+	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
+
+	mode = sattr->ia_mode;
+	if (S_ISFIFO(mode)) {
+		sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR;
+		sattr->ia_valid &= ~ATTR_SIZE;
+	} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
+		sattr->ia_valid |= ATTR_SIZE;
+		sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
+	}
+
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+
+	if (status == -EINVAL && S_ISFIFO(mode)) {
+		sattr->ia_mode = mode;
+		fattr.valid = 0;
+		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	}
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply mknod: %d\n", status);
+	return status;
+}
+  
+static int
+nfs_proc_remove(struct inode *dir, struct qstr *name)
+{
+	struct nfs_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	struct rpc_message	msg = { 
+		.rpc_proc	= &nfs_procedures[NFSPROC_REMOVE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+		.rpc_cred	= NULL
+	};
+	int			status;
+
+	dprintk("NFS call  remove %s\n", name->name);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+	dprintk("NFS reply remove: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+	struct nfs_diropargs	*arg;
+
+	arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL);
+	if (!arg)
+		return -ENOMEM;
+	arg->fh = NFS_FH(dir->d_inode);
+	arg->name = name->name;
+	arg->len = name->len;
+	msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
+	msg->rpc_argp = arg;
+	return 0;
+}
+
+static int
+nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+	struct rpc_message *msg = &task->tk_msg;
+	
+	if (msg->rpc_argp)
+		kfree(msg->rpc_argp);
+	return 0;
+}
+
+static int
+nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
+		struct inode *new_dir, struct qstr *new_name)
+{
+	struct nfs_renameargs	arg = {
+		.fromfh		= NFS_FH(old_dir),
+		.fromname	= old_name->name,
+		.fromlen	= old_name->len,
+		.tofh		= NFS_FH(new_dir),
+		.toname		= new_name->name,
+		.tolen		= new_name->len
+	};
+	int			status;
+
+	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	dprintk("NFS reply rename: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+	struct nfs_linkargs	arg = {
+		.fromfh		= NFS_FH(inode),
+		.tofh		= NFS_FH(dir),
+		.toname		= name->name,
+		.tolen		= name->len
+	};
+	int			status;
+
+	dprintk("NFS call  link %s\n", name->name);
+	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	dprintk("NFS reply link: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+		 struct iattr *sattr, struct nfs_fh *fhandle,
+		 struct nfs_fattr *fattr)
+{
+	struct nfs_symlinkargs	arg = {
+		.fromfh		= NFS_FH(dir),
+		.fromname	= name->name,
+		.fromlen	= name->len,
+		.topath		= path->name,
+		.tolen		= path->len,
+		.sattr		= sattr
+	};
+	int			status;
+
+	if (path->len > NFS2_MAXPATHLEN)
+		return -ENAMETOOLONG;
+	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+	fattr->valid = 0;
+	fhandle->size = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	dprintk("NFS reply symlink: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+{
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
+	struct nfs_createargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
+		.sattr		= sattr
+	};
+	struct nfs_diropok	res = {
+		.fh		= &fhandle,
+		.fattr		= &fattr
+	};
+	int			status;
+
+	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+	fattr.valid = 0;
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	if (status == 0)
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	dprintk("NFS reply mkdir: %d\n", status);
+	return status;
+}
+
+static int
+nfs_proc_rmdir(struct inode *dir, struct qstr *name)
+{
+	struct nfs_diropargs	arg = {
+		.fh		= NFS_FH(dir),
+		.name		= name->name,
+		.len		= name->len
+	};
+	int			status;
+
+	dprintk("NFS call  rmdir %s\n", name->name);
+	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	dprintk("NFS reply rmdir: %d\n", status);
+	return status;
+}
+
+/*
+ * The READDIR implementation is somewhat hackish - we pass a temporary
+ * buffer to the encode function, which installs it in the receive
+ * the receive iovec. The decode function just parses the reply to make
+ * sure it is syntactically correct; the entries itself are decoded
+ * from nfs_readdir by calling the decode_entry function directly.
+ */
+static int
+nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+		 u64 cookie, struct page *page, unsigned int count, int plus)
+{
+	struct inode		*dir = dentry->d_inode;
+	struct nfs_readdirargs	arg = {
+		.fh		= NFS_FH(dir),
+		.cookie		= cookie,
+		.count		= count,
+		.pages		= &page
+	};
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+		.rpc_cred	= cred
+	};
+	int			status;
+
+	lock_kernel();
+
+	dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+	dprintk("NFS reply readdir: %d\n", status);
+	unlock_kernel();
+	return status;
+}
+
+static int
+nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsstat *stat)
+{
+	struct nfs2_fsstat fsinfo;
+	int	status;
+
+	dprintk("NFS call  statfs\n");
+	stat->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	dprintk("NFS reply statfs: %d\n", status);
+	if (status)
+		goto out;
+	stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
+	stat->fbytes = (u64)fsinfo.bfree  * fsinfo.bsize;
+	stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
+	stat->tfiles = 0;
+	stat->ffiles = 0;
+	stat->afiles = 0;
+out:
+	return status;
+}
+
+static int
+nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+			struct nfs_fsinfo *info)
+{
+	struct nfs2_fsstat fsinfo;
+	int	status;
+
+	dprintk("NFS call  fsinfo\n");
+	info->fattr->valid = 0;
+	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	dprintk("NFS reply fsinfo: %d\n", status);
+	if (status)
+		goto out;
+	info->rtmax  = NFS_MAXDATA;
+	info->rtpref = fsinfo.tsize;
+	info->rtmult = fsinfo.bsize;
+	info->wtmax  = NFS_MAXDATA;
+	info->wtpref = fsinfo.tsize;
+	info->wtmult = fsinfo.bsize;
+	info->dtpref = fsinfo.tsize;
+	info->maxfilesize = 0x7FFFFFFF;
+	info->lease_time = 0;
+out:
+	return status;
+}
+
+static int
+nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+		  struct nfs_pathconf *info)
+{
+	info->max_link = 0;
+	info->max_namelen = NFS2_MAXNAMLEN;
+	return 0;
+}
+
+extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
+
+static void
+nfs_read_done(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+
+	if (task->tk_status >= 0) {
+		nfs_refresh_inode(data->inode, data->res.fattr);
+		/* Emulate the eof flag, which isn't normally needed in NFSv2
+		 * as it is guaranteed to always return the file attributes
+		 */
+		if (data->args.offset + data->args.count >= data->res.fattr->size)
+			data->res.eof = 1;
+	}
+	nfs_readpage_result(task);
+}
+
+static void
+nfs_proc_read_setup(struct nfs_read_data *data)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* N.B. Do we need to test? Never called for swapfile inode */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs_write_done(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+
+	if (task->tk_status >= 0)
+		nfs_refresh_inode(data->inode, data->res.fattr);
+	nfs_writeback_done(task);
+}
+
+static void
+nfs_proc_write_setup(struct nfs_write_data *data, int how)
+{
+	struct rpc_task		*task = &data->task;
+	struct inode		*inode = data->inode;
+	int			flags;
+	struct rpc_message	msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
+		.rpc_argp	= &data->args,
+		.rpc_resp	= &data->res,
+		.rpc_cred	= data->cred,
+	};
+
+	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
+	data->args.stable = NFS_FILE_SYNC;
+
+	/* Set the initial flags for the task.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+	/* Finalize the task. */
+	rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags);
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+	BUG();
+}
+
+static int
+nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+}
+
+
+struct nfs_rpc_ops	nfs_v2_clientops = {
+	.version	= 2,		       /* protocol version */
+	.dentry_ops	= &nfs_dentry_operations,
+	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.getroot	= nfs_proc_get_root,
+	.getattr	= nfs_proc_getattr,
+	.setattr	= nfs_proc_setattr,
+	.lookup		= nfs_proc_lookup,
+	.access		= NULL,		       /* access */
+	.readlink	= nfs_proc_readlink,
+	.read		= nfs_proc_read,
+	.write		= nfs_proc_write,
+	.commit		= NULL,		       /* commit */
+	.create		= nfs_proc_create,
+	.remove		= nfs_proc_remove,
+	.unlink_setup	= nfs_proc_unlink_setup,
+	.unlink_done	= nfs_proc_unlink_done,
+	.rename		= nfs_proc_rename,
+	.link		= nfs_proc_link,
+	.symlink	= nfs_proc_symlink,
+	.mkdir		= nfs_proc_mkdir,
+	.rmdir		= nfs_proc_rmdir,
+	.readdir	= nfs_proc_readdir,
+	.mknod		= nfs_proc_mknod,
+	.statfs		= nfs_proc_statfs,
+	.fsinfo		= nfs_proc_fsinfo,
+	.pathconf	= nfs_proc_pathconf,
+	.decode_dirent	= nfs_decode_dirent,
+	.read_setup	= nfs_proc_read_setup,
+	.write_setup	= nfs_proc_write_setup,
+	.commit_setup	= nfs_proc_commit_setup,
+	.file_open	= nfs_open,
+	.file_release	= nfs_release,
+	.lock		= nfs_proc_lock,
+};
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -0,0 +1,618 @@
+/*
+ * linux/fs/nfs/read.c
+ *
+ * Block I/O for NFS
+ *
+ * Partial copy of Linus' read cache modifications to fs/nfs/file.c
+ * modified for async RPC by okir@monad.swb.de
+ *
+ * We do an ugly hack here in order to return proper error codes to the
+ * user program when a read request failed: since generic_file_read
+ * only checks the return value of inode->i_op->readpage() which is always 0
+ * for async RPC, we set the error bit of the page to 1 when an error occurs,
+ * and make nfs_readpage transmit requests synchronously when encountering this.
+ * This is only a small problem, though, since we now retry all operations
+ * within the RPC code when root squashing is suspected.
+ */
+
+#include <linux/config.h>
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+
+#include <asm/system.h>
+
+#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+
+static int nfs_pagein_one(struct list_head *, struct inode *);
+static void nfs_readpage_result_partial(struct nfs_read_data *, int);
+static void nfs_readpage_result_full(struct nfs_read_data *, int);
+
+static kmem_cache_t *nfs_rdata_cachep;
+mempool_t *nfs_rdata_mempool;
+
+#define MIN_POOL_READ	(32)
+
+void nfs_readdata_release(struct rpc_task *task)
+{
+        struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
+        nfs_readdata_free(data);
+}
+
+static
+unsigned int nfs_page_length(struct inode *inode, struct page *page)
+{
+	loff_t i_size = i_size_read(inode);
+	unsigned long idx;
+
+	if (i_size <= 0)
+		return 0;
+	idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	if (page->index > idx)
+		return 0;
+	if (page->index != idx)
+		return PAGE_CACHE_SIZE;
+	return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
+}
+
+static
+int nfs_return_empty_page(struct page *page)
+{
+	memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
+	SetPageUptodate(page);
+	unlock_page(page);
+	return 0;
+}
+
+/*
+ * Read a page synchronously.
+ */
+static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
+		struct page *page)
+{
+	unsigned int	rsize = NFS_SERVER(inode)->rsize;
+	unsigned int	count = PAGE_CACHE_SIZE;
+	int		result;
+	struct nfs_read_data *rdata;
+
+	rdata = nfs_readdata_alloc();
+	if (!rdata)
+		return -ENOMEM;
+
+	memset(rdata, 0, sizeof(*rdata));
+	rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+	rdata->cred = ctx->cred;
+	rdata->inode = inode;
+	INIT_LIST_HEAD(&rdata->pages);
+	rdata->args.fh = NFS_FH(inode);
+	rdata->args.context = ctx;
+	rdata->args.pages = &page;
+	rdata->args.pgbase = 0UL;
+	rdata->args.count = rsize;
+	rdata->res.fattr = &rdata->fattr;
+
+	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
+
+	/*
+	 * This works now because the socket layer never tries to DMA
+	 * into this buffer directly.
+	 */
+	do {
+		if (count < rsize)
+			rdata->args.count = count;
+		rdata->res.count = rdata->args.count;
+		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
+
+		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
+			NFS_SERVER(inode)->hostname,
+			inode->i_sb->s_id,
+			(long long)NFS_FILEID(inode),
+			(unsigned long long)rdata->args.pgbase,
+			rdata->args.count);
+
+		lock_kernel();
+		result = NFS_PROTO(inode)->read(rdata);
+		unlock_kernel();
+
+		/*
+		 * Even if we had a partial success we can't mark the page
+		 * cache valid.
+		 */
+		if (result < 0) {
+			if (result == -EISDIR)
+				result = -EINVAL;
+			goto io_error;
+		}
+		count -= result;
+		rdata->args.pgbase += result;
+		/* Note: result == 0 should only happen if we're caching
+		 * a write that extends the file and punches a hole.
+		 */
+		if (rdata->res.eof != 0 || result == 0)
+			break;
+	} while (count);
+	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+
+	if (count)
+		memclear_highpage_flush(page, rdata->args.pgbase, count);
+	SetPageUptodate(page);
+	if (PageError(page))
+		ClearPageError(page);
+	result = 0;
+
+io_error:
+	unlock_page(page);
+	nfs_readdata_free(rdata);
+	return result;
+}
+
+static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
+		struct page *page)
+{
+	LIST_HEAD(one_request);
+	struct nfs_page	*new;
+	unsigned int len;
+
+	len = nfs_page_length(inode, page);
+	if (len == 0)
+		return nfs_return_empty_page(page);
+	new = nfs_create_request(ctx, inode, page, 0, len);
+	if (IS_ERR(new)) {
+		unlock_page(page);
+		return PTR_ERR(new);
+	}
+	if (len < PAGE_CACHE_SIZE)
+		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
+
+	nfs_lock_request(new);
+	nfs_list_add_request(new, &one_request);
+	nfs_pagein_one(&one_request, inode);
+	return 0;
+}
+
+static void nfs_readpage_release(struct nfs_page *req)
+{
+	unlock_page(req->wb_page);
+
+	nfs_clear_request(req);
+	nfs_release_request(req);
+	nfs_unlock_request(req);
+
+	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+			req->wb_context->dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_bytes,
+			(long long)req_offset(req));
+}
+
+/*
+ * Set up the NFS read request struct
+ */
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+		unsigned int count, unsigned int offset)
+{
+	struct inode		*inode;
+
+	data->req	  = req;
+	data->inode	  = inode = req->wb_context->dentry->d_inode;
+	data->cred	  = req->wb_context->cred;
+
+	data->args.fh     = NFS_FH(inode);
+	data->args.offset = req_offset(req) + offset;
+	data->args.pgbase = req->wb_pgbase + offset;
+	data->args.pages  = data->pagevec;
+	data->args.count  = count;
+	data->args.context = req->wb_context;
+
+	data->res.fattr   = &data->fattr;
+	data->res.count   = count;
+	data->res.eof     = 0;
+
+	NFS_PROTO(inode)->read_setup(data);
+
+	data->task.tk_cookie = (unsigned long)inode;
+	data->task.tk_calldata = data;
+	/* Release requests */
+	data->task.tk_release = nfs_readdata_release;
+
+	dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+			data->task.tk_pid,
+			inode->i_sb->s_id,
+			(long long)NFS_FILEID(inode),
+			count,
+			(unsigned long long)data->args.offset);
+}
+
+static void
+nfs_async_read_error(struct list_head *head)
+{
+	struct nfs_page	*req;
+
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		SetPageError(req->wb_page);
+		nfs_readpage_release(req);
+	}
+}
+
+/*
+ * Start an async read operation
+ */
+static void nfs_execute_read(struct nfs_read_data *data)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
+	sigset_t oldset;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	lock_kernel();
+	rpc_execute(&data->task);
+	unlock_kernel();
+	rpc_clnt_sigunmask(clnt, &oldset);
+}
+
+/*
+ * Generate multiple requests to fill a single page.
+ *
+ * We optimize to reduce the number of read operations on the wire.  If we
+ * detect that we're reading a page, or an area of a page, that is past the
+ * end of file, we do not generate NFS read operations but just clear the
+ * parts of the page that would have come back zero from the server anyway.
+ *
+ * We rely on the cached value of i_size to make this determination; another
+ * client can fill pages on the server past our cached end-of-file, but we
+ * won't see the new data until our attribute cache is updated.  This is more
+ * or less conventional NFS client behavior.
+ */
+static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
+{
+	struct nfs_page *req = nfs_list_entry(head->next);
+	struct page *page = req->wb_page;
+	struct nfs_read_data *data;
+	unsigned int rsize = NFS_SERVER(inode)->rsize;
+	unsigned int nbytes, offset;
+	int requests = 0;
+	LIST_HEAD(list);
+
+	nfs_list_remove_request(req);
+
+	nbytes = req->wb_bytes;
+	for(;;) {
+		data = nfs_readdata_alloc();
+		if (!data)
+			goto out_bad;
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, &list);
+		requests++;
+		if (nbytes <= rsize)
+			break;
+		nbytes -= rsize;
+	}
+	atomic_set(&req->wb_complete, requests);
+
+	ClearPageError(page);
+	offset = 0;
+	nbytes = req->wb_bytes;
+	do {
+		data = list_entry(list.next, struct nfs_read_data, pages);
+		list_del_init(&data->pages);
+
+		data->pagevec[0] = page;
+		data->complete = nfs_readpage_result_partial;
+
+		if (nbytes > rsize) {
+			nfs_read_rpcsetup(req, data, rsize, offset);
+			offset += rsize;
+			nbytes -= rsize;
+		} else {
+			nfs_read_rpcsetup(req, data, nbytes, offset);
+			nbytes = 0;
+		}
+		nfs_execute_read(data);
+	} while (nbytes != 0);
+
+	return 0;
+
+out_bad:
+	while (!list_empty(&list)) {
+		data = list_entry(list.next, struct nfs_read_data, pages);
+		list_del(&data->pages);
+		nfs_readdata_free(data);
+	}
+	SetPageError(page);
+	nfs_readpage_release(req);
+	return -ENOMEM;
+}
+
+static int nfs_pagein_one(struct list_head *head, struct inode *inode)
+{
+	struct nfs_page		*req;
+	struct page		**pages;
+	struct nfs_read_data	*data;
+	unsigned int		count;
+
+	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
+		return nfs_pagein_multi(head, inode);
+
+	data = nfs_readdata_alloc();
+	if (!data)
+		goto out_bad;
+
+	INIT_LIST_HEAD(&data->pages);
+	pages = data->pagevec;
+	count = 0;
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, &data->pages);
+		ClearPageError(req->wb_page);
+		*pages++ = req->wb_page;
+		count += req->wb_bytes;
+	}
+	req = nfs_list_entry(data->pages.next);
+
+	data->complete = nfs_readpage_result_full;
+	nfs_read_rpcsetup(req, data, count, 0);
+
+	nfs_execute_read(data);
+	return 0;
+out_bad:
+	nfs_async_read_error(head);
+	return -ENOMEM;
+}
+
+static int
+nfs_pagein_list(struct list_head *head, int rpages)
+{
+	LIST_HEAD(one_request);
+	struct nfs_page		*req;
+	int			error = 0;
+	unsigned int		pages = 0;
+
+	while (!list_empty(head)) {
+		pages += nfs_coalesce_requests(head, &one_request, rpages);
+		req = nfs_list_entry(one_request.next);
+		error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
+		if (error < 0)
+			break;
+	}
+	if (error >= 0)
+		return pages;
+
+	nfs_async_read_error(head);
+	return error;
+}
+
+/*
+ * Handle a read reply that fills part of a page.
+ */
+static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+{
+	struct nfs_page *req = data->req;
+	struct page *page = req->wb_page;
+ 
+	if (status >= 0) {
+		unsigned int request = data->args.count;
+		unsigned int result = data->res.count;
+
+		if (result < request) {
+			memclear_highpage_flush(page,
+						data->args.pgbase + result,
+						request - result);
+		}
+	} else
+		SetPageError(page);
+
+	if (atomic_dec_and_test(&req->wb_complete)) {
+		if (!PageError(page))
+			SetPageUptodate(page);
+		nfs_readpage_release(req);
+	}
+}
+
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+{
+	unsigned int count = data->res.count;
+
+	while (!list_empty(&data->pages)) {
+		struct nfs_page *req = nfs_list_entry(data->pages.next);
+		struct page *page = req->wb_page;
+		nfs_list_remove_request(req);
+
+		if (status >= 0) {
+			if (count < PAGE_CACHE_SIZE) {
+				if (count < req->wb_bytes)
+					memclear_highpage_flush(page,
+							req->wb_pgbase + count,
+							req->wb_bytes - count);
+				count = 0;
+			} else
+				count -= PAGE_CACHE_SIZE;
+			SetPageUptodate(page);
+		} else
+			SetPageError(page);
+		nfs_readpage_release(req);
+	}
+}
+
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+void nfs_readpage_result(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
+	struct nfs_readargs *argp = &data->args;
+	struct nfs_readres *resp = &data->res;
+	int status = task->tk_status;
+
+	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
+		task->tk_pid, status);
+
+	/* Is this a short read? */
+	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+		/* Has the server at least made some progress? */
+		if (resp->count != 0) {
+			/* Yes, so retry the read at the end of the data */
+			argp->offset += resp->count;
+			argp->pgbase += resp->count;
+			argp->count -= resp->count;
+			rpc_restart_call(task);
+			return;
+		}
+		task->tk_status = -EIO;
+	}
+	NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
+	data->complete(data, status);
+}
+
+/*
+ * Read a page over NFS.
+ * We read the page synchronously in the following case:
+ *  -	The error flag is set for this page. This happens only when a
+ *	previous async read operation failed.
+ */
+int nfs_readpage(struct file *file, struct page *page)
+{
+	struct nfs_open_context *ctx;
+	struct inode *inode = page->mapping->host;
+	int		error;
+
+	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
+		page, PAGE_CACHE_SIZE, page->index);
+	/*
+	 * Try to flush any pending writes to the file..
+	 *
+	 * NOTE! Because we own the page lock, there cannot
+	 * be any new pending writes generated at this point
+	 * for this page (other pages can be written to).
+	 */
+	error = nfs_wb_page(inode, page);
+	if (error)
+		goto out_error;
+
+	if (file == NULL) {
+		ctx = nfs_find_open_context(inode, FMODE_READ);
+		if (ctx == NULL)
+			return -EBADF;
+	} else
+		ctx = get_nfs_open_context((struct nfs_open_context *)
+				file->private_data);
+	if (!IS_SYNC(inode)) {
+		error = nfs_readpage_async(ctx, inode, page);
+		goto out;
+	}
+
+	error = nfs_readpage_sync(ctx, inode, page);
+	if (error < 0 && IS_SWAPFILE(inode))
+		printk("Aiee.. nfs swap-in of page failed!\n");
+out:
+	put_nfs_open_context(ctx);
+	return error;
+
+out_error:
+	unlock_page(page);
+	return error;
+}
+
+struct nfs_readdesc {
+	struct list_head *head;
+	struct nfs_open_context *ctx;
+};
+
+static int
+readpage_async_filler(void *data, struct page *page)
+{
+	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
+	struct inode *inode = page->mapping->host;
+	struct nfs_page *new;
+	unsigned int len;
+
+	nfs_wb_page(inode, page);
+	len = nfs_page_length(inode, page);
+	if (len == 0)
+		return nfs_return_empty_page(page);
+	new = nfs_create_request(desc->ctx, inode, page, 0, len);
+	if (IS_ERR(new)) {
+			SetPageError(page);
+			unlock_page(page);
+			return PTR_ERR(new);
+	}
+	if (len < PAGE_CACHE_SIZE)
+		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
+	nfs_lock_request(new);
+	nfs_list_add_request(new, desc->head);
+	return 0;
+}
+
+int nfs_readpages(struct file *filp, struct address_space *mapping,
+		struct list_head *pages, unsigned nr_pages)
+{
+	LIST_HEAD(head);
+	struct nfs_readdesc desc = {
+		.head		= &head,
+	};
+	struct inode *inode = mapping->host;
+	struct nfs_server *server = NFS_SERVER(inode);
+	int ret;
+
+	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
+			inode->i_sb->s_id,
+			(long long)NFS_FILEID(inode),
+			nr_pages);
+
+	if (filp == NULL) {
+		desc.ctx = nfs_find_open_context(inode, FMODE_READ);
+		if (desc.ctx == NULL)
+			return -EBADF;
+	} else
+		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
+				filp->private_data);
+	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+	if (!list_empty(&head)) {
+		int err = nfs_pagein_list(&head, server->rpages);
+		if (!ret)
+			ret = err;
+	}
+	put_nfs_open_context(desc.ctx);
+	return ret;
+}
+
+int nfs_init_readpagecache(void)
+{
+	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
+					     sizeof(struct nfs_read_data),
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL, NULL);
+	if (nfs_rdata_cachep == NULL)
+		return -ENOMEM;
+
+	nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
+					   mempool_alloc_slab,
+					   mempool_free_slab,
+					   nfs_rdata_cachep);
+	if (nfs_rdata_mempool == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_readpagecache(void)
+{
+	mempool_destroy(nfs_rdata_mempool);
+	if (kmem_cache_destroy(nfs_rdata_cachep))
+		printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
+}
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -0,0 +1,117 @@
+/*
+ *  linux/fs/nfs/symlink.c
+ *
+ *  Copyright (C) 1992  Rick Sladkey
+ *
+ *  Optimization changes Copyright (C) 1994 Florian La Roche
+ *
+ *  Jun 7 1999, cache symlink lookups in the page cache.  -DaveM
+ *
+ *  nfs symlink handling code
+ */
+
+#define NFS_NEED_XDR_TYPES
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+
+/* Symlink caching in the page cache is even more simplistic
+ * and straight-forward than readdir caching.
+ *
+ * At the beginning of the page we store pointer to struct page in question,
+ * simplifying nfs_put_link() (if inode got invalidated we can't find the page
+ * to be freed via pagecache lookup).
+ * The NUL-terminated string follows immediately thereafter.
+ */
+
+struct nfs_symlink {
+	struct page *page;
+	char body[0];
+};
+
+static int nfs_symlink_filler(struct inode *inode, struct page *page)
+{
+	const unsigned int pgbase = offsetof(struct nfs_symlink, body);
+	const unsigned int pglen = PAGE_SIZE - pgbase;
+	int error;
+
+	lock_kernel();
+	error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen);
+	unlock_kernel();
+	if (error < 0)
+		goto error;
+	SetPageUptodate(page);
+	unlock_page(page);
+	return 0;
+
+error:
+	SetPageError(page);
+	unlock_page(page);
+	return -EIO;
+}
+
+static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct inode *inode = dentry->d_inode;
+	struct page *page;
+	struct nfs_symlink *p;
+	void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
+	if (err)
+		goto read_failed;
+	page = read_cache_page(&inode->i_data, 0,
+				(filler_t *)nfs_symlink_filler, inode);
+	if (IS_ERR(page)) {
+		err = page;
+		goto read_failed;
+	}
+	if (!PageUptodate(page)) {
+		err = ERR_PTR(-EIO);
+		goto getlink_read_error;
+	}
+	p = kmap(page);
+	p->page = page;
+	nd_set_link(nd, p->body);
+	return 0;
+
+getlink_read_error:
+	page_cache_release(page);
+read_failed:
+	nd_set_link(nd, err);
+	return 0;
+}
+
+static void nfs_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s)) {
+		struct nfs_symlink *p;
+		struct page *page;
+
+		p = container_of(s, struct nfs_symlink, body[0]);
+		page = p->page;
+
+		kunmap(page);
+		page_cache_release(page);
+	}
+}
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations nfs_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= nfs_follow_link,
+	.put_link	= nfs_put_link,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+};
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -0,0 +1,227 @@
+/*
+ *  linux/fs/nfs/unlink.c
+ *
+ * nfs sillydelete handling
+ *
+ * NOTE: we rely on holding the BKL for list manipulation protection.
+ */
+
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dcache.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+
+
+struct nfs_unlinkdata {
+	struct nfs_unlinkdata	*next;
+	struct dentry	*dir, *dentry;
+	struct qstr	name;
+	struct rpc_task	task;
+	struct rpc_cred	*cred;
+	unsigned int	count;
+};
+
+static struct nfs_unlinkdata	*nfs_deletes;
+static RPC_WAITQ(nfs_delete_queue, "nfs_delete_queue");
+
+/**
+ * nfs_detach_unlinkdata - Remove asynchronous unlink from global list
+ * @data: pointer to descriptor
+ */
+static inline void
+nfs_detach_unlinkdata(struct nfs_unlinkdata *data)
+{
+	struct nfs_unlinkdata	**q;
+
+	for (q = &nfs_deletes; *q != NULL; q = &((*q)->next)) {
+		if (*q == data) {
+			*q = data->next;
+			break;
+		}
+	}
+}
+
+/**
+ * nfs_put_unlinkdata - release data from a sillydelete operation.
+ * @data: pointer to unlink structure.
+ */
+static void
+nfs_put_unlinkdata(struct nfs_unlinkdata *data)
+{
+	if (--data->count == 0) {
+		nfs_detach_unlinkdata(data);
+		if (data->name.name != NULL)
+			kfree(data->name.name);
+		kfree(data);
+	}
+}
+
+#define NAME_ALLOC_LEN(len)	((len+16) & ~15)
+/**
+ * nfs_copy_dname - copy dentry name to data structure
+ * @dentry: pointer to dentry
+ * @data: nfs_unlinkdata
+ */
+static inline void
+nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
+	char		*str;
+	int		len = dentry->d_name.len;
+
+	str = kmalloc(NAME_ALLOC_LEN(len), GFP_KERNEL);
+	if (!str)
+		return;
+	memcpy(str, dentry->d_name.name, len);
+	if (!data->name.len) {
+		data->name.len = len;
+		data->name.name = str;
+	} else
+		kfree(str);
+}
+
+/**
+ * nfs_async_unlink_init - Initialize the RPC info
+ * @task: rpc_task of the sillydelete
+ *
+ * We delay initializing RPC info until after the call to dentry_iput()
+ * in order to minimize races against rename().
+ */
+static void
+nfs_async_unlink_init(struct rpc_task *task)
+{
+	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct dentry		*dir = data->dir;
+	struct rpc_message	msg = {
+		.rpc_cred	= data->cred,
+	};
+	int			status = -ENOENT;
+
+	if (!data->name.len)
+		goto out_err;
+
+	status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name);
+	if (status < 0)
+		goto out_err;
+	nfs_begin_data_update(dir->d_inode);
+	rpc_call_setup(task, &msg, 0);
+	return;
+ out_err:
+	rpc_exit(task, status);
+}
+
+/**
+ * nfs_async_unlink_done - Sillydelete post-processing
+ * @task: rpc_task of the sillydelete
+ *
+ * Do the directory attribute update.
+ */
+static void
+nfs_async_unlink_done(struct rpc_task *task)
+{
+	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct dentry		*dir = data->dir;
+	struct inode		*dir_i;
+
+	if (!dir)
+		return;
+	dir_i = dir->d_inode;
+	nfs_end_data_update(dir_i);
+	if (NFS_PROTO(dir_i)->unlink_done(dir, task))
+		return;
+	put_rpccred(data->cred);
+	data->cred = NULL;
+	dput(dir);
+}
+
+/**
+ * nfs_async_unlink_release - Release the sillydelete data.
+ * @task: rpc_task of the sillydelete
+ *
+ * We need to call nfs_put_unlinkdata as a 'tk_release' task since the
+ * rpc_task would be freed too.
+ */
+static void
+nfs_async_unlink_release(struct rpc_task *task)
+{
+	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	nfs_put_unlinkdata(data);
+}
+
+/**
+ * nfs_async_unlink - asynchronous unlinking of a file
+ * @dentry: dentry to unlink
+ */
+int
+nfs_async_unlink(struct dentry *dentry)
+{
+	struct dentry	*dir = dentry->d_parent;
+	struct nfs_unlinkdata	*data;
+	struct rpc_task	*task;
+	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
+	int		status = -ENOMEM;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out;
+	memset(data, 0, sizeof(*data));
+
+	data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
+	if (IS_ERR(data->cred)) {
+		status = PTR_ERR(data->cred);
+		goto out_free;
+	}
+	data->dir = dget(dir);
+	data->dentry = dentry;
+
+	data->next = nfs_deletes;
+	nfs_deletes = data;
+	data->count = 1;
+
+	task = &data->task;
+	rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC);
+	task->tk_calldata = data;
+	task->tk_action = nfs_async_unlink_init;
+	task->tk_release = nfs_async_unlink_release;
+
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+	spin_unlock(&dentry->d_lock);
+
+	rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL);
+	status = 0;
+ out:
+	return status;
+out_free:
+	kfree(data);
+	return status;
+}
+
+/**
+ * nfs_complete_unlink - Initialize completion of the sillydelete
+ * @dentry: dentry to delete
+ *
+ * Since we're most likely to be called by dentry_iput(), we
+ * only use the dentry to find the sillydelete. We then copy the name
+ * into the qstr.
+ */
+void
+nfs_complete_unlink(struct dentry *dentry)
+{
+	struct nfs_unlinkdata	*data;
+
+	for(data = nfs_deletes; data != NULL; data = data->next) {
+		if (dentry == data->dentry)
+			break;
+	}
+	if (!data)
+		return;
+	data->count++;
+	nfs_copy_dname(dentry, data);
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+	spin_unlock(&dentry->d_lock);
+	rpc_wake_up_task(&data->task);
+	nfs_put_unlinkdata(data);
+}
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c