Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
This commit is contained in:
Linus Torvalds
2005-04-16 15:20:36 -07:00
커밋 1da177e4c3
17291개의 변경된 파일6718755개의 추가작업 그리고 0개의 파일을 삭제

15
fs/nfs/Makefile Normal file
파일 보기

@@ -0,0 +1,15 @@
#
# Makefile for the Linux nfs filesystem routines.
#
obj-$(CONFIG_NFS_FS) += nfs.o
nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \
proc.o read.o symlink.o unlink.o write.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
delegation.o idmap.o \
callback.o callback_xdr.o callback_proc.o
nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
nfs-objs := $(nfs-y)

187
fs/nfs/callback.c Normal file
파일 보기

@@ -0,0 +1,187 @@
/*
* linux/fs/nfs/callback.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFSv4 callback handling
*/
#include <linux/config.h>
#include <linux/completion.h>
#include <linux/ip.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/nfs_fs.h>
#include "callback.h"
#define NFSDBG_FACILITY NFSDBG_CALLBACK
struct nfs_callback_data {
unsigned int users;
struct svc_serv *serv;
pid_t pid;
struct completion started;
struct completion stopped;
};
static struct nfs_callback_data nfs_callback_info;
static DECLARE_MUTEX(nfs_callback_sema);
static struct svc_program nfs4_callback_program;
unsigned short nfs_callback_tcpport;
/*
* This is the callback kernel thread.
*/
static void nfs_callback_svc(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
int err;
__module_get(THIS_MODULE);
lock_kernel();
nfs_callback_info.pid = current->pid;
daemonize("nfsv4-svc");
/* Process request with signals blocked, but allow SIGKILL. */
allow_signal(SIGKILL);
complete(&nfs_callback_info.started);
while (nfs_callback_info.users != 0 || !signalled()) {
/*
* Listen for a request on the socket
*/
err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT);
if (err == -EAGAIN || err == -EINTR)
continue;
if (err < 0) {
printk(KERN_WARNING
"%s: terminating on error %d\n",
__FUNCTION__, -err);
break;
}
dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
svc_process(serv, rqstp);
}
nfs_callback_info.pid = 0;
complete(&nfs_callback_info.stopped);
unlock_kernel();
module_put_and_exit(0);
}
/*
* Bring up the server process if it is not already up.
*/
int nfs_callback_up(void)
{
struct svc_serv *serv;
struct svc_sock *svsk;
int ret = 0;
lock_kernel();
down(&nfs_callback_sema);
if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
goto out;
init_completion(&nfs_callback_info.started);
init_completion(&nfs_callback_info.stopped);
serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE);
ret = -ENOMEM;
if (!serv)
goto out_err;
/* FIXME: We don't want to register this socket with the portmapper */
ret = svc_makesock(serv, IPPROTO_TCP, 0);
if (ret < 0)
goto out_destroy;
if (!list_empty(&serv->sv_permsocks)) {
svsk = list_entry(serv->sv_permsocks.next,
struct svc_sock, sk_list);
nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
} else
BUG();
ret = svc_create_thread(nfs_callback_svc, serv);
if (ret < 0)
goto out_destroy;
nfs_callback_info.serv = serv;
wait_for_completion(&nfs_callback_info.started);
out:
up(&nfs_callback_sema);
unlock_kernel();
return ret;
out_destroy:
svc_destroy(serv);
out_err:
nfs_callback_info.users--;
goto out;
}
/*
* Kill the server process if it is not already up.
*/
int nfs_callback_down(void)
{
int ret = 0;
lock_kernel();
down(&nfs_callback_sema);
if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
goto out;
kill_proc(nfs_callback_info.pid, SIGKILL, 1);
wait_for_completion(&nfs_callback_info.stopped);
out:
up(&nfs_callback_sema);
unlock_kernel();
return ret;
}
static int nfs_callback_authenticate(struct svc_rqst *rqstp)
{
struct in_addr *addr = &rqstp->rq_addr.sin_addr;
struct nfs4_client *clp;
/* Don't talk to strangers */
clp = nfs4_find_client(addr);
if (clp == NULL)
return SVC_DROP;
dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
nfs4_put_client(clp);
switch (rqstp->rq_authop->flavour) {
case RPC_AUTH_NULL:
if (rqstp->rq_proc != CB_NULL)
return SVC_DENIED;
break;
case RPC_AUTH_UNIX:
break;
case RPC_AUTH_GSS:
/* FIXME: RPCSEC_GSS handling? */
default:
return SVC_DENIED;
}
return SVC_OK;
}
/*
* Define NFS4 callback program
*/
extern struct svc_version nfs4_callback_version1;
static struct svc_version *nfs4_callback_version[] = {
[1] = &nfs4_callback_version1,
};
static struct svc_stat nfs4_callback_stats;
static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */
.pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
};

70
fs/nfs/callback.h Normal file
파일 보기

@@ -0,0 +1,70 @@
/*
* linux/fs/nfs/callback.h
*
* Copyright (C) 2004 Trond Myklebust
*
* NFSv4 callback definitions
*/
#ifndef __LINUX_FS_NFS_CALLBACK_H
#define __LINUX_FS_NFS_CALLBACK_H
#define NFS4_CALLBACK 0x40000000
#define NFS4_CALLBACK_XDRSIZE 2048
#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
enum nfs4_callback_procnum {
CB_NULL = 0,
CB_COMPOUND = 1,
};
enum nfs4_callback_opnum {
OP_CB_GETATTR = 3,
OP_CB_RECALL = 4,
OP_CB_ILLEGAL = 10044,
};
struct cb_compound_hdr_arg {
int taglen;
const char *tag;
unsigned int callback_ident;
unsigned nops;
};
struct cb_compound_hdr_res {
uint32_t *status;
int taglen;
const char *tag;
uint32_t *nops;
};
struct cb_getattrargs {
struct sockaddr_in *addr;
struct nfs_fh fh;
uint32_t bitmap[2];
};
struct cb_getattrres {
uint32_t status;
uint32_t bitmap[2];
uint64_t size;
uint64_t change_attr;
struct timespec ctime;
struct timespec mtime;
};
struct cb_recallargs {
struct sockaddr_in *addr;
struct nfs_fh fh;
nfs4_stateid stateid;
uint32_t truncate;
};
extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
extern int nfs_callback_up(void);
extern int nfs_callback_down(void);
extern unsigned short nfs_callback_tcpport;
#endif /* __LINUX_FS_NFS_CALLBACK_H */

85
fs/nfs/callback_proc.c Normal file
파일 보기

@@ -0,0 +1,85 @@
/*
* linux/fs/nfs/callback_proc.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFSv4 callback procedures
*/
#include <linux/config.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include "callback.h"
#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_CALLBACK
unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
{
struct nfs4_client *clp;
struct nfs_delegation *delegation;
struct nfs_inode *nfsi;
struct inode *inode;
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
clp = nfs4_find_client(&args->addr->sin_addr);
if (clp == NULL)
goto out;
inode = nfs_delegation_find_inode(clp, &args->fh);
if (inode == NULL)
goto out_putclient;
nfsi = NFS_I(inode);
down_read(&nfsi->rwsem);
delegation = nfsi->delegation;
if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
goto out_iput;
res->size = i_size_read(inode);
res->change_attr = NFS_CHANGE_ATTR(inode);
res->ctime = inode->i_ctime;
res->mtime = inode->i_mtime;
res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
args->bitmap[0];
res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
args->bitmap[1];
res->status = 0;
out_iput:
up_read(&nfsi->rwsem);
iput(inode);
out_putclient:
nfs4_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
return res->status;
}
unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
{
struct nfs4_client *clp;
struct inode *inode;
unsigned res;
res = htonl(NFS4ERR_BADHANDLE);
clp = nfs4_find_client(&args->addr->sin_addr);
if (clp == NULL)
goto out;
inode = nfs_delegation_find_inode(clp, &args->fh);
if (inode == NULL)
goto out_putclient;
/* Set up a helper thread to actually return the delegation */
switch(nfs_async_inode_return_delegation(inode, &args->stateid)) {
case 0:
res = 0;
break;
case -ENOENT:
res = htonl(NFS4ERR_BAD_STATEID);
break;
default:
res = htonl(NFS4ERR_RESOURCE);
}
iput(inode);
out_putclient:
nfs4_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
return res;
}

481
fs/nfs/callback_xdr.c Normal file
파일 보기

@@ -0,0 +1,481 @@
/*
* linux/fs/nfs/callback_xdr.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFSv4 callback encode/decode procedures
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/sunrpc/svc.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include "callback.h"
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
#define CB_OP_GETATTR_BITMAP_MAXSZ (4)
#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
CB_OP_GETATTR_BITMAP_MAXSZ + \
2 + 2 + 3 + 3)
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define NFSDBG_FACILITY NFSDBG_CALLBACK
typedef unsigned (*callback_process_op_t)(void *, void *);
typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
struct callback_op {
callback_process_op_t process_op;
callback_decode_arg_t decode_args;
callback_encode_res_t encode_res;
long res_maxsize;
};
static struct callback_op callback_ops[];
static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
{
return htonl(NFS4_OK);
}
static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
{
return xdr_argsize_check(rqstp, p);
}
static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
{
return xdr_ressize_check(rqstp, p);
}
static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
{
uint32_t *p;
p = xdr_inline_decode(xdr, nbytes);
if (unlikely(p == NULL))
printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n");
return p;
}
static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
{
uint32_t *p;
p = read_buf(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
*len = ntohl(*p);
if (*len != 0) {
p = read_buf(xdr, *len);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
*str = (const char *)p;
} else
*str = NULL;
return 0;
}
static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
{
uint32_t *p;
p = read_buf(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
fh->size = ntohl(*p);
if (fh->size > NFS4_FHSIZE)
return htonl(NFS4ERR_BADHANDLE);
p = read_buf(xdr, fh->size);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(&fh->data[0], p, fh->size);
memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size);
return 0;
}
static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
{
uint32_t *p;
unsigned int attrlen;
p = read_buf(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
attrlen = ntohl(*p);
p = read_buf(xdr, attrlen << 2);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
if (likely(attrlen > 0))
bitmap[0] = ntohl(*p++);
if (attrlen > 1)
bitmap[1] = ntohl(*p);
return 0;
}
static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
uint32_t *p;
p = read_buf(xdr, 16);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(stateid->data, p, 16);
return 0;
}
static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
{
uint32_t *p;
unsigned int minor_version;
unsigned status;
status = decode_string(xdr, &hdr->taglen, &hdr->tag);
if (unlikely(status != 0))
return status;
/* We do not like overly long tags! */
if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) {
printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
__FUNCTION__, hdr->taglen);
return htonl(NFS4ERR_RESOURCE);
}
p = read_buf(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
minor_version = ntohl(*p++);
/* Check minor version is zero. */
if (minor_version != 0) {
printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n",
__FUNCTION__, minor_version);
return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
}
hdr->callback_ident = ntohl(*p++);
hdr->nops = ntohl(*p);
return 0;
}
static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
{
uint32_t *p;
p = read_buf(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
*op = ntohl(*p);
return 0;
}
static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
{
unsigned status;
status = decode_fh(xdr, &args->fh);
if (unlikely(status != 0))
goto out;
args->addr = &rqstp->rq_addr;
status = decode_bitmap(xdr, args->bitmap);
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
return status;
}
static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
{
uint32_t *p;
unsigned status;
args->addr = &rqstp->rq_addr;
status = decode_stateid(xdr, &args->stateid);
if (unlikely(status != 0))
goto out;
p = read_buf(xdr, 4);
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_RESOURCE);
goto out;
}
args->truncate = ntohl(*p);
status = decode_fh(xdr, &args->fh);
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
return 0;
}
static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
uint32_t *p;
p = xdr_reserve_space(xdr, 4 + len);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
xdr_encode_opaque(p, str, len);
return 0;
}
#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
{
uint32_t bm[2];
uint32_t *p;
bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0);
bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1);
if (bm[1] != 0) {
p = xdr_reserve_space(xdr, 16);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
*p++ = htonl(2);
*p++ = bm[0];
*p++ = bm[1];
} else if (bm[0] != 0) {
p = xdr_reserve_space(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
*p++ = htonl(1);
*p++ = bm[0];
} else {
p = xdr_reserve_space(xdr, 8);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
*p++ = htonl(0);
}
*savep = p;
return 0;
}
static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
{
uint32_t *p;
if (!(bitmap[0] & FATTR4_WORD0_CHANGE))
return 0;
p = xdr_reserve_space(xdr, 8);
if (unlikely(p == 0))
return htonl(NFS4ERR_RESOURCE);
p = xdr_encode_hyper(p, change);
return 0;
}
static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
{
uint32_t *p;
if (!(bitmap[0] & FATTR4_WORD0_SIZE))
return 0;
p = xdr_reserve_space(xdr, 8);
if (unlikely(p == 0))
return htonl(NFS4ERR_RESOURCE);
p = xdr_encode_hyper(p, size);
return 0;
}
static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
{
uint32_t *p;
p = xdr_reserve_space(xdr, 12);
if (unlikely(p == 0))
return htonl(NFS4ERR_RESOURCE);
p = xdr_encode_hyper(p, time->tv_sec);
*p = htonl(time->tv_nsec);
return 0;
}
static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
return 0;
return encode_attr_time(xdr,time);
}
static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
return 0;
return encode_attr_time(xdr,time);
}
static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
{
unsigned status;
hdr->status = xdr_reserve_space(xdr, 4);
if (unlikely(hdr->status == NULL))
return htonl(NFS4ERR_RESOURCE);
status = encode_string(xdr, hdr->taglen, hdr->tag);
if (unlikely(status != 0))
return status;
hdr->nops = xdr_reserve_space(xdr, 4);
if (unlikely(hdr->nops == NULL))
return htonl(NFS4ERR_RESOURCE);
return 0;
}
static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
{
uint32_t *p;
p = xdr_reserve_space(xdr, 8);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
*p++ = htonl(op);
*p = res;
return 0;
}
static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
{
uint32_t *savep;
unsigned status = res->status;
if (unlikely(status != 0))
goto out;
status = encode_attr_bitmap(xdr, res->bitmap, &savep);
if (unlikely(status != 0))
goto out;
status = encode_attr_change(xdr, res->bitmap, res->change_attr);
if (unlikely(status != 0))
goto out;
status = encode_attr_size(xdr, res->bitmap, res->size);
if (unlikely(status != 0))
goto out;
status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
if (unlikely(status != 0))
goto out;
status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
return status;
}
static unsigned process_op(struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
struct xdr_stream *xdr_out, void *resp)
{
struct callback_op *op;
unsigned int op_nr;
unsigned int status = 0;
long maxlen;
unsigned res;
dprintk("%s: start\n", __FUNCTION__);
status = decode_op_hdr(xdr_in, &op_nr);
if (unlikely(status != 0)) {
op_nr = OP_CB_ILLEGAL;
op = &callback_ops[0];
} else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
op_nr = OP_CB_ILLEGAL;
op = &callback_ops[0];
status = htonl(NFS4ERR_OP_ILLEGAL);
} else
op = &callback_ops[op_nr];
maxlen = xdr_out->end - xdr_out->p;
if (maxlen > 0 && maxlen < PAGE_SIZE) {
if (likely(status == 0 && op->decode_args != NULL))
status = op->decode_args(rqstp, xdr_in, argp);
if (likely(status == 0 && op->process_op != NULL))
status = op->process_op(argp, resp);
} else
status = htonl(NFS4ERR_RESOURCE);
res = encode_op_hdr(xdr_out, op_nr, status);
if (status == 0)
status = res;
if (op->encode_res != NULL && status == 0)
status = op->encode_res(rqstp, xdr_out, resp);
dprintk("%s: done, status = %d\n", __FUNCTION__, status);
return status;
}
/*
* Decode, process and encode a COMPOUND
*/
static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
{
struct cb_compound_hdr_arg hdr_arg;
struct cb_compound_hdr_res hdr_res;
struct xdr_stream xdr_in, xdr_out;
uint32_t *p;
unsigned int status;
unsigned int nops = 1;
dprintk("%s: start\n", __FUNCTION__);
xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
decode_compound_hdr_arg(&xdr_in, &hdr_arg);
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
encode_compound_hdr_res(&xdr_out, &hdr_res);
for (;;) {
status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp);
if (status != 0)
break;
if (nops == hdr_arg.nops)
break;
nops++;
}
*hdr_res.status = status;
*hdr_res.nops = htonl(nops);
dprintk("%s: done, status = %u\n", __FUNCTION__, status);
return rpc_success;
}
/*
* Define NFS4 callback COMPOUND ops.
*/
static struct callback_op callback_ops[] = {
[0] = {
.res_maxsize = CB_OP_HDR_RES_MAXSZ,
},
[OP_CB_GETATTR] = {
.process_op = (callback_process_op_t)nfs4_callback_getattr,
.decode_args = (callback_decode_arg_t)decode_getattr_args,
.encode_res = (callback_encode_res_t)encode_getattr_res,
.res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
},
[OP_CB_RECALL] = {
.process_op = (callback_process_op_t)nfs4_callback_recall,
.decode_args = (callback_decode_arg_t)decode_recall_args,
.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
}
};
/*
* Define NFS4 callback procedures
*/
static struct svc_procedure nfs4_callback_procedures1[] = {
[CB_NULL] = {
.pc_func = nfs4_callback_null,
.pc_decode = (kxdrproc_t)nfs4_decode_void,
.pc_encode = (kxdrproc_t)nfs4_encode_void,
.pc_xdrressize = 1,
},
[CB_COMPOUND] = {
.pc_func = nfs4_callback_compound,
.pc_encode = (kxdrproc_t)nfs4_encode_void,
.pc_argsize = 256,
.pc_ressize = 256,
.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
}
};
struct svc_version nfs4_callback_version1 = {
.vs_vers = 1,
.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
};

342
fs/nfs/delegation.c Normal file
파일 보기

@@ -0,0 +1,342 @@
/*
* linux/fs/nfs/delegation.c
*
* Copyright (C) 2004 Trond Myklebust
*
* NFS file delegation management
*
*/
#include <linux/config.h>
#include <linux/completion.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
#include "delegation.h"
static struct nfs_delegation *nfs_alloc_delegation(void)
{
return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL);
}
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
if (delegation->cred)
put_rpccred(delegation->cred);
kfree(delegation);
}
static void nfs_delegation_claim_opens(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
struct nfs4_state *state;
again:
spin_lock(&inode->i_lock);
list_for_each_entry(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
if (nfs4_open_delegation_recall(ctx->dentry, state) < 0)
return;
put_nfs_open_context(ctx);
goto again;
}
spin_unlock(&inode->i_lock);
}
/*
* Set up a delegation on an inode
*/
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
struct nfs_delegation *delegation = NFS_I(inode)->delegation;
if (delegation == NULL)
return;
memcpy(delegation->stateid.data, res->delegation.data,
sizeof(delegation->stateid.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
put_rpccred(cred);
delegation->cred = get_rpccred(cred);
delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
NFS_I(inode)->delegation_state = delegation->type;
smp_wmb();
}
/*
* Set up a delegation on an inode
*/
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int status = 0;
delegation = nfs_alloc_delegation();
if (delegation == NULL)
return -ENOMEM;
memcpy(delegation->stateid.data, res->delegation.data,
sizeof(delegation->stateid.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
delegation->cred = get_rpccred(cred);
delegation->inode = inode;
spin_lock(&clp->cl_lock);
if (nfsi->delegation == NULL) {
list_add(&delegation->super_list, &clp->cl_delegations);
nfsi->delegation = delegation;
nfsi->delegation_state = delegation->type;
delegation = NULL;
} else {
if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
sizeof(delegation->stateid)) != 0 ||
delegation->type != nfsi->delegation->type) {
printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
__FUNCTION__, NIPQUAD(clp->cl_addr));
status = -EIO;
}
}
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
kfree(delegation);
return status;
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
{
int res = 0;
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
nfs_free_delegation(delegation);
return res;
}
/* Sync all data to disk upon delegation return */
static void nfs_msync_inode(struct inode *inode)
{
filemap_fdatawrite(inode->i_mapping);
nfs_wb_all(inode);
filemap_fdatawait(inode->i_mapping);
}
/*
* Basic procedure for returning a delegation to the server
*/
int nfs_inode_return_delegation(struct inode *inode)
{
struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int res = 0;
nfs_msync_inode(inode);
down_read(&clp->cl_sem);
/* Guard against new delegated open calls */
down_write(&nfsi->rwsem);
spin_lock(&clp->cl_lock);
delegation = nfsi->delegation;
if (delegation != NULL) {
list_del_init(&delegation->super_list);
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
}
spin_unlock(&clp->cl_lock);
nfs_delegation_claim_opens(inode);
up_write(&nfsi->rwsem);
up_read(&clp->cl_sem);
nfs_msync_inode(inode);
if (delegation != NULL)
res = nfs_do_return_delegation(inode, delegation);
return res;
}
/*
* Return all delegations associated to a super block
*/
void nfs_return_all_delegations(struct super_block *sb)
{
struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
struct nfs_delegation *delegation;
struct inode *inode;
if (clp == NULL)
return;
restart:
spin_lock(&clp->cl_lock);
list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
if (delegation->inode->i_sb != sb)
continue;
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
spin_unlock(&clp->cl_lock);
nfs_inode_return_delegation(inode);
iput(inode);
goto restart;
}
spin_unlock(&clp->cl_lock);
}
/*
* Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
*/
void nfs_handle_cb_pathdown(struct nfs4_client *clp)
{
struct nfs_delegation *delegation;
struct inode *inode;
if (clp == NULL)
return;
restart:
spin_lock(&clp->cl_lock);
list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
spin_unlock(&clp->cl_lock);
nfs_inode_return_delegation(inode);
iput(inode);
goto restart;
}
spin_unlock(&clp->cl_lock);
}
struct recall_threadargs {
struct inode *inode;
struct nfs4_client *clp;
const nfs4_stateid *stateid;
struct completion started;
int result;
};
static int recall_thread(void *data)
{
struct recall_threadargs *args = (struct recall_threadargs *)data;
struct inode *inode = igrab(args->inode);
struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
daemonize("nfsv4-delegreturn");
nfs_msync_inode(inode);
down_read(&clp->cl_sem);
down_write(&nfsi->rwsem);
spin_lock(&clp->cl_lock);
delegation = nfsi->delegation;
if (delegation != NULL && memcmp(delegation->stateid.data,
args->stateid->data,
sizeof(delegation->stateid.data)) == 0) {
list_del_init(&delegation->super_list);
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
args->result = 0;
} else {
delegation = NULL;
args->result = -ENOENT;
}
spin_unlock(&clp->cl_lock);
complete(&args->started);
nfs_delegation_claim_opens(inode);
up_write(&nfsi->rwsem);
up_read(&clp->cl_sem);
nfs_msync_inode(inode);
if (delegation != NULL)
nfs_do_return_delegation(inode, delegation);
iput(inode);
module_put_and_exit(0);
}
/*
* Asynchronous delegation recall!
*/
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
{
struct recall_threadargs data = {
.inode = inode,
.stateid = stateid,
};
int status;
init_completion(&data.started);
__module_get(THIS_MODULE);
status = kernel_thread(recall_thread, &data, CLONE_KERNEL);
if (status < 0)
goto out_module_put;
wait_for_completion(&data.started);
return data.result;
out_module_put:
module_put(THIS_MODULE);
return status;
}
/*
* Retrieve the inode associated with a delegation
*/
struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
struct inode *res = NULL;
spin_lock(&clp->cl_lock);
list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
res = igrab(delegation->inode);
break;
}
}
spin_unlock(&clp->cl_lock);
return res;
}
/*
* Mark all delegations as needing to be reclaimed
*/
void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
{
struct nfs_delegation *delegation;
spin_lock(&clp->cl_lock);
list_for_each_entry(delegation, &clp->cl_delegations, super_list)
delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
spin_unlock(&clp->cl_lock);
}
/*
* Reap all unclaimed delegations after reboot recovery is done
*/
void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
{
struct nfs_delegation *delegation, *n;
LIST_HEAD(head);
spin_lock(&clp->cl_lock);
list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
continue;
list_move(&delegation->super_list, &head);
NFS_I(delegation->inode)->delegation = NULL;
NFS_I(delegation->inode)->delegation_state = 0;
}
spin_unlock(&clp->cl_lock);
while(!list_empty(&head)) {
delegation = list_entry(head.next, struct nfs_delegation, super_list);
list_del(&delegation->super_list);
nfs_free_delegation(delegation);
}
}

57
fs/nfs/delegation.h Normal file
파일 보기

@@ -0,0 +1,57 @@
/*
* linux/fs/nfs/delegation.h
*
* Copyright (c) Trond Myklebust
*
* Definitions pertaining to NFS delegated files
*/
#ifndef FS_NFS_DELEGATION_H
#define FS_NFS_DELEGATION_H
#if defined(CONFIG_NFS_V4)
/*
* NFSv4 delegation
*/
struct nfs_delegation {
struct list_head super_list;
struct rpc_cred *cred;
struct inode *inode;
nfs4_stateid stateid;
int type;
#define NFS_DELEGATION_NEED_RECLAIM 1
long flags;
loff_t maxsize;
};
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
int nfs_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
void nfs_return_all_delegations(struct super_block *sb);
void nfs_handle_cb_pathdown(struct nfs4_client *clp);
void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
flags &= FMODE_READ|FMODE_WRITE;
smp_rmb();
if ((NFS_I(inode)->delegation_state & flags) == flags)
return 1;
return 0;
}
#else
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
return 0;
}
#endif
#endif

1562
fs/nfs/dir.c Normal file

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다. Load Diff

808
fs/nfs/direct.c Normal file
파일 보기

@@ -0,0 +1,808 @@
/*
* linux/fs/nfs/direct.c
*
* Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
*
* High-performance uncached I/O for the Linux NFS client
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
* (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
* system cache protocols. Applications that process datasets
* considerably larger than the client's memory do not always benefit
* from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
* are made directly to the server; data stored or fetched via these
* requests is not cached in the Linux page cache. The client does not
* correct unaligned requests from applications. All requested bytes are
* held on permanent storage before a direct write system call returns to
* an application.
*
* Solaris implements an uncached I/O facility called directio() that
* is used for backups and sequential I/O to very large files. Solaris
* also supports uncaching whole NFS partitions with "-o forcedirectio,"
* an undocumented mount option.
*
* Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
* help from Andrew Morton.
*
* 18 Dec 2001 Initial implementation for 2.4 --cel
* 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
*
*/
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/smp_lock.h>
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/kref.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/sunrpc/clnt.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/atomic.h>
#define NFSDBG_FACILITY NFSDBG_VFS
#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
static kmem_cache_t *nfs_direct_cachep;
/*
* This represents a set of asynchronous requests that we're waiting on
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
struct list_head list; /* nfs_read_data structs */
wait_queue_head_t wait; /* wait for i/o completion */
struct page ** pages; /* pages in our buffer */
unsigned int npages; /* count of pages */
atomic_t complete, /* i/os we're waiting for */
count, /* bytes actually processed */
error; /* any reported error */
};
/**
* nfs_get_user_pages - find and set up pages underlying user's buffer
* rw: direction (read or write)
* user_addr: starting address of this segment of user's buffer
* count: size of this segment
* @pages: returned array of page struct pointers underlying user's buffer
*/
static inline int
nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
struct page ***pages)
{
int result = -ENOMEM;
unsigned long page_count;
size_t array_size;
/* set an arbitrary limit to prevent type overflow */
/* XXX: this can probably be as large as INT_MAX */
if (size > MAX_DIRECTIO_SIZE) {
*pages = NULL;
return -EFBIG;
}
page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
page_count -= user_addr >> PAGE_SHIFT;
array_size = (page_count * sizeof(struct page *));
*pages = kmalloc(array_size, GFP_KERNEL);
if (*pages) {
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
page_count, (rw == READ), 0,
*pages, NULL);
up_read(&current->mm->mmap_sem);
}
return result;
}
/**
* nfs_free_user_pages - tear down page struct array
* @pages: array of page struct pointers underlying target buffer
* @npages: number of pages in the array
* @do_dirty: dirty the pages as we release them
*/
static void
nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
{
int i;
for (i = 0; i < npages; i++) {
if (do_dirty)
set_page_dirty_lock(pages[i]);
page_cache_release(pages[i]);
}
kfree(pages);
}
/**
* nfs_direct_req_release - release nfs_direct_req structure for direct read
* @kref: kref object embedded in an nfs_direct_req structure
*
*/
static void nfs_direct_req_release(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
kmem_cache_free(nfs_direct_cachep, dreq);
}
/**
* nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
* @count: count of bytes for the read request
* @rsize: local rsize setting
*
* Note we also set the number of requests we have in the dreq when we are
* done. This prevents races with I/O completion so we will always wait
* until all requests have been dispatched and completed.
*/
static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
{
struct list_head *list;
struct nfs_direct_req *dreq;
unsigned int reads = 0;
dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
if (!dreq)
return NULL;
kref_init(&dreq->kref);
init_waitqueue_head(&dreq->wait);
INIT_LIST_HEAD(&dreq->list);
atomic_set(&dreq->count, 0);
atomic_set(&dreq->error, 0);
list = &dreq->list;
for(;;) {
struct nfs_read_data *data = nfs_readdata_alloc();
if (unlikely(!data)) {
while (!list_empty(list)) {
data = list_entry(list->next,
struct nfs_read_data, pages);
list_del(&data->pages);
nfs_readdata_free(data);
}
kref_put(&dreq->kref, nfs_direct_req_release);
return NULL;
}
INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, list);
data->req = (struct nfs_page *) dreq;
reads++;
if (nbytes <= rsize)
break;
nbytes -= rsize;
}
kref_get(&dreq->kref);
atomic_set(&dreq->complete, reads);
return dreq;
}
/**
* nfs_direct_read_result - handle a read reply for a direct read request
* @data: address of NFS READ operation control block
* @status: status of this NFS READ operation
*
* We must hold a reference to all the pages in this direct read request
* until the RPCs complete. This could be long *after* we are woken up in
* nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
*/
static void nfs_direct_read_result(struct nfs_read_data *data, int status)
{
struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
if (likely(status >= 0))
atomic_add(data->res.count, &dreq->count);
else
atomic_set(&dreq->error, status);
if (unlikely(atomic_dec_and_test(&dreq->complete))) {
nfs_free_user_pages(dreq->pages, dreq->npages, 1);
wake_up(&dreq->wait);
kref_put(&dreq->kref, nfs_direct_req_release);
}
}
/**
* nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
* @dreq: address of nfs_direct_req struct for this request
* @inode: target inode
* @ctx: target file open context
* @user_addr: starting address of this segment of user's buffer
* @count: size of this segment
* @file_offset: offset in file to begin the operation
*
* For each nfs_read_data struct that was allocated on the list, dispatch
* an NFS READ operation
*/
static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
struct inode *inode, struct nfs_open_context *ctx,
unsigned long user_addr, size_t count, loff_t file_offset)
{
struct list_head *list = &dreq->list;
struct page **pages = dreq->pages;
unsigned int curpage, pgbase;
unsigned int rsize = NFS_SERVER(inode)->rsize;
curpage = 0;
pgbase = user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
unsigned int bytes;
bytes = rsize;
if (count < rsize)
bytes = count;
data = list_entry(list->next, struct nfs_read_data, pages);
list_del_init(&data->pages);
data->inode = inode;
data->cred = ctx->cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
data->args.offset = file_offset;
data->args.pgbase = pgbase;
data->args.pages = &pages[curpage];
data->args.count = bytes;
data->res.fattr = &data->fattr;
data->res.eof = 0;
data->res.count = bytes;
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long) inode;
data->task.tk_calldata = data;
data->task.tk_release = nfs_readdata_release;
data->complete = nfs_direct_read_result;
lock_kernel();
rpc_execute(&data->task);
unlock_kernel();
dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
file_offset += bytes;
pgbase += bytes;
curpage += pgbase >> PAGE_SHIFT;
pgbase &= ~PAGE_MASK;
count -= bytes;
} while (count != 0);
}
/**
* nfs_direct_read_wait - wait for I/O completion for direct reads
* @dreq: request on which we are to wait
* @intr: whether or not this wait can be interrupted
*
* Collects and returns the final error value/byte-count.
*/
static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
{
int result = 0;
if (intr) {
result = wait_event_interruptible(dreq->wait,
(atomic_read(&dreq->complete) == 0));
} else {
wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
}
if (!result)
result = atomic_read(&dreq->error);
if (!result)
result = atomic_read(&dreq->count);
kref_put(&dreq->kref, nfs_direct_req_release);
return (ssize_t) result;
}
/**
* nfs_direct_read_seg - Read in one iov segment. Generate separate
* read RPCs for each "rsize" bytes.
* @inode: target inode
* @ctx: target file open context
* @user_addr: starting address of this segment of user's buffer
* @count: size of this segment
* @file_offset: offset in file to begin the operation
* @pages: array of addresses of page structs defining user's buffer
* @nr_pages: number of pages in the array
*
*/
static ssize_t nfs_direct_read_seg(struct inode *inode,
struct nfs_open_context *ctx, unsigned long user_addr,
size_t count, loff_t file_offset, struct page **pages,
unsigned int nr_pages)
{
ssize_t result;
sigset_t oldset;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_direct_req *dreq;
dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
if (!dreq)
return -ENOMEM;
dreq->pages = pages;
dreq->npages = nr_pages;
rpc_clnt_sigmask(clnt, &oldset);
nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
file_offset);
result = nfs_direct_read_wait(dreq, clnt->cl_intr);
rpc_clnt_sigunmask(clnt, &oldset);
return result;
}
/**
* nfs_direct_read - For each iov segment, map the user's buffer
* then generate read RPCs.
* @inode: target inode
* @ctx: target file open context
* @iov: array of vectors that define I/O buffer
* file_offset: offset in file to begin the operation
* nr_segs: size of iovec array
*
* We've already pushed out any non-direct writes so that this read
* will see them when we read from the server.
*/
static ssize_t
nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
const struct iovec *iov, loff_t file_offset,
unsigned long nr_segs)
{
ssize_t tot_bytes = 0;
unsigned long seg = 0;
while ((seg < nr_segs) && (tot_bytes >= 0)) {
ssize_t result;
int page_count;
struct page **pages;
const struct iovec *vec = &iov[seg++];
unsigned long user_addr = (unsigned long) vec->iov_base;
size_t size = vec->iov_len;
page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
if (page_count < 0) {
nfs_free_user_pages(pages, 0, 0);
if (tot_bytes > 0)
break;
return page_count;
}
result = nfs_direct_read_seg(inode, ctx, user_addr, size,
file_offset, pages, page_count);
if (result <= 0) {
if (tot_bytes > 0)
break;
return result;
}
tot_bytes += result;
file_offset += result;
if (result < size)
break;
}
return tot_bytes;
}
/**
* nfs_direct_write_seg - Write out one iov segment. Generate separate
* write RPCs for each "wsize" bytes, then commit.
* @inode: target inode
* @ctx: target file open context
* user_addr: starting address of this segment of user's buffer
* count: size of this segment
* file_offset: offset in file to begin the operation
* @pages: array of addresses of page structs defining user's buffer
* nr_pages: size of pages array
*/
static ssize_t nfs_direct_write_seg(struct inode *inode,
struct nfs_open_context *ctx, unsigned long user_addr,
size_t count, loff_t file_offset, struct page **pages,
int nr_pages)
{
const unsigned int wsize = NFS_SERVER(inode)->wsize;
size_t request;
int curpage, need_commit;
ssize_t result, tot_bytes;
struct nfs_writeverf first_verf;
struct nfs_write_data *wdata;
wdata = nfs_writedata_alloc();
if (!wdata)
return -ENOMEM;
wdata->inode = inode;
wdata->cred = ctx->cred;
wdata->args.fh = NFS_FH(inode);
wdata->args.context = ctx;
wdata->args.stable = NFS_UNSTABLE;
if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
wdata->args.stable = NFS_FILE_SYNC;
wdata->res.fattr = &wdata->fattr;
wdata->res.verf = &wdata->verf;
nfs_begin_data_update(inode);
retry:
need_commit = 0;
tot_bytes = 0;
curpage = 0;
request = count;
wdata->args.pgbase = user_addr & ~PAGE_MASK;
wdata->args.offset = file_offset;
do {
wdata->args.count = request;
if (wdata->args.count > wsize)
wdata->args.count = wsize;
wdata->args.pages = &pages[curpage];
dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
wdata->args.count, (long long) wdata->args.offset,
user_addr + tot_bytes, wdata->args.pgbase, curpage);
lock_kernel();
result = NFS_PROTO(inode)->write(wdata);
unlock_kernel();
if (result <= 0) {
if (tot_bytes > 0)
break;
goto out;
}
if (tot_bytes == 0)
memcpy(&first_verf.verifier, &wdata->verf.verifier,
sizeof(first_verf.verifier));
if (wdata->verf.committed != NFS_FILE_SYNC) {
need_commit = 1;
if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
sizeof(first_verf.verifier)));
goto sync_retry;
}
tot_bytes += result;
/* in case of a short write: stop now, let the app recover */
if (result < wdata->args.count)
break;
wdata->args.offset += result;
wdata->args.pgbase += result;
curpage += wdata->args.pgbase >> PAGE_SHIFT;
wdata->args.pgbase &= ~PAGE_MASK;
request -= result;
} while (request != 0);
/*
* Commit data written so far, even in the event of an error
*/
if (need_commit) {
wdata->args.count = tot_bytes;
wdata->args.offset = file_offset;
lock_kernel();
result = NFS_PROTO(inode)->commit(wdata);
unlock_kernel();
if (result < 0 || memcmp(&first_verf.verifier,
&wdata->verf.verifier,
sizeof(first_verf.verifier)) != 0)
goto sync_retry;
}
result = tot_bytes;
out:
nfs_end_data_update_defer(inode);
nfs_writedata_free(wdata);
return result;
sync_retry:
wdata->args.stable = NFS_FILE_SYNC;
goto retry;
}
/**
* nfs_direct_write - For each iov segment, map the user's buffer
* then generate write and commit RPCs.
* @inode: target inode
* @ctx: target file open context
* @iov: array of vectors that define I/O buffer
* file_offset: offset in file to begin the operation
* nr_segs: size of iovec array
*
* Upon return, generic_file_direct_IO invalidates any cached pages
* that non-direct readers might access, so they will pick up these
* writes immediately.
*/
static ssize_t nfs_direct_write(struct inode *inode,
struct nfs_open_context *ctx, const struct iovec *iov,
loff_t file_offset, unsigned long nr_segs)
{
ssize_t tot_bytes = 0;
unsigned long seg = 0;
while ((seg < nr_segs) && (tot_bytes >= 0)) {
ssize_t result;
int page_count;
struct page **pages;
const struct iovec *vec = &iov[seg++];
unsigned long user_addr = (unsigned long) vec->iov_base;
size_t size = vec->iov_len;
page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
if (page_count < 0) {
nfs_free_user_pages(pages, 0, 0);
if (tot_bytes > 0)
break;
return page_count;
}
result = nfs_direct_write_seg(inode, ctx, user_addr, size,
file_offset, pages, page_count);
nfs_free_user_pages(pages, page_count, 0);
if (result <= 0) {
if (tot_bytes > 0)
break;
return result;
}
tot_bytes += result;
file_offset += result;
if (result < size)
break;
}
return tot_bytes;
}
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* rw: direction (read or write)
* @iocb: target I/O control block
* @iov: array of vectors that define I/O buffer
* file_offset: offset in file to begin the operation
* nr_segs: size of iovec array
*
*/
ssize_t
nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t file_offset, unsigned long nr_segs)
{
ssize_t result = -EINVAL;
struct file *file = iocb->ki_filp;
struct nfs_open_context *ctx;
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
/*
* No support for async yet
*/
if (!is_sync_kiocb(iocb))
return result;
ctx = (struct nfs_open_context *)file->private_data;
switch (rw) {
case READ:
dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
dentry->d_name.name, file_offset, nr_segs);
result = nfs_direct_read(inode, ctx, iov,
file_offset, nr_segs);
break;
case WRITE:
dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
dentry->d_name.name, file_offset, nr_segs);
result = nfs_direct_write(inode, ctx, iov,
file_offset, nr_segs);
break;
default:
break;
}
return result;
}
/**
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer into which to read data
* count: number of bytes to read
* pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
* READ where the file size could change. So our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
*
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
ssize_t
nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
{
ssize_t retval = -EINVAL;
loff_t *ppos = &iocb->ki_pos;
struct file *file = iocb->ki_filp;
struct nfs_open_context *ctx =
(struct nfs_open_context *) file->private_data;
struct dentry *dentry = file->f_dentry;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct iovec iov = {
.iov_base = buf,
.iov_len = count,
};
dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) count, (unsigned long) pos);
if (!is_sync_kiocb(iocb))
goto out;
if (count < 0)
goto out;
retval = -EFAULT;
if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
goto out;
retval = 0;
if (!count)
goto out;
if (mapping->nrpages) {
retval = filemap_fdatawrite(mapping);
if (retval == 0)
retval = nfs_wb_all(inode);
if (retval == 0)
retval = filemap_fdatawait(mapping);
if (retval)
goto out;
}
retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
if (retval > 0)
*ppos = pos + retval;
out:
return retval;
}
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer from which to write data
* count: number of bytes to write
* pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode
* semaphore and updating the i_size. The NFS server will set
* the new i_size and this client must read the updated size
* back into its cache. We let the server do generic write
* parameter checking and report problems.
*
* We also avoid an unnecessary invocation of generic_osync_inode(),
* as it is fairly meaningless to sync the metadata of an NFS file.
*
* We eliminate local atime updates, see direct read above.
*
* We avoid unnecessary page cache invalidations for normal cached
* readers of this file.
*
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
ssize_t
nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
{
ssize_t retval = -EINVAL;
loff_t *ppos = &iocb->ki_pos;
unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
struct file *file = iocb->ki_filp;
struct nfs_open_context *ctx =
(struct nfs_open_context *) file->private_data;
struct dentry *dentry = file->f_dentry;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct iovec iov = {
.iov_base = (char __user *)buf,
.iov_len = count,
};
dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
inode->i_ino, (unsigned long) count, (unsigned long) pos);
if (!is_sync_kiocb(iocb))
goto out;
if (count < 0)
goto out;
if (pos < 0)
goto out;
retval = -EFAULT;
if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
goto out;
if (file->f_error) {
retval = file->f_error;
file->f_error = 0;
goto out;
}
retval = -EFBIG;
if (limit != RLIM_INFINITY) {
if (pos >= limit) {
send_sig(SIGXFSZ, current, 0);
goto out;
}
if (count > limit - (unsigned long) pos)
count = limit - (unsigned long) pos;
}
retval = 0;
if (!count)
goto out;
if (mapping->nrpages) {
retval = filemap_fdatawrite(mapping);
if (retval == 0)
retval = nfs_wb_all(inode);
if (retval == 0)
retval = filemap_fdatawait(mapping);
if (retval)
goto out;
}
retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
if (mapping->nrpages)
invalidate_inode_pages2(mapping);
if (retval > 0)
*ppos = pos + retval;
out:
return retval;
}
int nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
sizeof(struct nfs_direct_req),
0, SLAB_RECLAIM_ACCOUNT,
NULL, NULL);
if (nfs_direct_cachep == NULL)
return -ENOMEM;
return 0;
}
void nfs_destroy_directcache(void)
{
if (kmem_cache_destroy(nfs_direct_cachep))
printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
}

484
fs/nfs/file.c Normal file
파일 보기

@@ -0,0 +1,484 @@
/*
* linux/fs/nfs/file.c
*
* Copyright (C) 1992 Rick Sladkey
*
* Changes Copyright (C) 1994 by Florian La Roche
* - Do not copy data too often around in the kernel.
* - In nfs_file_read the return value of kmalloc wasn't checked.
* - Put in a better version of read look-ahead buffering. Original idea
* and implementation by Wai S Kok elekokws@ee.nus.sg.
*
* Expire cache on write to a file by Wai S Kok (Oct 1994).
*
* Total rewrite of read side for new NFS buffer cache.. Linus.
*
* nfs regular file handling functions
*/
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_FILE
static int nfs_file_open(struct inode *, struct file *);
static int nfs_file_release(struct inode *, struct file *);
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
static int nfs_file_flush(struct file *);
static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
static int nfs_check_flags(int flags);
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
struct file_operations nfs_file_operations = {
.llseek = remote_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = nfs_file_read,
.aio_write = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
.fsync = nfs_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.sendfile = nfs_file_sendfile,
.check_flags = nfs_check_flags,
};
struct inode_operations nfs_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0)
#endif
static int nfs_check_flags(int flags)
{
if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
return -EINVAL;
return 0;
}
/*
* Open file
*/
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
struct nfs_server *server = NFS_SERVER(inode);
int (*open)(struct inode *, struct file *);
int res;
res = nfs_check_flags(filp->f_flags);
if (res)
return res;
lock_kernel();
/* Do NFSv4 open() call */
if ((open = server->rpc_ops->file_open) != NULL)
res = open(inode, filp);
unlock_kernel();
return res;
}
static int
nfs_file_release(struct inode *inode, struct file *filp)
{
/* Ensure that dirty pages are flushed out with the right creds */
if (filp->f_mode & FMODE_WRITE)
filemap_fdatawrite(filp->f_mapping);
return NFS_PROTO(inode)->file_release(inode, filp);
}
/*
* Flush all dirty pages, and check for write errors.
*
*/
static int
nfs_file_flush(struct file *file)
{
struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
struct inode *inode = file->f_dentry->d_inode;
int status;
dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
lock_kernel();
/* Ensure that data+attribute caches are up to date after close() */
status = nfs_wb_all(inode);
if (!status) {
status = ctx->error;
ctx->error = 0;
if (!status && !nfs_have_delegation(inode, FMODE_READ))
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
}
unlock_kernel();
return status;
}
static ssize_t
nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_read(iocb, buf, count, pos);
#endif
dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) count, (unsigned long) pos);
result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (!result)
result = generic_file_aio_read(iocb, buf, count, pos);
return result;
}
static ssize_t
nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
read_actor_t actor, void *target)
{
struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode;
ssize_t res;
dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) count, (unsigned long long) *ppos);
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (!res)
res = generic_file_sendfile(filp, ppos, count, actor, target);
return res;
}
static int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
int status;
dfprintk(VFS, "nfs: mmap(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (!status)
status = generic_file_mmap(file, vma);
return status;
}
/*
* Flush any dirty pages for this process, and check for write errors.
* The return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
static int
nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
struct inode *inode = dentry->d_inode;
int status;
dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
lock_kernel();
status = nfs_wb_all(inode);
if (!status) {
status = ctx->error;
ctx->error = 0;
}
unlock_kernel();
return status;
}
/*
* This does the "real" work of the write. The generic routine has
* allocated the page, locked it, done all the page alignment stuff
* calculations etc. Now we should just copy the data from user
* space and write it back to the real medium..
*
* If the writer ends up delaying the write, the writer needs to
* increment the page use counts until he is done with the page.
*/
static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
return nfs_flush_incompatible(file, page);
}
static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
long status;
lock_kernel();
status = nfs_updatepage(file, page, offset, to-offset);
unlock_kernel();
return status;
}
struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
.set_page_dirty = __set_page_dirty_nobuffers,
.writepage = nfs_writepage,
.writepages = nfs_writepages,
.prepare_write = nfs_prepare_write,
.commit_write = nfs_commit_write,
#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
#endif
};
/*
* Write to a file (through the page cache).
*/
static ssize_t
nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_write(iocb, buf, count, pos);
#endif
dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
inode->i_ino, (unsigned long) count, (unsigned long) pos);
result = -EBUSY;
if (IS_SWAPFILE(inode))
goto out_swapfile;
result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (result)
goto out;
result = count;
if (!count)
goto out;
result = generic_file_aio_write(iocb, buf, count, pos);
out:
return result;
out_swapfile:
printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
goto out;
}
static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int status = 0;
lock_kernel();
/* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else {
struct file_lock *cfl = posix_test_lock(filp, fl);
fl->fl_type = F_UNLCK;
if (cfl != NULL)
memcpy(fl, cfl, sizeof(*fl));
}
unlock_kernel();
return status;
}
static int do_vfs_lock(struct file *file, struct file_lock *fl)
{
int res = 0;
switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
case FL_POSIX:
res = posix_lock_file_wait(file, fl);
break;
case FL_FLOCK:
res = flock_lock_file_wait(file, fl);
break;
default:
BUG();
}
if (res < 0)
printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
__FUNCTION__);
return res;
}
static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
sigset_t oldset;
int status;
rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
*/
filemap_fdatawrite(filp->f_mapping);
down(&inode->i_sem);
nfs_wb_all(inode);
up(&inode->i_sem);
filemap_fdatawait(filp->f_mapping);
/* NOTE: special case
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
*/
lock_kernel();
/* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else
status = do_vfs_lock(filp, fl);
unlock_kernel();
rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
sigset_t oldset;
int status;
rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
*/
status = filemap_fdatawrite(filp->f_mapping);
if (status == 0) {
down(&inode->i_sem);
status = nfs_wb_all(inode);
up(&inode->i_sem);
if (status == 0)
status = filemap_fdatawait(filp->f_mapping);
}
if (status < 0)
goto out;
lock_kernel();
/* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
/* If we were signalled we still need to ensure that
* we clean up any state on the server. We therefore
* record the lock call as having succeeded in order to
* ensure that locks_remove_posix() cleans it out when
* the process exits.
*/
if (status == -EINTR || status == -ERESTARTSYS)
do_vfs_lock(filp, fl);
} else
status = do_vfs_lock(filp, fl);
unlock_kernel();
if (status < 0)
goto out;
/*
* Make sure we clear the cache whenever we try to get the lock.
* This makes locking act as a cache coherency point.
*/
filemap_fdatawrite(filp->f_mapping);
down(&inode->i_sem);
nfs_wb_all(inode); /* we may have slept */
up(&inode->i_sem);
filemap_fdatawait(filp->f_mapping);
nfs_zap_caches(inode);
out:
rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
/*
* Lock a (portion of) a file
*/
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode * inode = filp->f_mapping->host;
dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
if (!inode)
return -EINVAL;
/* No mandatory locks over NFS */
if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
return -ENOLCK;
if (IS_GETLK(cmd))
return do_getlk(filp, cmd, fl);
if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl);
return do_setlk(filp, cmd, fl);
}
/*
* Lock a (portion of) a file
*/
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode * inode = filp->f_mapping->host;
dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags);
if (!inode)
return -EINVAL;
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
* using ((u32) filp | 0x80000000) or some such as the pid.
* Not sure whether that would be unique, though, or whether
* that would break in other places.
*/
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
/* We're simulating flock() locks using posix locks on the server */
fl->fl_owner = (fl_owner_t)filp;
fl->fl_start = 0;
fl->fl_end = OFFSET_MAX;
if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl);
return do_setlk(filp, cmd, fl);
}

498
fs/nfs/idmap.c Normal file
파일 보기

@@ -0,0 +1,498 @@
/*
* fs/nfs/idmap.c
*
* UID and GID to name mapping for clients.
*
* Copyright (c) 2002 The Regents of the University of Michigan.
* All rights reserved.
*
* Marius Aamodt Eriksen <marius@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_idmap.h>
#define IDMAP_HASH_SZ 128
struct idmap_hashent {
__u32 ih_id;
int ih_namelen;
char ih_name[IDMAP_NAMESZ];
};
struct idmap_hashtable {
__u8 h_type;
struct idmap_hashent h_entries[IDMAP_HASH_SZ];
};
struct idmap {
char idmap_path[48];
struct dentry *idmap_dentry;
wait_queue_head_t idmap_wq;
struct idmap_msg idmap_im;
struct semaphore idmap_lock; /* Serializes upcalls */
struct semaphore idmap_im_lock; /* Protects the hashtable */
struct idmap_hashtable idmap_user_hash;
struct idmap_hashtable idmap_group_hash;
};
static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
char __user *, size_t);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
static unsigned int fnvhash32(const void *, size_t);
static struct rpc_pipe_ops idmap_upcall_ops = {
.upcall = idmap_pipe_upcall,
.downcall = idmap_pipe_downcall,
.destroy_msg = idmap_pipe_destroy_msg,
};
void
nfs_idmap_new(struct nfs4_client *clp)
{
struct idmap *idmap;
if (clp->cl_idmap != NULL)
return;
if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
return;
memset(idmap, 0, sizeof(*idmap));
snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
"%s/idmap", clp->cl_rpcclient->cl_pathname);
idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
idmap, &idmap_upcall_ops, 0);
if (IS_ERR(idmap->idmap_dentry)) {
kfree(idmap);
return;
}
init_MUTEX(&idmap->idmap_lock);
init_MUTEX(&idmap->idmap_im_lock);
init_waitqueue_head(&idmap->idmap_wq);
idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
clp->cl_idmap = idmap;
}
void
nfs_idmap_delete(struct nfs4_client *clp)
{
struct idmap *idmap = clp->cl_idmap;
if (!idmap)
return;
rpc_unlink(idmap->idmap_path);
clp->cl_idmap = NULL;
kfree(idmap);
}
/*
* Helper routines for manipulating the hashtable
*/
static inline struct idmap_hashent *
idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
{
return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
}
static struct idmap_hashent *
idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
{
struct idmap_hashent *he = idmap_name_hash(h, name, len);
if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
return NULL;
return he;
}
static inline struct idmap_hashent *
idmap_id_hash(struct idmap_hashtable* h, __u32 id)
{
return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
}
static struct idmap_hashent *
idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
{
struct idmap_hashent *he = idmap_id_hash(h, id);
if (he->ih_id != id || he->ih_namelen == 0)
return NULL;
return he;
}
/*
* Routines for allocating new entries in the hashtable.
* For now, we just have 1 entry per bucket, so it's all
* pretty trivial.
*/
static inline struct idmap_hashent *
idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len)
{
return idmap_name_hash(h, name, len);
}
static inline struct idmap_hashent *
idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
{
return idmap_id_hash(h, id);
}
static void
idmap_update_entry(struct idmap_hashent *he, const char *name,
size_t namelen, __u32 id)
{
he->ih_id = id;
memcpy(he->ih_name, name, namelen);
he->ih_name[namelen] = '\0';
he->ih_namelen = namelen;
}
/*
* Name -> ID
*/
static int
nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
const char *name, size_t namelen, __u32 *id)
{
struct rpc_pipe_msg msg;
struct idmap_msg *im;
struct idmap_hashent *he;
DECLARE_WAITQUEUE(wq, current);
int ret = -EIO;
im = &idmap->idmap_im;
/*
* String sanity checks
* Note that the userland daemon expects NUL terminated strings
*/
for (;;) {
if (namelen == 0)
return -EINVAL;
if (name[namelen-1] != '\0')
break;
namelen--;
}
if (namelen >= IDMAP_NAMESZ)
return -EINVAL;
down(&idmap->idmap_lock);
down(&idmap->idmap_im_lock);
he = idmap_lookup_name(h, name, namelen);
if (he != NULL) {
*id = he->ih_id;
ret = 0;
goto out;
}
memset(im, 0, sizeof(*im));
memcpy(im->im_name, name, namelen);
im->im_type = h->h_type;
im->im_conv = IDMAP_CONV_NAMETOID;
memset(&msg, 0, sizeof(msg));
msg.data = im;
msg.len = sizeof(*im);
add_wait_queue(&idmap->idmap_wq, &wq);
if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
remove_wait_queue(&idmap->idmap_wq, &wq);
goto out;
}
set_current_state(TASK_UNINTERRUPTIBLE);
up(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
down(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
*id = im->im_id;
ret = 0;
}
out:
memset(im, 0, sizeof(*im));
up(&idmap->idmap_im_lock);
up(&idmap->idmap_lock);
return (ret);
}
/*
* ID -> Name
*/
static int
nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
__u32 id, char *name)
{
struct rpc_pipe_msg msg;
struct idmap_msg *im;
struct idmap_hashent *he;
DECLARE_WAITQUEUE(wq, current);
int ret = -EIO;
unsigned int len;
im = &idmap->idmap_im;
down(&idmap->idmap_lock);
down(&idmap->idmap_im_lock);
he = idmap_lookup_id(h, id);
if (he != 0) {
memcpy(name, he->ih_name, he->ih_namelen);
ret = he->ih_namelen;
goto out;
}
memset(im, 0, sizeof(*im));
im->im_type = h->h_type;
im->im_conv = IDMAP_CONV_IDTONAME;
im->im_id = id;
memset(&msg, 0, sizeof(msg));
msg.data = im;
msg.len = sizeof(*im);
add_wait_queue(&idmap->idmap_wq, &wq);
if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
remove_wait_queue(&idmap->idmap_wq, &wq);
goto out;
}
set_current_state(TASK_UNINTERRUPTIBLE);
up(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
down(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
goto out;
memcpy(name, im->im_name, len);
ret = len;
}
out:
memset(im, 0, sizeof(*im));
up(&idmap->idmap_im_lock);
up(&idmap->idmap_lock);
return ret;
}
/* RPC pipefs upcall/downcall routines */
static ssize_t
idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
char __user *dst, size_t buflen)
{
char *data = (char *)msg->data + msg->copied;
ssize_t mlen = msg->len - msg->copied;
ssize_t left;
if (mlen > buflen)
mlen = buflen;
left = copy_to_user(dst, data, mlen);
if (left < 0) {
msg->errno = left;
return left;
}
mlen -= left;
msg->copied += mlen;
msg->errno = 0;
return mlen;
}
static ssize_t
idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
struct idmap *idmap = (struct idmap *)rpci->private;
struct idmap_msg im_in, *im = &idmap->idmap_im;
struct idmap_hashtable *h;
struct idmap_hashent *he = NULL;
int namelen_in;
int ret;
if (mlen != sizeof(im_in))
return (-ENOSPC);
if (copy_from_user(&im_in, src, mlen) != 0)
return (-EFAULT);
down(&idmap->idmap_im_lock);
ret = mlen;
im->im_status = im_in.im_status;
/* If we got an error, terminate now, and wake up pending upcalls */
if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
wake_up(&idmap->idmap_wq);
goto out;
}
/* Sanity checking of strings */
ret = -EINVAL;
namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
goto out;
switch (im_in.im_type) {
case IDMAP_TYPE_USER:
h = &idmap->idmap_user_hash;
break;
case IDMAP_TYPE_GROUP:
h = &idmap->idmap_group_hash;
break;
default:
goto out;
}
switch (im_in.im_conv) {
case IDMAP_CONV_IDTONAME:
/* Did we match the current upcall? */
if (im->im_conv == IDMAP_CONV_IDTONAME
&& im->im_type == im_in.im_type
&& im->im_id == im_in.im_id) {
/* Yes: copy string, including the terminating '\0' */
memcpy(im->im_name, im_in.im_name, namelen_in);
im->im_name[namelen_in] = '\0';
wake_up(&idmap->idmap_wq);
}
he = idmap_alloc_id(h, im_in.im_id);
break;
case IDMAP_CONV_NAMETOID:
/* Did we match the current upcall? */
if (im->im_conv == IDMAP_CONV_NAMETOID
&& im->im_type == im_in.im_type
&& strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
&& memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
im->im_id = im_in.im_id;
wake_up(&idmap->idmap_wq);
}
he = idmap_alloc_name(h, im_in.im_name, namelen_in);
break;
default:
goto out;
}
/* If the entry is valid, also copy it to the cache */
if (he != NULL)
idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
ret = mlen;
out:
up(&idmap->idmap_im_lock);
return ret;
}
void
idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
struct idmap_msg *im = msg->data;
struct idmap *idmap = container_of(im, struct idmap, idmap_im);
if (msg->errno >= 0)
return;
down(&idmap->idmap_im_lock);
im->im_status = IDMAP_STATUS_LOOKUPFAIL;
wake_up(&idmap->idmap_wq);
up(&idmap->idmap_im_lock);
}
/*
* Fowler/Noll/Vo hash
* http://www.isthe.com/chongo/tech/comp/fnv/
*/
#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
static unsigned int fnvhash32(const void *buf, size_t buflen)
{
const unsigned char *p, *end = (const unsigned char *)buf + buflen;
unsigned int hash = FNV_1_32;
for (p = buf; p < end; p++) {
hash *= FNV_P_32;
hash ^= (unsigned int)*p;
}
return (hash);
}
int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
{
struct idmap *idmap = clp->cl_idmap;
return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
}
int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
{
struct idmap *idmap = clp->cl_idmap;
return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
}
int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
{
struct idmap *idmap = clp->cl_idmap;
return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
}
int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
{
struct idmap *idmap = clp->cl_idmap;
return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
}

2003
fs/nfs/inode.c Normal file

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다. Load Diff

183
fs/nfs/mount_clnt.c Normal file
파일 보기

@@ -0,0 +1,183 @@
/*
* linux/fs/nfs/mount_clnt.c
*
* MOUNT client to support NFSroot.
*
* Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/uio.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs_fs.h>
#ifdef RPC_DEBUG
# define NFSDBG_FACILITY NFSDBG_ROOT
#endif
/*
#define MOUNT_PROGRAM 100005
#define MOUNT_VERSION 1
#define MOUNT_MNT 1
#define MOUNT_UMNT 3
*/
static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
int, int);
static struct rpc_program mnt_program;
struct mnt_fhstatus {
unsigned int status;
struct nfs_fh * fh;
};
/*
* Obtain an NFS file handle for the given host and path
*/
int
nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
int version, int protocol)
{
struct rpc_clnt *mnt_clnt;
struct mnt_fhstatus result = {
.fh = fh
};
char hostname[32];
int status;
int call;
dprintk("NFS: nfs_mount(%08x:%s)\n",
(unsigned)ntohl(addr->sin_addr.s_addr), path);
sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
mnt_clnt = mnt_create(hostname, addr, version, protocol);
if (IS_ERR(mnt_clnt))
return PTR_ERR(mnt_clnt);
call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
status = rpc_call(mnt_clnt, call, path, &result, 0);
return status < 0? status : (result.status? -EACCES : 0);
}
static struct rpc_clnt *
mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
int protocol)
{
struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
xprt = xprt_create_proto(protocol, srvaddr, NULL);
if (IS_ERR(xprt))
return (struct rpc_clnt *)xprt;
clnt = rpc_create_client(xprt, hostname,
&mnt_program, version,
RPC_AUTH_UNIX);
if (IS_ERR(clnt)) {
xprt_destroy(xprt);
} else {
clnt->cl_softrtry = 1;
clnt->cl_chatty = 1;
clnt->cl_oneshot = 1;
clnt->cl_intr = 1;
}
return clnt;
}
/*
* XDR encode/decode functions for MOUNT
*/
static int
xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
{
p = xdr_encode_string(p, path);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
static int
xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
{
struct nfs_fh *fh = res->fh;
if ((res->status = ntohl(*p++)) == 0) {
fh->size = NFS2_FHSIZE;
memcpy(fh->data, p, NFS2_FHSIZE);
}
return 0;
}
static int
xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
{
struct nfs_fh *fh = res->fh;
if ((res->status = ntohl(*p++)) == 0) {
int size = ntohl(*p++);
if (size <= NFS3_FHSIZE) {
fh->size = size;
memcpy(fh->data, p, size);
} else
res->status = -EBADHANDLE;
}
return 0;
}
#define MNT_dirpath_sz (1 + 256)
#define MNT_fhstatus_sz (1 + 8)
static struct rpc_procinfo mnt_procedures[] = {
[MNTPROC_MNT] = {
.p_proc = MNTPROC_MNT,
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus,
.p_bufsiz = MNT_dirpath_sz << 2,
},
};
static struct rpc_procinfo mnt3_procedures[] = {
[MOUNTPROC3_MNT] = {
.p_proc = MOUNTPROC3_MNT,
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
.p_bufsiz = MNT_dirpath_sz << 2,
},
};
static struct rpc_version mnt_version1 = {
.number = 1,
.nrprocs = 2,
.procs = mnt_procedures
};
static struct rpc_version mnt_version3 = {
.number = 3,
.nrprocs = 2,
.procs = mnt3_procedures
};
static struct rpc_version * mnt_version[] = {
NULL,
&mnt_version1,
NULL,
&mnt_version3,
};
static struct rpc_stat mnt_stats;
static struct rpc_program mnt_program = {
.name = "mount",
.number = NFS_MNT_PROGRAM,
.nrvers = sizeof(mnt_version)/sizeof(mnt_version[0]),
.version = mnt_version,
.stats = &mnt_stats,
};

711
fs/nfs/nfs2xdr.c Normal file
파일 보기

@@ -0,0 +1,711 @@
/*
* linux/fs/nfs/nfs2xdr.c
*
* XDR functions to encode/decode NFS RPC arguments and results.
*
* Copyright (C) 1992, 1993, 1994 Rick Sladkey
* Copyright (C) 1996 Olaf Kirch
* 04 Aug 1998 Ion Badulescu <ionut@cs.columbia.edu>
* FIFO's need special handling in NFSv2
*/
#include <linux/param.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#define NFSDBG_FACILITY NFSDBG_XDR
/* #define NFS_PARANOIA 1 */
extern int nfs_stat_to_errno(int stat);
/* Mapping from NFS error code to "errno" error code. */
#define errno_NFSERR_IO EIO
/*
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
*/
#define NFS_fhandle_sz (8)
#define NFS_sattr_sz (8)
#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2))
#define NFS_path_sz (1+(NFS2_MAXPATHLEN>>2))
#define NFS_fattr_sz (17)
#define NFS_info_sz (5)
#define NFS_entry_sz (NFS_filename_sz+3)
#define NFS_diropargs_sz (NFS_fhandle_sz+NFS_filename_sz)
#define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz)
#define NFS_readlinkargs_sz (NFS_fhandle_sz)
#define NFS_readargs_sz (NFS_fhandle_sz+3)
#define NFS_writeargs_sz (NFS_fhandle_sz+4)
#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz)
#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz)
#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz)
#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
#define NFS_readdirargs_sz (NFS_fhandle_sz+2)
#define NFS_attrstat_sz (1+NFS_fattr_sz)
#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
#define NFS_readlinkres_sz (2)
#define NFS_readres_sz (1+NFS_fattr_sz+1)
#define NFS_writeres_sz (NFS_attrstat_sz)
#define NFS_stat_sz (1)
#define NFS_readdirres_sz (1)
#define NFS_statfsres_sz (1+NFS_info_sz)
/*
* Common NFS XDR functions as inlines
*/
static inline u32 *
xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle)
{
memcpy(p, fhandle->data, NFS2_FHSIZE);
return p + XDR_QUADLEN(NFS2_FHSIZE);
}
static inline u32 *
xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
{
/* NFSv2 handles have a fixed length */
fhandle->size = NFS2_FHSIZE;
memcpy(fhandle->data, p, NFS2_FHSIZE);
return p + XDR_QUADLEN(NFS2_FHSIZE);
}
static inline u32*
xdr_encode_time(u32 *p, struct timespec *timep)
{
*p++ = htonl(timep->tv_sec);
/* Convert nanoseconds into microseconds */
*p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0);
return p;
}
static inline u32*
xdr_encode_current_server_time(u32 *p, struct timespec *timep)
{
/*
* Passing the invalid value useconds=1000000 is a
* Sun convention for "set to current server time".
* It's needed to make permissions checks for the
* "touch" program across v2 mounts to Solaris and
* Irix boxes work correctly. See description of
* sattr in section 6.1 of "NFS Illustrated" by
* Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
*/
*p++ = htonl(timep->tv_sec);
*p++ = htonl(1000000);
return p;
}
static inline u32*
xdr_decode_time(u32 *p, struct timespec *timep)
{
timep->tv_sec = ntohl(*p++);
/* Convert microseconds into nanoseconds */
timep->tv_nsec = ntohl(*p++) * 1000;
return p;
}
static u32 *
xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
{
u32 rdev;
fattr->type = (enum nfs_ftype) ntohl(*p++);
fattr->mode = ntohl(*p++);
fattr->nlink = ntohl(*p++);
fattr->uid = ntohl(*p++);
fattr->gid = ntohl(*p++);
fattr->size = ntohl(*p++);
fattr->du.nfs2.blocksize = ntohl(*p++);
rdev = ntohl(*p++);
fattr->du.nfs2.blocks = ntohl(*p++);
fattr->fsid_u.nfs3 = ntohl(*p++);
fattr->fileid = ntohl(*p++);
p = xdr_decode_time(p, &fattr->atime);
p = xdr_decode_time(p, &fattr->mtime);
p = xdr_decode_time(p, &fattr->ctime);
fattr->valid |= NFS_ATTR_FATTR;
fattr->rdev = new_decode_dev(rdev);
if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
fattr->type = NFFIFO;
fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
fattr->rdev = 0;
}
fattr->timestamp = jiffies;
return p;
}
#define SATTR(p, attr, flag, field) \
*p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
static inline u32 *
xdr_encode_sattr(u32 *p, struct iattr *attr)
{
SATTR(p, attr, ATTR_MODE, ia_mode);
SATTR(p, attr, ATTR_UID, ia_uid);
SATTR(p, attr, ATTR_GID, ia_gid);
SATTR(p, attr, ATTR_SIZE, ia_size);
if (attr->ia_valid & ATTR_ATIME_SET) {
p = xdr_encode_time(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
p = xdr_encode_current_server_time(p, &attr->ia_atime);
} else {
*p++ = ~(u32) 0;
*p++ = ~(u32) 0;
}
if (attr->ia_valid & ATTR_MTIME_SET) {
p = xdr_encode_time(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
p = xdr_encode_current_server_time(p, &attr->ia_mtime);
} else {
*p++ = ~(u32) 0;
*p++ = ~(u32) 0;
}
return p;
}
#undef SATTR
/*
* NFS encode functions
*/
/*
* Encode file handle argument
* GETATTR, READLINK, STATFS
*/
static int
nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
{
p = xdr_encode_fhandle(p, fh);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
/*
* Encode SETATTR arguments
*/
static int
nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
{
p = xdr_encode_fhandle(p, args->fh);
p = xdr_encode_sattr(p, args->sattr);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
/*
* Encode directory ops argument
* LOOKUP, REMOVE, RMDIR
*/
static int
nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
{
p = xdr_encode_fhandle(p, args->fh);
p = xdr_encode_array(p, args->name, args->len);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
/*
* Arguments to a READ call. Since we read data directly into the page
* cache, we also set up the reply iovec here so that iov[1] points
* exactly to the page we want to fetch.
*/
static int
nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
{
struct rpc_auth *auth = req->rq_task->tk_auth;
unsigned int replen;
u32 offset = (u32)args->offset;
u32 count = args->count;
p = xdr_encode_fhandle(p, args->fh);
*p++ = htonl(offset);
*p++ = htonl(count);
*p++ = htonl(count);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
/* Inline the page array */
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
xdr_inline_pages(&req->rq_rcv_buf, replen,
args->pages, args->pgbase, count);
return 0;
}
/*
* Decode READ reply
*/
static int
nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
{
struct kvec *iov = req->rq_rcv_buf.head;
int status, count, recvd, hdrlen;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
p = xdr_decode_fattr(p, res->fattr);
count = ntohl(*p++);
res->eof = 0;
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
printk(KERN_WARNING "NFS: READ reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
dprintk("NFS: READ header is short. iovec will be shifted.\n");
xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
}
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
printk(KERN_WARNING "NFS: server cheating in read reply: "
"count %d > recvd %d\n", count, recvd);
count = recvd;
}
dprintk("RPC: readres OK count %d\n", count);
if (count < res->count)
res->count = count;
return count;
}
/*
* Write arguments. Splice the buffer to be written into the iovec.
*/
static int
nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
{
struct xdr_buf *sndbuf = &req->rq_snd_buf;
u32 offset = (u32)args->offset;
u32 count = args->count;
p = xdr_encode_fhandle(p, args->fh);
*p++ = htonl(offset);
*p++ = htonl(offset);
*p++ = htonl(count);
*p++ = htonl(count);
sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
/* Copy the page array */
xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
return 0;
}
/*
* Encode create arguments
* CREATE, MKDIR
*/
static int
nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
{
p = xdr_encode_fhandle(p, args->fh);
p = xdr_encode_array(p, args->name, args->len);
p = xdr_encode_sattr(p, args->sattr);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
/*
* Encode RENAME arguments
*/
static int
nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
{
p = xdr_encode_fhandle(p, args->fromfh);
p = xdr_encode_array(p, args->fromname, args->fromlen);
p = xdr_encode_fhandle(p, args->tofh);
p = xdr_encode_array(p, args->toname, args->tolen);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
/*
* Encode LINK arguments
*/
static int
nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
{
p = xdr_encode_fhandle(p, args->fromfh);
p = xdr_encode_fhandle(p, args->tofh);
p = xdr_encode_array(p, args->toname, args->tolen);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
/*
* Encode SYMLINK arguments
*/
static int
nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
{
p = xdr_encode_fhandle(p, args->fromfh);
p = xdr_encode_array(p, args->fromname, args->fromlen);
p = xdr_encode_array(p, args->topath, args->tolen);
p = xdr_encode_sattr(p, args->sattr);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
/*
* Encode arguments to readdir call
*/
static int
nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
{
struct rpc_task *task = req->rq_task;
struct rpc_auth *auth = task->tk_auth;
unsigned int replen;
u32 count = args->count;
p = xdr_encode_fhandle(p, args->fh);
*p++ = htonl(args->cookie);
*p++ = htonl(count); /* see above */
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
/* Inline the page array */
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
return 0;
}
/*
* Decode the result of a readdir call.
* We're not really decoding anymore, we just leave the buffer untouched
* and only check that it is syntactically correct.
* The real decoding happens in nfs_decode_entry below, called directly
* from nfs_readdir for each entry.
*/
static int
nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
{
struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
struct kvec *iov = rcvbuf->head;
struct page **page;
int hdrlen, recvd;
int status, nr;
unsigned int len, pglen;
u32 *end, *entry, *kaddr;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
}
pglen = rcvbuf->page_len;
recvd = rcvbuf->len - hdrlen;
if (pglen > recvd)
pglen = recvd;
page = rcvbuf->pages;
kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
end = (u32 *)((char *)p + pglen);
entry = p;
for (nr = 0; *p++; nr++) {
if (p + 2 > end)
goto short_pkt;
p++; /* fileid */
len = ntohl(*p++);
p += XDR_QUADLEN(len) + 1; /* name plus cookie */
if (len > NFS2_MAXNAMLEN) {
printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
len);
goto err_unmap;
}
if (p + 2 > end)
goto short_pkt;
entry = p;
}
if (!nr && (entry[0] != 0 || entry[1] == 0))
goto short_pkt;
out:
kunmap_atomic(kaddr, KM_USER0);
return nr;
short_pkt:
entry[0] = entry[1] = 0;
/* truncate listing ? */
if (!nr) {
printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
entry[1] = 1;
}
goto out;
err_unmap:
nr = -errno_NFSERR_IO;
goto out;
}
u32 *
nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
{
if (!*p++) {
if (!*p)
return ERR_PTR(-EAGAIN);
entry->eof = 1;
return ERR_PTR(-EBADCOOKIE);
}
entry->ino = ntohl(*p++);
entry->len = ntohl(*p++);
entry->name = (const char *) p;
p += XDR_QUADLEN(entry->len);
entry->prev_cookie = entry->cookie;
entry->cookie = ntohl(*p++);
entry->eof = !p[0] && p[1];
return p;
}
/*
* NFS XDR decode functions
*/
/*
* Decode simple status reply
*/
static int
nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
{
int status;
if ((status = ntohl(*p++)) != 0)
status = -nfs_stat_to_errno(status);
return status;
}
/*
* Decode attrstat reply
* GETATTR, SETATTR, WRITE
*/
static int
nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
{
int status;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
xdr_decode_fattr(p, fattr);
return 0;
}
/*
* Decode diropres reply
* LOOKUP, CREATE, MKDIR
*/
static int
nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
{
int status;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
p = xdr_decode_fhandle(p, res->fh);
xdr_decode_fattr(p, res->fattr);
return 0;
}
/*
* Encode READLINK args
*/
static int
nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args)
{
struct rpc_auth *auth = req->rq_task->tk_auth;
unsigned int replen;
p = xdr_encode_fhandle(p, args->fh);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
/* Inline the page array */
replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
return 0;
}
/*
* Decode READLINK reply
*/
static int
nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
{
struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
struct kvec *iov = rcvbuf->head;
int hdrlen, len, recvd;
char *kaddr;
int status;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
/* Convert length of symlink */
len = ntohl(*p++);
if (len >= rcvbuf->page_len || len <= 0) {
dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
return -ENAMETOOLONG;
}
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
"length %d > %Zu\n", hdrlen, iov->iov_len);
return -errno_NFSERR_IO;
} else if (iov->iov_len != hdrlen) {
dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
}
recvd = req->rq_rcv_buf.len - hdrlen;
if (recvd < len) {
printk(KERN_WARNING "NFS: server cheating in readlink reply: "
"count %u > recvd %u\n", len, recvd);
return -EIO;
}
/* NULL terminate the string we got */
kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
kaddr[len+rcvbuf->page_base] = '\0';
kunmap_atomic(kaddr, KM_USER0);
return 0;
}
/*
* Decode WRITE reply
*/
static int
nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
{
res->verf->committed = NFS_FILE_SYNC;
return nfs_xdr_attrstat(req, p, res->fattr);
}
/*
* Decode STATFS reply
*/
static int
nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
{
int status;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
res->tsize = ntohl(*p++);
res->bsize = ntohl(*p++);
res->blocks = ntohl(*p++);
res->bfree = ntohl(*p++);
res->bavail = ntohl(*p++);
return 0;
}
/*
* We need to translate between nfs status return values and
* the local errno values which may not be the same.
*/
static struct {
int stat;
int errno;
} nfs_errtbl[] = {
{ NFS_OK, 0 },
{ NFSERR_PERM, EPERM },
{ NFSERR_NOENT, ENOENT },
{ NFSERR_IO, errno_NFSERR_IO },
{ NFSERR_NXIO, ENXIO },
/* { NFSERR_EAGAIN, EAGAIN }, */
{ NFSERR_ACCES, EACCES },
{ NFSERR_EXIST, EEXIST },
{ NFSERR_XDEV, EXDEV },
{ NFSERR_NODEV, ENODEV },
{ NFSERR_NOTDIR, ENOTDIR },
{ NFSERR_ISDIR, EISDIR },
{ NFSERR_INVAL, EINVAL },
{ NFSERR_FBIG, EFBIG },
{ NFSERR_NOSPC, ENOSPC },
{ NFSERR_ROFS, EROFS },
{ NFSERR_MLINK, EMLINK },
{ NFSERR_NAMETOOLONG, ENAMETOOLONG },
{ NFSERR_NOTEMPTY, ENOTEMPTY },
{ NFSERR_DQUOT, EDQUOT },
{ NFSERR_STALE, ESTALE },
{ NFSERR_REMOTE, EREMOTE },
#ifdef EWFLUSH
{ NFSERR_WFLUSH, EWFLUSH },
#endif
{ NFSERR_BADHANDLE, EBADHANDLE },
{ NFSERR_NOT_SYNC, ENOTSYNC },
{ NFSERR_BAD_COOKIE, EBADCOOKIE },
{ NFSERR_NOTSUPP, ENOTSUPP },
{ NFSERR_TOOSMALL, ETOOSMALL },
{ NFSERR_SERVERFAULT, ESERVERFAULT },
{ NFSERR_BADTYPE, EBADTYPE },
{ NFSERR_JUKEBOX, EJUKEBOX },
{ -1, EIO }
};
/*
* Convert an NFS error code to a local one.
* This one is used jointly by NFSv2 and NFSv3.
*/
int
nfs_stat_to_errno(int stat)
{
int i;
for (i = 0; nfs_errtbl[i].stat != -1; i++) {
if (nfs_errtbl[i].stat == stat)
return nfs_errtbl[i].errno;
}
printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
return nfs_errtbl[i].errno;
}
#ifndef MAX
# define MAX(a, b) (((a) > (b))? (a) : (b))
#endif
#define PROC(proc, argtype, restype, timer) \
[NFSPROC_##proc] = { \
.p_proc = NFSPROC_##proc, \
.p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs_xdr_##restype, \
.p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
.p_timer = timer \
}
struct rpc_procinfo nfs_procedures[] = {
PROC(GETATTR, fhandle, attrstat, 1),
PROC(SETATTR, sattrargs, attrstat, 0),
PROC(LOOKUP, diropargs, diropres, 2),
PROC(READLINK, readlinkargs, readlinkres, 3),
PROC(READ, readargs, readres, 3),
PROC(WRITE, writeargs, writeres, 4),
PROC(CREATE, createargs, diropres, 0),
PROC(REMOVE, diropargs, stat, 0),
PROC(RENAME, renameargs, stat, 0),
PROC(LINK, linkargs, stat, 0),
PROC(SYMLINK, symlinkargs, stat, 0),
PROC(MKDIR, createargs, diropres, 0),
PROC(RMDIR, diropargs, stat, 0),
PROC(READDIR, readdirargs, readdirres, 3),
PROC(STATFS, fhandle, statfsres, 0),
};
struct rpc_version nfs_version2 = {
.number = 2,
.nrprocs = sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
.procs = nfs_procedures
};

859
fs/nfs/nfs3proc.c Normal file
파일 보기

@@ -0,0 +1,859 @@
/*
* linux/fs/nfs/nfs3proc.c
*
* Client-side NFSv3 procedures stubs.
*
* Copyright (C) 1997, Olaf Kirch
*/
#include <linux/mm.h>
#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/lockd/bind.h>
#include <linux/smp_lock.h>
#define NFSDBG_FACILITY NFSDBG_PROC
extern struct rpc_procinfo nfs3_procedures[];
/* A wrapper to handle the EJUKEBOX error message */
static int
nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
sigset_t oldset;
int res;
rpc_clnt_sigmask(clnt, &oldset);
do {
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!signalled());
rpc_clnt_sigunmask(clnt, &oldset);
return res;
}
static inline int
nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
{
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[proc],
.rpc_argp = argp,
.rpc_resp = resp,
};
return nfs3_rpc_wrapper(clnt, &msg, flags);
}
#define rpc_call(clnt, proc, argp, resp, flags) \
nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
#define rpc_call_sync(clnt, msg, flags) \
nfs3_rpc_wrapper(clnt, msg, flags)
static int
nfs3_async_handle_jukebox(struct rpc_task *task)
{
if (task->tk_status != -EJUKEBOX)
return 0;
task->tk_status = 0;
rpc_restart_call(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
return 1;
}
/*
* Bare-bones access to getattr: this is for nfs_read_super.
*/
static int
nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
int status;
dprintk("%s: call fsinfo\n", __FUNCTION__);
info->fattr->valid = 0;
status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
}
return status;
}
/*
* One function for each procedure in the NFS protocol.
*/
static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
int status;
dprintk("NFS call getattr\n");
fattr->valid = 0;
status = rpc_call(server->client, NFS3PROC_GETATTR,
fhandle, fattr, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
static int
nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct iattr *sattr)
{
struct inode *inode = dentry->d_inode;
struct nfs3_sattrargs arg = {
.fh = NFS_FH(inode),
.sattr = sattr,
};
int status;
dprintk("NFS call setattr\n");
fattr->valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
static int
nfs3_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_fattr dir_attr;
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct nfs3_diropres res = {
.dir_attr = &dir_attr,
.fh = fhandle,
.fattr = fattr
};
int status;
dprintk("NFS call lookup %s\n", name->name);
dir_attr.valid = 0;
fattr->valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
fhandle, fattr, 0);
dprintk("NFS reply lookup: %d\n", status);
if (status >= 0)
status = nfs_refresh_inode(dir, &dir_attr);
return status;
}
static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs_fattr fattr;
struct nfs3_accessargs arg = {
.fh = NFS_FH(inode),
};
struct nfs3_accessres res = {
.fattr = &fattr,
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
.rpc_cred = entry->cred
};
int mode = entry->mask;
int status;
dprintk("NFS call access\n");
fattr.valid = 0;
if (mode & MAY_READ)
arg.access |= NFS3_ACCESS_READ;
if (S_ISDIR(inode->i_mode)) {
if (mode & MAY_WRITE)
arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
if (mode & MAY_EXEC)
arg.access |= NFS3_ACCESS_LOOKUP;
} else {
if (mode & MAY_WRITE)
arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
if (mode & MAY_EXEC)
arg.access |= NFS3_ACCESS_EXECUTE;
}
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_refresh_inode(inode, &fattr);
if (status == 0) {
entry->mask = 0;
if (res.access & NFS3_ACCESS_READ)
entry->mask |= MAY_READ;
if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE))
entry->mask |= MAY_WRITE;
if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
entry->mask |= MAY_EXEC;
}
dprintk("NFS reply access: %d\n", status);
return status;
}
static int nfs3_proc_readlink(struct inode *inode, struct page *page,
unsigned int pgbase, unsigned int pglen)
{
struct nfs_fattr fattr;
struct nfs3_readlinkargs args = {
.fh = NFS_FH(inode),
.pgbase = pgbase,
.pglen = pglen,
.pages = &page
};
int status;
dprintk("NFS call readlink\n");
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
&args, &fattr, 0);
nfs_refresh_inode(inode, &fattr);
dprintk("NFS reply readlink: %d\n", status);
return status;
}
static int nfs3_proc_read(struct nfs_read_data *rdata)
{
int flags = rdata->flags;
struct inode * inode = rdata->inode;
struct nfs_fattr * fattr = rdata->res.fattr;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READ],
.rpc_argp = &rdata->args,
.rpc_resp = &rdata->res,
.rpc_cred = rdata->cred,
};
int status;
dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
(long long) rdata->args.offset);
fattr->valid = 0;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
if (status >= 0)
nfs_refresh_inode(inode, fattr);
dprintk("NFS reply read: %d\n", status);
return status;
}
static int nfs3_proc_write(struct nfs_write_data *wdata)
{
int rpcflags = wdata->flags;
struct inode * inode = wdata->inode;
struct nfs_fattr * fattr = wdata->res.fattr;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
.rpc_argp = &wdata->args,
.rpc_resp = &wdata->res,
.rpc_cred = wdata->cred,
};
int status;
dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
(long long) wdata->args.offset);
fattr->valid = 0;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
if (status >= 0)
nfs_refresh_inode(inode, fattr);
dprintk("NFS reply write: %d\n", status);
return status < 0? status : wdata->res.count;
}
static int nfs3_proc_commit(struct nfs_write_data *cdata)
{
struct inode * inode = cdata->inode;
struct nfs_fattr * fattr = cdata->res.fattr;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
.rpc_argp = &cdata->args,
.rpc_resp = &cdata->res,
.rpc_cred = cdata->cred,
};
int status;
dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
(long long) cdata->args.offset);
fattr->valid = 0;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status >= 0)
nfs_refresh_inode(inode, fattr);
dprintk("NFS reply commit: %d\n", status);
return status;
}
/*
* Create a regular file.
* For now, we don't implement O_EXCL.
*/
static int
nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
struct nfs_fh fhandle;
struct nfs_fattr fattr;
struct nfs_fattr dir_attr;
struct nfs3_createargs arg = {
.fh = NFS_FH(dir),
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr,
};
struct nfs3_diropres res = {
.dir_attr = &dir_attr,
.fh = &fhandle,
.fattr = &fattr
};
int status;
dprintk("NFS call create %s\n", dentry->d_name.name);
arg.createmode = NFS3_CREATE_UNCHECKED;
if (flags & O_EXCL) {
arg.createmode = NFS3_CREATE_EXCLUSIVE;
arg.verifier[0] = jiffies;
arg.verifier[1] = current->pid;
}
again:
dir_attr.valid = 0;
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
nfs_refresh_inode(dir, &dir_attr);
/* If the server doesn't support the exclusive creation semantics,
* try again with simple 'guarded' mode. */
if (status == NFSERR_NOTSUPP) {
switch (arg.createmode) {
case NFS3_CREATE_EXCLUSIVE:
arg.createmode = NFS3_CREATE_GUARDED;
break;
case NFS3_CREATE_GUARDED:
arg.createmode = NFS3_CREATE_UNCHECKED;
break;
case NFS3_CREATE_UNCHECKED:
goto out;
}
goto again;
}
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
if (status != 0)
goto out;
/* When we created the file with exclusive semantics, make
* sure we set the attributes afterwards. */
if (arg.createmode == NFS3_CREATE_EXCLUSIVE) {
dprintk("NFS call setattr (post-create)\n");
if (!(sattr->ia_valid & ATTR_ATIME_SET))
sattr->ia_valid |= ATTR_ATIME;
if (!(sattr->ia_valid & ATTR_MTIME_SET))
sattr->ia_valid |= ATTR_MTIME;
/* Note: we could use a guarded setattr here, but I'm
* not sure this buys us anything (and I'd have
* to revamp the NFSv3 XDR code) */
status = nfs3_proc_setattr(dentry, &fattr, sattr);
nfs_refresh_inode(dentry->d_inode, &fattr);
dprintk("NFS reply setattr (post-create): %d\n", status);
}
out:
dprintk("NFS reply create: %d\n", status);
return status;
}
static int
nfs3_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_fattr dir_attr;
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE],
.rpc_argp = &arg,
.rpc_resp = &dir_attr,
};
int status;
dprintk("NFS call remove %s\n", name->name);
dir_attr.valid = 0;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_refresh_inode(dir, &dir_attr);
dprintk("NFS reply remove: %d\n", status);
return status;
}
static int
nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
{
struct unlinkxdr {
struct nfs3_diropargs arg;
struct nfs_fattr res;
} *ptr;
ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL);
if (!ptr)
return -ENOMEM;
ptr->arg.fh = NFS_FH(dir->d_inode);
ptr->arg.name = name->name;
ptr->arg.len = name->len;
ptr->res.valid = 0;
msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
msg->rpc_argp = &ptr->arg;
msg->rpc_resp = &ptr->res;
return 0;
}
static int
nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
{
struct rpc_message *msg = &task->tk_msg;
struct nfs_fattr *dir_attr;
if (nfs3_async_handle_jukebox(task))
return 1;
if (msg->rpc_argp) {
dir_attr = (struct nfs_fattr*)msg->rpc_resp;
nfs_refresh_inode(dir->d_inode, dir_attr);
kfree(msg->rpc_argp);
}
return 0;
}
static int
nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
struct inode *new_dir, struct qstr *new_name)
{
struct nfs_fattr old_dir_attr, new_dir_attr;
struct nfs3_renameargs arg = {
.fromfh = NFS_FH(old_dir),
.fromname = old_name->name,
.fromlen = old_name->len,
.tofh = NFS_FH(new_dir),
.toname = new_name->name,
.tolen = new_name->len
};
struct nfs3_renameres res = {
.fromattr = &old_dir_attr,
.toattr = &new_dir_attr
};
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
old_dir_attr.valid = 0;
new_dir_attr.valid = 0;
status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
nfs_refresh_inode(old_dir, &old_dir_attr);
nfs_refresh_inode(new_dir, &new_dir_attr);
dprintk("NFS reply rename: %d\n", status);
return status;
}
static int
nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs_fattr dir_attr, fattr;
struct nfs3_linkargs arg = {
.fromfh = NFS_FH(inode),
.tofh = NFS_FH(dir),
.toname = name->name,
.tolen = name->len
};
struct nfs3_linkres res = {
.dir_attr = &dir_attr,
.fattr = &fattr
};
int status;
dprintk("NFS call link %s\n", name->name);
dir_attr.valid = 0;
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
nfs_refresh_inode(dir, &dir_attr);
nfs_refresh_inode(inode, &fattr);
dprintk("NFS reply link: %d\n", status);
return status;
}
static int
nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
struct iattr *sattr, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
struct nfs_fattr dir_attr;
struct nfs3_symlinkargs arg = {
.fromfh = NFS_FH(dir),
.fromname = name->name,
.fromlen = name->len,
.topath = path->name,
.tolen = path->len,
.sattr = sattr
};
struct nfs3_diropres res = {
.dir_attr = &dir_attr,
.fh = fhandle,
.fattr = fattr
};
int status;
if (path->len > NFS3_MAXPATHLEN)
return -ENAMETOOLONG;
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
dir_attr.valid = 0;
fattr->valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
nfs_refresh_inode(dir, &dir_attr);
dprintk("NFS reply symlink: %d\n", status);
return status;
}
static int
nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct nfs_fh fhandle;
struct nfs_fattr fattr, dir_attr;
struct nfs3_mkdirargs arg = {
.fh = NFS_FH(dir),
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr
};
struct nfs3_diropres res = {
.dir_attr = &dir_attr,
.fh = &fhandle,
.fattr = &fattr
};
int status;
dprintk("NFS call mkdir %s\n", dentry->d_name.name);
dir_attr.valid = 0;
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
nfs_refresh_inode(dir, &dir_attr);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply mkdir: %d\n", status);
return status;
}
static int
nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
{
struct nfs_fattr dir_attr;
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
int status;
dprintk("NFS call rmdir %s\n", name->name);
dir_attr.valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
nfs_refresh_inode(dir, &dir_attr);
dprintk("NFS reply rmdir: %d\n", status);
return status;
}
/*
* The READDIR implementation is somewhat hackish - we pass the user buffer
* to the encode function, which installs it in the receive iovec.
* The decode function itself doesn't perform any decoding, it just makes
* sure the reply is syntactically correct.
*
* Also note that this implementation handles both plain readdir and
* readdirplus.
*/
static int
nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page *page, unsigned int count, int plus)
{
struct inode *dir = dentry->d_inode;
struct nfs_fattr dir_attr;
u32 *verf = NFS_COOKIEVERF(dir);
struct nfs3_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
.verf = {verf[0], verf[1]},
.plus = plus,
.count = count,
.pages = &page
};
struct nfs3_readdirres res = {
.dir_attr = &dir_attr,
.verf = verf,
.plus = plus
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READDIR],
.rpc_argp = &arg,
.rpc_resp = &res,
.rpc_cred = cred
};
int status;
lock_kernel();
if (plus)
msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
dprintk("NFS call readdir%s %d\n",
plus? "plus" : "", (unsigned int) cookie);
dir_attr.valid = 0;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_refresh_inode(dir, &dir_attr);
dprintk("NFS reply readdir: %d\n", status);
unlock_kernel();
return status;
}
static int
nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
dev_t rdev)
{
struct nfs_fh fh;
struct nfs_fattr fattr, dir_attr;
struct nfs3_mknodargs arg = {
.fh = NFS_FH(dir),
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr,
.rdev = rdev
};
struct nfs3_diropres res = {
.dir_attr = &dir_attr,
.fh = &fh,
.fattr = &fattr
};
int status;
switch (sattr->ia_mode & S_IFMT) {
case S_IFBLK: arg.type = NF3BLK; break;
case S_IFCHR: arg.type = NF3CHR; break;
case S_IFIFO: arg.type = NF3FIFO; break;
case S_IFSOCK: arg.type = NF3SOCK; break;
default: return -EINVAL;
}
dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name,
MAJOR(rdev), MINOR(rdev));
dir_attr.valid = 0;
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
nfs_refresh_inode(dir, &dir_attr);
if (status == 0)
status = nfs_instantiate(dentry, &fh, &fattr);
dprintk("NFS reply mknod: %d\n", status);
return status;
}
static int
nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
int status;
dprintk("NFS call fsstat\n");
stat->fattr->valid = 0;
status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
dprintk("NFS reply statfs: %d\n", status);
return status;
}
static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
int status;
dprintk("NFS call fsinfo\n");
info->fattr->valid = 0;
status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
dprintk("NFS reply fsinfo: %d\n", status);
return status;
}
static int
nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
int status;
dprintk("NFS call pathconf\n");
info->fattr->valid = 0;
status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
dprintk("NFS reply pathconf: %d\n", status);
return status;
}
extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
static void
nfs3_read_done(struct rpc_task *task)
{
struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
if (nfs3_async_handle_jukebox(task))
return;
/* Call back common NFS readpage processing */
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, &data->fattr);
nfs_readpage_result(task);
}
static void
nfs3_proc_read_setup(struct nfs_read_data *data)
{
struct rpc_task *task = &data->task;
struct inode *inode = data->inode;
int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READ],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
/* N.B. Do we need to test? Never called for swapfile inode */
flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
/* Finalize the task. */
rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags);
rpc_call_setup(task, &msg, 0);
}
static void
nfs3_write_done(struct rpc_task *task)
{
struct nfs_write_data *data;
if (nfs3_async_handle_jukebox(task))
return;
data = (struct nfs_write_data *)task->tk_calldata;
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, data->res.fattr);
nfs_writeback_done(task);
}
static void
nfs3_proc_write_setup(struct nfs_write_data *data, int how)
{
struct rpc_task *task = &data->task;
struct inode *inode = data->inode;
int stable;
int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
if (how & FLUSH_STABLE) {
if (!NFS_I(inode)->ncommit)
stable = NFS_FILE_SYNC;
else
stable = NFS_DATA_SYNC;
} else
stable = NFS_UNSTABLE;
data->args.stable = stable;
/* Set the initial flags for the task. */
flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
/* Finalize the task. */
rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags);
rpc_call_setup(task, &msg, 0);
}
static void
nfs3_commit_done(struct rpc_task *task)
{
struct nfs_write_data *data;
if (nfs3_async_handle_jukebox(task))
return;
data = (struct nfs_write_data *)task->tk_calldata;
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, data->res.fattr);
nfs_commit_done(task);
}
static void
nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
{
struct rpc_task *task = &data->task;
struct inode *inode = data->inode;
int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
/* Set the initial flags for the task. */
flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
/* Finalize the task. */
rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags);
rpc_call_setup(task, &msg, 0);
}
static int
nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
}
struct nfs_rpc_ops nfs_v3_clientops = {
.version = 3, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs_dir_inode_operations,
.getroot = nfs3_proc_get_root,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
.access = nfs3_proc_access,
.readlink = nfs3_proc_readlink,
.read = nfs3_proc_read,
.write = nfs3_proc_write,
.commit = nfs3_proc_commit,
.create = nfs3_proc_create,
.remove = nfs3_proc_remove,
.unlink_setup = nfs3_proc_unlink_setup,
.unlink_done = nfs3_proc_unlink_done,
.rename = nfs3_proc_rename,
.link = nfs3_proc_link,
.symlink = nfs3_proc_symlink,
.mkdir = nfs3_proc_mkdir,
.rmdir = nfs3_proc_rmdir,
.readdir = nfs3_proc_readdir,
.mknod = nfs3_proc_mknod,
.statfs = nfs3_proc_statfs,
.fsinfo = nfs3_proc_fsinfo,
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.read_setup = nfs3_proc_read_setup,
.write_setup = nfs3_proc_write_setup,
.commit_setup = nfs3_proc_commit_setup,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs3_proc_lock,
};

1023
fs/nfs/nfs3xdr.c Normal file

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다. Load Diff

2786
fs/nfs/nfs4proc.c Normal file

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다. Load Diff

148
fs/nfs/nfs4renewd.c Normal file
파일 보기

@@ -0,0 +1,148 @@
/*
* fs/nfs/nfs4renewd.c
*
* Copyright (c) 2002 The Regents of the University of Michigan.
* All rights reserved.
*
* Kendrick Smith <kmsmith@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Implementation of the NFSv4 "renew daemon", which wakes up periodically to
* send a RENEW, to keep state alive on the server. The daemon is implemented
* as an rpc_task, not a real kernel thread, so it always runs in rpciod's
* context. There is one renewd per nfs_server.
*
* TODO: If the send queue gets backlogged (e.g., if the server goes down),
* we will keep filling the queue with periodic RENEW requests. We need a
* mechanism for ensuring that if renewd successfully sends off a request,
* then it only wakes up when the request is finished. Maybe use the
* child task framework of the RPC layer?
*/
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#define NFSDBG_FACILITY NFSDBG_PROC
void
nfs4_renew_state(void *data)
{
struct nfs4_client *clp = (struct nfs4_client *)data;
long lease, timeout;
unsigned long last, now;
down_read(&clp->cl_sem);
dprintk("%s: start\n", __FUNCTION__);
/* Are there any active superblocks? */
if (list_empty(&clp->cl_superblocks))
goto out;
spin_lock(&clp->cl_lock);
lease = clp->cl_lease_time;
last = clp->cl_last_renewal;
now = jiffies;
timeout = (2 * lease) / 3 + (long)last - (long)now;
/* Are we close to a lease timeout? */
if (time_after(now, last + lease/3)) {
spin_unlock(&clp->cl_lock);
/* Queue an asynchronous RENEW. */
nfs4_proc_async_renew(clp);
timeout = (2 * lease) / 3;
spin_lock(&clp->cl_lock);
} else
dprintk("%s: failed to call renewd. Reason: lease not expired \n",
__FUNCTION__);
if (timeout < 5 * HZ) /* safeguard */
timeout = 5 * HZ;
dprintk("%s: requeueing work. Lease period = %ld\n",
__FUNCTION__, (timeout + HZ - 1) / HZ);
cancel_delayed_work(&clp->cl_renewd);
schedule_delayed_work(&clp->cl_renewd, timeout);
spin_unlock(&clp->cl_lock);
out:
up_read(&clp->cl_sem);
dprintk("%s: done\n", __FUNCTION__);
}
/* Must be called with clp->cl_sem locked for writes */
void
nfs4_schedule_state_renewal(struct nfs4_client *clp)
{
long timeout;
spin_lock(&clp->cl_lock);
timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal
- (long)jiffies;
if (timeout < 5 * HZ)
timeout = 5 * HZ;
dprintk("%s: requeueing work. Lease period = %ld\n",
__FUNCTION__, (timeout + HZ - 1) / HZ);
cancel_delayed_work(&clp->cl_renewd);
schedule_delayed_work(&clp->cl_renewd, timeout);
spin_unlock(&clp->cl_lock);
}
void
nfs4_renewd_prepare_shutdown(struct nfs_server *server)
{
struct nfs4_client *clp = server->nfs4_state;
if (!clp)
return;
flush_scheduled_work();
down_write(&clp->cl_sem);
if (!list_empty(&server->nfs4_siblings))
list_del_init(&server->nfs4_siblings);
up_write(&clp->cl_sem);
}
/* Must be called with clp->cl_sem locked for writes */
void
nfs4_kill_renewd(struct nfs4_client *clp)
{
down_read(&clp->cl_sem);
if (!list_empty(&clp->cl_superblocks)) {
up_read(&clp->cl_sem);
return;
}
cancel_delayed_work(&clp->cl_renewd);
up_read(&clp->cl_sem);
flush_scheduled_work();
}
/*
* Local variables:
* c-basic-offset: 8
* End:
*/

932
fs/nfs/nfs4state.c Normal file
파일 보기

@@ -0,0 +1,932 @@
/*
* fs/nfs/nfs4state.c
*
* Client-side XDR for NFSv4.
*
* Copyright (c) 2002 The Regents of the University of Michigan.
* All rights reserved.
*
* Kendrick Smith <kmsmith@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Implementation of the NFSv4 state model. For the time being,
* this is minimal, but will be made much more complex in a
* subsequent patch.
*/
#include <linux/config.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_idmap.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include "callback.h"
#include "delegation.h"
#define OPENOWNER_POOL_SIZE 8
static DEFINE_SPINLOCK(state_spinlock);
nfs4_stateid zero_stateid;
#if 0
nfs4_stateid one_stateid =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
#endif
static LIST_HEAD(nfs4_clientid_list);
static void nfs4_recover_state(void *);
extern void nfs4_renew_state(void *);
void
init_nfsv4_state(struct nfs_server *server)
{
server->nfs4_state = NULL;
INIT_LIST_HEAD(&server->nfs4_siblings);
}
void
destroy_nfsv4_state(struct nfs_server *server)
{
if (server->mnt_path) {
kfree(server->mnt_path);
server->mnt_path = NULL;
}
if (server->nfs4_state) {
nfs4_put_client(server->nfs4_state);
server->nfs4_state = NULL;
}
}
/*
* nfs4_get_client(): returns an empty client structure
* nfs4_put_client(): drops reference to client structure
*
* Since these are allocated/deallocated very rarely, we don't
* bother putting them in a slab cache...
*/
static struct nfs4_client *
nfs4_alloc_client(struct in_addr *addr)
{
struct nfs4_client *clp;
if (nfs_callback_up() < 0)
return NULL;
if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
nfs_callback_down();
return NULL;
}
memset(clp, 0, sizeof(*clp));
memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
init_rwsem(&clp->cl_sem);
INIT_LIST_HEAD(&clp->cl_delegations);
INIT_LIST_HEAD(&clp->cl_state_owners);
INIT_LIST_HEAD(&clp->cl_unused);
spin_lock_init(&clp->cl_lock);
atomic_set(&clp->cl_count, 1);
INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp);
INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
INIT_LIST_HEAD(&clp->cl_superblocks);
init_waitqueue_head(&clp->cl_waitq);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
clp->cl_boot_time = CURRENT_TIME;
clp->cl_state = 1 << NFS4CLNT_OK;
return clp;
}
static void
nfs4_free_client(struct nfs4_client *clp)
{
struct nfs4_state_owner *sp;
while (!list_empty(&clp->cl_unused)) {
sp = list_entry(clp->cl_unused.next,
struct nfs4_state_owner,
so_list);
list_del(&sp->so_list);
kfree(sp);
}
BUG_ON(!list_empty(&clp->cl_state_owners));
if (clp->cl_cred)
put_rpccred(clp->cl_cred);
nfs_idmap_delete(clp);
if (clp->cl_rpcclient)
rpc_shutdown_client(clp->cl_rpcclient);
kfree(clp);
nfs_callback_down();
}
static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
{
struct nfs4_client *clp;
list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
atomic_inc(&clp->cl_count);
return clp;
}
}
return NULL;
}
struct nfs4_client *nfs4_find_client(struct in_addr *addr)
{
struct nfs4_client *clp;
spin_lock(&state_spinlock);
clp = __nfs4_find_client(addr);
spin_unlock(&state_spinlock);
return clp;
}
struct nfs4_client *
nfs4_get_client(struct in_addr *addr)
{
struct nfs4_client *clp, *new = NULL;
spin_lock(&state_spinlock);
for (;;) {
clp = __nfs4_find_client(addr);
if (clp != NULL)
break;
clp = new;
if (clp != NULL) {
list_add(&clp->cl_servers, &nfs4_clientid_list);
new = NULL;
break;
}
spin_unlock(&state_spinlock);
new = nfs4_alloc_client(addr);
spin_lock(&state_spinlock);
if (new == NULL)
break;
}
spin_unlock(&state_spinlock);
if (new)
nfs4_free_client(new);
return clp;
}
void
nfs4_put_client(struct nfs4_client *clp)
{
if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
return;
list_del(&clp->cl_servers);
spin_unlock(&state_spinlock);
BUG_ON(!list_empty(&clp->cl_superblocks));
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
nfs4_kill_renewd(clp);
nfs4_free_client(clp);
}
static int __nfs4_init_client(struct nfs4_client *clp)
{
int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport);
if (status == 0)
status = nfs4_proc_setclientid_confirm(clp);
if (status == 0)
nfs4_schedule_state_renewal(clp);
return status;
}
int nfs4_init_client(struct nfs4_client *clp)
{
return nfs4_map_errors(__nfs4_init_client(clp));
}
u32
nfs4_alloc_lockowner_id(struct nfs4_client *clp)
{
return clp->cl_lockowner_id ++;
}
static struct nfs4_state_owner *
nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
{
struct nfs4_state_owner *sp = NULL;
if (!list_empty(&clp->cl_unused)) {
sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
atomic_inc(&sp->so_count);
sp->so_cred = cred;
list_move(&sp->so_list, &clp->cl_state_owners);
clp->cl_nunused--;
}
return sp;
}
static struct nfs4_state_owner *
nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
{
struct nfs4_state_owner *sp, *res = NULL;
list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
if (sp->so_cred != cred)
continue;
atomic_inc(&sp->so_count);
/* Move to the head of the list */
list_move(&sp->so_list, &clp->cl_state_owners);
res = sp;
break;
}
return res;
}
/*
* nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
* create a new state_owner.
*
*/
static struct nfs4_state_owner *
nfs4_alloc_state_owner(void)
{
struct nfs4_state_owner *sp;
sp = kmalloc(sizeof(*sp),GFP_KERNEL);
if (!sp)
return NULL;
init_MUTEX(&sp->so_sema);
sp->so_seqid = 0; /* arbitrary */
INIT_LIST_HEAD(&sp->so_states);
INIT_LIST_HEAD(&sp->so_delegations);
atomic_set(&sp->so_count, 1);
return sp;
}
void
nfs4_drop_state_owner(struct nfs4_state_owner *sp)
{
struct nfs4_client *clp = sp->so_client;
spin_lock(&clp->cl_lock);
list_del_init(&sp->so_list);
spin_unlock(&clp->cl_lock);
}
/*
* Note: must be called with clp->cl_sem held in order to prevent races
* with reboot recovery!
*/
struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
{
struct nfs4_client *clp = server->nfs4_state;
struct nfs4_state_owner *sp, *new;
get_rpccred(cred);
new = nfs4_alloc_state_owner();
spin_lock(&clp->cl_lock);
sp = nfs4_find_state_owner(clp, cred);
if (sp == NULL)
sp = nfs4_client_grab_unused(clp, cred);
if (sp == NULL && new != NULL) {
list_add(&new->so_list, &clp->cl_state_owners);
new->so_client = clp;
new->so_id = nfs4_alloc_lockowner_id(clp);
new->so_cred = cred;
sp = new;
new = NULL;
}
spin_unlock(&clp->cl_lock);
if (new)
kfree(new);
if (sp != NULL)
return sp;
put_rpccred(cred);
return NULL;
}
/*
* Must be called with clp->cl_sem held in order to avoid races
* with state recovery...
*/
void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{
struct nfs4_client *clp = sp->so_client;
struct rpc_cred *cred = sp->so_cred;
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
return;
if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
goto out_free;
if (list_empty(&sp->so_list))
goto out_free;
list_move(&sp->so_list, &clp->cl_unused);
clp->cl_nunused++;
spin_unlock(&clp->cl_lock);
put_rpccred(cred);
cred = NULL;
return;
out_free:
list_del(&sp->so_list);
spin_unlock(&clp->cl_lock);
put_rpccred(cred);
kfree(sp);
}
static struct nfs4_state *
nfs4_alloc_open_state(void)
{
struct nfs4_state *state;
state = kmalloc(sizeof(*state), GFP_KERNEL);
if (!state)
return NULL;
state->state = 0;
state->nreaders = 0;
state->nwriters = 0;
state->flags = 0;
memset(state->stateid.data, 0, sizeof(state->stateid.data));
atomic_set(&state->count, 1);
INIT_LIST_HEAD(&state->lock_states);
init_MUTEX(&state->lock_sema);
rwlock_init(&state->state_lock);
return state;
}
static struct nfs4_state *
__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_state *state;
mode &= (FMODE_READ|FMODE_WRITE);
list_for_each_entry(state, &nfsi->open_states, inode_states) {
if (state->owner->so_cred != cred)
continue;
if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
continue;
if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
continue;
if ((state->state & mode) != mode)
continue;
atomic_inc(&state->count);
if (mode & FMODE_READ)
state->nreaders++;
if (mode & FMODE_WRITE)
state->nwriters++;
return state;
}
return NULL;
}
static struct nfs4_state *
__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_state *state;
list_for_each_entry(state, &nfsi->open_states, inode_states) {
/* Is this in the process of being freed? */
if (state->nreaders == 0 && state->nwriters == 0)
continue;
if (state->owner == owner) {
atomic_inc(&state->count);
return state;
}
}
return NULL;
}
struct nfs4_state *
nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
{
struct nfs4_state *state;
spin_lock(&inode->i_lock);
state = __nfs4_find_state(inode, cred, mode);
spin_unlock(&inode->i_lock);
return state;
}
static void
nfs4_free_open_state(struct nfs4_state *state)
{
kfree(state);
}
struct nfs4_state *
nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
{
struct nfs4_state *state, *new;
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&inode->i_lock);
state = __nfs4_find_state_byowner(inode, owner);
spin_unlock(&inode->i_lock);
if (state)
goto out;
new = nfs4_alloc_open_state();
spin_lock(&inode->i_lock);
state = __nfs4_find_state_byowner(inode, owner);
if (state == NULL && new != NULL) {
state = new;
/* Caller *must* be holding owner->so_sem */
/* Note: The reclaim code dictates that we add stateless
* and read-only stateids to the end of the list */
list_add_tail(&state->open_states, &owner->so_states);
state->owner = owner;
atomic_inc(&owner->so_count);
list_add(&state->inode_states, &nfsi->open_states);
state->inode = igrab(inode);
spin_unlock(&inode->i_lock);
} else {
spin_unlock(&inode->i_lock);
if (new)
nfs4_free_open_state(new);
}
out:
return state;
}
/*
* Beware! Caller must be holding exactly one
* reference to clp->cl_sem and owner->so_sema!
*/
void nfs4_put_open_state(struct nfs4_state *state)
{
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
return;
if (!list_empty(&state->inode_states))
list_del(&state->inode_states);
spin_unlock(&inode->i_lock);
list_del(&state->open_states);
iput(inode);
BUG_ON (state->state != 0);
nfs4_free_open_state(state);
nfs4_put_state_owner(owner);
}
/*
* Beware! Caller must be holding no references to clp->cl_sem!
* of owner->so_sema!
*/
void nfs4_close_state(struct nfs4_state *state, mode_t mode)
{
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
struct nfs4_client *clp = owner->so_client;
int newstate;
atomic_inc(&owner->so_count);
down_read(&clp->cl_sem);
down(&owner->so_sema);
/* Protect against nfs4_find_state() */
spin_lock(&inode->i_lock);
if (mode & FMODE_READ)
state->nreaders--;
if (mode & FMODE_WRITE)
state->nwriters--;
if (state->nwriters == 0) {
if (state->nreaders == 0)
list_del_init(&state->inode_states);
/* See reclaim code */
list_move_tail(&state->open_states, &owner->so_states);
}
spin_unlock(&inode->i_lock);
newstate = 0;
if (state->state != 0) {
if (state->nreaders)
newstate |= FMODE_READ;
if (state->nwriters)
newstate |= FMODE_WRITE;
if (state->state == newstate)
goto out;
if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
return;
}
out:
nfs4_put_open_state(state);
up(&owner->so_sema);
nfs4_put_state_owner(owner);
up_read(&clp->cl_sem);
}
/*
* Search the state->lock_states for an existing lock_owner
* that is compatible with current->files
*/
static struct nfs4_lock_state *
__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *pos;
list_for_each_entry(pos, &state->lock_states, ls_locks) {
if (pos->ls_owner != fl_owner)
continue;
atomic_inc(&pos->ls_count);
return pos;
}
return NULL;
}
struct nfs4_lock_state *
nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *lsp;
read_lock(&state->state_lock);
lsp = __nfs4_find_lock_state(state, fl_owner);
read_unlock(&state->state_lock);
return lsp;
}
/*
* Return a compatible lock_state. If no initialized lock_state structure
* exists, return an uninitialized one.
*
* The caller must be holding state->lock_sema
*/
static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *lsp;
struct nfs4_client *clp = state->owner->so_client;
lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
if (lsp == NULL)
return NULL;
lsp->ls_flags = 0;
lsp->ls_seqid = 0; /* arbitrary */
lsp->ls_id = -1;
memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
atomic_set(&lsp->ls_count, 1);
lsp->ls_owner = fl_owner;
INIT_LIST_HEAD(&lsp->ls_locks);
spin_lock(&clp->cl_lock);
lsp->ls_id = nfs4_alloc_lockowner_id(clp);
spin_unlock(&clp->cl_lock);
return lsp;
}
/*
* Return a compatible lock_state. If no initialized lock_state structure
* exists, return an uninitialized one.
*
* The caller must be holding state->lock_sema and clp->cl_sem
*/
struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
{
struct nfs4_lock_state * lsp;
lsp = nfs4_find_lock_state(state, owner);
if (lsp == NULL)
lsp = nfs4_alloc_lock_state(state, owner);
return lsp;
}
/*
* Byte-range lock aware utility to initialize the stateid of read/write
* requests.
*/
void
nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
{
if (test_bit(LK_STATE_IN_USE, &state->flags)) {
struct nfs4_lock_state *lsp;
lsp = nfs4_find_lock_state(state, fl_owner);
if (lsp) {
memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
nfs4_put_lock_state(lsp);
return;
}
}
memcpy(dst, &state->stateid, sizeof(*dst));
}
/*
* Called with state->lock_sema and clp->cl_sem held.
*/
void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
{
if (status == NFS_OK || seqid_mutating_err(-status))
lsp->ls_seqid++;
}
/*
* Check to see if the request lock (type FL_UNLK) effects the fl lock.
*
* fl and request must have the same posix owner
*
* return:
* 0 -> fl not effected by request
* 1 -> fl consumed by request
*/
static int
nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
{
if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
return 1;
return 0;
}
/*
* Post an initialized lock_state on the state->lock_states list.
*/
void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
{
if (!list_empty(&lsp->ls_locks))
return;
atomic_inc(&lsp->ls_count);
write_lock(&state->state_lock);
list_add(&lsp->ls_locks, &state->lock_states);
set_bit(LK_STATE_IN_USE, &state->flags);
write_unlock(&state->state_lock);
}
/*
* to decide to 'reap' lock state:
* 1) search i_flock for file_locks with fl.lock_state = to ls.
* 2) determine if unlock will consume found lock.
* if so, reap
*
* else, don't reap.
*
*/
void
nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
{
struct inode *inode = state->inode;
struct file_lock *fl;
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & FL_POSIX))
continue;
if (fl->fl_owner != lsp->ls_owner)
continue;
/* Exit if we find at least one lock which is not consumed */
if (nfs4_check_unlock(fl,request) == 0)
return;
}
write_lock(&state->state_lock);
list_del_init(&lsp->ls_locks);
if (list_empty(&state->lock_states))
clear_bit(LK_STATE_IN_USE, &state->flags);
write_unlock(&state->state_lock);
nfs4_put_lock_state(lsp);
}
/*
* Release reference to lock_state, and free it if we see that
* it is no longer in use
*/
void
nfs4_put_lock_state(struct nfs4_lock_state *lsp)
{
if (!atomic_dec_and_test(&lsp->ls_count))
return;
BUG_ON (!list_empty(&lsp->ls_locks));
kfree(lsp);
}
/*
* Called with sp->so_sema and clp->cl_sem held.
*
* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
* failed with a seqid incrementing error -
* see comments nfs_fs.h:seqid_mutating_error()
*/
void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
{
if (status == NFS_OK || seqid_mutating_err(-status))
sp->so_seqid++;
/* If the server returns BAD_SEQID, unhash state_owner here */
if (status == -NFS4ERR_BAD_SEQID)
nfs4_drop_state_owner(sp);
}
static int reclaimer(void *);
struct reclaimer_args {
struct nfs4_client *clp;
struct completion complete;
};
/*
* State recovery routine
*/
void
nfs4_recover_state(void *data)
{
struct nfs4_client *clp = (struct nfs4_client *)data;
struct reclaimer_args args = {
.clp = clp,
};
might_sleep();
init_completion(&args.complete);
if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
goto out_failed_clear;
wait_for_completion(&args.complete);
return;
out_failed_clear:
set_bit(NFS4CLNT_OK, &clp->cl_state);
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
}
/*
* Schedule a state recovery attempt
*/
void
nfs4_schedule_state_recovery(struct nfs4_client *clp)
{
if (!clp)
return;
if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state))
schedule_work(&clp->cl_recoverd);
}
static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state)
{
struct inode *inode = state->inode;
struct file_lock *fl;
int status = 0;
for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
if (!(fl->fl_flags & FL_POSIX))
continue;
if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
continue;
status = ops->recover_lock(state, fl);
if (status >= 0)
continue;
switch (status) {
default:
printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
__FUNCTION__, status);
case -NFS4ERR_EXPIRED:
case -NFS4ERR_NO_GRACE:
case -NFS4ERR_RECLAIM_BAD:
case -NFS4ERR_RECLAIM_CONFLICT:
/* kill_proc(fl->fl_owner, SIGLOST, 1); */
break;
case -NFS4ERR_STALE_CLIENTID:
goto out_err;
}
}
return 0;
out_err:
return status;
}
static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp)
{
struct nfs4_state *state;
struct nfs4_lock_state *lock;
int status = 0;
/* Note: we rely on the sp->so_states list being ordered
* so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
* states first.
* This is needed to ensure that the server won't give us any
* read delegations that we have to return if, say, we are
* recovering after a network partition or a reboot from a
* server that doesn't support a grace period.
*/
list_for_each_entry(state, &sp->so_states, open_states) {
if (state->state == 0)
continue;
status = ops->recover_open(sp, state);
list_for_each_entry(lock, &state->lock_states, ls_locks)
lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
if (status >= 0) {
status = nfs4_reclaim_locks(ops, state);
if (status < 0)
goto out_err;
list_for_each_entry(lock, &state->lock_states, ls_locks) {
if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
printk("%s: Lock reclaim failed!\n",
__FUNCTION__);
}
continue;
}
switch (status) {
default:
printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
__FUNCTION__, status);
case -ENOENT:
case -NFS4ERR_RECLAIM_BAD:
case -NFS4ERR_RECLAIM_CONFLICT:
/*
* Open state on this file cannot be recovered
* All we can do is revert to using the zero stateid.
*/
memset(state->stateid.data, 0,
sizeof(state->stateid.data));
/* Mark the file as being 'closed' */
state->state = 0;
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_NO_GRACE:
case -NFS4ERR_STALE_CLIENTID:
goto out_err;
}
}
return 0;
out_err:
return status;
}
static int reclaimer(void *ptr)
{
struct reclaimer_args *args = (struct reclaimer_args *)ptr;
struct nfs4_client *clp = args->clp;
struct nfs4_state_owner *sp;
struct nfs4_state_recovery_ops *ops;
int status = 0;
daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
allow_signal(SIGKILL);
atomic_inc(&clp->cl_count);
complete(&args->complete);
/* Ensure exclusive access to NFSv4 state */
lock_kernel();
down_write(&clp->cl_sem);
/* Are there any NFS mounts out there? */
if (list_empty(&clp->cl_superblocks))
goto out;
restart_loop:
status = nfs4_proc_renew(clp);
switch (status) {
case 0:
case -NFS4ERR_CB_PATH_DOWN:
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_LEASE_MOVED:
ops = &nfs4_reboot_recovery_ops;
break;
default:
ops = &nfs4_network_partition_recovery_ops;
};
status = __nfs4_init_client(clp);
if (status)
goto out_error;
/* Mark all delegations for reclaim */
nfs_delegation_mark_reclaim(clp);
/* Note: list is protected by exclusive lock on cl->cl_sem */
list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
status = nfs4_reclaim_open_state(ops, sp);
if (status < 0) {
if (status == -NFS4ERR_NO_GRACE) {
ops = &nfs4_network_partition_recovery_ops;
status = nfs4_reclaim_open_state(ops, sp);
}
if (status == -NFS4ERR_STALE_CLIENTID)
goto restart_loop;
if (status == -NFS4ERR_EXPIRED)
goto restart_loop;
}
}
nfs_delegation_reap_unclaimed(clp);
out:
set_bit(NFS4CLNT_OK, &clp->cl_state);
up_write(&clp->cl_sem);
unlock_kernel();
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
if (status == -NFS4ERR_CB_PATH_DOWN)
nfs_handle_cb_pathdown(clp);
nfs4_put_client(clp);
return 0;
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
NIPQUAD(clp->cl_addr.s_addr), -status);
goto out;
}
/*
* Local variables:
* c-basic-offset: 8
* End:
*/

4034
fs/nfs/nfs4xdr.c Normal file

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다. Load Diff

513
fs/nfs/nfsroot.c Normal file
파일 보기

@@ -0,0 +1,513 @@
/*
* $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $
*
* Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de>
*
* Allow an NFS filesystem to be mounted as root. The way this works is:
* (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
* (2) Handle RPC negotiation with the system which replied to RARP or
* was reported as a boot server by BOOTP or manually.
* (3) The actual mounting is done later, when init() is running.
*
*
* Changes:
*
* Alan Cox : Removed get_address name clash with FPU.
* Alan Cox : Reformatted a bit.
* Gero Kuhlmann : Code cleanup
* Michael Rausch : Fixed recognition of an incoming RARP answer.
* Martin Mares : (2.0) Auto-configuration via BOOTP supported.
* Martin Mares : Manual selection of interface & BOOTP/RARP.
* Martin Mares : Using network routes instead of host routes,
* allowing the default configuration to be used
* for normal operation of the host.
* Martin Mares : Randomized timer with exponential backoff
* installed to minimize network congestion.
* Martin Mares : Code cleanup.
* Martin Mares : (2.1) BOOTP and RARP made configuration options.
* Martin Mares : Server hostname generation fixed.
* Gerd Knorr : Fixed wired inode handling
* Martin Mares : (2.2) "0.0.0.0" addresses from command line ignored.
* Martin Mares : RARP replies not tested for server address.
* Gero Kuhlmann : (2.3) Some bug fixes and code cleanup again (please
* send me your new patches _before_ bothering
* Linus so that I don' always have to cleanup
* _afterwards_ - thanks)
* Gero Kuhlmann : Last changes of Martin Mares undone.
* Gero Kuhlmann : RARP replies are tested for specified server
* again. However, it's now possible to have
* different RARP and NFS servers.
* Gero Kuhlmann : "0.0.0.0" addresses from command line are
* now mapped to INADDR_NONE.
* Gero Kuhlmann : Fixed a bug which prevented BOOTP path name
* from being used (thanks to Leo Spiekman)
* Andy Walker : Allow to specify the NFS server in nfs_root
* without giving a path name
* Swen Th<54>mmler : Allow to specify the NFS options in nfs_root
* without giving a path name. Fix BOOTP request
* for domainname (domainname is NIS domain, not
* DNS domain!). Skip dummy devices for BOOTP.
* Jacek Zapala : Fixed a bug which prevented server-ip address
* from nfsroot parameter from being used.
* Olaf Kirch : Adapted to new NFS code.
* Jakub Jelinek : Free used code segment.
* Marko Kohtala : Fixed some bugs.
* Martin Mares : Debug message cleanup
* Martin Mares : Changed to use the new generic IP layer autoconfig
* code. BOOTP and RARP moved there.
* Martin Mares : Default path now contains host name instead of
* host IP address (but host name defaults to IP
* address anyway).
* Martin Mares : Use root_server_addr appropriately during setup.
* Martin Mares : Rewrote parameter parsing, now hopefully giving
* correct overriding.
* Trond Myklebust : Add in preliminary support for NFSv3 and TCP.
* Fix bug in root_nfs_addr(). nfs_data.namlen
* is NOT for the length of the hostname.
* Hua Qin : Support for mounting root file system via
* NFS over TCP.
* Fabian Frederick: Option parser rebuilt (using parser lib)
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/in.h>
#include <linux/major.h>
#include <linux/utsname.h>
#include <linux/inet.h>
#include <linux/root_dev.h>
#include <net/ipconfig.h>
#include <linux/parser.h>
/* Define this to allow debugging output */
#undef NFSROOT_DEBUG
#define NFSDBG_FACILITY NFSDBG_ROOT
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
/* Parameters passed from the kernel command line */
static char nfs_root_name[256] __initdata = "";
/* Address of NFS server */
static __u32 servaddr __initdata = 0;
/* Name of directory to mount */
static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
/* NFS-related data */
static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
static int nfs_port __initdata = 0; /* Port to connect to for NFS */
static int mount_port __initdata = 0; /* Mount daemon port number */
/***************************************************************************
Parsing of options
***************************************************************************/
enum {
/* Options that take integer arguments */
Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
/* Options that take no arguments */
Opt_soft, Opt_hard, Opt_intr,
Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac,
Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
/* Error token */
Opt_err
};
static match_table_t __initdata tokens = {
{Opt_port, "port=%u"},
{Opt_rsize, "rsize=%u"},
{Opt_wsize, "wsize=%u"},
{Opt_timeo, "timeo=%u"},
{Opt_retrans, "retrans=%u"},
{Opt_acregmin, "acregmin=%u"},
{Opt_acregmax, "acregmax=%u"},
{Opt_acdirmin, "acdirmin=%u"},
{Opt_acdirmax, "acdirmax=%u"},
{Opt_soft, "soft"},
{Opt_hard, "hard"},
{Opt_intr, "intr"},
{Opt_nointr, "nointr"},
{Opt_posix, "posix"},
{Opt_noposix, "noposix"},
{Opt_cto, "cto"},
{Opt_nocto, "nocto"},
{Opt_ac, "ac"},
{Opt_noac, "noac"},
{Opt_lock, "lock"},
{Opt_nolock, "nolock"},
{Opt_v2, "nfsvers=2"},
{Opt_v2, "v2"},
{Opt_v3, "nfsvers=3"},
{Opt_v3, "v3"},
{Opt_udp, "proto=udp"},
{Opt_udp, "udp"},
{Opt_tcp, "proto=tcp"},
{Opt_tcp, "tcp"},
{Opt_err, NULL}
};
/*
* Parse option string.
*/
static int __init root_nfs_parse(char *name, char *buf)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
if (!name)
return 1;
/* Set the NFS remote path */
p = strsep(&name, ",");
if (p[0] != '\0' && strcmp(p, "default") != 0)
strlcpy(buf, p, NFS_MAXPATHLEN);
while ((p = strsep (&name, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
/* %u tokens only. Beware if you add new tokens! */
if (token < Opt_soft && match_int(&args[0], &option))
return 0;
switch (token) {
case Opt_port:
nfs_port = option;
break;
case Opt_rsize:
nfs_data.rsize = option;
break;
case Opt_wsize:
nfs_data.wsize = option;
break;
case Opt_timeo:
nfs_data.timeo = option;
break;
case Opt_retrans:
nfs_data.retrans = option;
break;
case Opt_acregmin:
nfs_data.acregmin = option;
break;
case Opt_acregmax:
nfs_data.acregmax = option;
break;
case Opt_acdirmin:
nfs_data.acdirmin = option;
break;
case Opt_acdirmax:
nfs_data.acdirmax = option;
break;
case Opt_soft:
nfs_data.flags |= NFS_MOUNT_SOFT;
break;
case Opt_hard:
nfs_data.flags &= ~NFS_MOUNT_SOFT;
break;
case Opt_intr:
nfs_data.flags |= NFS_MOUNT_INTR;
break;
case Opt_nointr:
nfs_data.flags &= ~NFS_MOUNT_INTR;
break;
case Opt_posix:
nfs_data.flags |= NFS_MOUNT_POSIX;
break;
case Opt_noposix:
nfs_data.flags &= ~NFS_MOUNT_POSIX;
break;
case Opt_cto:
nfs_data.flags &= ~NFS_MOUNT_NOCTO;
break;
case Opt_nocto:
nfs_data.flags |= NFS_MOUNT_NOCTO;
break;
case Opt_ac:
nfs_data.flags &= ~NFS_MOUNT_NOAC;
break;
case Opt_noac:
nfs_data.flags |= NFS_MOUNT_NOAC;
break;
case Opt_lock:
nfs_data.flags &= ~NFS_MOUNT_NONLM;
break;
case Opt_nolock:
nfs_data.flags |= NFS_MOUNT_NONLM;
break;
case Opt_v2:
nfs_data.flags &= ~NFS_MOUNT_VER3;
break;
case Opt_v3:
nfs_data.flags |= NFS_MOUNT_VER3;
break;
case Opt_udp:
nfs_data.flags &= ~NFS_MOUNT_TCP;
break;
case Opt_tcp:
nfs_data.flags |= NFS_MOUNT_TCP;
break;
default :
return 0;
}
}
return 1;
}
/*
* Prepare the NFS data structure and parse all options.
*/
static int __init root_nfs_name(char *name)
{
static char buf[NFS_MAXPATHLEN] __initdata;
char *cp;
/* Set some default values */
memset(&nfs_data, 0, sizeof(nfs_data));
nfs_port = -1;
nfs_data.version = NFS_MOUNT_VERSION;
nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */
nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
nfs_data.acregmin = 3;
nfs_data.acregmax = 60;
nfs_data.acdirmin = 30;
nfs_data.acdirmax = 60;
strcpy(buf, NFS_ROOT);
/* Process options received from the remote server */
root_nfs_parse(root_server_path, buf);
/* Override them by options set on kernel command-line */
root_nfs_parse(name, buf);
cp = system_utsname.nodename;
if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
return -1;
}
sprintf(nfs_path, buf, cp);
return 1;
}
/*
* Get NFS server address.
*/
static int __init root_nfs_addr(void)
{
if ((servaddr = root_server_addr) == INADDR_NONE) {
printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
return -1;
}
snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
"%u.%u.%u.%u", NIPQUAD(servaddr));
return 0;
}
/*
* Tell the user what's going on.
*/
#ifdef NFSROOT_DEBUG
static void __init root_nfs_print(void)
{
printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
nfs_path, nfs_data.hostname);
printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
nfs_data.acregmin, nfs_data.acregmax,
nfs_data.acdirmin, nfs_data.acdirmax);
printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n",
nfs_port, mount_port, nfs_data.flags);
}
#endif
static int __init root_nfs_init(void)
{
#ifdef NFSROOT_DEBUG
nfs_debug |= NFSDBG_ROOT;
#endif
/*
* Decode the root directory path name and NFS options from
* the kernel command line. This has to go here in order to
* be able to use the client IP address for the remote root
* directory (necessary for pure RARP booting).
*/
if (root_nfs_name(nfs_root_name) < 0 ||
root_nfs_addr() < 0)
return -1;
#ifdef NFSROOT_DEBUG
root_nfs_print();
#endif
return 0;
}
/*
* Parse NFS server and directory information passed on the kernel
* command line.
*/
static int __init nfs_root_setup(char *line)
{
ROOT_DEV = Root_NFS;
if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
strlcpy(nfs_root_name, line, sizeof(nfs_root_name));
} else {
int n = strlen(line) + sizeof(NFS_ROOT) - 1;
if (n >= sizeof(nfs_root_name))
line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0';
sprintf(nfs_root_name, NFS_ROOT, line);
}
root_server_addr = root_nfs_parse_addr(nfs_root_name);
return 1;
}
__setup("nfsroot=", nfs_root_setup);
/***************************************************************************
Routines to actually mount the root directory
***************************************************************************/
/*
* Construct sockaddr_in from address and port number.
*/
static inline void
set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port)
{
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = addr;
sin->sin_port = port;
}
/*
* Query server portmapper for the port of a daemon program.
*/
static int __init root_nfs_getport(int program, int version, int proto)
{
struct sockaddr_in sin;
printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
program, version, NIPQUAD(servaddr));
set_sockaddr(&sin, servaddr, 0);
return rpc_getport_external(&sin, program, version, proto);
}
/*
* Use portmapper to find mountd and nfsd port numbers if not overriden
* by the user. Use defaults if portmapper is not available.
* XXX: Is there any nfs server with no portmapper?
*/
static int __init root_nfs_ports(void)
{
int port;
int nfsd_ver, mountd_ver;
int nfsd_port, mountd_port;
int proto;
if (nfs_data.flags & NFS_MOUNT_VER3) {
nfsd_ver = NFS3_VERSION;
mountd_ver = NFS_MNT3_VERSION;
nfsd_port = NFS_PORT;
mountd_port = NFS_MNT_PORT;
} else {
nfsd_ver = NFS2_VERSION;
mountd_ver = NFS_MNT_VERSION;
nfsd_port = NFS_PORT;
mountd_port = NFS_MNT_PORT;
}
proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
if (nfs_port < 0) {
if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
"number from server, using default\n");
port = nfsd_port;
}
nfs_port = htons(port);
dprintk("Root-NFS: Portmapper on server returned %d "
"as nfsd port\n", port);
}
if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
printk(KERN_ERR "Root-NFS: Unable to get mountd port "
"number from server, using default\n");
port = mountd_port;
}
mount_port = htons(port);
dprintk("Root-NFS: mountd port is %d\n", port);
return 0;
}
/*
* Get a file handle from the server for the directory which is to be
* mounted.
*/
static int __init root_nfs_get_handle(void)
{
struct nfs_fh fh;
struct sockaddr_in sin;
int status;
int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
IPPROTO_TCP : IPPROTO_UDP;
int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
NFS_MNT3_VERSION : NFS_MNT_VERSION;
set_sockaddr(&sin, servaddr, mount_port);
status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
if (status < 0)
printk(KERN_ERR "Root-NFS: Server returned error %d "
"while mounting %s\n", status, nfs_path);
else {
nfs_data.root.size = fh.size;
memcpy(nfs_data.root.data, fh.data, fh.size);
}
return status;
}
/*
* Get the NFS port numbers and file handle, and return the prepared 'data'
* argument for mount() if everything went OK. Return NULL otherwise.
*/
void * __init nfs_root_data(void)
{
if (root_nfs_init() < 0
|| root_nfs_ports() < 0
|| root_nfs_get_handle() < 0)
return NULL;
set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port);
return (void*)&nfs_data;
}

309
fs/nfs/pagelist.c Normal file
파일 보기

@@ -0,0 +1,309 @@
/*
* linux/fs/nfs/pagelist.c
*
* A set of helper functions for managing NFS read and write requests.
* The main purpose of these routines is to provide support for the
* coalescing of several requests into a single RPC call.
*
* Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
*
*/
#include <linux/config.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/nfs_page.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#define NFS_PARANOIA 1
static kmem_cache_t *nfs_page_cachep;
static inline struct nfs_page *
nfs_page_alloc(void)
{
struct nfs_page *p;
p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->wb_list);
}
return p;
}
static inline void
nfs_page_free(struct nfs_page *p)
{
kmem_cache_free(nfs_page_cachep, p);
}
/**
* nfs_create_request - Create an NFS read/write request.
* @file: file descriptor to use
* @inode: inode to which the request is attached
* @page: page to write
* @offset: starting offset within the page for the write
* @count: number of bytes to read/write
*
* The page must be locked by the caller. This makes sure we never
* create two different requests for the same page, and avoids
* a possible deadlock when we reach the hard limit on the number
* of dirty pages.
* User should ensure it is safe to sleep in this function.
*/
struct nfs_page *
nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
struct page *page,
unsigned int offset, unsigned int count)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_page *req;
/* Deal with hard limits. */
for (;;) {
/* try to allocate the request struct */
req = nfs_page_alloc();
if (req != NULL)
break;
/* Try to free up at least one request in order to stay
* below the hard limit
*/
if (signalled() && (server->flags & NFS_MOUNT_INTR))
return ERR_PTR(-ERESTARTSYS);
yield();
}
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
atomic_set(&req->wb_count, 1);
req->wb_context = get_nfs_open_context(ctx);
return req;
}
/**
* nfs_unlock_request - Unlock request and wake up sleepers.
* @req:
*/
void nfs_unlock_request(struct nfs_page *req)
{
if (!NFS_WBACK_BUSY(req)) {
printk(KERN_ERR "NFS: Invalid unlock attempted\n");
BUG();
}
smp_mb__before_clear_bit();
clear_bit(PG_BUSY, &req->wb_flags);
smp_mb__after_clear_bit();
wake_up_all(&req->wb_context->waitq);
nfs_release_request(req);
}
/**
* nfs_clear_request - Free up all resources allocated to the request
* @req:
*
* Release page resources associated with a write request after it
* has completed.
*/
void nfs_clear_request(struct nfs_page *req)
{
if (req->wb_page) {
page_cache_release(req->wb_page);
req->wb_page = NULL;
}
}
/**
* nfs_release_request - Release the count on an NFS read/write request
* @req: request to release
*
* Note: Should never be called with the spinlock held!
*/
void
nfs_release_request(struct nfs_page *req)
{
if (!atomic_dec_and_test(&req->wb_count))
return;
#ifdef NFS_PARANOIA
BUG_ON (!list_empty(&req->wb_list));
BUG_ON (NFS_WBACK_BUSY(req));
#endif
/* Release struct file or cached credential */
nfs_clear_request(req);
put_nfs_open_context(req->wb_context);
nfs_page_free(req);
}
/**
* nfs_list_add_request - Insert a request into a sorted list
* @req: request
* @head: head of list into which to insert the request.
*
* Note that the wb_list is sorted by page index in order to facilitate
* coalescing of requests.
* We use an insertion sort that is optimized for the case of appended
* writes.
*/
void
nfs_list_add_request(struct nfs_page *req, struct list_head *head)
{
struct list_head *pos;
#ifdef NFS_PARANOIA
if (!list_empty(&req->wb_list)) {
printk(KERN_ERR "NFS: Add to list failed!\n");
BUG();
}
#endif
list_for_each_prev(pos, head) {
struct nfs_page *p = nfs_list_entry(pos);
if (p->wb_index < req->wb_index)
break;
}
list_add(&req->wb_list, pos);
req->wb_list_head = head;
}
/**
* nfs_wait_on_request - Wait for a request to complete.
* @req: request to wait upon.
*
* Interruptible by signals only if mounted with intr flag.
* The user is responsible for holding a count on the request.
*/
int
nfs_wait_on_request(struct nfs_page *req)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
if (!NFS_WBACK_BUSY(req))
return 0;
return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
}
/**
* nfs_coalesce_requests - Split coalesced requests out from a list.
* @head: source list
* @dst: destination list
* @nmax: maximum number of requests to coalesce
*
* Moves a maximum of 'nmax' elements from one list to another.
* The elements are checked to ensure that they form a contiguous set
* of pages, and that the RPC credentials are the same.
*/
int
nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
unsigned int nmax)
{
struct nfs_page *req = NULL;
unsigned int npages = 0;
while (!list_empty(head)) {
struct nfs_page *prev = req;
req = nfs_list_entry(head->next);
if (prev) {
if (req->wb_context->cred != prev->wb_context->cred)
break;
if (req->wb_context->lockowner != prev->wb_context->lockowner)
break;
if (req->wb_context->state != prev->wb_context->state)
break;
if (req->wb_index != (prev->wb_index + 1))
break;
if (req->wb_pgbase != 0)
break;
}
nfs_list_remove_request(req);
nfs_list_add_request(req, dst);
npages++;
if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE)
break;
if (npages >= nmax)
break;
}
return npages;
}
/**
* nfs_scan_list - Scan a list for matching requests
* @head: One of the NFS inode request lists
* @dst: Destination list
* @idx_start: lower bound of page->index to scan
* @npages: idx_start + npages sets the upper bound to scan.
*
* Moves elements from one of the inode request lists.
* If the number of requests is set to 0, the entire address_space
* starting at index idx_start, is scanned.
* The requests are *not* checked to ensure that they form a contiguous set.
* You must be holding the inode's req_lock when calling this function
*/
int
nfs_scan_list(struct list_head *head, struct list_head *dst,
unsigned long idx_start, unsigned int npages)
{
struct list_head *pos, *tmp;
struct nfs_page *req;
unsigned long idx_end;
int res;
res = 0;
if (npages == 0)
idx_end = ~0;
else
idx_end = idx_start + npages - 1;
list_for_each_safe(pos, tmp, head) {
req = nfs_list_entry(pos);
if (req->wb_index < idx_start)
continue;
if (req->wb_index > idx_end)
break;
if (!nfs_lock_request(req))
continue;
nfs_list_remove_request(req);
nfs_list_add_request(req, dst);
res++;
}
return res;
}
int nfs_init_nfspagecache(void)
{
nfs_page_cachep = kmem_cache_create("nfs_page",
sizeof(struct nfs_page),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (nfs_page_cachep == NULL)
return -ENOMEM;
return 0;
}
void nfs_destroy_nfspagecache(void)
{
if (kmem_cache_destroy(nfs_page_cachep))
printk(KERN_INFO "nfs_page: not all structures were freed\n");
}

655
fs/nfs/proc.c Normal file
파일 보기

@@ -0,0 +1,655 @@
/*
* linux/fs/nfs/proc.c
*
* Copyright (C) 1992, 1993, 1994 Rick Sladkey
*
* OS-independent nfs remote procedure call functions
*
* Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers
* so at last we can have decent(ish) throughput off a
* Sun server.
*
* Coding optimized and cleaned up by Florian La Roche.
* Note: Error returns are optimized for NFS_OK, which isn't translated via
* nfs_stat_to_errno(), but happens to be already the right return code.
*
* Also, the code currently doesn't check the size of the packet, when
* it decodes the packet.
*
* Feel free to fix it and mail me the diffs if it worries you.
*
* Completely rewritten to support the new RPC call interface;
* rewrote and moved the entire XDR stuff to xdr.c
* --Olaf Kirch June 1996
*
* The code below initializes all auto variables explicitly, otherwise
* it will fail to work as a module (gcc generates a memset call for an
* incomplete struct).
*/
#include <linux/types.h>
#include <linux/param.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/utsname.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/in.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/lockd/bind.h>
#include <linux/smp_lock.h>
#define NFSDBG_FACILITY NFSDBG_PROC
extern struct rpc_procinfo nfs_procedures[];
/*
* Bare-bones access to getattr: this is for nfs_read_super.
*/
static int
nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs_fattr *fattr = info->fattr;
struct nfs2_fsstat fsinfo;
int status;
dprintk("%s: call getattr\n", __FUNCTION__);
fattr->valid = 0;
status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __FUNCTION__);
status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status)
return status;
info->rtmax = NFS_MAXDATA;
info->rtpref = fsinfo.tsize;
info->rtmult = fsinfo.bsize;
info->wtmax = NFS_MAXDATA;
info->wtpref = fsinfo.tsize;
info->wtmult = fsinfo.bsize;
info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
return 0;
}
/*
* One function for each procedure in the NFS protocol.
*/
static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
int status;
dprintk("NFS call getattr\n");
fattr->valid = 0;
status = rpc_call(server->client, NFSPROC_GETATTR,
fhandle, fattr, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
static int
nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct iattr *sattr)
{
struct inode *inode = dentry->d_inode;
struct nfs_sattrargs arg = {
.fh = NFS_FH(inode),
.sattr = sattr
};
int status;
dprintk("NFS call setattr\n");
fattr->valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
static int
nfs_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct nfs_diropok res = {
.fh = fhandle,
.fattr = fattr
};
int status;
dprintk("NFS call lookup %s\n", name->name);
fattr->valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
static int nfs_proc_readlink(struct inode *inode, struct page *page,
unsigned int pgbase, unsigned int pglen)
{
struct nfs_readlinkargs args = {
.fh = NFS_FH(inode),
.pgbase = pgbase,
.pglen = pglen,
.pages = &page
};
int status;
dprintk("NFS call readlink\n");
status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
dprintk("NFS reply readlink: %d\n", status);
return status;
}
static int nfs_proc_read(struct nfs_read_data *rdata)
{
int flags = rdata->flags;
struct inode * inode = rdata->inode;
struct nfs_fattr * fattr = rdata->res.fattr;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READ],
.rpc_argp = &rdata->args,
.rpc_resp = &rdata->res,
.rpc_cred = rdata->cred,
};
int status;
dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
(long long) rdata->args.offset);
fattr->valid = 0;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
if (status >= 0) {
nfs_refresh_inode(inode, fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
if (rdata->args.offset + rdata->args.count >= fattr->size)
rdata->res.eof = 1;
}
dprintk("NFS reply read: %d\n", status);
return status;
}
static int nfs_proc_write(struct nfs_write_data *wdata)
{
int flags = wdata->flags;
struct inode * inode = wdata->inode;
struct nfs_fattr * fattr = wdata->res.fattr;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_WRITE],
.rpc_argp = &wdata->args,
.rpc_resp = &wdata->res,
.rpc_cred = wdata->cred,
};
int status;
dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
(long long) wdata->args.offset);
fattr->valid = 0;
status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
if (status >= 0) {
nfs_refresh_inode(inode, fattr);
wdata->res.count = wdata->args.count;
wdata->verf.committed = NFS_FILE_SYNC;
}
dprintk("NFS reply write: %d\n", status);
return status < 0? status : wdata->res.count;
}
static int
nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
struct nfs_fh fhandle;
struct nfs_fattr fattr;
struct nfs_createargs arg = {
.fh = NFS_FH(dir),
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr
};
struct nfs_diropok res = {
.fh = &fhandle,
.fattr = &fattr
};
int status;
fattr.valid = 0;
dprintk("NFS call create %s\n", dentry->d_name.name);
status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply create: %d\n", status);
return status;
}
/*
* In NFSv2, mknod is grafted onto the create call.
*/
static int
nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
dev_t rdev)
{
struct nfs_fh fhandle;
struct nfs_fattr fattr;
struct nfs_createargs arg = {
.fh = NFS_FH(dir),
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr
};
struct nfs_diropok res = {
.fh = &fhandle,
.fattr = &fattr
};
int status, mode;
dprintk("NFS call mknod %s\n", dentry->d_name.name);
mode = sattr->ia_mode;
if (S_ISFIFO(mode)) {
sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR;
sattr->ia_valid &= ~ATTR_SIZE;
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
sattr->ia_valid |= ATTR_SIZE;
sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
}
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
if (status == -EINVAL && S_ISFIFO(mode)) {
sattr->ia_mode = mode;
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
}
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply mknod: %d\n", status);
return status;
}
static int
nfs_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
.rpc_argp = &arg,
.rpc_resp = NULL,
.rpc_cred = NULL
};
int status;
dprintk("NFS call remove %s\n", name->name);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
dprintk("NFS reply remove: %d\n", status);
return status;
}
static int
nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
{
struct nfs_diropargs *arg;
arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL);
if (!arg)
return -ENOMEM;
arg->fh = NFS_FH(dir->d_inode);
arg->name = name->name;
arg->len = name->len;
msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
msg->rpc_argp = arg;
return 0;
}
static int
nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
{
struct rpc_message *msg = &task->tk_msg;
if (msg->rpc_argp)
kfree(msg->rpc_argp);
return 0;
}
static int
nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
struct inode *new_dir, struct qstr *new_name)
{
struct nfs_renameargs arg = {
.fromfh = NFS_FH(old_dir),
.fromname = old_name->name,
.fromlen = old_name->len,
.tofh = NFS_FH(new_dir),
.toname = new_name->name,
.tolen = new_name->len
};
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
dprintk("NFS reply rename: %d\n", status);
return status;
}
static int
nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs_linkargs arg = {
.fromfh = NFS_FH(inode),
.tofh = NFS_FH(dir),
.toname = name->name,
.tolen = name->len
};
int status;
dprintk("NFS call link %s\n", name->name);
status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
dprintk("NFS reply link: %d\n", status);
return status;
}
static int
nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
struct iattr *sattr, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
struct nfs_symlinkargs arg = {
.fromfh = NFS_FH(dir),
.fromname = name->name,
.fromlen = name->len,
.topath = path->name,
.tolen = path->len,
.sattr = sattr
};
int status;
if (path->len > NFS2_MAXPATHLEN)
return -ENAMETOOLONG;
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
fattr->valid = 0;
fhandle->size = 0;
status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
dprintk("NFS reply symlink: %d\n", status);
return status;
}
static int
nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct nfs_fh fhandle;
struct nfs_fattr fattr;
struct nfs_createargs arg = {
.fh = NFS_FH(dir),
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr
};
struct nfs_diropok res = {
.fh = &fhandle,
.fattr = &fattr
};
int status;
dprintk("NFS call mkdir %s\n", dentry->d_name.name);
fattr.valid = 0;
status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply mkdir: %d\n", status);
return status;
}
static int
nfs_proc_rmdir(struct inode *dir, struct qstr *name)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len
};
int status;
dprintk("NFS call rmdir %s\n", name->name);
status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
dprintk("NFS reply rmdir: %d\n", status);
return status;
}
/*
* The READDIR implementation is somewhat hackish - we pass a temporary
* buffer to the encode function, which installs it in the receive
* the receive iovec. The decode function just parses the reply to make
* sure it is syntactically correct; the entries itself are decoded
* from nfs_readdir by calling the decode_entry function directly.
*/
static int
nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page *page, unsigned int count, int plus)
{
struct inode *dir = dentry->d_inode;
struct nfs_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
.count = count,
.pages = &page
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
.rpc_resp = NULL,
.rpc_cred = cred
};
int status;
lock_kernel();
dprintk("NFS call readdir %d\n", (unsigned int)cookie);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
dprintk("NFS reply readdir: %d\n", status);
unlock_kernel();
return status;
}
static int
nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
struct nfs2_fsstat fsinfo;
int status;
dprintk("NFS call statfs\n");
stat->fattr->valid = 0;
status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
dprintk("NFS reply statfs: %d\n", status);
if (status)
goto out;
stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize;
stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
stat->tfiles = 0;
stat->ffiles = 0;
stat->afiles = 0;
out:
return status;
}
static int
nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs2_fsstat fsinfo;
int status;
dprintk("NFS call fsinfo\n");
info->fattr->valid = 0;
status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
dprintk("NFS reply fsinfo: %d\n", status);
if (status)
goto out;
info->rtmax = NFS_MAXDATA;
info->rtpref = fsinfo.tsize;
info->rtmult = fsinfo.bsize;
info->wtmax = NFS_MAXDATA;
info->wtpref = fsinfo.tsize;
info->wtmult = fsinfo.bsize;
info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
out:
return status;
}
static int
nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
info->max_link = 0;
info->max_namelen = NFS2_MAXNAMLEN;
return 0;
}
extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
static void
nfs_read_done(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
if (task->tk_status >= 0) {
nfs_refresh_inode(data->inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
if (data->args.offset + data->args.count >= data->res.fattr->size)
data->res.eof = 1;
}
nfs_readpage_result(task);
}
static void
nfs_proc_read_setup(struct nfs_read_data *data)
{
struct rpc_task *task = &data->task;
struct inode *inode = data->inode;
int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READ],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
/* N.B. Do we need to test? Never called for swapfile inode */
flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
/* Finalize the task. */
rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags);
rpc_call_setup(task, &msg, 0);
}
static void
nfs_write_done(struct rpc_task *task)
{
struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, data->res.fattr);
nfs_writeback_done(task);
}
static void
nfs_proc_write_setup(struct nfs_write_data *data, int how)
{
struct rpc_task *task = &data->task;
struct inode *inode = data->inode;
int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_WRITE],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
/* Set the initial flags for the task. */
flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
/* Finalize the task. */
rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags);
rpc_call_setup(task, &msg, 0);
}
static void
nfs_proc_commit_setup(struct nfs_write_data *data, int how)
{
BUG();
}
static int
nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
}
struct nfs_rpc_ops nfs_v2_clientops = {
.version = 2, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs_dir_inode_operations,
.getroot = nfs_proc_get_root,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
.lookup = nfs_proc_lookup,
.access = NULL, /* access */
.readlink = nfs_proc_readlink,
.read = nfs_proc_read,
.write = nfs_proc_write,
.commit = NULL, /* commit */
.create = nfs_proc_create,
.remove = nfs_proc_remove,
.unlink_setup = nfs_proc_unlink_setup,
.unlink_done = nfs_proc_unlink_done,
.rename = nfs_proc_rename,
.link = nfs_proc_link,
.symlink = nfs_proc_symlink,
.mkdir = nfs_proc_mkdir,
.rmdir = nfs_proc_rmdir,
.readdir = nfs_proc_readdir,
.mknod = nfs_proc_mknod,
.statfs = nfs_proc_statfs,
.fsinfo = nfs_proc_fsinfo,
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs_decode_dirent,
.read_setup = nfs_proc_read_setup,
.write_setup = nfs_proc_write_setup,
.commit_setup = nfs_proc_commit_setup,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs_proc_lock,
};

618
fs/nfs/read.c Normal file
파일 보기

@@ -0,0 +1,618 @@
/*
* linux/fs/nfs/read.c
*
* Block I/O for NFS
*
* Partial copy of Linus' read cache modifications to fs/nfs/file.c
* modified for async RPC by okir@monad.swb.de
*
* We do an ugly hack here in order to return proper error codes to the
* user program when a read request failed: since generic_file_read
* only checks the return value of inode->i_op->readpage() which is always 0
* for async RPC, we set the error bit of the page to 1 when an error occurs,
* and make nfs_readpage transmit requests synchronously when encountering this.
* This is only a small problem, though, since we now retry all operations
* within the RPC code when root squashing is suspected.
*/
#include <linux/config.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/smp_lock.h>
#include <asm/system.h>
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static int nfs_pagein_one(struct list_head *, struct inode *);
static void nfs_readpage_result_partial(struct nfs_read_data *, int);
static void nfs_readpage_result_full(struct nfs_read_data *, int);
static kmem_cache_t *nfs_rdata_cachep;
mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
void nfs_readdata_release(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
nfs_readdata_free(data);
}
static
unsigned int nfs_page_length(struct inode *inode, struct page *page)
{
loff_t i_size = i_size_read(inode);
unsigned long idx;
if (i_size <= 0)
return 0;
idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
if (page->index > idx)
return 0;
if (page->index != idx)
return PAGE_CACHE_SIZE;
return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
}
static
int nfs_return_empty_page(struct page *page)
{
memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
unlock_page(page);
return 0;
}
/*
* Read a page synchronously.
*/
static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
unsigned int rsize = NFS_SERVER(inode)->rsize;
unsigned int count = PAGE_CACHE_SIZE;
int result;
struct nfs_read_data *rdata;
rdata = nfs_readdata_alloc();
if (!rdata)
return -ENOMEM;
memset(rdata, 0, sizeof(*rdata));
rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
rdata->cred = ctx->cred;
rdata->inode = inode;
INIT_LIST_HEAD(&rdata->pages);
rdata->args.fh = NFS_FH(inode);
rdata->args.context = ctx;
rdata->args.pages = &page;
rdata->args.pgbase = 0UL;
rdata->args.count = rsize;
rdata->res.fattr = &rdata->fattr;
dprintk("NFS: nfs_readpage_sync(%p)\n", page);
/*
* This works now because the socket layer never tries to DMA
* into this buffer directly.
*/
do {
if (count < rsize)
rdata->args.count = count;
rdata->res.count = rdata->args.count;
rdata->args.offset = page_offset(page) + rdata->args.pgbase;
dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
NFS_SERVER(inode)->hostname,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(unsigned long long)rdata->args.pgbase,
rdata->args.count);
lock_kernel();
result = NFS_PROTO(inode)->read(rdata);
unlock_kernel();
/*
* Even if we had a partial success we can't mark the page
* cache valid.
*/
if (result < 0) {
if (result == -EISDIR)
result = -EINVAL;
goto io_error;
}
count -= result;
rdata->args.pgbase += result;
/* Note: result == 0 should only happen if we're caching
* a write that extends the file and punches a hole.
*/
if (rdata->res.eof != 0 || result == 0)
break;
} while (count);
NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
if (count)
memclear_highpage_flush(page, rdata->args.pgbase, count);
SetPageUptodate(page);
if (PageError(page))
ClearPageError(page);
result = 0;
io_error:
unlock_page(page);
nfs_readdata_free(rdata);
return result;
}
static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
LIST_HEAD(one_request);
struct nfs_page *new;
unsigned int len;
len = nfs_page_length(inode, page);
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(ctx, inode, page, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
return PTR_ERR(new);
}
if (len < PAGE_CACHE_SIZE)
memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
nfs_lock_request(new);
nfs_list_add_request(new, &one_request);
nfs_pagein_one(&one_request, inode);
return 0;
}
static void nfs_readpage_release(struct nfs_page *req)
{
unlock_page(req->wb_page);
nfs_clear_request(req);
nfs_release_request(req);
nfs_unlock_request(req);
dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
}
/*
* Set up the NFS read request struct
*/
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
unsigned int count, unsigned int offset)
{
struct inode *inode;
data->req = req;
data->inode = inode = req->wb_context->dentry->d_inode;
data->cred = req->wb_context->cred;
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
data->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pagevec;
data->args.count = count;
data->args.context = req->wb_context;
data->res.fattr = &data->fattr;
data->res.count = count;
data->res.eof = 0;
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long)inode;
data->task.tk_calldata = data;
/* Release requests */
data->task.tk_release = nfs_readdata_release;
dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
count,
(unsigned long long)data->args.offset);
}
static void
nfs_async_read_error(struct list_head *head)
{
struct nfs_page *req;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
SetPageError(req->wb_page);
nfs_readpage_release(req);
}
}
/*
* Start an async read operation
*/
static void nfs_execute_read(struct nfs_read_data *data)
{
struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
sigset_t oldset;
rpc_clnt_sigmask(clnt, &oldset);
lock_kernel();
rpc_execute(&data->task);
unlock_kernel();
rpc_clnt_sigunmask(clnt, &oldset);
}
/*
* Generate multiple requests to fill a single page.
*
* We optimize to reduce the number of read operations on the wire. If we
* detect that we're reading a page, or an area of a page, that is past the
* end of file, we do not generate NFS read operations but just clear the
* parts of the page that would have come back zero from the server anyway.
*
* We rely on the cached value of i_size to make this determination; another
* client can fill pages on the server past our cached end-of-file, but we
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
{
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
unsigned int rsize = NFS_SERVER(inode)->rsize;
unsigned int nbytes, offset;
int requests = 0;
LIST_HEAD(list);
nfs_list_remove_request(req);
nbytes = req->wb_bytes;
for(;;) {
data = nfs_readdata_alloc();
if (!data)
goto out_bad;
INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, &list);
requests++;
if (nbytes <= rsize)
break;
nbytes -= rsize;
}
atomic_set(&req->wb_complete, requests);
ClearPageError(page);
offset = 0;
nbytes = req->wb_bytes;
do {
data = list_entry(list.next, struct nfs_read_data, pages);
list_del_init(&data->pages);
data->pagevec[0] = page;
data->complete = nfs_readpage_result_partial;
if (nbytes > rsize) {
nfs_read_rpcsetup(req, data, rsize, offset);
offset += rsize;
nbytes -= rsize;
} else {
nfs_read_rpcsetup(req, data, nbytes, offset);
nbytes = 0;
}
nfs_execute_read(data);
} while (nbytes != 0);
return 0;
out_bad:
while (!list_empty(&list)) {
data = list_entry(list.next, struct nfs_read_data, pages);
list_del(&data->pages);
nfs_readdata_free(data);
}
SetPageError(page);
nfs_readpage_release(req);
return -ENOMEM;
}
static int nfs_pagein_one(struct list_head *head, struct inode *inode)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
unsigned int count;
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
return nfs_pagein_multi(head, inode);
data = nfs_readdata_alloc();
if (!data)
goto out_bad;
INIT_LIST_HEAD(&data->pages);
pages = data->pagevec;
count = 0;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &data->pages);
ClearPageError(req->wb_page);
*pages++ = req->wb_page;
count += req->wb_bytes;
}
req = nfs_list_entry(data->pages.next);
data->complete = nfs_readpage_result_full;
nfs_read_rpcsetup(req, data, count, 0);
nfs_execute_read(data);
return 0;
out_bad:
nfs_async_read_error(head);
return -ENOMEM;
}
static int
nfs_pagein_list(struct list_head *head, int rpages)
{
LIST_HEAD(one_request);
struct nfs_page *req;
int error = 0;
unsigned int pages = 0;
while (!list_empty(head)) {
pages += nfs_coalesce_requests(head, &one_request, rpages);
req = nfs_list_entry(one_request.next);
error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
if (error < 0)
break;
}
if (error >= 0)
return pages;
nfs_async_read_error(head);
return error;
}
/*
* Handle a read reply that fills part of a page.
*/
static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
{
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
if (status >= 0) {
unsigned int request = data->args.count;
unsigned int result = data->res.count;
if (result < request) {
memclear_highpage_flush(page,
data->args.pgbase + result,
request - result);
}
} else
SetPageError(page);
if (atomic_dec_and_test(&req->wb_complete)) {
if (!PageError(page))
SetPageUptodate(page);
nfs_readpage_release(req);
}
}
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
{
unsigned int count = data->res.count;
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
nfs_list_remove_request(req);
if (status >= 0) {
if (count < PAGE_CACHE_SIZE) {
if (count < req->wb_bytes)
memclear_highpage_flush(page,
req->wb_pgbase + count,
req->wb_bytes - count);
count = 0;
} else
count -= PAGE_CACHE_SIZE;
SetPageUptodate(page);
} else
SetPageError(page);
nfs_readpage_release(req);
}
}
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
void nfs_readpage_result(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;
int status = task->tk_status;
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
task->tk_pid, status);
/* Is this a short read? */
if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Yes, so retry the read at the end of the data */
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call(task);
return;
}
task->tk_status = -EIO;
}
NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
data->complete(data, status);
}
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
* - The error flag is set for this page. This happens only when a
* previous async read operation failed.
*/
int nfs_readpage(struct file *file, struct page *page)
{
struct nfs_open_context *ctx;
struct inode *inode = page->mapping->host;
int error;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page->index);
/*
* Try to flush any pending writes to the file..
*
* NOTE! Because we own the page lock, there cannot
* be any new pending writes generated at this point
* for this page (other pages can be written to).
*/
error = nfs_wb_page(inode, page);
if (error)
goto out_error;
if (file == NULL) {
ctx = nfs_find_open_context(inode, FMODE_READ);
if (ctx == NULL)
return -EBADF;
} else
ctx = get_nfs_open_context((struct nfs_open_context *)
file->private_data);
if (!IS_SYNC(inode)) {
error = nfs_readpage_async(ctx, inode, page);
goto out;
}
error = nfs_readpage_sync(ctx, inode, page);
if (error < 0 && IS_SWAPFILE(inode))
printk("Aiee.. nfs swap-in of page failed!\n");
out:
put_nfs_open_context(ctx);
return error;
out_error:
unlock_page(page);
return error;
}
struct nfs_readdesc {
struct list_head *head;
struct nfs_open_context *ctx;
};
static int
readpage_async_filler(void *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
struct inode *inode = page->mapping->host;
struct nfs_page *new;
unsigned int len;
nfs_wb_page(inode, page);
len = nfs_page_length(inode, page);
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(desc->ctx, inode, page, 0, len);
if (IS_ERR(new)) {
SetPageError(page);
unlock_page(page);
return PTR_ERR(new);
}
if (len < PAGE_CACHE_SIZE)
memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
nfs_lock_request(new);
nfs_list_add_request(new, desc->head);
return 0;
}
int nfs_readpages(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
LIST_HEAD(head);
struct nfs_readdesc desc = {
.head = &head,
};
struct inode *inode = mapping->host;
struct nfs_server *server = NFS_SERVER(inode);
int ret;
dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
nr_pages);
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, FMODE_READ);
if (desc.ctx == NULL)
return -EBADF;
} else
desc.ctx = get_nfs_open_context((struct nfs_open_context *)
filp->private_data);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
if (!list_empty(&head)) {
int err = nfs_pagein_list(&head, server->rpages);
if (!ret)
ret = err;
}
put_nfs_open_context(desc.ctx);
return ret;
}
int nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
sizeof(struct nfs_read_data),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (nfs_rdata_cachep == NULL)
return -ENOMEM;
nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
mempool_alloc_slab,
mempool_free_slab,
nfs_rdata_cachep);
if (nfs_rdata_mempool == NULL)
return -ENOMEM;
return 0;
}
void nfs_destroy_readpagecache(void)
{
mempool_destroy(nfs_rdata_mempool);
if (kmem_cache_destroy(nfs_rdata_cachep))
printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
}

117
fs/nfs/symlink.c Normal file
파일 보기

@@ -0,0 +1,117 @@
/*
* linux/fs/nfs/symlink.c
*
* Copyright (C) 1992 Rick Sladkey
*
* Optimization changes Copyright (C) 1994 Florian La Roche
*
* Jun 7 1999, cache symlink lookups in the page cache. -DaveM
*
* nfs symlink handling code
*/
#define NFS_NEED_XDR_TYPES
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#include <linux/pagemap.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/namei.h>
/* Symlink caching in the page cache is even more simplistic
* and straight-forward than readdir caching.
*
* At the beginning of the page we store pointer to struct page in question,
* simplifying nfs_put_link() (if inode got invalidated we can't find the page
* to be freed via pagecache lookup).
* The NUL-terminated string follows immediately thereafter.
*/
struct nfs_symlink {
struct page *page;
char body[0];
};
static int nfs_symlink_filler(struct inode *inode, struct page *page)
{
const unsigned int pgbase = offsetof(struct nfs_symlink, body);
const unsigned int pglen = PAGE_SIZE - pgbase;
int error;
lock_kernel();
error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen);
unlock_kernel();
if (error < 0)
goto error;
SetPageUptodate(page);
unlock_page(page);
return 0;
error:
SetPageError(page);
unlock_page(page);
return -EIO;
}
static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct page *page;
struct nfs_symlink *p;
void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
if (err)
goto read_failed;
page = read_cache_page(&inode->i_data, 0,
(filler_t *)nfs_symlink_filler, inode);
if (IS_ERR(page)) {
err = page;
goto read_failed;
}
if (!PageUptodate(page)) {
err = ERR_PTR(-EIO);
goto getlink_read_error;
}
p = kmap(page);
p->page = page;
nd_set_link(nd, p->body);
return 0;
getlink_read_error:
page_cache_release(page);
read_failed:
nd_set_link(nd, err);
return 0;
}
static void nfs_put_link(struct dentry *dentry, struct nameidata *nd)
{
char *s = nd_get_link(nd);
if (!IS_ERR(s)) {
struct nfs_symlink *p;
struct page *page;
p = container_of(s, struct nfs_symlink, body[0]);
page = p->page;
kunmap(page);
page_cache_release(page);
}
}
/*
* symlinks can't do much...
*/
struct inode_operations nfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = nfs_follow_link,
.put_link = nfs_put_link,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};

227
fs/nfs/unlink.c Normal file
파일 보기

@@ -0,0 +1,227 @@
/*
* linux/fs/nfs/unlink.c
*
* nfs sillydelete handling
*
* NOTE: we rely on holding the BKL for list manipulation protection.
*/
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/dcache.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
struct nfs_unlinkdata {
struct nfs_unlinkdata *next;
struct dentry *dir, *dentry;
struct qstr name;
struct rpc_task task;
struct rpc_cred *cred;
unsigned int count;
};
static struct nfs_unlinkdata *nfs_deletes;
static RPC_WAITQ(nfs_delete_queue, "nfs_delete_queue");
/**
* nfs_detach_unlinkdata - Remove asynchronous unlink from global list
* @data: pointer to descriptor
*/
static inline void
nfs_detach_unlinkdata(struct nfs_unlinkdata *data)
{
struct nfs_unlinkdata **q;
for (q = &nfs_deletes; *q != NULL; q = &((*q)->next)) {
if (*q == data) {
*q = data->next;
break;
}
}
}
/**
* nfs_put_unlinkdata - release data from a sillydelete operation.
* @data: pointer to unlink structure.
*/
static void
nfs_put_unlinkdata(struct nfs_unlinkdata *data)
{
if (--data->count == 0) {
nfs_detach_unlinkdata(data);
if (data->name.name != NULL)
kfree(data->name.name);
kfree(data);
}
}
#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
/**
* nfs_copy_dname - copy dentry name to data structure
* @dentry: pointer to dentry
* @data: nfs_unlinkdata
*/
static inline void
nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
{
char *str;
int len = dentry->d_name.len;
str = kmalloc(NAME_ALLOC_LEN(len), GFP_KERNEL);
if (!str)
return;
memcpy(str, dentry->d_name.name, len);
if (!data->name.len) {
data->name.len = len;
data->name.name = str;
} else
kfree(str);
}
/**
* nfs_async_unlink_init - Initialize the RPC info
* @task: rpc_task of the sillydelete
*
* We delay initializing RPC info until after the call to dentry_iput()
* in order to minimize races against rename().
*/
static void
nfs_async_unlink_init(struct rpc_task *task)
{
struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
struct dentry *dir = data->dir;
struct rpc_message msg = {
.rpc_cred = data->cred,
};
int status = -ENOENT;
if (!data->name.len)
goto out_err;
status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name);
if (status < 0)
goto out_err;
nfs_begin_data_update(dir->d_inode);
rpc_call_setup(task, &msg, 0);
return;
out_err:
rpc_exit(task, status);
}
/**
* nfs_async_unlink_done - Sillydelete post-processing
* @task: rpc_task of the sillydelete
*
* Do the directory attribute update.
*/
static void
nfs_async_unlink_done(struct rpc_task *task)
{
struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
struct dentry *dir = data->dir;
struct inode *dir_i;
if (!dir)
return;
dir_i = dir->d_inode;
nfs_end_data_update(dir_i);
if (NFS_PROTO(dir_i)->unlink_done(dir, task))
return;
put_rpccred(data->cred);
data->cred = NULL;
dput(dir);
}
/**
* nfs_async_unlink_release - Release the sillydelete data.
* @task: rpc_task of the sillydelete
*
* We need to call nfs_put_unlinkdata as a 'tk_release' task since the
* rpc_task would be freed too.
*/
static void
nfs_async_unlink_release(struct rpc_task *task)
{
struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
nfs_put_unlinkdata(data);
}
/**
* nfs_async_unlink - asynchronous unlinking of a file
* @dentry: dentry to unlink
*/
int
nfs_async_unlink(struct dentry *dentry)
{
struct dentry *dir = dentry->d_parent;
struct nfs_unlinkdata *data;
struct rpc_task *task;
struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
int status = -ENOMEM;
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
memset(data, 0, sizeof(*data));
data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
if (IS_ERR(data->cred)) {
status = PTR_ERR(data->cred);
goto out_free;
}
data->dir = dget(dir);
data->dentry = dentry;
data->next = nfs_deletes;
nfs_deletes = data;
data->count = 1;
task = &data->task;
rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC);
task->tk_calldata = data;
task->tk_action = nfs_async_unlink_init;
task->tk_release = nfs_async_unlink_release;
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_NFSFS_RENAMED;
spin_unlock(&dentry->d_lock);
rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL);
status = 0;
out:
return status;
out_free:
kfree(data);
return status;
}
/**
* nfs_complete_unlink - Initialize completion of the sillydelete
* @dentry: dentry to delete
*
* Since we're most likely to be called by dentry_iput(), we
* only use the dentry to find the sillydelete. We then copy the name
* into the qstr.
*/
void
nfs_complete_unlink(struct dentry *dentry)
{
struct nfs_unlinkdata *data;
for(data = nfs_deletes; data != NULL; data = data->next) {
if (dentry == data->dentry)
break;
}
if (!data)
return;
data->count++;
nfs_copy_dname(dentry, data);
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
spin_unlock(&dentry->d_lock);
rpc_wake_up_task(&data->task);
nfs_put_unlinkdata(data);
}

1431
fs/nfs/write.c Normal file

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다. Load Diff