nfs/blocklayout: add SCSI layout support

This is a trivial extension to the block layout driver to support the
new SCSI layouts draft.  There are three changes:

 - device identifcation through the SCSI VPD page.  This allows us to
   directly use the udev generated persistent device names instead of
   requiring an expensive lookup by crawling every block device node
   in /dev and reading a signature for it.
 - use of SCSI persistent reservations to protect device access and
   allow for robust fencing.  On the client sides this just means
   registering and unregistering a server supplied key.
 - an optimized LAYOUTCOMMIT payload that doesn't send unessecary
   fields to the server.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
This commit is contained in:
Christoph Hellwig
2016-03-04 20:46:15 +01:00
committed by J. Bruce Fields
parent 40cf446b94
commit d9186c0397
5 changed files with 239 additions and 26 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
* Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/vmalloc.h>
@@ -462,10 +462,12 @@ out:
return err;
}
static size_t ext_tree_layoutupdate_size(size_t count)
static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
{
return sizeof(__be32) /* number of entries */ +
PNFS_BLOCK_EXTENT_SIZE * count;
if (bl->bl_scsi_layout)
return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
else
return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
}
static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -482,6 +484,23 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
}
}
static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
{
p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
NFS4_DEVICEID4_SIZE);
p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
p = xdr_encode_hyper(p, 0LL);
*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
return p;
}
static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
{
p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
}
static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count)
{
@@ -495,19 +514,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
continue;
(*count)++;
if (ext_tree_layoutupdate_size(*count) > buffer_size) {
if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
/* keep counting.. */
ret = -ENOSPC;
continue;
}
p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
NFS4_DEVICEID4_SIZE);
p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
p = xdr_encode_hyper(p, 0LL);
*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
if (bl->bl_scsi_layout)
p = encode_scsi_range(be, p);
else
p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
}
spin_unlock(&bl->bl_ext_lock);
@@ -536,7 +552,7 @@ retry:
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
buffer_size = ext_tree_layoutupdate_size(count);
buffer_size = ext_tree_layoutupdate_size(bl, count);
count = 0;
arg->layoutupdate_pages =
@@ -555,7 +571,7 @@ retry:
}
*start_p = cpu_to_be32(count);
arg->layoutupdate_len = ext_tree_layoutupdate_size(count);
arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
void *p = start_p, *end = p + arg->layoutupdate_len;