Merge tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The highlights are:

   - a series that fixes some old memory allocation issues in libceph
     (myself). We no longer allocate memory in places where allocation
     failures cannot be handled and BUG when the allocation fails.

   - support for copy_file_range() syscall (Luis Henriques). If size and
     alignment conditions are met, it leverages RADOS copy-from
     operation. Otherwise, a local copy is performed.

   - a patch that reduces memory requirement of ceph_sync_read() from
     the size of the entire read to the size of one object (Zheng Yan).

   - fallocate() syscall is now restricted to FALLOC_FL_PUNCH_HOLE (Luis
     Henriques)"

* tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client: (25 commits)
  ceph: new mount option to disable usage of copy-from op
  ceph: support copy_file_range file operation
  libceph: support the RADOS copy-from operation
  ceph: add non-blocking parameter to ceph_try_get_caps()
  libceph: check reply num_data_items in setup_request_data()
  libceph: preallocate message data items
  libceph, rbd, ceph: move ceph_osdc_alloc_messages() calls
  libceph: introduce alloc_watch_request()
  libceph: assign cookies in linger_submit()
  libceph: enable fallback to ceph_msg_new() in ceph_msgpool_get()
  ceph: num_ops is off by one in ceph_aio_retry_work()
  libceph: no need to call osd_req_opcode_valid() in osd_req_encode_op()
  ceph: set timeout conditionally in __cap_delay_requeue
  libceph: don't consume a ref on pagelist in ceph_msg_data_add_pagelist()
  libceph: introduce ceph_pagelist_alloc()
  libceph: osd_req_op_cls_init() doesn't need to take opcode
  libceph: bump CEPH_MSG_MAX_DATA_LEN
  ceph: only allow punch hole mode in fallocate
  ceph: refactor ceph_sync_read()
  ceph: check if LOOKUPNAME request was aborted when filling trace
  ...
This commit is contained in:
Linus Torvalds
2018-10-31 14:42:31 -07:00
21 changed files with 900 additions and 404 deletions

View File

@@ -81,7 +81,13 @@ struct ceph_options {
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
/*
* Handle the largest possible rbd object in one message.
* There is no limit on the size of cephfs objects, but it has to obey
* rsize and wsize mount options anyway.
*/
#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024)
#define CEPH_AUTH_NAME_DEFAULT "guest"

View File

@@ -82,22 +82,6 @@ enum ceph_msg_data_type {
CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */
};
static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
{
switch (type) {
case CEPH_MSG_DATA_NONE:
case CEPH_MSG_DATA_PAGES:
case CEPH_MSG_DATA_PAGELIST:
#ifdef CONFIG_BLOCK
case CEPH_MSG_DATA_BIO:
#endif /* CONFIG_BLOCK */
case CEPH_MSG_DATA_BVECS:
return true;
default:
return false;
}
}
#ifdef CONFIG_BLOCK
struct ceph_bio_iter {
@@ -181,7 +165,6 @@ struct ceph_bvec_iter {
} while (0)
struct ceph_msg_data {
struct list_head links; /* ceph_msg->data */
enum ceph_msg_data_type type;
union {
#ifdef CONFIG_BLOCK
@@ -202,7 +185,6 @@ struct ceph_msg_data {
struct ceph_msg_data_cursor {
size_t total_resid; /* across all data items */
struct list_head *data_head; /* = &ceph_msg->data */
struct ceph_msg_data *data; /* current data item */
size_t resid; /* bytes not yet consumed */
@@ -240,7 +222,9 @@ struct ceph_msg {
struct ceph_buffer *middle;
size_t data_length;
struct list_head data;
struct ceph_msg_data *data;
int num_data_items;
int max_data_items;
struct ceph_msg_data_cursor cursor;
struct ceph_connection *con;
@@ -381,6 +365,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
struct ceph_bvec_iter *bvec_pos);
struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
gfp_t flags, bool can_fail);
extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail);

View File

@@ -13,14 +13,15 @@ struct ceph_msgpool {
mempool_t *pool;
int type; /* preallocated message type */
int front_len; /* preallocated payload size */
int max_data_items;
};
extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking,
const char *name);
int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int max_data_items, int size,
const char *name);
extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
int front_len);
struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
int max_data_items);
extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
#endif

View File

@@ -136,6 +136,13 @@ struct ceph_osd_req_op {
u64 expected_object_size;
u64 expected_write_size;
} alloc_hint;
struct {
u64 snapid;
u64 src_version;
u8 flags;
u32 src_fadvise_flags;
struct ceph_osd_data osd_data;
} copy_from;
};
};
@@ -444,9 +451,8 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
struct page **pages, u64 length,
u32 alignment, bool pages_from_pool,
bool own_pages);
extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
const char *class, const char *method);
int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
const char *class, const char *method);
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, const char *name, const void *value,
size_t size, u8 cmp_op, u8 cmp_mode);
@@ -511,6 +517,16 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct timespec64 *mtime,
struct page **pages, int nr_pages);
int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
u64 src_snapid, u64 src_version,
struct ceph_object_id *src_oid,
struct ceph_object_locator *src_oloc,
u32 src_fadvise_flags,
struct ceph_object_id *dst_oid,
struct ceph_object_locator *dst_oloc,
u32 dst_fadvise_flags,
u8 copy_from_flags);
/* watch/notify */
struct ceph_osd_linger_request *
ceph_osdc_watch(struct ceph_osd_client *osdc,

View File

@@ -23,16 +23,7 @@ struct ceph_pagelist_cursor {
size_t room; /* room remaining to reset to */
};
static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
{
INIT_LIST_HEAD(&pl->head);
pl->mapped_tail = NULL;
pl->length = 0;
pl->room = 0;
INIT_LIST_HEAD(&pl->free_list);
pl->num_pages_free = 0;
refcount_set(&pl->refcnt, 1);
}
struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags);
extern void ceph_pagelist_release(struct ceph_pagelist *pl);

View File

@@ -410,6 +410,14 @@ enum {
enum {
CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */
CEPH_OSD_OP_FLAG_FADVISE_RANDOM = 0x4, /* the op is random */
CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED = 0x10,/* data will be accessed in
the near future */
CEPH_OSD_OP_FLAG_FADVISE_DONTNEED = 0x20,/* data will not be accessed
in the near future */
CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only
once by this client */
};
#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
@@ -431,6 +439,15 @@ enum {
CEPH_OSD_CMPXATTR_MODE_U64 = 2
};
enum {
CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1, /* part of a flush operation */
CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */
CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4, /* ignore osd cache logic */
CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to
* cloneid */
CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16, /* order with write */
};
enum {
CEPH_OSD_WATCH_OP_UNWATCH = 0,
CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1,
@@ -497,6 +514,17 @@ struct ceph_osd_op {
__le64 expected_object_size;
__le64 expected_write_size;
} __attribute__ ((packed)) alloc_hint;
struct {
__le64 snapid;
__le64 src_version;
__u8 flags; /* CEPH_OSD_COPY_FROM_FLAG_* */
/*
* CEPH_OSD_OP_FLAG_FADVISE_*: fadvise flags
* for src object, flags for dest object are in
* ceph_osd_op::flags.
*/
__le32 src_fadvise_flags;
} __attribute__ ((packed)) copy_from;
};
__le32 payload_len;
} __attribute__ ((packed));