osd_client.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _FS_CEPH_OSD_CLIENT_H
  3. #define _FS_CEPH_OSD_CLIENT_H
  4. #include <linux/bitrev.h>
  5. #include <linux/completion.h>
  6. #include <linux/kref.h>
  7. #include <linux/mempool.h>
  8. #include <linux/rbtree.h>
  9. #include <linux/refcount.h>
  10. #include <linux/ktime.h>
  11. #include <linux/ceph/types.h>
  12. #include <linux/ceph/osdmap.h>
  13. #include <linux/ceph/messenger.h>
  14. #include <linux/ceph/msgpool.h>
  15. #include <linux/ceph/auth.h>
  16. #include <linux/ceph/pagelist.h>
  17. struct ceph_msg;
  18. struct ceph_snap_context;
  19. struct ceph_osd_request;
  20. struct ceph_osd_client;
  21. /*
  22. * completion callback for async writepages
  23. */
  24. typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
  25. #define CEPH_HOMELESS_OSD -1
  26. /* a given osd we're communicating with */
  27. struct ceph_osd {
  28. refcount_t o_ref;
  29. struct ceph_osd_client *o_osdc;
  30. int o_osd;
  31. int o_incarnation;
  32. struct rb_node o_node;
  33. struct ceph_connection o_con;
  34. struct rb_root o_requests;
  35. struct rb_root o_linger_requests;
  36. struct rb_root o_backoff_mappings;
  37. struct rb_root o_backoffs_by_id;
  38. struct list_head o_osd_lru;
  39. struct ceph_auth_handshake o_auth;
  40. unsigned long lru_ttl;
  41. struct list_head o_keepalive_item;
  42. struct mutex lock;
  43. };
  44. #define CEPH_OSD_SLAB_OPS 2
  45. #define CEPH_OSD_MAX_OPS 16
  46. enum ceph_osd_data_type {
  47. CEPH_OSD_DATA_TYPE_NONE = 0,
  48. CEPH_OSD_DATA_TYPE_PAGES,
  49. CEPH_OSD_DATA_TYPE_PAGELIST,
  50. #ifdef CONFIG_BLOCK
  51. CEPH_OSD_DATA_TYPE_BIO,
  52. #endif /* CONFIG_BLOCK */
  53. CEPH_OSD_DATA_TYPE_BVECS,
  54. };
  55. struct ceph_osd_data {
  56. enum ceph_osd_data_type type;
  57. union {
  58. struct {
  59. struct page **pages;
  60. u64 length;
  61. u32 alignment;
  62. bool pages_from_pool;
  63. bool own_pages;
  64. };
  65. struct ceph_pagelist *pagelist;
  66. #ifdef CONFIG_BLOCK
  67. struct {
  68. struct ceph_bio_iter bio_pos;
  69. u32 bio_length;
  70. };
  71. #endif /* CONFIG_BLOCK */
  72. struct {
  73. struct ceph_bvec_iter bvec_pos;
  74. u32 num_bvecs;
  75. };
  76. };
  77. };
  78. struct ceph_osd_req_op {
  79. u16 op; /* CEPH_OSD_OP_* */
  80. u32 flags; /* CEPH_OSD_OP_FLAG_* */
  81. u32 indata_len; /* request */
  82. u32 outdata_len; /* reply */
  83. s32 rval;
  84. union {
  85. struct ceph_osd_data raw_data_in;
  86. struct {
  87. u64 offset, length;
  88. u64 truncate_size;
  89. u32 truncate_seq;
  90. struct ceph_osd_data osd_data;
  91. } extent;
  92. struct {
  93. u32 name_len;
  94. u32 value_len;
  95. __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
  96. __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
  97. struct ceph_osd_data osd_data;
  98. } xattr;
  99. struct {
  100. const char *class_name;
  101. const char *method_name;
  102. struct ceph_osd_data request_info;
  103. struct ceph_osd_data request_data;
  104. struct ceph_osd_data response_data;
  105. __u8 class_len;
  106. __u8 method_len;
  107. u32 indata_len;
  108. } cls;
  109. struct {
  110. u64 cookie;
  111. __u8 op; /* CEPH_OSD_WATCH_OP_ */
  112. u32 gen;
  113. } watch;
  114. struct {
  115. struct ceph_osd_data request_data;
  116. } notify_ack;
  117. struct {
  118. u64 cookie;
  119. struct ceph_osd_data request_data;
  120. struct ceph_osd_data response_data;
  121. } notify;
  122. struct {
  123. struct ceph_osd_data response_data;
  124. } list_watchers;
  125. struct {
  126. u64 expected_object_size;
  127. u64 expected_write_size;
  128. u32 flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
  129. } alloc_hint;
  130. struct {
  131. u64 snapid;
  132. u64 src_version;
  133. u8 flags;
  134. u32 src_fadvise_flags;
  135. struct ceph_osd_data osd_data;
  136. } copy_from;
  137. };
  138. };
  139. struct ceph_osd_request_target {
  140. struct ceph_object_id base_oid;
  141. struct ceph_object_locator base_oloc;
  142. struct ceph_object_id target_oid;
  143. struct ceph_object_locator target_oloc;
  144. struct ceph_pg pgid; /* last raw pg we mapped to */
  145. struct ceph_spg spgid; /* last actual spg we mapped to */
  146. u32 pg_num;
  147. u32 pg_num_mask;
  148. struct ceph_osds acting;
  149. struct ceph_osds up;
  150. int size;
  151. int min_size;
  152. bool sort_bitwise;
  153. bool recovery_deletes;
  154. unsigned int flags; /* CEPH_OSD_FLAG_* */
  155. bool used_replica;
  156. bool paused;
  157. u32 epoch;
  158. u32 last_force_resend;
  159. int osd;
  160. };
  161. /* an in-flight request */
  162. struct ceph_osd_request {
  163. u64 r_tid; /* unique for this client */
  164. struct rb_node r_node;
  165. struct rb_node r_mc_node; /* map check */
  166. struct work_struct r_complete_work;
  167. struct ceph_osd *r_osd;
  168. struct ceph_osd_request_target r_t;
  169. #define r_base_oid r_t.base_oid
  170. #define r_base_oloc r_t.base_oloc
  171. #define r_flags r_t.flags
  172. struct ceph_msg *r_request, *r_reply;
  173. u32 r_sent; /* >0 if r_request is sending/sent */
  174. /* request osd ops array */
  175. unsigned int r_num_ops;
  176. int r_result;
  177. struct ceph_osd_client *r_osdc;
  178. struct kref r_kref;
  179. bool r_mempool;
  180. struct completion r_completion; /* private to osd_client.c */
  181. ceph_osdc_callback_t r_callback;
  182. struct inode *r_inode; /* for use by callbacks */
  183. struct list_head r_private_item; /* ditto */
  184. void *r_priv; /* ditto */
  185. /* set by submitter */
  186. u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */
  187. struct ceph_snap_context *r_snapc; /* for writes */
  188. struct timespec64 r_mtime; /* ditto */
  189. u64 r_data_offset; /* ditto */
  190. bool r_linger; /* don't resend on failure */
  191. /* internal */
  192. unsigned long r_stamp; /* jiffies, send or check time */
  193. unsigned long r_start_stamp; /* jiffies */
  194. ktime_t r_start_latency; /* ktime_t */
  195. ktime_t r_end_latency; /* ktime_t */
  196. int r_attempts;
  197. u32 r_map_dne_bound;
  198. struct ceph_osd_req_op r_ops[];
  199. };
  200. struct ceph_request_redirect {
  201. struct ceph_object_locator oloc;
  202. };
  203. /*
  204. * osd request identifier
  205. *
  206. * caller name + incarnation# + tid to unique identify this request
  207. */
  208. struct ceph_osd_reqid {
  209. struct ceph_entity_name name;
  210. __le64 tid;
  211. __le32 inc;
  212. } __packed;
  213. struct ceph_blkin_trace_info {
  214. __le64 trace_id;
  215. __le64 span_id;
  216. __le64 parent_span_id;
  217. } __packed;
  218. typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
  219. u64 notifier_id, void *data, size_t data_len);
  220. typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
  221. struct ceph_osd_linger_request {
  222. struct ceph_osd_client *osdc;
  223. u64 linger_id;
  224. bool committed;
  225. bool is_watch; /* watch or notify */
  226. struct ceph_osd *osd;
  227. struct ceph_osd_request *reg_req;
  228. struct ceph_osd_request *ping_req;
  229. unsigned long ping_sent;
  230. unsigned long watch_valid_thru;
  231. struct list_head pending_lworks;
  232. struct ceph_osd_request_target t;
  233. u32 map_dne_bound;
  234. struct timespec64 mtime;
  235. struct kref kref;
  236. struct mutex lock;
  237. struct rb_node node; /* osd */
  238. struct rb_node osdc_node; /* osdc */
  239. struct rb_node mc_node; /* map check */
  240. struct list_head scan_item;
  241. struct completion reg_commit_wait;
  242. struct completion notify_finish_wait;
  243. int reg_commit_error;
  244. int notify_finish_error;
  245. int last_error;
  246. u32 register_gen;
  247. u64 notify_id;
  248. rados_watchcb2_t wcb;
  249. rados_watcherrcb_t errcb;
  250. void *data;
  251. struct ceph_pagelist *request_pl;
  252. struct page **notify_id_pages;
  253. struct page ***preply_pages;
  254. size_t *preply_len;
  255. };
  256. struct ceph_watch_item {
  257. struct ceph_entity_name name;
  258. u64 cookie;
  259. struct ceph_entity_addr addr;
  260. };
  261. struct ceph_spg_mapping {
  262. struct rb_node node;
  263. struct ceph_spg spgid;
  264. struct rb_root backoffs;
  265. };
  266. struct ceph_hobject_id {
  267. void *key;
  268. size_t key_len;
  269. void *oid;
  270. size_t oid_len;
  271. u64 snapid;
  272. u32 hash;
  273. u8 is_max;
  274. void *nspace;
  275. size_t nspace_len;
  276. s64 pool;
  277. /* cache */
  278. u32 hash_reverse_bits;
  279. };
  280. static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
  281. {
  282. hoid->hash_reverse_bits = bitrev32(hoid->hash);
  283. }
  284. /*
  285. * PG-wide backoff: [begin, end)
  286. * per-object backoff: begin == end
  287. */
  288. struct ceph_osd_backoff {
  289. struct rb_node spg_node;
  290. struct rb_node id_node;
  291. struct ceph_spg spgid;
  292. u64 id;
  293. struct ceph_hobject_id *begin;
  294. struct ceph_hobject_id *end;
  295. };
  296. #define CEPH_LINGER_ID_START 0xffff000000000000ULL
  297. struct ceph_osd_client {
  298. struct ceph_client *client;
  299. struct ceph_osdmap *osdmap; /* current map */
  300. struct rw_semaphore lock;
  301. struct rb_root osds; /* osds */
  302. struct list_head osd_lru; /* idle osds */
  303. spinlock_t osd_lru_lock;
  304. u32 epoch_barrier;
  305. struct ceph_osd homeless_osd;
  306. atomic64_t last_tid; /* tid of last request */
  307. u64 last_linger_id;
  308. struct rb_root linger_requests; /* lingering requests */
  309. struct rb_root map_checks;
  310. struct rb_root linger_map_checks;
  311. atomic_t num_requests;
  312. atomic_t num_homeless;
  313. int abort_err;
  314. struct delayed_work timeout_work;
  315. struct delayed_work osds_timeout_work;
  316. #ifdef CONFIG_DEBUG_FS
  317. struct dentry *debugfs_file;
  318. #endif
  319. mempool_t *req_mempool;
  320. struct ceph_msgpool msgpool_op;
  321. struct ceph_msgpool msgpool_op_reply;
  322. struct workqueue_struct *notify_wq;
  323. struct workqueue_struct *completion_wq;
  324. };
  325. static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
  326. {
  327. return osdc->osdmap->flags & flag;
  328. }
  329. extern int ceph_osdc_setup(void);
  330. extern void ceph_osdc_cleanup(void);
  331. extern int ceph_osdc_init(struct ceph_osd_client *osdc,
  332. struct ceph_client *client);
  333. extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
  334. extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
  335. extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
  336. struct ceph_msg *msg);
  337. extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
  338. struct ceph_msg *msg);
  339. void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
  340. void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
  341. void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
  342. #define osd_req_op_data(oreq, whch, typ, fld) \
  343. ({ \
  344. struct ceph_osd_request *__oreq = (oreq); \
  345. unsigned int __whch = (whch); \
  346. BUG_ON(__whch >= __oreq->r_num_ops); \
  347. &__oreq->r_ops[__whch].typ.fld; \
  348. })
  349. struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req,
  350. unsigned int which, u16 opcode, u32 flags);
  351. extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
  352. unsigned int which,
  353. struct page **pages, u64 length,
  354. u32 alignment, bool pages_from_pool,
  355. bool own_pages);
  356. extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
  357. unsigned int which, u16 opcode,
  358. u64 offset, u64 length,
  359. u64 truncate_size, u32 truncate_seq);
  360. extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
  361. unsigned int which, u64 length);
  362. extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
  363. unsigned int which, u64 offset_inc);
  364. extern struct ceph_osd_data *osd_req_op_extent_osd_data(
  365. struct ceph_osd_request *osd_req,
  366. unsigned int which);
  367. extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
  368. unsigned int which,
  369. struct page **pages, u64 length,
  370. u32 alignment, bool pages_from_pool,
  371. bool own_pages);
  372. extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
  373. unsigned int which,
  374. struct ceph_pagelist *pagelist);
  375. #ifdef CONFIG_BLOCK
  376. void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
  377. unsigned int which,
  378. struct ceph_bio_iter *bio_pos,
  379. u32 bio_length);
  380. #endif /* CONFIG_BLOCK */
  381. void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
  382. unsigned int which,
  383. struct bio_vec *bvecs, u32 num_bvecs,
  384. u32 bytes);
  385. void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
  386. unsigned int which,
  387. struct ceph_bvec_iter *bvec_pos);
  388. extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
  389. unsigned int which,
  390. struct ceph_pagelist *pagelist);
  391. extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
  392. unsigned int which,
  393. struct page **pages, u64 length,
  394. u32 alignment, bool pages_from_pool,
  395. bool own_pages);
  396. void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
  397. unsigned int which,
  398. struct bio_vec *bvecs, u32 num_bvecs,
  399. u32 bytes);
  400. extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
  401. unsigned int which,
  402. struct page **pages, u64 length,
  403. u32 alignment, bool pages_from_pool,
  404. bool own_pages);
  405. int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
  406. const char *class, const char *method);
  407. extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
  408. u16 opcode, const char *name, const void *value,
  409. size_t size, u8 cmp_op, u8 cmp_mode);
  410. extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
  411. unsigned int which,
  412. u64 expected_object_size,
  413. u64 expected_write_size,
  414. u32 flags);
  415. extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
  416. u64 src_snapid, u64 src_version,
  417. struct ceph_object_id *src_oid,
  418. struct ceph_object_locator *src_oloc,
  419. u32 src_fadvise_flags,
  420. u32 dst_fadvise_flags,
  421. u32 truncate_seq, u64 truncate_size,
  422. u8 copy_from_flags);
  423. extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
  424. struct ceph_snap_context *snapc,
  425. unsigned int num_ops,
  426. bool use_mempool,
  427. gfp_t gfp_flags);
  428. int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
  429. extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
  430. struct ceph_file_layout *layout,
  431. struct ceph_vino vino,
  432. u64 offset, u64 *len,
  433. unsigned int which, int num_ops,
  434. int opcode, int flags,
  435. struct ceph_snap_context *snapc,
  436. u32 truncate_seq, u64 truncate_size,
  437. bool use_mempool);
  438. extern void ceph_osdc_get_request(struct ceph_osd_request *req);
  439. extern void ceph_osdc_put_request(struct ceph_osd_request *req);
  440. void ceph_osdc_start_request(struct ceph_osd_client *osdc,
  441. struct ceph_osd_request *req);
  442. extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
  443. extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
  444. struct ceph_osd_request *req);
  445. extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
  446. extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
  447. void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
  448. int ceph_osdc_call(struct ceph_osd_client *osdc,
  449. struct ceph_object_id *oid,
  450. struct ceph_object_locator *oloc,
  451. const char *class, const char *method,
  452. unsigned int flags,
  453. struct page *req_page, size_t req_len,
  454. struct page **resp_pages, size_t *resp_len);
  455. /* watch/notify */
  456. struct ceph_osd_linger_request *
  457. ceph_osdc_watch(struct ceph_osd_client *osdc,
  458. struct ceph_object_id *oid,
  459. struct ceph_object_locator *oloc,
  460. rados_watchcb2_t wcb,
  461. rados_watcherrcb_t errcb,
  462. void *data);
  463. int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
  464. struct ceph_osd_linger_request *lreq);
  465. int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
  466. struct ceph_object_id *oid,
  467. struct ceph_object_locator *oloc,
  468. u64 notify_id,
  469. u64 cookie,
  470. void *payload,
  471. u32 payload_len);
  472. int ceph_osdc_notify(struct ceph_osd_client *osdc,
  473. struct ceph_object_id *oid,
  474. struct ceph_object_locator *oloc,
  475. void *payload,
  476. u32 payload_len,
  477. u32 timeout,
  478. struct page ***preply_pages,
  479. size_t *preply_len);
  480. int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
  481. struct ceph_osd_linger_request *lreq);
  482. int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
  483. struct ceph_object_id *oid,
  484. struct ceph_object_locator *oloc,
  485. struct ceph_watch_item **watchers,
  486. u32 *num_watchers);
  487. #endif