printk_ringbuffer.c 67 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/kernel.h>
  3. #include <linux/irqflags.h>
  4. #include <linux/string.h>
  5. #include <linux/errno.h>
  6. #include <linux/bug.h>
  7. #include "printk_ringbuffer.h"
  8. /**
  9. * DOC: printk_ringbuffer overview
  10. *
  11. * Data Structure
  12. * --------------
  13. * The printk_ringbuffer is made up of 3 internal ringbuffers:
  14. *
  15. * desc_ring
  16. * A ring of descriptors and their meta data (such as sequence number,
  17. * timestamp, loglevel, etc.) as well as internal state information about
  18. * the record and logical positions specifying where in the other
  19. * ringbuffer the text strings are located.
  20. *
  21. * text_data_ring
  22. * A ring of data blocks. A data block consists of an unsigned long
  23. * integer (ID) that maps to a desc_ring index followed by the text
  24. * string of the record.
  25. *
  26. * The internal state information of a descriptor is the key element to allow
  27. * readers and writers to locklessly synchronize access to the data.
  28. *
  29. * Implementation
  30. * --------------
  31. *
  32. * Descriptor Ring
  33. * ~~~~~~~~~~~~~~~
  34. * The descriptor ring is an array of descriptors. A descriptor contains
  35. * essential meta data to track the data of a printk record using
  36. * blk_lpos structs pointing to associated text data blocks (see
  37. * "Data Rings" below). Each descriptor is assigned an ID that maps
  38. * directly to index values of the descriptor array and has a state. The ID
  39. * and the state are bitwise combined into a single descriptor field named
  40. * @state_var, allowing ID and state to be synchronously and atomically
  41. * updated.
  42. *
  43. * Descriptors have four states:
  44. *
  45. * reserved
  46. * A writer is modifying the record.
  47. *
  48. * committed
  49. * The record and all its data are written. A writer can reopen the
  50. * descriptor (transitioning it back to reserved), but in the committed
  51. * state the data is consistent.
  52. *
  53. * finalized
  54. * The record and all its data are complete and available for reading. A
  55. * writer cannot reopen the descriptor.
  56. *
  57. * reusable
  58. * The record exists, but its text and/or meta data may no longer be
  59. * available.
  60. *
  61. * Querying the @state_var of a record requires providing the ID of the
  62. * descriptor to query. This can yield a possible fifth (pseudo) state:
  63. *
  64. * miss
  65. * The descriptor being queried has an unexpected ID.
  66. *
  67. * The descriptor ring has a @tail_id that contains the ID of the oldest
  68. * descriptor and @head_id that contains the ID of the newest descriptor.
  69. *
  70. * When a new descriptor should be created (and the ring is full), the tail
  71. * descriptor is invalidated by first transitioning to the reusable state and
  72. * then invalidating all tail data blocks up to and including the data blocks
  73. * associated with the tail descriptor (for the text ring). Then
  74. * @tail_id is advanced, followed by advancing @head_id. And finally the
  75. * @state_var of the new descriptor is initialized to the new ID and reserved
  76. * state.
  77. *
  78. * The @tail_id can only be advanced if the new @tail_id would be in the
  79. * committed or reusable queried state. This makes it possible that a valid
  80. * sequence number of the tail is always available.
  81. *
  82. * Descriptor Finalization
  83. * ~~~~~~~~~~~~~~~~~~~~~~~
  84. * When a writer calls the commit function prb_commit(), record data is
  85. * fully stored and is consistent within the ringbuffer. However, a writer can
  86. * reopen that record, claiming exclusive access (as with prb_reserve()), and
  87. * modify that record. When finished, the writer must again commit the record.
  88. *
  89. * In order for a record to be made available to readers (and also become
  90. * recyclable for writers), it must be finalized. A finalized record cannot be
  91. * reopened and can never become "unfinalized". Record finalization can occur
  92. * in three different scenarios:
  93. *
  94. * 1) A writer can simultaneously commit and finalize its record by calling
  95. * prb_final_commit() instead of prb_commit().
  96. *
  97. * 2) When a new record is reserved and the previous record has been
  98. * committed via prb_commit(), that previous record is automatically
  99. * finalized.
  100. *
  101. * 3) When a record is committed via prb_commit() and a newer record
  102. * already exists, the record being committed is automatically finalized.
  103. *
  104. * Data Ring
  105. * ~~~~~~~~~
  106. * The text data ring is a byte array composed of data blocks. Data blocks are
  107. * referenced by blk_lpos structs that point to the logical position of the
  108. * beginning of a data block and the beginning of the next adjacent data
  109. * block. Logical positions are mapped directly to index values of the byte
  110. * array ringbuffer.
  111. *
  112. * Each data block consists of an ID followed by the writer data. The ID is
  113. * the identifier of a descriptor that is associated with the data block. A
  114. * given data block is considered valid if all of the following conditions
  115. * are met:
  116. *
  117. * 1) The descriptor associated with the data block is in the committed
  118. * or finalized queried state.
  119. *
  120. * 2) The blk_lpos struct within the descriptor associated with the data
  121. * block references back to the same data block.
  122. *
  123. * 3) The data block is within the head/tail logical position range.
  124. *
  125. * If the writer data of a data block would extend beyond the end of the
  126. * byte array, only the ID of the data block is stored at the logical
  127. * position and the full data block (ID and writer data) is stored at the
  128. * beginning of the byte array. The referencing blk_lpos will point to the
  129. * ID before the wrap and the next data block will be at the logical
  130. * position adjacent the full data block after the wrap.
  131. *
  132. * Data rings have a @tail_lpos that points to the beginning of the oldest
  133. * data block and a @head_lpos that points to the logical position of the
  134. * next (not yet existing) data block.
  135. *
  136. * When a new data block should be created (and the ring is full), tail data
  137. * blocks will first be invalidated by putting their associated descriptors
  138. * into the reusable state and then pushing the @tail_lpos forward beyond
  139. * them. Then the @head_lpos is pushed forward and is associated with a new
  140. * descriptor. If a data block is not valid, the @tail_lpos cannot be
  141. * advanced beyond it.
  142. *
  143. * Info Array
  144. * ~~~~~~~~~~
  145. * The general meta data of printk records are stored in printk_info structs,
  146. * stored in an array with the same number of elements as the descriptor ring.
  147. * Each info corresponds to the descriptor of the same index in the
  148. * descriptor ring. Info validity is confirmed by evaluating the corresponding
  149. * descriptor before and after loading the info.
  150. *
  151. * Usage
  152. * -----
  153. * Here are some simple examples demonstrating writers and readers. For the
  154. * examples a global ringbuffer (test_rb) is available (which is not the
  155. * actual ringbuffer used by printk)::
  156. *
  157. * DEFINE_PRINTKRB(test_rb, 15, 5);
  158. *
  159. * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of
  160. * 1 MiB (2 ^ (15 + 5)) for text data.
  161. *
  162. * Sample writer code::
  163. *
  164. * const char *textstr = "message text";
  165. * struct prb_reserved_entry e;
  166. * struct printk_record r;
  167. *
  168. * // specify how much to allocate
  169. * prb_rec_init_wr(&r, strlen(textstr) + 1);
  170. *
  171. * if (prb_reserve(&e, &test_rb, &r)) {
  172. * snprintf(r.text_buf, r.text_buf_size, "%s", textstr);
  173. *
  174. * r.info->text_len = strlen(textstr);
  175. * r.info->ts_nsec = local_clock();
  176. * r.info->caller_id = printk_caller_id();
  177. *
  178. * // commit and finalize the record
  179. * prb_final_commit(&e);
  180. * }
  181. *
  182. * Note that additional writer functions are available to extend a record
  183. * after it has been committed but not yet finalized. This can be done as
  184. * long as no new records have been reserved and the caller is the same.
  185. *
  186. * Sample writer code (record extending)::
  187. *
  188. * // alternate rest of previous example
  189. *
  190. * r.info->text_len = strlen(textstr);
  191. * r.info->ts_nsec = local_clock();
  192. * r.info->caller_id = printk_caller_id();
  193. *
  194. * // commit the record (but do not finalize yet)
  195. * prb_commit(&e);
  196. * }
  197. *
  198. * ...
  199. *
  200. * // specify additional 5 bytes text space to extend
  201. * prb_rec_init_wr(&r, 5);
  202. *
  203. * // try to extend, but only if it does not exceed 32 bytes
  204. * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id()), 32) {
  205. * snprintf(&r.text_buf[r.info->text_len],
  206. * r.text_buf_size - r.info->text_len, "hello");
  207. *
  208. * r.info->text_len += 5;
  209. *
  210. * // commit and finalize the record
  211. * prb_final_commit(&e);
  212. * }
  213. *
  214. * Sample reader code::
  215. *
  216. * struct printk_info info;
  217. * struct printk_record r;
  218. * char text_buf[32];
  219. * u64 seq;
  220. *
  221. * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf));
  222. *
  223. * prb_for_each_record(0, &test_rb, &seq, &r) {
  224. * if (info.seq != seq)
  225. * pr_warn("lost %llu records\n", info.seq - seq);
  226. *
  227. * if (info.text_len > r.text_buf_size) {
  228. * pr_warn("record %llu text truncated\n", info.seq);
  229. * text_buf[r.text_buf_size - 1] = 0;
  230. * }
  231. *
  232. * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec,
  233. * &text_buf[0]);
  234. * }
  235. *
  236. * Note that additional less convenient reader functions are available to
  237. * allow complex record access.
  238. *
  239. * ABA Issues
  240. * ~~~~~~~~~~
  241. * To help avoid ABA issues, descriptors are referenced by IDs (array index
  242. * values combined with tagged bits counting array wraps) and data blocks are
  243. * referenced by logical positions (array index values combined with tagged
  244. * bits counting array wraps). However, on 32-bit systems the number of
  245. * tagged bits is relatively small such that an ABA incident is (at least
  246. * theoretically) possible. For example, if 4 million maximally sized (1KiB)
  247. * printk messages were to occur in NMI context on a 32-bit system, the
  248. * interrupted context would not be able to recognize that the 32-bit integer
  249. * completely wrapped and thus represents a different data block than the one
  250. * the interrupted context expects.
  251. *
  252. * To help combat this possibility, additional state checking is performed
  253. * (such as using cmpxchg() even though set() would suffice). These extra
  254. * checks are commented as such and will hopefully catch any ABA issue that
  255. * a 32-bit system might experience.
  256. *
  257. * Memory Barriers
  258. * ~~~~~~~~~~~~~~~
  259. * Multiple memory barriers are used. To simplify proving correctness and
  260. * generating litmus tests, lines of code related to memory barriers
  261. * (loads, stores, and the associated memory barriers) are labeled::
  262. *
  263. * LMM(function:letter)
  264. *
  265. * Comments reference the labels using only the "function:letter" part.
  266. *
  267. * The memory barrier pairs and their ordering are:
  268. *
  269. * desc_reserve:D / desc_reserve:B
  270. * push descriptor tail (id), then push descriptor head (id)
  271. *
  272. * desc_reserve:D / data_push_tail:B
  273. * push data tail (lpos), then set new descriptor reserved (state)
  274. *
  275. * desc_reserve:D / desc_push_tail:C
  276. * push descriptor tail (id), then set new descriptor reserved (state)
  277. *
  278. * desc_reserve:D / prb_first_seq:C
  279. * push descriptor tail (id), then set new descriptor reserved (state)
  280. *
  281. * desc_reserve:F / desc_read:D
  282. * set new descriptor id and reserved (state), then allow writer changes
  283. *
  284. * data_alloc:A (or data_realloc:A) / desc_read:D
  285. * set old descriptor reusable (state), then modify new data block area
  286. *
  287. * data_alloc:A (or data_realloc:A) / data_push_tail:B
  288. * push data tail (lpos), then modify new data block area
  289. *
  290. * _prb_commit:B / desc_read:B
  291. * store writer changes, then set new descriptor committed (state)
  292. *
  293. * desc_reopen_last:A / _prb_commit:B
  294. * set descriptor reserved (state), then read descriptor data
  295. *
  296. * _prb_commit:B / desc_reserve:D
  297. * set new descriptor committed (state), then check descriptor head (id)
  298. *
  299. * data_push_tail:D / data_push_tail:A
  300. * set descriptor reusable (state), then push data tail (lpos)
  301. *
  302. * desc_push_tail:B / desc_reserve:D
  303. * set descriptor reusable (state), then push descriptor tail (id)
  304. */
  305. #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits)
  306. #define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1)
  307. #define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits)
  308. #define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1)
  309. /* Determine the data array index from a logical position. */
  310. #define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring))
  311. /* Determine the desc array index from an ID or sequence number. */
  312. #define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring))
  313. /* Determine how many times the data array has wrapped. */
  314. #define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits)
  315. /* Determine if a logical position refers to a data-less block. */
  316. #define LPOS_DATALESS(lpos) ((lpos) & 1UL)
  317. #define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \
  318. LPOS_DATALESS((blk)->next))
  319. /* Get the logical position at index 0 of the current wrap. */
  320. #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \
  321. ((lpos) & ~DATA_SIZE_MASK(data_ring))
  322. /* Get the ID for the same index of the previous wrap as the given ID. */
  323. #define DESC_ID_PREV_WRAP(desc_ring, id) \
  324. DESC_ID((id) - DESCS_COUNT(desc_ring))
  325. /*
  326. * A data block: mapped directly to the beginning of the data block area
  327. * specified as a logical position within the data ring.
  328. *
  329. * @id: the ID of the associated descriptor
  330. * @data: the writer data
  331. *
  332. * Note that the size of a data block is only known by its associated
  333. * descriptor.
  334. */
  335. struct prb_data_block {
  336. unsigned long id;
  337. char data[];
  338. };
  339. /*
  340. * Return the descriptor associated with @n. @n can be either a
  341. * descriptor ID or a sequence number.
  342. */
  343. static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n)
  344. {
  345. return &desc_ring->descs[DESC_INDEX(desc_ring, n)];
  346. }
  347. /*
  348. * Return the printk_info associated with @n. @n can be either a
  349. * descriptor ID or a sequence number.
  350. */
  351. static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n)
  352. {
  353. return &desc_ring->infos[DESC_INDEX(desc_ring, n)];
  354. }
  355. static struct prb_data_block *to_block(struct prb_data_ring *data_ring,
  356. unsigned long begin_lpos)
  357. {
  358. return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)];
  359. }
  360. /*
  361. * Increase the data size to account for data block meta data plus any
  362. * padding so that the adjacent data block is aligned on the ID size.
  363. */
  364. static unsigned int to_blk_size(unsigned int size)
  365. {
  366. struct prb_data_block *db = NULL;
  367. size += sizeof(*db);
  368. size = ALIGN(size, sizeof(db->id));
  369. return size;
  370. }
  371. /*
  372. * Sanity checker for reserve size. The ringbuffer code assumes that a data
  373. * block does not exceed the maximum possible size that could fit within the
  374. * ringbuffer. This function provides that basic size check so that the
  375. * assumption is safe.
  376. */
  377. static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size)
  378. {
  379. struct prb_data_block *db = NULL;
  380. if (size == 0)
  381. return true;
  382. /*
  383. * Ensure the alignment padded size could possibly fit in the data
  384. * array. The largest possible data block must still leave room for
  385. * at least the ID of the next block.
  386. */
  387. size = to_blk_size(size);
  388. if (size > DATA_SIZE(data_ring) - sizeof(db->id))
  389. return false;
  390. return true;
  391. }
  392. /* Query the state of a descriptor. */
  393. static enum desc_state get_desc_state(unsigned long id,
  394. unsigned long state_val)
  395. {
  396. if (id != DESC_ID(state_val))
  397. return desc_miss;
  398. return DESC_STATE(state_val);
  399. }
  400. /*
  401. * Get a copy of a specified descriptor and return its queried state. If the
  402. * descriptor is in an inconsistent state (miss or reserved), the caller can
  403. * only expect the descriptor's @state_var field to be valid.
  404. *
  405. * The sequence number and caller_id can be optionally retrieved. Like all
  406. * non-state_var data, they are only valid if the descriptor is in a
  407. * consistent state.
  408. */
  409. static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
  410. unsigned long id, struct prb_desc *desc_out,
  411. u64 *seq_out, u32 *caller_id_out)
  412. {
  413. struct printk_info *info = to_info(desc_ring, id);
  414. struct prb_desc *desc = to_desc(desc_ring, id);
  415. atomic_long_t *state_var = &desc->state_var;
  416. enum desc_state d_state;
  417. unsigned long state_val;
  418. /* Check the descriptor state. */
  419. state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */
  420. d_state = get_desc_state(id, state_val);
  421. if (d_state == desc_miss || d_state == desc_reserved) {
  422. /*
  423. * The descriptor is in an inconsistent state. Set at least
  424. * @state_var so that the caller can see the details of
  425. * the inconsistent state.
  426. */
  427. goto out;
  428. }
  429. /*
  430. * Guarantee the state is loaded before copying the descriptor
  431. * content. This avoids copying obsolete descriptor content that might
  432. * not apply to the descriptor state. This pairs with _prb_commit:B.
  433. *
  434. * Memory barrier involvement:
  435. *
  436. * If desc_read:A reads from _prb_commit:B, then desc_read:C reads
  437. * from _prb_commit:A.
  438. *
  439. * Relies on:
  440. *
  441. * WMB from _prb_commit:A to _prb_commit:B
  442. * matching
  443. * RMB from desc_read:A to desc_read:C
  444. */
  445. smp_rmb(); /* LMM(desc_read:B) */
  446. /*
  447. * Copy the descriptor data. The data is not valid until the
  448. * state has been re-checked. A memcpy() for all of @desc
  449. * cannot be used because of the atomic_t @state_var field.
  450. */
  451. if (desc_out) {
  452. memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
  453. sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
  454. }
  455. if (seq_out)
  456. *seq_out = info->seq; /* also part of desc_read:C */
  457. if (caller_id_out)
  458. *caller_id_out = info->caller_id; /* also part of desc_read:C */
  459. /*
  460. * 1. Guarantee the descriptor content is loaded before re-checking
  461. * the state. This avoids reading an obsolete descriptor state
  462. * that may not apply to the copied content. This pairs with
  463. * desc_reserve:F.
  464. *
  465. * Memory barrier involvement:
  466. *
  467. * If desc_read:C reads from desc_reserve:G, then desc_read:E
  468. * reads from desc_reserve:F.
  469. *
  470. * Relies on:
  471. *
  472. * WMB from desc_reserve:F to desc_reserve:G
  473. * matching
  474. * RMB from desc_read:C to desc_read:E
  475. *
  476. * 2. Guarantee the record data is loaded before re-checking the
  477. * state. This avoids reading an obsolete descriptor state that may
  478. * not apply to the copied data. This pairs with data_alloc:A and
  479. * data_realloc:A.
  480. *
  481. * Memory barrier involvement:
  482. *
  483. * If copy_data:A reads from data_alloc:B, then desc_read:E
  484. * reads from desc_make_reusable:A.
  485. *
  486. * Relies on:
  487. *
  488. * MB from desc_make_reusable:A to data_alloc:B
  489. * matching
  490. * RMB from desc_read:C to desc_read:E
  491. *
  492. * Note: desc_make_reusable:A and data_alloc:B can be different
  493. * CPUs. However, the data_alloc:B CPU (which performs the
  494. * full memory barrier) must have previously seen
  495. * desc_make_reusable:A.
  496. */
  497. smp_rmb(); /* LMM(desc_read:D) */
  498. /*
  499. * The data has been copied. Return the current descriptor state,
  500. * which may have changed since the load above.
  501. */
  502. state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
  503. d_state = get_desc_state(id, state_val);
  504. out:
  505. if (desc_out)
  506. atomic_long_set(&desc_out->state_var, state_val);
  507. return d_state;
  508. }
  509. /*
  510. * Take a specified descriptor out of the finalized state by attempting
  511. * the transition from finalized to reusable. Either this context or some
  512. * other context will have been successful.
  513. */
  514. static void desc_make_reusable(struct prb_desc_ring *desc_ring,
  515. unsigned long id)
  516. {
  517. unsigned long val_finalized = DESC_SV(id, desc_finalized);
  518. unsigned long val_reusable = DESC_SV(id, desc_reusable);
  519. struct prb_desc *desc = to_desc(desc_ring, id);
  520. atomic_long_t *state_var = &desc->state_var;
  521. atomic_long_cmpxchg_relaxed(state_var, val_finalized,
  522. val_reusable); /* LMM(desc_make_reusable:A) */
  523. }
  524. /*
  525. * Given the text data ring, put the associated descriptor of each
  526. * data block from @lpos_begin until @lpos_end into the reusable state.
  527. *
  528. * If there is any problem making the associated descriptor reusable, either
  529. * the descriptor has not yet been finalized or another writer context has
  530. * already pushed the tail lpos past the problematic data block. Regardless,
  531. * on error the caller can re-load the tail lpos to determine the situation.
  532. */
  533. static bool data_make_reusable(struct printk_ringbuffer *rb,
  534. unsigned long lpos_begin,
  535. unsigned long lpos_end,
  536. unsigned long *lpos_out)
  537. {
  538. struct prb_data_ring *data_ring = &rb->text_data_ring;
  539. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  540. struct prb_data_block *blk;
  541. enum desc_state d_state;
  542. struct prb_desc desc;
  543. struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos;
  544. unsigned long id;
  545. /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */
  546. while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) {
  547. blk = to_block(data_ring, lpos_begin);
  548. /*
  549. * Load the block ID from the data block. This is a data race
  550. * against a writer that may have newly reserved this data
  551. * area. If the loaded value matches a valid descriptor ID,
  552. * the blk_lpos of that descriptor will be checked to make
  553. * sure it points back to this data block. If the check fails,
  554. * the data area has been recycled by another writer.
  555. */
  556. id = blk->id; /* LMM(data_make_reusable:A) */
  557. d_state = desc_read(desc_ring, id, &desc,
  558. NULL, NULL); /* LMM(data_make_reusable:B) */
  559. switch (d_state) {
  560. case desc_miss:
  561. case desc_reserved:
  562. case desc_committed:
  563. return false;
  564. case desc_finalized:
  565. /*
  566. * This data block is invalid if the descriptor
  567. * does not point back to it.
  568. */
  569. if (blk_lpos->begin != lpos_begin)
  570. return false;
  571. desc_make_reusable(desc_ring, id);
  572. break;
  573. case desc_reusable:
  574. /*
  575. * This data block is invalid if the descriptor
  576. * does not point back to it.
  577. */
  578. if (blk_lpos->begin != lpos_begin)
  579. return false;
  580. break;
  581. }
  582. /* Advance @lpos_begin to the next data block. */
  583. lpos_begin = blk_lpos->next;
  584. }
  585. *lpos_out = lpos_begin;
  586. return true;
  587. }
  588. /*
  589. * Advance the data ring tail to at least @lpos. This function puts
  590. * descriptors into the reusable state if the tail is pushed beyond
  591. * their associated data block.
  592. */
  593. static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos)
  594. {
  595. struct prb_data_ring *data_ring = &rb->text_data_ring;
  596. unsigned long tail_lpos_new;
  597. unsigned long tail_lpos;
  598. unsigned long next_lpos;
  599. /* If @lpos is from a data-less block, there is nothing to do. */
  600. if (LPOS_DATALESS(lpos))
  601. return true;
  602. /*
  603. * Any descriptor states that have transitioned to reusable due to the
  604. * data tail being pushed to this loaded value will be visible to this
  605. * CPU. This pairs with data_push_tail:D.
  606. *
  607. * Memory barrier involvement:
  608. *
  609. * If data_push_tail:A reads from data_push_tail:D, then this CPU can
  610. * see desc_make_reusable:A.
  611. *
  612. * Relies on:
  613. *
  614. * MB from desc_make_reusable:A to data_push_tail:D
  615. * matches
  616. * READFROM from data_push_tail:D to data_push_tail:A
  617. * thus
  618. * READFROM from desc_make_reusable:A to this CPU
  619. */
  620. tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */
  621. /*
  622. * Loop until the tail lpos is at or beyond @lpos. This condition
  623. * may already be satisfied, resulting in no full memory barrier
  624. * from data_push_tail:D being performed. However, since this CPU
  625. * sees the new tail lpos, any descriptor states that transitioned to
  626. * the reusable state must already be visible.
  627. */
  628. while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) {
  629. /*
  630. * Make all descriptors reusable that are associated with
  631. * data blocks before @lpos.
  632. */
  633. if (!data_make_reusable(rb, tail_lpos, lpos, &next_lpos)) {
  634. /*
  635. * 1. Guarantee the block ID loaded in
  636. * data_make_reusable() is performed before
  637. * reloading the tail lpos. The failed
  638. * data_make_reusable() may be due to a newly
  639. * recycled data area causing the tail lpos to
  640. * have been previously pushed. This pairs with
  641. * data_alloc:A and data_realloc:A.
  642. *
  643. * Memory barrier involvement:
  644. *
  645. * If data_make_reusable:A reads from data_alloc:B,
  646. * then data_push_tail:C reads from
  647. * data_push_tail:D.
  648. *
  649. * Relies on:
  650. *
  651. * MB from data_push_tail:D to data_alloc:B
  652. * matching
  653. * RMB from data_make_reusable:A to
  654. * data_push_tail:C
  655. *
  656. * Note: data_push_tail:D and data_alloc:B can be
  657. * different CPUs. However, the data_alloc:B
  658. * CPU (which performs the full memory
  659. * barrier) must have previously seen
  660. * data_push_tail:D.
  661. *
  662. * 2. Guarantee the descriptor state loaded in
  663. * data_make_reusable() is performed before
  664. * reloading the tail lpos. The failed
  665. * data_make_reusable() may be due to a newly
  666. * recycled descriptor causing the tail lpos to
  667. * have been previously pushed. This pairs with
  668. * desc_reserve:D.
  669. *
  670. * Memory barrier involvement:
  671. *
  672. * If data_make_reusable:B reads from
  673. * desc_reserve:F, then data_push_tail:C reads
  674. * from data_push_tail:D.
  675. *
  676. * Relies on:
  677. *
  678. * MB from data_push_tail:D to desc_reserve:F
  679. * matching
  680. * RMB from data_make_reusable:B to
  681. * data_push_tail:C
  682. *
  683. * Note: data_push_tail:D and desc_reserve:F can
  684. * be different CPUs. However, the
  685. * desc_reserve:F CPU (which performs the
  686. * full memory barrier) must have previously
  687. * seen data_push_tail:D.
  688. */
  689. smp_rmb(); /* LMM(data_push_tail:B) */
  690. tail_lpos_new = atomic_long_read(&data_ring->tail_lpos
  691. ); /* LMM(data_push_tail:C) */
  692. if (tail_lpos_new == tail_lpos)
  693. return false;
  694. /* Another CPU pushed the tail. Try again. */
  695. tail_lpos = tail_lpos_new;
  696. continue;
  697. }
  698. /*
  699. * Guarantee any descriptor states that have transitioned to
  700. * reusable are stored before pushing the tail lpos. A full
  701. * memory barrier is needed since other CPUs may have made
  702. * the descriptor states reusable. This pairs with
  703. * data_push_tail:A.
  704. */
  705. if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos,
  706. next_lpos)) { /* LMM(data_push_tail:D) */
  707. break;
  708. }
  709. }
  710. return true;
  711. }
  712. /*
  713. * Advance the desc ring tail. This function advances the tail by one
  714. * descriptor, thus invalidating the oldest descriptor. Before advancing
  715. * the tail, the tail descriptor is made reusable and all data blocks up to
  716. * and including the descriptor's data block are invalidated (i.e. the data
  717. * ring tail is pushed past the data block of the descriptor being made
  718. * reusable).
  719. */
  720. static bool desc_push_tail(struct printk_ringbuffer *rb,
  721. unsigned long tail_id)
  722. {
  723. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  724. enum desc_state d_state;
  725. struct prb_desc desc;
  726. d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL);
  727. switch (d_state) {
  728. case desc_miss:
  729. /*
  730. * If the ID is exactly 1 wrap behind the expected, it is
  731. * in the process of being reserved by another writer and
  732. * must be considered reserved.
  733. */
  734. if (DESC_ID(atomic_long_read(&desc.state_var)) ==
  735. DESC_ID_PREV_WRAP(desc_ring, tail_id)) {
  736. return false;
  737. }
  738. /*
  739. * The ID has changed. Another writer must have pushed the
  740. * tail and recycled the descriptor already. Success is
  741. * returned because the caller is only interested in the
  742. * specified tail being pushed, which it was.
  743. */
  744. return true;
  745. case desc_reserved:
  746. case desc_committed:
  747. return false;
  748. case desc_finalized:
  749. desc_make_reusable(desc_ring, tail_id);
  750. break;
  751. case desc_reusable:
  752. break;
  753. }
  754. /*
  755. * Data blocks must be invalidated before their associated
  756. * descriptor can be made available for recycling. Invalidating
  757. * them later is not possible because there is no way to trust
  758. * data blocks once their associated descriptor is gone.
  759. */
  760. if (!data_push_tail(rb, desc.text_blk_lpos.next))
  761. return false;
  762. /*
  763. * Check the next descriptor after @tail_id before pushing the tail
  764. * to it because the tail must always be in a finalized or reusable
  765. * state. The implementation of prb_first_seq() relies on this.
  766. *
  767. * A successful read implies that the next descriptor is less than or
  768. * equal to @head_id so there is no risk of pushing the tail past the
  769. * head.
  770. */
  771. d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc,
  772. NULL, NULL); /* LMM(desc_push_tail:A) */
  773. if (d_state == desc_finalized || d_state == desc_reusable) {
  774. /*
  775. * Guarantee any descriptor states that have transitioned to
  776. * reusable are stored before pushing the tail ID. This allows
  777. * verifying the recycled descriptor state. A full memory
  778. * barrier is needed since other CPUs may have made the
  779. * descriptor states reusable. This pairs with desc_reserve:D.
  780. */
  781. atomic_long_cmpxchg(&desc_ring->tail_id, tail_id,
  782. DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */
  783. } else {
  784. /*
  785. * Guarantee the last state load from desc_read() is before
  786. * reloading @tail_id in order to see a new tail ID in the
  787. * case that the descriptor has been recycled. This pairs
  788. * with desc_reserve:D.
  789. *
  790. * Memory barrier involvement:
  791. *
  792. * If desc_push_tail:A reads from desc_reserve:F, then
  793. * desc_push_tail:D reads from desc_push_tail:B.
  794. *
  795. * Relies on:
  796. *
  797. * MB from desc_push_tail:B to desc_reserve:F
  798. * matching
  799. * RMB from desc_push_tail:A to desc_push_tail:D
  800. *
  801. * Note: desc_push_tail:B and desc_reserve:F can be different
  802. * CPUs. However, the desc_reserve:F CPU (which performs
  803. * the full memory barrier) must have previously seen
  804. * desc_push_tail:B.
  805. */
  806. smp_rmb(); /* LMM(desc_push_tail:C) */
  807. /*
  808. * Re-check the tail ID. The descriptor following @tail_id is
  809. * not in an allowed tail state. But if the tail has since
  810. * been moved by another CPU, then it does not matter.
  811. */
  812. if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */
  813. return false;
  814. }
  815. return true;
  816. }
  817. /* Reserve a new descriptor, invalidating the oldest if necessary. */
  818. static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
  819. {
  820. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  821. unsigned long prev_state_val;
  822. unsigned long id_prev_wrap;
  823. struct prb_desc *desc;
  824. unsigned long head_id;
  825. unsigned long id;
  826. head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */
  827. do {
  828. id = DESC_ID(head_id + 1);
  829. id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id);
  830. /*
  831. * Guarantee the head ID is read before reading the tail ID.
  832. * Since the tail ID is updated before the head ID, this
  833. * guarantees that @id_prev_wrap is never ahead of the tail
  834. * ID. This pairs with desc_reserve:D.
  835. *
  836. * Memory barrier involvement:
  837. *
  838. * If desc_reserve:A reads from desc_reserve:D, then
  839. * desc_reserve:C reads from desc_push_tail:B.
  840. *
  841. * Relies on:
  842. *
  843. * MB from desc_push_tail:B to desc_reserve:D
  844. * matching
  845. * RMB from desc_reserve:A to desc_reserve:C
  846. *
  847. * Note: desc_push_tail:B and desc_reserve:D can be different
  848. * CPUs. However, the desc_reserve:D CPU (which performs
  849. * the full memory barrier) must have previously seen
  850. * desc_push_tail:B.
  851. */
  852. smp_rmb(); /* LMM(desc_reserve:B) */
  853. if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id
  854. )) { /* LMM(desc_reserve:C) */
  855. /*
  856. * Make space for the new descriptor by
  857. * advancing the tail.
  858. */
  859. if (!desc_push_tail(rb, id_prev_wrap))
  860. return false;
  861. }
  862. /*
  863. * 1. Guarantee the tail ID is read before validating the
  864. * recycled descriptor state. A read memory barrier is
  865. * sufficient for this. This pairs with desc_push_tail:B.
  866. *
  867. * Memory barrier involvement:
  868. *
  869. * If desc_reserve:C reads from desc_push_tail:B, then
  870. * desc_reserve:E reads from desc_make_reusable:A.
  871. *
  872. * Relies on:
  873. *
  874. * MB from desc_make_reusable:A to desc_push_tail:B
  875. * matching
  876. * RMB from desc_reserve:C to desc_reserve:E
  877. *
  878. * Note: desc_make_reusable:A and desc_push_tail:B can be
  879. * different CPUs. However, the desc_push_tail:B CPU
  880. * (which performs the full memory barrier) must have
  881. * previously seen desc_make_reusable:A.
  882. *
  883. * 2. Guarantee the tail ID is stored before storing the head
  884. * ID. This pairs with desc_reserve:B.
  885. *
  886. * 3. Guarantee any data ring tail changes are stored before
  887. * recycling the descriptor. Data ring tail changes can
  888. * happen via desc_push_tail()->data_push_tail(). A full
  889. * memory barrier is needed since another CPU may have
  890. * pushed the data ring tails. This pairs with
  891. * data_push_tail:B.
  892. *
  893. * 4. Guarantee a new tail ID is stored before recycling the
  894. * descriptor. A full memory barrier is needed since
  895. * another CPU may have pushed the tail ID. This pairs
  896. * with desc_push_tail:C and this also pairs with
  897. * prb_first_seq:C.
  898. *
  899. * 5. Guarantee the head ID is stored before trying to
  900. * finalize the previous descriptor. This pairs with
  901. * _prb_commit:B.
  902. */
  903. } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id,
  904. id)); /* LMM(desc_reserve:D) */
  905. desc = to_desc(desc_ring, id);
  906. /*
  907. * If the descriptor has been recycled, verify the old state val.
  908. * See "ABA Issues" about why this verification is performed.
  909. */
  910. prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */
  911. if (prev_state_val &&
  912. get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) {
  913. WARN_ON_ONCE(1);
  914. return false;
  915. }
  916. /*
  917. * Assign the descriptor a new ID and set its state to reserved.
  918. * See "ABA Issues" about why cmpxchg() instead of set() is used.
  919. *
  920. * Guarantee the new descriptor ID and state is stored before making
  921. * any other changes. A write memory barrier is sufficient for this.
  922. * This pairs with desc_read:D.
  923. */
  924. if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val,
  925. DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */
  926. WARN_ON_ONCE(1);
  927. return false;
  928. }
  929. /* Now data in @desc can be modified: LMM(desc_reserve:G) */
  930. *id_out = id;
  931. return true;
  932. }
  933. /* Determine the end of a data block. */
  934. static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
  935. unsigned long lpos, unsigned int size)
  936. {
  937. unsigned long begin_lpos;
  938. unsigned long next_lpos;
  939. begin_lpos = lpos;
  940. next_lpos = lpos + size;
  941. /* First check if the data block does not wrap. */
  942. if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
  943. return next_lpos;
  944. /* Wrapping data blocks store their data at the beginning. */
  945. return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size);
  946. }
  947. /*
  948. * Allocate a new data block, invalidating the oldest data block(s)
  949. * if necessary. This function also associates the data block with
  950. * a specified descriptor.
  951. */
  952. static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
  953. struct prb_data_blk_lpos *blk_lpos, unsigned long id)
  954. {
  955. struct prb_data_ring *data_ring = &rb->text_data_ring;
  956. struct prb_data_block *blk;
  957. unsigned long begin_lpos;
  958. unsigned long next_lpos;
  959. if (size == 0) {
  960. /* Specify a data-less block. */
  961. blk_lpos->begin = NO_LPOS;
  962. blk_lpos->next = NO_LPOS;
  963. return NULL;
  964. }
  965. size = to_blk_size(size);
  966. begin_lpos = atomic_long_read(&data_ring->head_lpos);
  967. do {
  968. next_lpos = get_next_lpos(data_ring, begin_lpos, size);
  969. if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) {
  970. /* Failed to allocate, specify a data-less block. */
  971. blk_lpos->begin = FAILED_LPOS;
  972. blk_lpos->next = FAILED_LPOS;
  973. return NULL;
  974. }
  975. /*
  976. * 1. Guarantee any descriptor states that have transitioned
  977. * to reusable are stored before modifying the newly
  978. * allocated data area. A full memory barrier is needed
  979. * since other CPUs may have made the descriptor states
  980. * reusable. See data_push_tail:A about why the reusable
  981. * states are visible. This pairs with desc_read:D.
  982. *
  983. * 2. Guarantee any updated tail lpos is stored before
  984. * modifying the newly allocated data area. Another CPU may
  985. * be in data_make_reusable() and is reading a block ID
  986. * from this area. data_make_reusable() can handle reading
  987. * a garbage block ID value, but then it must be able to
  988. * load a new tail lpos. A full memory barrier is needed
  989. * since other CPUs may have updated the tail lpos. This
  990. * pairs with data_push_tail:B.
  991. */
  992. } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos,
  993. next_lpos)); /* LMM(data_alloc:A) */
  994. blk = to_block(data_ring, begin_lpos);
  995. blk->id = id; /* LMM(data_alloc:B) */
  996. if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) {
  997. /* Wrapping data blocks store their data at the beginning. */
  998. blk = to_block(data_ring, 0);
  999. /*
  1000. * Store the ID on the wrapped block for consistency.
  1001. * The printk_ringbuffer does not actually use it.
  1002. */
  1003. blk->id = id;
  1004. }
  1005. blk_lpos->begin = begin_lpos;
  1006. blk_lpos->next = next_lpos;
  1007. return &blk->data[0];
  1008. }
  1009. /*
  1010. * Try to resize an existing data block associated with the descriptor
  1011. * specified by @id. If the resized data block should become wrapped, it
  1012. * copies the old data to the new data block. If @size yields a data block
  1013. * with the same or less size, the data block is left as is.
  1014. *
  1015. * Fail if this is not the last allocated data block or if there is not
  1016. * enough space or it is not possible make enough space.
  1017. *
  1018. * Return a pointer to the beginning of the entire data buffer or NULL on
  1019. * failure.
  1020. */
  1021. static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
  1022. struct prb_data_blk_lpos *blk_lpos, unsigned long id)
  1023. {
  1024. struct prb_data_ring *data_ring = &rb->text_data_ring;
  1025. struct prb_data_block *blk;
  1026. unsigned long head_lpos;
  1027. unsigned long next_lpos;
  1028. bool wrapped;
  1029. /* Reallocation only works if @blk_lpos is the newest data block. */
  1030. head_lpos = atomic_long_read(&data_ring->head_lpos);
  1031. if (head_lpos != blk_lpos->next)
  1032. return NULL;
  1033. /* Keep track if @blk_lpos was a wrapping data block. */
  1034. wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next));
  1035. size = to_blk_size(size);
  1036. next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size);
  1037. /* If the data block does not increase, there is nothing to do. */
  1038. if (head_lpos - next_lpos < DATA_SIZE(data_ring)) {
  1039. if (wrapped)
  1040. blk = to_block(data_ring, 0);
  1041. else
  1042. blk = to_block(data_ring, blk_lpos->begin);
  1043. return &blk->data[0];
  1044. }
  1045. if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring)))
  1046. return NULL;
  1047. /* The memory barrier involvement is the same as data_alloc:A. */
  1048. if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos,
  1049. next_lpos)) { /* LMM(data_realloc:A) */
  1050. return NULL;
  1051. }
  1052. blk = to_block(data_ring, blk_lpos->begin);
  1053. if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) {
  1054. struct prb_data_block *old_blk = blk;
  1055. /* Wrapping data blocks store their data at the beginning. */
  1056. blk = to_block(data_ring, 0);
  1057. /*
  1058. * Store the ID on the wrapped block for consistency.
  1059. * The printk_ringbuffer does not actually use it.
  1060. */
  1061. blk->id = id;
  1062. if (!wrapped) {
  1063. /*
  1064. * Since the allocated space is now in the newly
  1065. * created wrapping data block, copy the content
  1066. * from the old data block.
  1067. */
  1068. memcpy(&blk->data[0], &old_blk->data[0],
  1069. (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id));
  1070. }
  1071. }
  1072. blk_lpos->next = next_lpos;
  1073. return &blk->data[0];
  1074. }
  1075. /* Return the number of bytes used by a data block. */
  1076. static unsigned int space_used(struct prb_data_ring *data_ring,
  1077. struct prb_data_blk_lpos *blk_lpos)
  1078. {
  1079. /* Data-less blocks take no space. */
  1080. if (BLK_DATALESS(blk_lpos))
  1081. return 0;
  1082. if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) {
  1083. /* Data block does not wrap. */
  1084. return (DATA_INDEX(data_ring, blk_lpos->next) -
  1085. DATA_INDEX(data_ring, blk_lpos->begin));
  1086. }
  1087. /*
  1088. * For wrapping data blocks, the trailing (wasted) space is
  1089. * also counted.
  1090. */
  1091. return (DATA_INDEX(data_ring, blk_lpos->next) +
  1092. DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin));
  1093. }
  1094. /*
  1095. * Given @blk_lpos, return a pointer to the writer data from the data block
  1096. * and calculate the size of the data part. A NULL pointer is returned if
  1097. * @blk_lpos specifies values that could never be legal.
  1098. *
  1099. * This function (used by readers) performs strict validation on the lpos
  1100. * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
  1101. * triggered if an internal error is detected.
  1102. */
  1103. static const char *get_data(struct prb_data_ring *data_ring,
  1104. struct prb_data_blk_lpos *blk_lpos,
  1105. unsigned int *data_size)
  1106. {
  1107. struct prb_data_block *db;
  1108. /* Data-less data block description. */
  1109. if (BLK_DATALESS(blk_lpos)) {
  1110. if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
  1111. *data_size = 0;
  1112. return "";
  1113. }
  1114. return NULL;
  1115. }
  1116. /* Regular data block: @begin less than @next and in same wrap. */
  1117. if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) &&
  1118. blk_lpos->begin < blk_lpos->next) {
  1119. db = to_block(data_ring, blk_lpos->begin);
  1120. *data_size = blk_lpos->next - blk_lpos->begin;
  1121. /* Wrapping data block: @begin is one wrap behind @next. */
  1122. } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) ==
  1123. DATA_WRAPS(data_ring, blk_lpos->next)) {
  1124. db = to_block(data_ring, 0);
  1125. *data_size = DATA_INDEX(data_ring, blk_lpos->next);
  1126. /* Illegal block description. */
  1127. } else {
  1128. WARN_ON_ONCE(1);
  1129. return NULL;
  1130. }
  1131. /* A valid data block will always be aligned to the ID size. */
  1132. if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) ||
  1133. WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) {
  1134. return NULL;
  1135. }
  1136. /* A valid data block will always have at least an ID. */
  1137. if (WARN_ON_ONCE(*data_size < sizeof(db->id)))
  1138. return NULL;
  1139. /* Subtract block ID space from size to reflect data size. */
  1140. *data_size -= sizeof(db->id);
  1141. return &db->data[0];
  1142. }
  1143. /*
  1144. * Attempt to transition the newest descriptor from committed back to reserved
  1145. * so that the record can be modified by a writer again. This is only possible
  1146. * if the descriptor is not yet finalized and the provided @caller_id matches.
  1147. */
  1148. static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
  1149. u32 caller_id, unsigned long *id_out)
  1150. {
  1151. unsigned long prev_state_val;
  1152. enum desc_state d_state;
  1153. struct prb_desc desc;
  1154. struct prb_desc *d;
  1155. unsigned long id;
  1156. u32 cid;
  1157. id = atomic_long_read(&desc_ring->head_id);
  1158. /*
  1159. * To reduce unnecessarily reopening, first check if the descriptor
  1160. * state and caller ID are correct.
  1161. */
  1162. d_state = desc_read(desc_ring, id, &desc, NULL, &cid);
  1163. if (d_state != desc_committed || cid != caller_id)
  1164. return NULL;
  1165. d = to_desc(desc_ring, id);
  1166. prev_state_val = DESC_SV(id, desc_committed);
  1167. /*
  1168. * Guarantee the reserved state is stored before reading any
  1169. * record data. A full memory barrier is needed because @state_var
  1170. * modification is followed by reading. This pairs with _prb_commit:B.
  1171. *
  1172. * Memory barrier involvement:
  1173. *
  1174. * If desc_reopen_last:A reads from _prb_commit:B, then
  1175. * prb_reserve_in_last:A reads from _prb_commit:A.
  1176. *
  1177. * Relies on:
  1178. *
  1179. * WMB from _prb_commit:A to _prb_commit:B
  1180. * matching
  1181. * MB If desc_reopen_last:A to prb_reserve_in_last:A
  1182. */
  1183. if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
  1184. DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */
  1185. return NULL;
  1186. }
  1187. *id_out = id;
  1188. return d;
  1189. }
  1190. /**
  1191. * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer
  1192. * used by the newest record.
  1193. *
  1194. * @e: The entry structure to setup.
  1195. * @rb: The ringbuffer to re-reserve and extend data in.
  1196. * @r: The record structure to allocate buffers for.
  1197. * @caller_id: The caller ID of the caller (reserving writer).
  1198. * @max_size: Fail if the extended size would be greater than this.
  1199. *
  1200. * This is the public function available to writers to re-reserve and extend
  1201. * data.
  1202. *
  1203. * The writer specifies the text size to extend (not the new total size) by
  1204. * setting the @text_buf_size field of @r. To ensure proper initialization
  1205. * of @r, prb_rec_init_wr() should be used.
  1206. *
  1207. * This function will fail if @caller_id does not match the caller ID of the
  1208. * newest record. In that case the caller must reserve new data using
  1209. * prb_reserve().
  1210. *
  1211. * Context: Any context. Disables local interrupts on success.
  1212. * Return: true if text data could be extended, otherwise false.
  1213. *
  1214. * On success:
  1215. *
  1216. * - @r->text_buf points to the beginning of the entire text buffer.
  1217. *
  1218. * - @r->text_buf_size is set to the new total size of the buffer.
  1219. *
  1220. * - @r->info is not touched so that @r->info->text_len could be used
  1221. * to append the text.
  1222. *
  1223. * - prb_record_text_space() can be used on @e to query the new
  1224. * actually used space.
  1225. *
  1226. * Important: All @r->info fields will already be set with the current values
  1227. * for the record. I.e. @r->info->text_len will be less than
  1228. * @text_buf_size. Writers can use @r->info->text_len to know
  1229. * where concatenation begins and writers should update
  1230. * @r->info->text_len after concatenating.
  1231. */
  1232. bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
  1233. struct printk_record *r, u32 caller_id, unsigned int max_size)
  1234. {
  1235. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1236. struct printk_info *info;
  1237. unsigned int data_size;
  1238. struct prb_desc *d;
  1239. unsigned long id;
  1240. local_irq_save(e->irqflags);
  1241. /* Transition the newest descriptor back to the reserved state. */
  1242. d = desc_reopen_last(desc_ring, caller_id, &id);
  1243. if (!d) {
  1244. local_irq_restore(e->irqflags);
  1245. goto fail_reopen;
  1246. }
  1247. /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */
  1248. info = to_info(desc_ring, id);
  1249. /*
  1250. * Set the @e fields here so that prb_commit() can be used if
  1251. * anything fails from now on.
  1252. */
  1253. e->rb = rb;
  1254. e->id = id;
  1255. /*
  1256. * desc_reopen_last() checked the caller_id, but there was no
  1257. * exclusive access at that point. The descriptor may have
  1258. * changed since then.
  1259. */
  1260. if (caller_id != info->caller_id)
  1261. goto fail;
  1262. if (BLK_DATALESS(&d->text_blk_lpos)) {
  1263. if (WARN_ON_ONCE(info->text_len != 0)) {
  1264. pr_warn_once("wrong text_len value (%hu, expecting 0)\n",
  1265. info->text_len);
  1266. info->text_len = 0;
  1267. }
  1268. if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
  1269. goto fail;
  1270. if (r->text_buf_size > max_size)
  1271. goto fail;
  1272. r->text_buf = data_alloc(rb, r->text_buf_size,
  1273. &d->text_blk_lpos, id);
  1274. } else {
  1275. if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size))
  1276. goto fail;
  1277. /*
  1278. * Increase the buffer size to include the original size. If
  1279. * the meta data (@text_len) is not sane, use the full data
  1280. * block size.
  1281. */
  1282. if (WARN_ON_ONCE(info->text_len > data_size)) {
  1283. pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n",
  1284. info->text_len, data_size);
  1285. info->text_len = data_size;
  1286. }
  1287. r->text_buf_size += info->text_len;
  1288. if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
  1289. goto fail;
  1290. if (r->text_buf_size > max_size)
  1291. goto fail;
  1292. r->text_buf = data_realloc(rb, r->text_buf_size,
  1293. &d->text_blk_lpos, id);
  1294. }
  1295. if (r->text_buf_size && !r->text_buf)
  1296. goto fail;
  1297. r->info = info;
  1298. e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
  1299. return true;
  1300. fail:
  1301. prb_commit(e);
  1302. /* prb_commit() re-enabled interrupts. */
  1303. fail_reopen:
  1304. /* Make it clear to the caller that the re-reserve failed. */
  1305. memset(r, 0, sizeof(*r));
  1306. return false;
  1307. }
  1308. /*
  1309. * Attempt to finalize a specified descriptor. If this fails, the descriptor
  1310. * is either already final or it will finalize itself when the writer commits.
  1311. */
  1312. static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id)
  1313. {
  1314. unsigned long prev_state_val = DESC_SV(id, desc_committed);
  1315. struct prb_desc *d = to_desc(desc_ring, id);
  1316. atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val,
  1317. DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */
  1318. /* Best effort to remember the last finalized @id. */
  1319. atomic_long_set(&desc_ring->last_finalized_id, id);
  1320. }
  1321. /**
  1322. * prb_reserve() - Reserve space in the ringbuffer.
  1323. *
  1324. * @e: The entry structure to setup.
  1325. * @rb: The ringbuffer to reserve data in.
  1326. * @r: The record structure to allocate buffers for.
  1327. *
  1328. * This is the public function available to writers to reserve data.
  1329. *
  1330. * The writer specifies the text size to reserve by setting the
  1331. * @text_buf_size field of @r. To ensure proper initialization of @r,
  1332. * prb_rec_init_wr() should be used.
  1333. *
  1334. * Context: Any context. Disables local interrupts on success.
  1335. * Return: true if at least text data could be allocated, otherwise false.
  1336. *
  1337. * On success, the fields @info and @text_buf of @r will be set by this
  1338. * function and should be filled in by the writer before committing. Also
  1339. * on success, prb_record_text_space() can be used on @e to query the actual
  1340. * space used for the text data block.
  1341. *
  1342. * Important: @info->text_len needs to be set correctly by the writer in
  1343. * order for data to be readable and/or extended. Its value
  1344. * is initialized to 0.
  1345. */
  1346. bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
  1347. struct printk_record *r)
  1348. {
  1349. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1350. struct printk_info *info;
  1351. struct prb_desc *d;
  1352. unsigned long id;
  1353. u64 seq;
  1354. if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
  1355. goto fail;
  1356. /*
  1357. * Descriptors in the reserved state act as blockers to all further
  1358. * reservations once the desc_ring has fully wrapped. Disable
  1359. * interrupts during the reserve/commit window in order to minimize
  1360. * the likelihood of this happening.
  1361. */
  1362. local_irq_save(e->irqflags);
  1363. if (!desc_reserve(rb, &id)) {
  1364. /* Descriptor reservation failures are tracked. */
  1365. atomic_long_inc(&rb->fail);
  1366. local_irq_restore(e->irqflags);
  1367. goto fail;
  1368. }
  1369. d = to_desc(desc_ring, id);
  1370. info = to_info(desc_ring, id);
  1371. /*
  1372. * All @info fields (except @seq) are cleared and must be filled in
  1373. * by the writer. Save @seq before clearing because it is used to
  1374. * determine the new sequence number.
  1375. */
  1376. seq = info->seq;
  1377. memset(info, 0, sizeof(*info));
  1378. /*
  1379. * Set the @e fields here so that prb_commit() can be used if
  1380. * text data allocation fails.
  1381. */
  1382. e->rb = rb;
  1383. e->id = id;
  1384. /*
  1385. * Initialize the sequence number if it has "never been set".
  1386. * Otherwise just increment it by a full wrap.
  1387. *
  1388. * @seq is considered "never been set" if it has a value of 0,
  1389. * _except_ for @infos[0], which was specially setup by the ringbuffer
  1390. * initializer and therefore is always considered as set.
  1391. *
  1392. * See the "Bootstrap" comment block in printk_ringbuffer.h for
  1393. * details about how the initializer bootstraps the descriptors.
  1394. */
  1395. if (seq == 0 && DESC_INDEX(desc_ring, id) != 0)
  1396. info->seq = DESC_INDEX(desc_ring, id);
  1397. else
  1398. info->seq = seq + DESCS_COUNT(desc_ring);
  1399. /*
  1400. * New data is about to be reserved. Once that happens, previous
  1401. * descriptors are no longer able to be extended. Finalize the
  1402. * previous descriptor now so that it can be made available to
  1403. * readers. (For seq==0 there is no previous descriptor.)
  1404. */
  1405. if (info->seq > 0)
  1406. desc_make_final(desc_ring, DESC_ID(id - 1));
  1407. r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);
  1408. /* If text data allocation fails, a data-less record is committed. */
  1409. if (r->text_buf_size && !r->text_buf) {
  1410. prb_commit(e);
  1411. /* prb_commit() re-enabled interrupts. */
  1412. goto fail;
  1413. }
  1414. r->info = info;
  1415. /* Record full text space used by record. */
  1416. e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
  1417. return true;
  1418. fail:
  1419. /* Make it clear to the caller that the reserve failed. */
  1420. memset(r, 0, sizeof(*r));
  1421. return false;
  1422. }
  1423. /* Commit the data (possibly finalizing it) and restore interrupts. */
  1424. static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val)
  1425. {
  1426. struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
  1427. struct prb_desc *d = to_desc(desc_ring, e->id);
  1428. unsigned long prev_state_val = DESC_SV(e->id, desc_reserved);
  1429. /* Now the writer has finished all writing: LMM(_prb_commit:A) */
  1430. /*
  1431. * Set the descriptor as committed. See "ABA Issues" about why
  1432. * cmpxchg() instead of set() is used.
  1433. *
  1434. * 1 Guarantee all record data is stored before the descriptor state
  1435. * is stored as committed. A write memory barrier is sufficient
  1436. * for this. This pairs with desc_read:B and desc_reopen_last:A.
  1437. *
  1438. * 2. Guarantee the descriptor state is stored as committed before
  1439. * re-checking the head ID in order to possibly finalize this
  1440. * descriptor. This pairs with desc_reserve:D.
  1441. *
  1442. * Memory barrier involvement:
  1443. *
  1444. * If prb_commit:A reads from desc_reserve:D, then
  1445. * desc_make_final:A reads from _prb_commit:B.
  1446. *
  1447. * Relies on:
  1448. *
  1449. * MB _prb_commit:B to prb_commit:A
  1450. * matching
  1451. * MB desc_reserve:D to desc_make_final:A
  1452. */
  1453. if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
  1454. DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */
  1455. WARN_ON_ONCE(1);
  1456. }
  1457. /* Restore interrupts, the reserve/commit window is finished. */
  1458. local_irq_restore(e->irqflags);
  1459. }
  1460. /**
  1461. * prb_commit() - Commit (previously reserved) data to the ringbuffer.
  1462. *
  1463. * @e: The entry containing the reserved data information.
  1464. *
  1465. * This is the public function available to writers to commit data.
  1466. *
  1467. * Note that the data is not yet available to readers until it is finalized.
  1468. * Finalizing happens automatically when space for the next record is
  1469. * reserved.
  1470. *
  1471. * See prb_final_commit() for a version of this function that finalizes
  1472. * immediately.
  1473. *
  1474. * Context: Any context. Enables local interrupts.
  1475. */
  1476. void prb_commit(struct prb_reserved_entry *e)
  1477. {
  1478. struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
  1479. unsigned long head_id;
  1480. _prb_commit(e, desc_committed);
  1481. /*
  1482. * If this descriptor is no longer the head (i.e. a new record has
  1483. * been allocated), extending the data for this record is no longer
  1484. * allowed and therefore it must be finalized.
  1485. */
  1486. head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
  1487. if (head_id != e->id)
  1488. desc_make_final(desc_ring, e->id);
  1489. }
  1490. /**
  1491. * prb_final_commit() - Commit and finalize (previously reserved) data to
  1492. * the ringbuffer.
  1493. *
  1494. * @e: The entry containing the reserved data information.
  1495. *
  1496. * This is the public function available to writers to commit+finalize data.
  1497. *
  1498. * By finalizing, the data is made immediately available to readers.
  1499. *
  1500. * This function should only be used if there are no intentions of extending
  1501. * this data using prb_reserve_in_last().
  1502. *
  1503. * Context: Any context. Enables local interrupts.
  1504. */
  1505. void prb_final_commit(struct prb_reserved_entry *e)
  1506. {
  1507. struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
  1508. _prb_commit(e, desc_finalized);
  1509. /* Best effort to remember the last finalized @id. */
  1510. atomic_long_set(&desc_ring->last_finalized_id, e->id);
  1511. }
  1512. /*
  1513. * Count the number of lines in provided text. All text has at least 1 line
  1514. * (even if @text_size is 0). Each '\n' processed is counted as an additional
  1515. * line.
  1516. */
  1517. static unsigned int count_lines(const char *text, unsigned int text_size)
  1518. {
  1519. unsigned int next_size = text_size;
  1520. unsigned int line_count = 1;
  1521. const char *next = text;
  1522. while (next_size) {
  1523. next = memchr(next, '\n', next_size);
  1524. if (!next)
  1525. break;
  1526. line_count++;
  1527. next++;
  1528. next_size = text_size - (next - text);
  1529. }
  1530. return line_count;
  1531. }
  1532. /*
  1533. * Given @blk_lpos, copy an expected @len of data into the provided buffer.
  1534. * If @line_count is provided, count the number of lines in the data.
  1535. *
  1536. * This function (used by readers) performs strict validation on the data
  1537. * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
  1538. * triggered if an internal error is detected.
  1539. */
  1540. static bool copy_data(struct prb_data_ring *data_ring,
  1541. struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf,
  1542. unsigned int buf_size, unsigned int *line_count)
  1543. {
  1544. unsigned int data_size;
  1545. const char *data;
  1546. /* Caller might not want any data. */
  1547. if ((!buf || !buf_size) && !line_count)
  1548. return true;
  1549. data = get_data(data_ring, blk_lpos, &data_size);
  1550. if (!data)
  1551. return false;
  1552. /*
  1553. * Actual cannot be less than expected. It can be more than expected
  1554. * because of the trailing alignment padding.
  1555. *
  1556. * Note that invalid @len values can occur because the caller loads
  1557. * the value during an allowed data race.
  1558. */
  1559. if (data_size < (unsigned int)len)
  1560. return false;
  1561. /* Caller interested in the line count? */
  1562. if (line_count)
  1563. *line_count = count_lines(data, len);
  1564. /* Caller interested in the data content? */
  1565. if (!buf || !buf_size)
  1566. return true;
  1567. data_size = min_t(unsigned int, buf_size, len);
  1568. memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */
  1569. return true;
  1570. }
  1571. /*
  1572. * This is an extended version of desc_read(). It gets a copy of a specified
  1573. * descriptor. However, it also verifies that the record is finalized and has
  1574. * the sequence number @seq. On success, 0 is returned.
  1575. *
  1576. * Error return values:
  1577. * -EINVAL: A finalized record with sequence number @seq does not exist.
  1578. * -ENOENT: A finalized record with sequence number @seq exists, but its data
  1579. * is not available. This is a valid record, so readers should
  1580. * continue with the next record.
  1581. */
  1582. static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring,
  1583. unsigned long id, u64 seq,
  1584. struct prb_desc *desc_out)
  1585. {
  1586. struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos;
  1587. enum desc_state d_state;
  1588. u64 s;
  1589. d_state = desc_read(desc_ring, id, desc_out, &s, NULL);
  1590. /*
  1591. * An unexpected @id (desc_miss) or @seq mismatch means the record
  1592. * does not exist. A descriptor in the reserved or committed state
  1593. * means the record does not yet exist for the reader.
  1594. */
  1595. if (d_state == desc_miss ||
  1596. d_state == desc_reserved ||
  1597. d_state == desc_committed ||
  1598. s != seq) {
  1599. return -EINVAL;
  1600. }
  1601. /*
  1602. * A descriptor in the reusable state may no longer have its data
  1603. * available; report it as existing but with lost data. Or the record
  1604. * may actually be a record with lost data.
  1605. */
  1606. if (d_state == desc_reusable ||
  1607. (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) {
  1608. return -ENOENT;
  1609. }
  1610. return 0;
  1611. }
  1612. /*
  1613. * Copy the ringbuffer data from the record with @seq to the provided
  1614. * @r buffer. On success, 0 is returned.
  1615. *
  1616. * See desc_read_finalized_seq() for error return values.
  1617. */
  1618. static int prb_read(struct printk_ringbuffer *rb, u64 seq,
  1619. struct printk_record *r, unsigned int *line_count)
  1620. {
  1621. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1622. struct printk_info *info = to_info(desc_ring, seq);
  1623. struct prb_desc *rdesc = to_desc(desc_ring, seq);
  1624. atomic_long_t *state_var = &rdesc->state_var;
  1625. struct prb_desc desc;
  1626. unsigned long id;
  1627. int err;
  1628. /* Extract the ID, used to specify the descriptor to read. */
  1629. id = DESC_ID(atomic_long_read(state_var));
  1630. /* Get a local copy of the correct descriptor (if available). */
  1631. err = desc_read_finalized_seq(desc_ring, id, seq, &desc);
  1632. /*
  1633. * If @r is NULL, the caller is only interested in the availability
  1634. * of the record.
  1635. */
  1636. if (err || !r)
  1637. return err;
  1638. /* If requested, copy meta data. */
  1639. if (r->info)
  1640. memcpy(r->info, info, sizeof(*(r->info)));
  1641. /* Copy text data. If it fails, this is a data-less record. */
  1642. if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len,
  1643. r->text_buf, r->text_buf_size, line_count)) {
  1644. return -ENOENT;
  1645. }
  1646. /* Ensure the record is still finalized and has the same @seq. */
  1647. return desc_read_finalized_seq(desc_ring, id, seq, &desc);
  1648. }
  1649. /* Get the sequence number of the tail descriptor. */
  1650. static u64 prb_first_seq(struct printk_ringbuffer *rb)
  1651. {
  1652. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1653. enum desc_state d_state;
  1654. struct prb_desc desc;
  1655. unsigned long id;
  1656. u64 seq;
  1657. for (;;) {
  1658. id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */
  1659. d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */
  1660. /*
  1661. * This loop will not be infinite because the tail is
  1662. * _always_ in the finalized or reusable state.
  1663. */
  1664. if (d_state == desc_finalized || d_state == desc_reusable)
  1665. break;
  1666. /*
  1667. * Guarantee the last state load from desc_read() is before
  1668. * reloading @tail_id in order to see a new tail in the case
  1669. * that the descriptor has been recycled. This pairs with
  1670. * desc_reserve:D.
  1671. *
  1672. * Memory barrier involvement:
  1673. *
  1674. * If prb_first_seq:B reads from desc_reserve:F, then
  1675. * prb_first_seq:A reads from desc_push_tail:B.
  1676. *
  1677. * Relies on:
  1678. *
  1679. * MB from desc_push_tail:B to desc_reserve:F
  1680. * matching
  1681. * RMB prb_first_seq:B to prb_first_seq:A
  1682. */
  1683. smp_rmb(); /* LMM(prb_first_seq:C) */
  1684. }
  1685. return seq;
  1686. }
  1687. /*
  1688. * Non-blocking read of a record. Updates @seq to the last finalized record
  1689. * (which may have no data available).
  1690. *
  1691. * See the description of prb_read_valid() and prb_read_valid_info()
  1692. * for details.
  1693. */
  1694. static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
  1695. struct printk_record *r, unsigned int *line_count)
  1696. {
  1697. u64 tail_seq;
  1698. int err;
  1699. while ((err = prb_read(rb, *seq, r, line_count))) {
  1700. tail_seq = prb_first_seq(rb);
  1701. if (*seq < tail_seq) {
  1702. /*
  1703. * Behind the tail. Catch up and try again. This
  1704. * can happen for -ENOENT and -EINVAL cases.
  1705. */
  1706. *seq = tail_seq;
  1707. } else if (err == -ENOENT) {
  1708. /* Record exists, but no data available. Skip. */
  1709. (*seq)++;
  1710. } else {
  1711. /* Non-existent/non-finalized record. Must stop. */
  1712. return false;
  1713. }
  1714. }
  1715. return true;
  1716. }
  1717. /**
  1718. * prb_read_valid() - Non-blocking read of a requested record or (if gone)
  1719. * the next available record.
  1720. *
  1721. * @rb: The ringbuffer to read from.
  1722. * @seq: The sequence number of the record to read.
  1723. * @r: A record data buffer to store the read record to.
  1724. *
  1725. * This is the public function available to readers to read a record.
  1726. *
  1727. * The reader provides the @info and @text_buf buffers of @r to be
  1728. * filled in. Any of the buffer pointers can be set to NULL if the reader
  1729. * is not interested in that data. To ensure proper initialization of @r,
  1730. * prb_rec_init_rd() should be used.
  1731. *
  1732. * Context: Any context.
  1733. * Return: true if a record was read, otherwise false.
  1734. *
  1735. * On success, the reader must check r->info.seq to see which record was
  1736. * actually read. This allows the reader to detect dropped records.
  1737. *
  1738. * Failure means @seq refers to a not yet written record.
  1739. */
  1740. bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
  1741. struct printk_record *r)
  1742. {
  1743. return _prb_read_valid(rb, &seq, r, NULL);
  1744. }
  1745. /**
  1746. * prb_read_valid_info() - Non-blocking read of meta data for a requested
  1747. * record or (if gone) the next available record.
  1748. *
  1749. * @rb: The ringbuffer to read from.
  1750. * @seq: The sequence number of the record to read.
  1751. * @info: A buffer to store the read record meta data to.
  1752. * @line_count: A buffer to store the number of lines in the record text.
  1753. *
  1754. * This is the public function available to readers to read only the
  1755. * meta data of a record.
  1756. *
  1757. * The reader provides the @info, @line_count buffers to be filled in.
  1758. * Either of the buffer pointers can be set to NULL if the reader is not
  1759. * interested in that data.
  1760. *
  1761. * Context: Any context.
  1762. * Return: true if a record's meta data was read, otherwise false.
  1763. *
  1764. * On success, the reader must check info->seq to see which record meta data
  1765. * was actually read. This allows the reader to detect dropped records.
  1766. *
  1767. * Failure means @seq refers to a not yet written record.
  1768. */
  1769. bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
  1770. struct printk_info *info, unsigned int *line_count)
  1771. {
  1772. struct printk_record r;
  1773. prb_rec_init_rd(&r, info, NULL, 0);
  1774. return _prb_read_valid(rb, &seq, &r, line_count);
  1775. }
  1776. /**
  1777. * prb_first_valid_seq() - Get the sequence number of the oldest available
  1778. * record.
  1779. *
  1780. * @rb: The ringbuffer to get the sequence number from.
  1781. *
  1782. * This is the public function available to readers to see what the
  1783. * first/oldest valid sequence number is.
  1784. *
  1785. * This provides readers a starting point to begin iterating the ringbuffer.
  1786. *
  1787. * Context: Any context.
  1788. * Return: The sequence number of the first/oldest record or, if the
  1789. * ringbuffer is empty, 0 is returned.
  1790. */
  1791. u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
  1792. {
  1793. u64 seq = 0;
  1794. if (!_prb_read_valid(rb, &seq, NULL, NULL))
  1795. return 0;
  1796. return seq;
  1797. }
  1798. /**
  1799. * prb_next_seq() - Get the sequence number after the last available record.
  1800. *
  1801. * @rb: The ringbuffer to get the sequence number from.
  1802. *
  1803. * This is the public function available to readers to see what the next
  1804. * newest sequence number available to readers will be.
  1805. *
  1806. * This provides readers a sequence number to jump to if all currently
  1807. * available records should be skipped.
  1808. *
  1809. * Context: Any context.
  1810. * Return: The sequence number of the next newest (not yet available) record
  1811. * for readers.
  1812. */
  1813. u64 prb_next_seq(struct printk_ringbuffer *rb)
  1814. {
  1815. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1816. enum desc_state d_state;
  1817. unsigned long id;
  1818. u64 seq;
  1819. /* Check if the cached @id still points to a valid @seq. */
  1820. id = atomic_long_read(&desc_ring->last_finalized_id);
  1821. d_state = desc_read(desc_ring, id, NULL, &seq, NULL);
  1822. if (d_state == desc_finalized || d_state == desc_reusable) {
  1823. /*
  1824. * Begin searching after the last finalized record.
  1825. *
  1826. * On 0, the search must begin at 0 because of hack#2
  1827. * of the bootstrapping phase it is not known if a
  1828. * record at index 0 exists.
  1829. */
  1830. if (seq != 0)
  1831. seq++;
  1832. } else {
  1833. /*
  1834. * The information about the last finalized sequence number
  1835. * has gone. It should happen only when there is a flood of
  1836. * new messages and the ringbuffer is rapidly recycled.
  1837. * Give up and start from the beginning.
  1838. */
  1839. seq = 0;
  1840. }
  1841. /*
  1842. * The information about the last finalized @seq might be inaccurate.
  1843. * Search forward to find the current one.
  1844. */
  1845. while (_prb_read_valid(rb, &seq, NULL, NULL))
  1846. seq++;
  1847. return seq;
  1848. }
  1849. /**
  1850. * prb_init() - Initialize a ringbuffer to use provided external buffers.
  1851. *
  1852. * @rb: The ringbuffer to initialize.
  1853. * @text_buf: The data buffer for text data.
  1854. * @textbits: The size of @text_buf as a power-of-2 value.
  1855. * @descs: The descriptor buffer for ringbuffer records.
  1856. * @descbits: The count of @descs items as a power-of-2 value.
  1857. * @infos: The printk_info buffer for ringbuffer records.
  1858. *
  1859. * This is the public function available to writers to setup a ringbuffer
  1860. * during runtime using provided buffers.
  1861. *
  1862. * This must match the initialization of DEFINE_PRINTKRB().
  1863. *
  1864. * Context: Any context.
  1865. */
  1866. void prb_init(struct printk_ringbuffer *rb,
  1867. char *text_buf, unsigned int textbits,
  1868. struct prb_desc *descs, unsigned int descbits,
  1869. struct printk_info *infos)
  1870. {
  1871. memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0]));
  1872. memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0]));
  1873. rb->desc_ring.count_bits = descbits;
  1874. rb->desc_ring.descs = descs;
  1875. rb->desc_ring.infos = infos;
  1876. atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
  1877. atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
  1878. atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits));
  1879. rb->text_data_ring.size_bits = textbits;
  1880. rb->text_data_ring.data = text_buf;
  1881. atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits));
  1882. atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits));
  1883. atomic_long_set(&rb->fail, 0);
  1884. atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits));
  1885. descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS;
  1886. descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS;
  1887. infos[0].seq = -(u64)_DESCS_COUNT(descbits);
  1888. infos[_DESCS_COUNT(descbits) - 1].seq = 0;
  1889. }
  1890. /**
  1891. * prb_record_text_space() - Query the full actual used ringbuffer space for
  1892. * the text data of a reserved entry.
  1893. *
  1894. * @e: The successfully reserved entry to query.
  1895. *
  1896. * This is the public function available to writers to see how much actual
  1897. * space is used in the ringbuffer to store the text data of the specified
  1898. * entry.
  1899. *
  1900. * This function is only valid if @e has been successfully reserved using
  1901. * prb_reserve().
  1902. *
  1903. * Context: Any context.
  1904. * Return: The size in bytes used by the text data of the associated record.
  1905. */
  1906. unsigned int prb_record_text_space(struct prb_reserved_entry *e)
  1907. {
  1908. return e->text_space;
  1909. }