badblocks.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Bad block management
  4. *
  5. * - Heavily based on MD badblocks code from Neil Brown
  6. *
  7. * Copyright (c) 2015, Intel Corporation.
  8. */
  9. #include <linux/badblocks.h>
  10. #include <linux/seqlock.h>
  11. #include <linux/device.h>
  12. #include <linux/kernel.h>
  13. #include <linux/module.h>
  14. #include <linux/stddef.h>
  15. #include <linux/types.h>
  16. #include <linux/slab.h>
  17. /**
  18. * badblocks_check() - check a given range for bad sectors
  19. * @bb: the badblocks structure that holds all badblock information
  20. * @s: sector (start) at which to check for badblocks
  21. * @sectors: number of sectors to check for badblocks
  22. * @first_bad: pointer to store location of the first badblock
  23. * @bad_sectors: pointer to store number of badblocks after @first_bad
  24. *
  25. * We can record which blocks on each device are 'bad' and so just
  26. * fail those blocks, or that stripe, rather than the whole device.
  27. * Entries in the bad-block table are 64bits wide. This comprises:
  28. * Length of bad-range, in sectors: 0-511 for lengths 1-512
  29. * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
  30. * A 'shift' can be set so that larger blocks are tracked and
  31. * consequently larger devices can be covered.
  32. * 'Acknowledged' flag - 1 bit. - the most significant bit.
  33. *
  34. * Locking of the bad-block table uses a seqlock so badblocks_check
  35. * might need to retry if it is very unlucky.
  36. * We will sometimes want to check for bad blocks in a bi_end_io function,
  37. * so we use the write_seqlock_irq variant.
  38. *
  39. * When looking for a bad block we specify a range and want to
  40. * know if any block in the range is bad. So we binary-search
  41. * to the last range that starts at-or-before the given endpoint,
  42. * (or "before the sector after the target range")
  43. * then see if it ends after the given start.
  44. *
  45. * Return:
  46. * 0: there are no known bad blocks in the range
  47. * 1: there are known bad block which are all acknowledged
  48. * -1: there are bad blocks which have not yet been acknowledged in metadata.
  49. * plus the start/length of the first bad section we overlap.
  50. */
  51. int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  52. sector_t *first_bad, int *bad_sectors)
  53. {
  54. int hi;
  55. int lo;
  56. u64 *p = bb->page;
  57. int rv;
  58. sector_t target = s + sectors;
  59. unsigned seq;
  60. if (bb->shift > 0) {
  61. /* round the start down, and the end up */
  62. s >>= bb->shift;
  63. target += (1<<bb->shift) - 1;
  64. target >>= bb->shift;
  65. }
  66. /* 'target' is now the first block after the bad range */
  67. retry:
  68. seq = read_seqbegin(&bb->lock);
  69. lo = 0;
  70. rv = 0;
  71. hi = bb->count;
  72. /* Binary search between lo and hi for 'target'
  73. * i.e. for the last range that starts before 'target'
  74. */
  75. /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
  76. * are known not to be the last range before target.
  77. * VARIANT: hi-lo is the number of possible
  78. * ranges, and decreases until it reaches 1
  79. */
  80. while (hi - lo > 1) {
  81. int mid = (lo + hi) / 2;
  82. sector_t a = BB_OFFSET(p[mid]);
  83. if (a < target)
  84. /* This could still be the one, earlier ranges
  85. * could not.
  86. */
  87. lo = mid;
  88. else
  89. /* This and later ranges are definitely out. */
  90. hi = mid;
  91. }
  92. /* 'lo' might be the last that started before target, but 'hi' isn't */
  93. if (hi > lo) {
  94. /* need to check all range that end after 's' to see if
  95. * any are unacknowledged.
  96. */
  97. while (lo >= 0 &&
  98. BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
  99. if (BB_OFFSET(p[lo]) < target) {
  100. /* starts before the end, and finishes after
  101. * the start, so they must overlap
  102. */
  103. if (rv != -1 && BB_ACK(p[lo]))
  104. rv = 1;
  105. else
  106. rv = -1;
  107. *first_bad = BB_OFFSET(p[lo]);
  108. *bad_sectors = BB_LEN(p[lo]);
  109. }
  110. lo--;
  111. }
  112. }
  113. if (read_seqretry(&bb->lock, seq))
  114. goto retry;
  115. return rv;
  116. }
  117. EXPORT_SYMBOL_GPL(badblocks_check);
  118. static void badblocks_update_acked(struct badblocks *bb)
  119. {
  120. u64 *p = bb->page;
  121. int i;
  122. bool unacked = false;
  123. if (!bb->unacked_exist)
  124. return;
  125. for (i = 0; i < bb->count ; i++) {
  126. if (!BB_ACK(p[i])) {
  127. unacked = true;
  128. break;
  129. }
  130. }
  131. if (!unacked)
  132. bb->unacked_exist = 0;
  133. }
  134. /**
  135. * badblocks_set() - Add a range of bad blocks to the table.
  136. * @bb: the badblocks structure that holds all badblock information
  137. * @s: first sector to mark as bad
  138. * @sectors: number of sectors to mark as bad
  139. * @acknowledged: weather to mark the bad sectors as acknowledged
  140. *
  141. * This might extend the table, or might contract it if two adjacent ranges
  142. * can be merged. We binary-search to find the 'insertion' point, then
  143. * decide how best to handle it.
  144. *
  145. * Return:
  146. * 0: success
  147. * 1: failed to set badblocks (out of space)
  148. */
  149. int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
  150. int acknowledged)
  151. {
  152. u64 *p;
  153. int lo, hi;
  154. int rv = 0;
  155. unsigned long flags;
  156. if (bb->shift < 0)
  157. /* badblocks are disabled */
  158. return 1;
  159. if (bb->shift) {
  160. /* round the start down, and the end up */
  161. sector_t next = s + sectors;
  162. s >>= bb->shift;
  163. next += (1<<bb->shift) - 1;
  164. next >>= bb->shift;
  165. sectors = next - s;
  166. }
  167. write_seqlock_irqsave(&bb->lock, flags);
  168. p = bb->page;
  169. lo = 0;
  170. hi = bb->count;
  171. /* Find the last range that starts at-or-before 's' */
  172. while (hi - lo > 1) {
  173. int mid = (lo + hi) / 2;
  174. sector_t a = BB_OFFSET(p[mid]);
  175. if (a <= s)
  176. lo = mid;
  177. else
  178. hi = mid;
  179. }
  180. if (hi > lo && BB_OFFSET(p[lo]) > s)
  181. hi = lo;
  182. if (hi > lo) {
  183. /* we found a range that might merge with the start
  184. * of our new range
  185. */
  186. sector_t a = BB_OFFSET(p[lo]);
  187. sector_t e = a + BB_LEN(p[lo]);
  188. int ack = BB_ACK(p[lo]);
  189. if (e >= s) {
  190. /* Yes, we can merge with a previous range */
  191. if (s == a && s + sectors >= e)
  192. /* new range covers old */
  193. ack = acknowledged;
  194. else
  195. ack = ack && acknowledged;
  196. if (e < s + sectors)
  197. e = s + sectors;
  198. if (e - a <= BB_MAX_LEN) {
  199. p[lo] = BB_MAKE(a, e-a, ack);
  200. s = e;
  201. } else {
  202. /* does not all fit in one range,
  203. * make p[lo] maximal
  204. */
  205. if (BB_LEN(p[lo]) != BB_MAX_LEN)
  206. p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
  207. s = a + BB_MAX_LEN;
  208. }
  209. sectors = e - s;
  210. }
  211. }
  212. if (sectors && hi < bb->count) {
  213. /* 'hi' points to the first range that starts after 's'.
  214. * Maybe we can merge with the start of that range
  215. */
  216. sector_t a = BB_OFFSET(p[hi]);
  217. sector_t e = a + BB_LEN(p[hi]);
  218. int ack = BB_ACK(p[hi]);
  219. if (a <= s + sectors) {
  220. /* merging is possible */
  221. if (e <= s + sectors) {
  222. /* full overlap */
  223. e = s + sectors;
  224. ack = acknowledged;
  225. } else
  226. ack = ack && acknowledged;
  227. a = s;
  228. if (e - a <= BB_MAX_LEN) {
  229. p[hi] = BB_MAKE(a, e-a, ack);
  230. s = e;
  231. } else {
  232. p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
  233. s = a + BB_MAX_LEN;
  234. }
  235. sectors = e - s;
  236. lo = hi;
  237. hi++;
  238. }
  239. }
  240. if (sectors == 0 && hi < bb->count) {
  241. /* we might be able to combine lo and hi */
  242. /* Note: 's' is at the end of 'lo' */
  243. sector_t a = BB_OFFSET(p[hi]);
  244. int lolen = BB_LEN(p[lo]);
  245. int hilen = BB_LEN(p[hi]);
  246. int newlen = lolen + hilen - (s - a);
  247. if (s >= a && newlen < BB_MAX_LEN) {
  248. /* yes, we can combine them */
  249. int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
  250. p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
  251. memmove(p + hi, p + hi + 1,
  252. (bb->count - hi - 1) * 8);
  253. bb->count--;
  254. }
  255. }
  256. while (sectors) {
  257. /* didn't merge (it all).
  258. * Need to add a range just before 'hi'
  259. */
  260. if (bb->count >= MAX_BADBLOCKS) {
  261. /* No room for more */
  262. rv = 1;
  263. break;
  264. } else {
  265. int this_sectors = sectors;
  266. memmove(p + hi + 1, p + hi,
  267. (bb->count - hi) * 8);
  268. bb->count++;
  269. if (this_sectors > BB_MAX_LEN)
  270. this_sectors = BB_MAX_LEN;
  271. p[hi] = BB_MAKE(s, this_sectors, acknowledged);
  272. sectors -= this_sectors;
  273. s += this_sectors;
  274. }
  275. }
  276. bb->changed = 1;
  277. if (!acknowledged)
  278. bb->unacked_exist = 1;
  279. else
  280. badblocks_update_acked(bb);
  281. write_sequnlock_irqrestore(&bb->lock, flags);
  282. return rv;
  283. }
  284. EXPORT_SYMBOL_GPL(badblocks_set);
  285. /**
  286. * badblocks_clear() - Remove a range of bad blocks to the table.
  287. * @bb: the badblocks structure that holds all badblock information
  288. * @s: first sector to mark as bad
  289. * @sectors: number of sectors to mark as bad
  290. *
  291. * This may involve extending the table if we spilt a region,
  292. * but it must not fail. So if the table becomes full, we just
  293. * drop the remove request.
  294. *
  295. * Return:
  296. * 0: success
  297. * 1: failed to clear badblocks
  298. */
  299. int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
  300. {
  301. u64 *p;
  302. int lo, hi;
  303. sector_t target = s + sectors;
  304. int rv = 0;
  305. if (bb->shift > 0) {
  306. /* When clearing we round the start up and the end down.
  307. * This should not matter as the shift should align with
  308. * the block size and no rounding should ever be needed.
  309. * However it is better the think a block is bad when it
  310. * isn't than to think a block is not bad when it is.
  311. */
  312. s += (1<<bb->shift) - 1;
  313. s >>= bb->shift;
  314. target >>= bb->shift;
  315. }
  316. write_seqlock_irq(&bb->lock);
  317. p = bb->page;
  318. lo = 0;
  319. hi = bb->count;
  320. /* Find the last range that starts before 'target' */
  321. while (hi - lo > 1) {
  322. int mid = (lo + hi) / 2;
  323. sector_t a = BB_OFFSET(p[mid]);
  324. if (a < target)
  325. lo = mid;
  326. else
  327. hi = mid;
  328. }
  329. if (hi > lo) {
  330. /* p[lo] is the last range that could overlap the
  331. * current range. Earlier ranges could also overlap,
  332. * but only this one can overlap the end of the range.
  333. */
  334. if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
  335. (BB_OFFSET(p[lo]) < target)) {
  336. /* Partial overlap, leave the tail of this range */
  337. int ack = BB_ACK(p[lo]);
  338. sector_t a = BB_OFFSET(p[lo]);
  339. sector_t end = a + BB_LEN(p[lo]);
  340. if (a < s) {
  341. /* we need to split this range */
  342. if (bb->count >= MAX_BADBLOCKS) {
  343. rv = -ENOSPC;
  344. goto out;
  345. }
  346. memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
  347. bb->count++;
  348. p[lo] = BB_MAKE(a, s-a, ack);
  349. lo++;
  350. }
  351. p[lo] = BB_MAKE(target, end - target, ack);
  352. /* there is no longer an overlap */
  353. hi = lo;
  354. lo--;
  355. }
  356. while (lo >= 0 &&
  357. (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
  358. (BB_OFFSET(p[lo]) < target)) {
  359. /* This range does overlap */
  360. if (BB_OFFSET(p[lo]) < s) {
  361. /* Keep the early parts of this range. */
  362. int ack = BB_ACK(p[lo]);
  363. sector_t start = BB_OFFSET(p[lo]);
  364. p[lo] = BB_MAKE(start, s - start, ack);
  365. /* now low doesn't overlap, so.. */
  366. break;
  367. }
  368. lo--;
  369. }
  370. /* 'lo' is strictly before, 'hi' is strictly after,
  371. * anything between needs to be discarded
  372. */
  373. if (hi - lo > 1) {
  374. memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
  375. bb->count -= (hi - lo - 1);
  376. }
  377. }
  378. badblocks_update_acked(bb);
  379. bb->changed = 1;
  380. out:
  381. write_sequnlock_irq(&bb->lock);
  382. return rv;
  383. }
  384. EXPORT_SYMBOL_GPL(badblocks_clear);
  385. /**
  386. * ack_all_badblocks() - Acknowledge all bad blocks in a list.
  387. * @bb: the badblocks structure that holds all badblock information
  388. *
  389. * This only succeeds if ->changed is clear. It is used by
  390. * in-kernel metadata updates
  391. */
  392. void ack_all_badblocks(struct badblocks *bb)
  393. {
  394. if (bb->page == NULL || bb->changed)
  395. /* no point even trying */
  396. return;
  397. write_seqlock_irq(&bb->lock);
  398. if (bb->changed == 0 && bb->unacked_exist) {
  399. u64 *p = bb->page;
  400. int i;
  401. for (i = 0; i < bb->count ; i++) {
  402. if (!BB_ACK(p[i])) {
  403. sector_t start = BB_OFFSET(p[i]);
  404. int len = BB_LEN(p[i]);
  405. p[i] = BB_MAKE(start, len, 1);
  406. }
  407. }
  408. bb->unacked_exist = 0;
  409. }
  410. write_sequnlock_irq(&bb->lock);
  411. }
  412. EXPORT_SYMBOL_GPL(ack_all_badblocks);
  413. /**
  414. * badblocks_show() - sysfs access to bad-blocks list
  415. * @bb: the badblocks structure that holds all badblock information
  416. * @page: buffer received from sysfs
  417. * @unack: weather to show unacknowledged badblocks
  418. *
  419. * Return:
  420. * Length of returned data
  421. */
  422. ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
  423. {
  424. size_t len;
  425. int i;
  426. u64 *p = bb->page;
  427. unsigned seq;
  428. if (bb->shift < 0)
  429. return 0;
  430. retry:
  431. seq = read_seqbegin(&bb->lock);
  432. len = 0;
  433. i = 0;
  434. while (len < PAGE_SIZE && i < bb->count) {
  435. sector_t s = BB_OFFSET(p[i]);
  436. unsigned int length = BB_LEN(p[i]);
  437. int ack = BB_ACK(p[i]);
  438. i++;
  439. if (unack && ack)
  440. continue;
  441. len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
  442. (unsigned long long)s << bb->shift,
  443. length << bb->shift);
  444. }
  445. if (unack && len == 0)
  446. bb->unacked_exist = 0;
  447. if (read_seqretry(&bb->lock, seq))
  448. goto retry;
  449. return len;
  450. }
  451. EXPORT_SYMBOL_GPL(badblocks_show);
  452. /**
  453. * badblocks_store() - sysfs access to bad-blocks list
  454. * @bb: the badblocks structure that holds all badblock information
  455. * @page: buffer received from sysfs
  456. * @len: length of data received from sysfs
  457. * @unack: weather to show unacknowledged badblocks
  458. *
  459. * Return:
  460. * Length of the buffer processed or -ve error.
  461. */
  462. ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
  463. int unack)
  464. {
  465. unsigned long long sector;
  466. int length;
  467. char newline;
  468. switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
  469. case 3:
  470. if (newline != '\n')
  471. return -EINVAL;
  472. fallthrough;
  473. case 2:
  474. if (length <= 0)
  475. return -EINVAL;
  476. break;
  477. default:
  478. return -EINVAL;
  479. }
  480. if (badblocks_set(bb, sector, length, !unack))
  481. return -ENOSPC;
  482. else
  483. return len;
  484. }
  485. EXPORT_SYMBOL_GPL(badblocks_store);
  486. static int __badblocks_init(struct device *dev, struct badblocks *bb,
  487. int enable)
  488. {
  489. bb->dev = dev;
  490. bb->count = 0;
  491. if (enable)
  492. bb->shift = 0;
  493. else
  494. bb->shift = -1;
  495. if (dev)
  496. bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
  497. else
  498. bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
  499. if (!bb->page) {
  500. bb->shift = -1;
  501. return -ENOMEM;
  502. }
  503. seqlock_init(&bb->lock);
  504. return 0;
  505. }
  506. /**
  507. * badblocks_init() - initialize the badblocks structure
  508. * @bb: the badblocks structure that holds all badblock information
  509. * @enable: weather to enable badblocks accounting
  510. *
  511. * Return:
  512. * 0: success
  513. * -ve errno: on error
  514. */
  515. int badblocks_init(struct badblocks *bb, int enable)
  516. {
  517. return __badblocks_init(NULL, bb, enable);
  518. }
  519. EXPORT_SYMBOL_GPL(badblocks_init);
  520. int devm_init_badblocks(struct device *dev, struct badblocks *bb)
  521. {
  522. if (!bb)
  523. return -EINVAL;
  524. return __badblocks_init(dev, bb, 1);
  525. }
  526. EXPORT_SYMBOL_GPL(devm_init_badblocks);
  527. /**
  528. * badblocks_exit() - free the badblocks structure
  529. * @bb: the badblocks structure that holds all badblock information
  530. */
  531. void badblocks_exit(struct badblocks *bb)
  532. {
  533. if (!bb)
  534. return;
  535. if (bb->dev)
  536. devm_kfree(bb->dev, bb->page);
  537. else
  538. kfree(bb->page);
  539. bb->page = NULL;
  540. }
  541. EXPORT_SYMBOL_GPL(badblocks_exit);