sysfs.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * bcache sysfs interfaces
  4. *
  5. * Copyright 2010, 2011 Kent Overstreet <[email protected]>
  6. * Copyright 2012 Google, Inc.
  7. */
  8. #include "bcache.h"
  9. #include "sysfs.h"
  10. #include "btree.h"
  11. #include "request.h"
  12. #include "writeback.h"
  13. #include "features.h"
  14. #include <linux/blkdev.h>
  15. #include <linux/sort.h>
  16. #include <linux/sched/clock.h>
  17. extern bool bcache_is_reboot;
  18. /* Default is 0 ("writethrough") */
  19. static const char * const bch_cache_modes[] = {
  20. "writethrough",
  21. "writeback",
  22. "writearound",
  23. "none",
  24. NULL
  25. };
  26. static const char * const bch_reada_cache_policies[] = {
  27. "all",
  28. "meta-only",
  29. NULL
  30. };
  31. /* Default is 0 ("auto") */
  32. static const char * const bch_stop_on_failure_modes[] = {
  33. "auto",
  34. "always",
  35. NULL
  36. };
  37. static const char * const cache_replacement_policies[] = {
  38. "lru",
  39. "fifo",
  40. "random",
  41. NULL
  42. };
  43. static const char * const error_actions[] = {
  44. "unregister",
  45. "panic",
  46. NULL
  47. };
  48. write_attribute(attach);
  49. write_attribute(detach);
  50. write_attribute(unregister);
  51. write_attribute(stop);
  52. write_attribute(clear_stats);
  53. write_attribute(trigger_gc);
  54. write_attribute(prune_cache);
  55. write_attribute(flash_vol_create);
  56. read_attribute(bucket_size);
  57. read_attribute(block_size);
  58. read_attribute(nbuckets);
  59. read_attribute(tree_depth);
  60. read_attribute(root_usage_percent);
  61. read_attribute(priority_stats);
  62. read_attribute(btree_cache_size);
  63. read_attribute(btree_cache_max_chain);
  64. read_attribute(cache_available_percent);
  65. read_attribute(written);
  66. read_attribute(btree_written);
  67. read_attribute(metadata_written);
  68. read_attribute(active_journal_entries);
  69. read_attribute(backing_dev_name);
  70. read_attribute(backing_dev_uuid);
  71. sysfs_time_stats_attribute(btree_gc, sec, ms);
  72. sysfs_time_stats_attribute(btree_split, sec, us);
  73. sysfs_time_stats_attribute(btree_sort, ms, us);
  74. sysfs_time_stats_attribute(btree_read, ms, us);
  75. read_attribute(btree_nodes);
  76. read_attribute(btree_used_percent);
  77. read_attribute(average_key_size);
  78. read_attribute(dirty_data);
  79. read_attribute(bset_tree_stats);
  80. read_attribute(feature_compat);
  81. read_attribute(feature_ro_compat);
  82. read_attribute(feature_incompat);
  83. read_attribute(state);
  84. read_attribute(cache_read_races);
  85. read_attribute(reclaim);
  86. read_attribute(reclaimed_journal_buckets);
  87. read_attribute(flush_write);
  88. read_attribute(writeback_keys_done);
  89. read_attribute(writeback_keys_failed);
  90. read_attribute(io_errors);
  91. read_attribute(congested);
  92. read_attribute(cutoff_writeback);
  93. read_attribute(cutoff_writeback_sync);
  94. rw_attribute(congested_read_threshold_us);
  95. rw_attribute(congested_write_threshold_us);
  96. rw_attribute(sequential_cutoff);
  97. rw_attribute(data_csum);
  98. rw_attribute(cache_mode);
  99. rw_attribute(readahead_cache_policy);
  100. rw_attribute(stop_when_cache_set_failed);
  101. rw_attribute(writeback_metadata);
  102. rw_attribute(writeback_running);
  103. rw_attribute(writeback_percent);
  104. rw_attribute(writeback_delay);
  105. rw_attribute(writeback_rate);
  106. rw_attribute(writeback_consider_fragment);
  107. rw_attribute(writeback_rate_update_seconds);
  108. rw_attribute(writeback_rate_i_term_inverse);
  109. rw_attribute(writeback_rate_p_term_inverse);
  110. rw_attribute(writeback_rate_fp_term_low);
  111. rw_attribute(writeback_rate_fp_term_mid);
  112. rw_attribute(writeback_rate_fp_term_high);
  113. rw_attribute(writeback_rate_minimum);
  114. read_attribute(writeback_rate_debug);
  115. read_attribute(stripe_size);
  116. read_attribute(partial_stripes_expensive);
  117. rw_attribute(synchronous);
  118. rw_attribute(journal_delay_ms);
  119. rw_attribute(io_disable);
  120. rw_attribute(discard);
  121. rw_attribute(running);
  122. rw_attribute(label);
  123. rw_attribute(errors);
  124. rw_attribute(io_error_limit);
  125. rw_attribute(io_error_halflife);
  126. rw_attribute(verify);
  127. rw_attribute(bypass_torture_test);
  128. rw_attribute(key_merging_disabled);
  129. rw_attribute(gc_always_rewrite);
  130. rw_attribute(expensive_debug_checks);
  131. rw_attribute(cache_replacement_policy);
  132. rw_attribute(btree_shrinker_disabled);
  133. rw_attribute(copy_gc_enabled);
  134. rw_attribute(idle_max_writeback_rate);
  135. rw_attribute(gc_after_writeback);
  136. rw_attribute(size);
  137. static ssize_t bch_snprint_string_list(char *buf,
  138. size_t size,
  139. const char * const list[],
  140. size_t selected)
  141. {
  142. char *out = buf;
  143. size_t i;
  144. for (i = 0; list[i]; i++)
  145. out += scnprintf(out, buf + size - out,
  146. i == selected ? "[%s] " : "%s ", list[i]);
  147. out[-1] = '\n';
  148. return out - buf;
  149. }
  150. SHOW(__bch_cached_dev)
  151. {
  152. struct cached_dev *dc = container_of(kobj, struct cached_dev,
  153. disk.kobj);
  154. char const *states[] = { "no cache", "clean", "dirty", "inconsistent" };
  155. int wb = dc->writeback_running;
  156. #define var(stat) (dc->stat)
  157. if (attr == &sysfs_cache_mode)
  158. return bch_snprint_string_list(buf, PAGE_SIZE,
  159. bch_cache_modes,
  160. BDEV_CACHE_MODE(&dc->sb));
  161. if (attr == &sysfs_readahead_cache_policy)
  162. return bch_snprint_string_list(buf, PAGE_SIZE,
  163. bch_reada_cache_policies,
  164. dc->cache_readahead_policy);
  165. if (attr == &sysfs_stop_when_cache_set_failed)
  166. return bch_snprint_string_list(buf, PAGE_SIZE,
  167. bch_stop_on_failure_modes,
  168. dc->stop_when_cache_set_failed);
  169. sysfs_printf(data_csum, "%i", dc->disk.data_csum);
  170. var_printf(verify, "%i");
  171. var_printf(bypass_torture_test, "%i");
  172. var_printf(writeback_metadata, "%i");
  173. var_printf(writeback_running, "%i");
  174. var_printf(writeback_consider_fragment, "%i");
  175. var_print(writeback_delay);
  176. var_print(writeback_percent);
  177. sysfs_hprint(writeback_rate,
  178. wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
  179. sysfs_printf(io_errors, "%i", atomic_read(&dc->io_errors));
  180. sysfs_printf(io_error_limit, "%i", dc->error_limit);
  181. sysfs_printf(io_disable, "%i", dc->io_disable);
  182. var_print(writeback_rate_update_seconds);
  183. var_print(writeback_rate_i_term_inverse);
  184. var_print(writeback_rate_p_term_inverse);
  185. var_print(writeback_rate_fp_term_low);
  186. var_print(writeback_rate_fp_term_mid);
  187. var_print(writeback_rate_fp_term_high);
  188. var_print(writeback_rate_minimum);
  189. if (attr == &sysfs_writeback_rate_debug) {
  190. char rate[20];
  191. char dirty[20];
  192. char target[20];
  193. char proportional[20];
  194. char integral[20];
  195. char change[20];
  196. s64 next_io;
  197. /*
  198. * Except for dirty and target, other values should
  199. * be 0 if writeback is not running.
  200. */
  201. bch_hprint(rate,
  202. wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
  203. : 0);
  204. bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
  205. bch_hprint(target, dc->writeback_rate_target << 9);
  206. bch_hprint(proportional,
  207. wb ? dc->writeback_rate_proportional << 9 : 0);
  208. bch_hprint(integral,
  209. wb ? dc->writeback_rate_integral_scaled << 9 : 0);
  210. bch_hprint(change, wb ? dc->writeback_rate_change << 9 : 0);
  211. next_io = wb ? div64_s64(dc->writeback_rate.next-local_clock(),
  212. NSEC_PER_MSEC) : 0;
  213. return sprintf(buf,
  214. "rate:\t\t%s/sec\n"
  215. "dirty:\t\t%s\n"
  216. "target:\t\t%s\n"
  217. "proportional:\t%s\n"
  218. "integral:\t%s\n"
  219. "change:\t\t%s/sec\n"
  220. "next io:\t%llims\n",
  221. rate, dirty, target, proportional,
  222. integral, change, next_io);
  223. }
  224. sysfs_hprint(dirty_data,
  225. bcache_dev_sectors_dirty(&dc->disk) << 9);
  226. sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9);
  227. var_printf(partial_stripes_expensive, "%u");
  228. var_hprint(sequential_cutoff);
  229. sysfs_print(running, atomic_read(&dc->running));
  230. sysfs_print(state, states[BDEV_STATE(&dc->sb)]);
  231. if (attr == &sysfs_label) {
  232. memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
  233. buf[SB_LABEL_SIZE + 1] = '\0';
  234. strcat(buf, "\n");
  235. return strlen(buf);
  236. }
  237. if (attr == &sysfs_backing_dev_name) {
  238. snprintf(buf, BDEVNAME_SIZE + 1, "%pg", dc->bdev);
  239. strcat(buf, "\n");
  240. return strlen(buf);
  241. }
  242. if (attr == &sysfs_backing_dev_uuid) {
  243. /* convert binary uuid into 36-byte string plus '\0' */
  244. snprintf(buf, 36+1, "%pU", dc->sb.uuid);
  245. strcat(buf, "\n");
  246. return strlen(buf);
  247. }
  248. #undef var
  249. return 0;
  250. }
  251. SHOW_LOCKED(bch_cached_dev)
  252. STORE(__cached_dev)
  253. {
  254. struct cached_dev *dc = container_of(kobj, struct cached_dev,
  255. disk.kobj);
  256. ssize_t v;
  257. struct cache_set *c;
  258. struct kobj_uevent_env *env;
  259. /* no user space access if system is rebooting */
  260. if (bcache_is_reboot)
  261. return -EBUSY;
  262. #define d_strtoul(var) sysfs_strtoul(var, dc->var)
  263. #define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
  264. #define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
  265. sysfs_strtoul(data_csum, dc->disk.data_csum);
  266. d_strtoul(verify);
  267. sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test);
  268. sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata);
  269. sysfs_strtoul_bool(writeback_running, dc->writeback_running);
  270. sysfs_strtoul_bool(writeback_consider_fragment, dc->writeback_consider_fragment);
  271. sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX);
  272. sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
  273. 0, bch_cutoff_writeback);
  274. if (attr == &sysfs_writeback_rate) {
  275. ssize_t ret;
  276. long int v = atomic_long_read(&dc->writeback_rate.rate);
  277. ret = strtoul_safe_clamp(buf, v, 1, INT_MAX);
  278. if (!ret) {
  279. atomic_long_set(&dc->writeback_rate.rate, v);
  280. ret = size;
  281. }
  282. return ret;
  283. }
  284. sysfs_strtoul_clamp(writeback_rate_update_seconds,
  285. dc->writeback_rate_update_seconds,
  286. 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
  287. sysfs_strtoul_clamp(writeback_rate_i_term_inverse,
  288. dc->writeback_rate_i_term_inverse,
  289. 1, UINT_MAX);
  290. sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
  291. dc->writeback_rate_p_term_inverse,
  292. 1, UINT_MAX);
  293. sysfs_strtoul_clamp(writeback_rate_fp_term_low,
  294. dc->writeback_rate_fp_term_low,
  295. 1, dc->writeback_rate_fp_term_mid - 1);
  296. sysfs_strtoul_clamp(writeback_rate_fp_term_mid,
  297. dc->writeback_rate_fp_term_mid,
  298. dc->writeback_rate_fp_term_low + 1,
  299. dc->writeback_rate_fp_term_high - 1);
  300. sysfs_strtoul_clamp(writeback_rate_fp_term_high,
  301. dc->writeback_rate_fp_term_high,
  302. dc->writeback_rate_fp_term_mid + 1, UINT_MAX);
  303. sysfs_strtoul_clamp(writeback_rate_minimum,
  304. dc->writeback_rate_minimum,
  305. 1, UINT_MAX);
  306. sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
  307. if (attr == &sysfs_io_disable) {
  308. int v = strtoul_or_return(buf);
  309. dc->io_disable = v ? 1 : 0;
  310. }
  311. sysfs_strtoul_clamp(sequential_cutoff,
  312. dc->sequential_cutoff,
  313. 0, UINT_MAX);
  314. if (attr == &sysfs_clear_stats)
  315. bch_cache_accounting_clear(&dc->accounting);
  316. if (attr == &sysfs_running &&
  317. strtoul_or_return(buf)) {
  318. v = bch_cached_dev_run(dc);
  319. if (v)
  320. return v;
  321. }
  322. if (attr == &sysfs_cache_mode) {
  323. v = __sysfs_match_string(bch_cache_modes, -1, buf);
  324. if (v < 0)
  325. return v;
  326. if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
  327. SET_BDEV_CACHE_MODE(&dc->sb, v);
  328. bch_write_bdev_super(dc, NULL);
  329. }
  330. }
  331. if (attr == &sysfs_readahead_cache_policy) {
  332. v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
  333. if (v < 0)
  334. return v;
  335. if ((unsigned int) v != dc->cache_readahead_policy)
  336. dc->cache_readahead_policy = v;
  337. }
  338. if (attr == &sysfs_stop_when_cache_set_failed) {
  339. v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
  340. if (v < 0)
  341. return v;
  342. dc->stop_when_cache_set_failed = v;
  343. }
  344. if (attr == &sysfs_label) {
  345. if (size > SB_LABEL_SIZE)
  346. return -EINVAL;
  347. memcpy(dc->sb.label, buf, size);
  348. if (size < SB_LABEL_SIZE)
  349. dc->sb.label[size] = '\0';
  350. if (size && dc->sb.label[size - 1] == '\n')
  351. dc->sb.label[size - 1] = '\0';
  352. bch_write_bdev_super(dc, NULL);
  353. if (dc->disk.c) {
  354. memcpy(dc->disk.c->uuids[dc->disk.id].label,
  355. buf, SB_LABEL_SIZE);
  356. bch_uuid_write(dc->disk.c);
  357. }
  358. env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
  359. if (!env)
  360. return -ENOMEM;
  361. add_uevent_var(env, "DRIVER=bcache");
  362. add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid);
  363. add_uevent_var(env, "CACHED_LABEL=%s", buf);
  364. kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
  365. KOBJ_CHANGE,
  366. env->envp);
  367. kfree(env);
  368. }
  369. if (attr == &sysfs_attach) {
  370. uint8_t set_uuid[16];
  371. if (bch_parse_uuid(buf, set_uuid) < 16)
  372. return -EINVAL;
  373. v = -ENOENT;
  374. list_for_each_entry(c, &bch_cache_sets, list) {
  375. v = bch_cached_dev_attach(dc, c, set_uuid);
  376. if (!v)
  377. return size;
  378. }
  379. if (v == -ENOENT)
  380. pr_err("Can't attach %s: cache set not found\n", buf);
  381. return v;
  382. }
  383. if (attr == &sysfs_detach && dc->disk.c)
  384. bch_cached_dev_detach(dc);
  385. if (attr == &sysfs_stop)
  386. bcache_device_stop(&dc->disk);
  387. return size;
  388. }
  389. STORE(bch_cached_dev)
  390. {
  391. struct cached_dev *dc = container_of(kobj, struct cached_dev,
  392. disk.kobj);
  393. /* no user space access if system is rebooting */
  394. if (bcache_is_reboot)
  395. return -EBUSY;
  396. mutex_lock(&bch_register_lock);
  397. size = __cached_dev_store(kobj, attr, buf, size);
  398. if (attr == &sysfs_writeback_running) {
  399. /* dc->writeback_running changed in __cached_dev_store() */
  400. if (IS_ERR_OR_NULL(dc->writeback_thread)) {
  401. /*
  402. * reject setting it to 1 via sysfs if writeback
  403. * kthread is not created yet.
  404. */
  405. if (dc->writeback_running) {
  406. dc->writeback_running = false;
  407. pr_err("%s: failed to run non-existent writeback thread\n",
  408. dc->disk.disk->disk_name);
  409. }
  410. } else
  411. /*
  412. * writeback kthread will check if dc->writeback_running
  413. * is true or false.
  414. */
  415. bch_writeback_queue(dc);
  416. }
  417. /*
  418. * Only set BCACHE_DEV_WB_RUNNING when cached device attached to
  419. * a cache set, otherwise it doesn't make sense.
  420. */
  421. if (attr == &sysfs_writeback_percent)
  422. if ((dc->disk.c != NULL) &&
  423. (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)))
  424. schedule_delayed_work(&dc->writeback_rate_update,
  425. dc->writeback_rate_update_seconds * HZ);
  426. mutex_unlock(&bch_register_lock);
  427. return size;
  428. }
  429. static struct attribute *bch_cached_dev_attrs[] = {
  430. &sysfs_attach,
  431. &sysfs_detach,
  432. &sysfs_stop,
  433. #if 0
  434. &sysfs_data_csum,
  435. #endif
  436. &sysfs_cache_mode,
  437. &sysfs_readahead_cache_policy,
  438. &sysfs_stop_when_cache_set_failed,
  439. &sysfs_writeback_metadata,
  440. &sysfs_writeback_running,
  441. &sysfs_writeback_delay,
  442. &sysfs_writeback_percent,
  443. &sysfs_writeback_rate,
  444. &sysfs_writeback_consider_fragment,
  445. &sysfs_writeback_rate_update_seconds,
  446. &sysfs_writeback_rate_i_term_inverse,
  447. &sysfs_writeback_rate_p_term_inverse,
  448. &sysfs_writeback_rate_fp_term_low,
  449. &sysfs_writeback_rate_fp_term_mid,
  450. &sysfs_writeback_rate_fp_term_high,
  451. &sysfs_writeback_rate_minimum,
  452. &sysfs_writeback_rate_debug,
  453. &sysfs_io_errors,
  454. &sysfs_io_error_limit,
  455. &sysfs_io_disable,
  456. &sysfs_dirty_data,
  457. &sysfs_stripe_size,
  458. &sysfs_partial_stripes_expensive,
  459. &sysfs_sequential_cutoff,
  460. &sysfs_clear_stats,
  461. &sysfs_running,
  462. &sysfs_state,
  463. &sysfs_label,
  464. #ifdef CONFIG_BCACHE_DEBUG
  465. &sysfs_verify,
  466. &sysfs_bypass_torture_test,
  467. #endif
  468. &sysfs_backing_dev_name,
  469. &sysfs_backing_dev_uuid,
  470. NULL
  471. };
  472. ATTRIBUTE_GROUPS(bch_cached_dev);
  473. KTYPE(bch_cached_dev);
  474. SHOW(bch_flash_dev)
  475. {
  476. struct bcache_device *d = container_of(kobj, struct bcache_device,
  477. kobj);
  478. struct uuid_entry *u = &d->c->uuids[d->id];
  479. sysfs_printf(data_csum, "%i", d->data_csum);
  480. sysfs_hprint(size, u->sectors << 9);
  481. if (attr == &sysfs_label) {
  482. memcpy(buf, u->label, SB_LABEL_SIZE);
  483. buf[SB_LABEL_SIZE + 1] = '\0';
  484. strcat(buf, "\n");
  485. return strlen(buf);
  486. }
  487. return 0;
  488. }
  489. STORE(__bch_flash_dev)
  490. {
  491. struct bcache_device *d = container_of(kobj, struct bcache_device,
  492. kobj);
  493. struct uuid_entry *u = &d->c->uuids[d->id];
  494. /* no user space access if system is rebooting */
  495. if (bcache_is_reboot)
  496. return -EBUSY;
  497. sysfs_strtoul(data_csum, d->data_csum);
  498. if (attr == &sysfs_size) {
  499. uint64_t v;
  500. strtoi_h_or_return(buf, v);
  501. u->sectors = v >> 9;
  502. bch_uuid_write(d->c);
  503. set_capacity(d->disk, u->sectors);
  504. }
  505. if (attr == &sysfs_label) {
  506. memcpy(u->label, buf, SB_LABEL_SIZE);
  507. bch_uuid_write(d->c);
  508. }
  509. if (attr == &sysfs_unregister) {
  510. set_bit(BCACHE_DEV_DETACHING, &d->flags);
  511. bcache_device_stop(d);
  512. }
  513. return size;
  514. }
  515. STORE_LOCKED(bch_flash_dev)
  516. static struct attribute *bch_flash_dev_attrs[] = {
  517. &sysfs_unregister,
  518. #if 0
  519. &sysfs_data_csum,
  520. #endif
  521. &sysfs_label,
  522. &sysfs_size,
  523. NULL
  524. };
  525. ATTRIBUTE_GROUPS(bch_flash_dev);
  526. KTYPE(bch_flash_dev);
  527. struct bset_stats_op {
  528. struct btree_op op;
  529. size_t nodes;
  530. struct bset_stats stats;
  531. };
  532. static int bch_btree_bset_stats(struct btree_op *b_op, struct btree *b)
  533. {
  534. struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);
  535. op->nodes++;
  536. bch_btree_keys_stats(&b->keys, &op->stats);
  537. return MAP_CONTINUE;
  538. }
  539. static int bch_bset_print_stats(struct cache_set *c, char *buf)
  540. {
  541. struct bset_stats_op op;
  542. int ret;
  543. memset(&op, 0, sizeof(op));
  544. bch_btree_op_init(&op.op, -1);
  545. ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, bch_btree_bset_stats);
  546. if (ret < 0)
  547. return ret;
  548. return snprintf(buf, PAGE_SIZE,
  549. "btree nodes: %zu\n"
  550. "written sets: %zu\n"
  551. "unwritten sets: %zu\n"
  552. "written key bytes: %zu\n"
  553. "unwritten key bytes: %zu\n"
  554. "floats: %zu\n"
  555. "failed: %zu\n",
  556. op.nodes,
  557. op.stats.sets_written, op.stats.sets_unwritten,
  558. op.stats.bytes_written, op.stats.bytes_unwritten,
  559. op.stats.floats, op.stats.failed);
  560. }
  561. static unsigned int bch_root_usage(struct cache_set *c)
  562. {
  563. unsigned int bytes = 0;
  564. struct bkey *k;
  565. struct btree *b;
  566. struct btree_iter iter;
  567. goto lock_root;
  568. do {
  569. rw_unlock(false, b);
  570. lock_root:
  571. b = c->root;
  572. rw_lock(false, b, b->level);
  573. } while (b != c->root);
  574. for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
  575. bytes += bkey_bytes(k);
  576. rw_unlock(false, b);
  577. return (bytes * 100) / btree_bytes(c);
  578. }
  579. static size_t bch_cache_size(struct cache_set *c)
  580. {
  581. size_t ret = 0;
  582. struct btree *b;
  583. mutex_lock(&c->bucket_lock);
  584. list_for_each_entry(b, &c->btree_cache, list)
  585. ret += 1 << (b->keys.page_order + PAGE_SHIFT);
  586. mutex_unlock(&c->bucket_lock);
  587. return ret;
  588. }
  589. static unsigned int bch_cache_max_chain(struct cache_set *c)
  590. {
  591. unsigned int ret = 0;
  592. struct hlist_head *h;
  593. mutex_lock(&c->bucket_lock);
  594. for (h = c->bucket_hash;
  595. h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
  596. h++) {
  597. unsigned int i = 0;
  598. struct hlist_node *p;
  599. hlist_for_each(p, h)
  600. i++;
  601. ret = max(ret, i);
  602. }
  603. mutex_unlock(&c->bucket_lock);
  604. return ret;
  605. }
  606. static unsigned int bch_btree_used(struct cache_set *c)
  607. {
  608. return div64_u64(c->gc_stats.key_bytes * 100,
  609. (c->gc_stats.nodes ?: 1) * btree_bytes(c));
  610. }
  611. static unsigned int bch_average_key_size(struct cache_set *c)
  612. {
  613. return c->gc_stats.nkeys
  614. ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
  615. : 0;
  616. }
  617. SHOW(__bch_cache_set)
  618. {
  619. struct cache_set *c = container_of(kobj, struct cache_set, kobj);
  620. sysfs_print(synchronous, CACHE_SYNC(&c->cache->sb));
  621. sysfs_print(journal_delay_ms, c->journal_delay_ms);
  622. sysfs_hprint(bucket_size, bucket_bytes(c->cache));
  623. sysfs_hprint(block_size, block_bytes(c->cache));
  624. sysfs_print(tree_depth, c->root->level);
  625. sysfs_print(root_usage_percent, bch_root_usage(c));
  626. sysfs_hprint(btree_cache_size, bch_cache_size(c));
  627. sysfs_print(btree_cache_max_chain, bch_cache_max_chain(c));
  628. sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use);
  629. sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
  630. sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
  631. sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
  632. sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
  633. sysfs_print(btree_used_percent, bch_btree_used(c));
  634. sysfs_print(btree_nodes, c->gc_stats.nodes);
  635. sysfs_hprint(average_key_size, bch_average_key_size(c));
  636. sysfs_print(cache_read_races,
  637. atomic_long_read(&c->cache_read_races));
  638. sysfs_print(reclaim,
  639. atomic_long_read(&c->reclaim));
  640. sysfs_print(reclaimed_journal_buckets,
  641. atomic_long_read(&c->reclaimed_journal_buckets));
  642. sysfs_print(flush_write,
  643. atomic_long_read(&c->flush_write));
  644. sysfs_print(writeback_keys_done,
  645. atomic_long_read(&c->writeback_keys_done));
  646. sysfs_print(writeback_keys_failed,
  647. atomic_long_read(&c->writeback_keys_failed));
  648. if (attr == &sysfs_errors)
  649. return bch_snprint_string_list(buf, PAGE_SIZE, error_actions,
  650. c->on_error);
  651. /* See count_io_errors for why 88 */
  652. sysfs_print(io_error_halflife, c->error_decay * 88);
  653. sysfs_print(io_error_limit, c->error_limit);
  654. sysfs_hprint(congested,
  655. ((uint64_t) bch_get_congested(c)) << 9);
  656. sysfs_print(congested_read_threshold_us,
  657. c->congested_read_threshold_us);
  658. sysfs_print(congested_write_threshold_us,
  659. c->congested_write_threshold_us);
  660. sysfs_print(cutoff_writeback, bch_cutoff_writeback);
  661. sysfs_print(cutoff_writeback_sync, bch_cutoff_writeback_sync);
  662. sysfs_print(active_journal_entries, fifo_used(&c->journal.pin));
  663. sysfs_printf(verify, "%i", c->verify);
  664. sysfs_printf(key_merging_disabled, "%i", c->key_merging_disabled);
  665. sysfs_printf(expensive_debug_checks,
  666. "%i", c->expensive_debug_checks);
  667. sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
  668. sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
  669. sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
  670. sysfs_printf(idle_max_writeback_rate, "%i",
  671. c->idle_max_writeback_rate_enabled);
  672. sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback);
  673. sysfs_printf(io_disable, "%i",
  674. test_bit(CACHE_SET_IO_DISABLE, &c->flags));
  675. if (attr == &sysfs_bset_tree_stats)
  676. return bch_bset_print_stats(c, buf);
  677. if (attr == &sysfs_feature_compat)
  678. return bch_print_cache_set_feature_compat(c, buf, PAGE_SIZE);
  679. if (attr == &sysfs_feature_ro_compat)
  680. return bch_print_cache_set_feature_ro_compat(c, buf, PAGE_SIZE);
  681. if (attr == &sysfs_feature_incompat)
  682. return bch_print_cache_set_feature_incompat(c, buf, PAGE_SIZE);
  683. return 0;
  684. }
  685. SHOW_LOCKED(bch_cache_set)
  686. STORE(__bch_cache_set)
  687. {
  688. struct cache_set *c = container_of(kobj, struct cache_set, kobj);
  689. ssize_t v;
  690. /* no user space access if system is rebooting */
  691. if (bcache_is_reboot)
  692. return -EBUSY;
  693. if (attr == &sysfs_unregister)
  694. bch_cache_set_unregister(c);
  695. if (attr == &sysfs_stop)
  696. bch_cache_set_stop(c);
  697. if (attr == &sysfs_synchronous) {
  698. bool sync = strtoul_or_return(buf);
  699. if (sync != CACHE_SYNC(&c->cache->sb)) {
  700. SET_CACHE_SYNC(&c->cache->sb, sync);
  701. bcache_write_super(c);
  702. }
  703. }
  704. if (attr == &sysfs_flash_vol_create) {
  705. int r;
  706. uint64_t v;
  707. strtoi_h_or_return(buf, v);
  708. r = bch_flash_dev_create(c, v);
  709. if (r)
  710. return r;
  711. }
  712. if (attr == &sysfs_clear_stats) {
  713. atomic_long_set(&c->writeback_keys_done, 0);
  714. atomic_long_set(&c->writeback_keys_failed, 0);
  715. memset(&c->gc_stats, 0, sizeof(struct gc_stat));
  716. bch_cache_accounting_clear(&c->accounting);
  717. }
  718. if (attr == &sysfs_trigger_gc)
  719. force_wake_up_gc(c);
  720. if (attr == &sysfs_prune_cache) {
  721. struct shrink_control sc;
  722. sc.gfp_mask = GFP_KERNEL;
  723. sc.nr_to_scan = strtoul_or_return(buf);
  724. c->shrink.scan_objects(&c->shrink, &sc);
  725. }
  726. sysfs_strtoul_clamp(congested_read_threshold_us,
  727. c->congested_read_threshold_us,
  728. 0, UINT_MAX);
  729. sysfs_strtoul_clamp(congested_write_threshold_us,
  730. c->congested_write_threshold_us,
  731. 0, UINT_MAX);
  732. if (attr == &sysfs_errors) {
  733. v = __sysfs_match_string(error_actions, -1, buf);
  734. if (v < 0)
  735. return v;
  736. c->on_error = v;
  737. }
  738. sysfs_strtoul_clamp(io_error_limit, c->error_limit, 0, UINT_MAX);
  739. /* See count_io_errors() for why 88 */
  740. if (attr == &sysfs_io_error_halflife) {
  741. unsigned long v = 0;
  742. ssize_t ret;
  743. ret = strtoul_safe_clamp(buf, v, 0, UINT_MAX);
  744. if (!ret) {
  745. c->error_decay = v / 88;
  746. return size;
  747. }
  748. return ret;
  749. }
  750. if (attr == &sysfs_io_disable) {
  751. v = strtoul_or_return(buf);
  752. if (v) {
  753. if (test_and_set_bit(CACHE_SET_IO_DISABLE,
  754. &c->flags))
  755. pr_warn("CACHE_SET_IO_DISABLE already set\n");
  756. } else {
  757. if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
  758. &c->flags))
  759. pr_warn("CACHE_SET_IO_DISABLE already cleared\n");
  760. }
  761. }
  762. sysfs_strtoul_clamp(journal_delay_ms,
  763. c->journal_delay_ms,
  764. 0, USHRT_MAX);
  765. sysfs_strtoul_bool(verify, c->verify);
  766. sysfs_strtoul_bool(key_merging_disabled, c->key_merging_disabled);
  767. sysfs_strtoul(expensive_debug_checks, c->expensive_debug_checks);
  768. sysfs_strtoul_bool(gc_always_rewrite, c->gc_always_rewrite);
  769. sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
  770. sysfs_strtoul_bool(copy_gc_enabled, c->copy_gc_enabled);
  771. sysfs_strtoul_bool(idle_max_writeback_rate,
  772. c->idle_max_writeback_rate_enabled);
  773. /*
  774. * write gc_after_writeback here may overwrite an already set
  775. * BCH_DO_AUTO_GC, it doesn't matter because this flag will be
  776. * set in next chance.
  777. */
  778. sysfs_strtoul_clamp(gc_after_writeback, c->gc_after_writeback, 0, 1);
  779. return size;
  780. }
  781. STORE_LOCKED(bch_cache_set)
  782. SHOW(bch_cache_set_internal)
  783. {
  784. struct cache_set *c = container_of(kobj, struct cache_set, internal);
  785. return bch_cache_set_show(&c->kobj, attr, buf);
  786. }
  787. STORE(bch_cache_set_internal)
  788. {
  789. struct cache_set *c = container_of(kobj, struct cache_set, internal);
  790. /* no user space access if system is rebooting */
  791. if (bcache_is_reboot)
  792. return -EBUSY;
  793. return bch_cache_set_store(&c->kobj, attr, buf, size);
  794. }
  795. static void bch_cache_set_internal_release(struct kobject *k)
  796. {
  797. }
  798. static struct attribute *bch_cache_set_attrs[] = {
  799. &sysfs_unregister,
  800. &sysfs_stop,
  801. &sysfs_synchronous,
  802. &sysfs_journal_delay_ms,
  803. &sysfs_flash_vol_create,
  804. &sysfs_bucket_size,
  805. &sysfs_block_size,
  806. &sysfs_tree_depth,
  807. &sysfs_root_usage_percent,
  808. &sysfs_btree_cache_size,
  809. &sysfs_cache_available_percent,
  810. &sysfs_average_key_size,
  811. &sysfs_errors,
  812. &sysfs_io_error_limit,
  813. &sysfs_io_error_halflife,
  814. &sysfs_congested,
  815. &sysfs_congested_read_threshold_us,
  816. &sysfs_congested_write_threshold_us,
  817. &sysfs_clear_stats,
  818. NULL
  819. };
  820. ATTRIBUTE_GROUPS(bch_cache_set);
  821. KTYPE(bch_cache_set);
  822. static struct attribute *bch_cache_set_internal_attrs[] = {
  823. &sysfs_active_journal_entries,
  824. sysfs_time_stats_attribute_list(btree_gc, sec, ms)
  825. sysfs_time_stats_attribute_list(btree_split, sec, us)
  826. sysfs_time_stats_attribute_list(btree_sort, ms, us)
  827. sysfs_time_stats_attribute_list(btree_read, ms, us)
  828. &sysfs_btree_nodes,
  829. &sysfs_btree_used_percent,
  830. &sysfs_btree_cache_max_chain,
  831. &sysfs_bset_tree_stats,
  832. &sysfs_cache_read_races,
  833. &sysfs_reclaim,
  834. &sysfs_reclaimed_journal_buckets,
  835. &sysfs_flush_write,
  836. &sysfs_writeback_keys_done,
  837. &sysfs_writeback_keys_failed,
  838. &sysfs_trigger_gc,
  839. &sysfs_prune_cache,
  840. #ifdef CONFIG_BCACHE_DEBUG
  841. &sysfs_verify,
  842. &sysfs_key_merging_disabled,
  843. &sysfs_expensive_debug_checks,
  844. #endif
  845. &sysfs_gc_always_rewrite,
  846. &sysfs_btree_shrinker_disabled,
  847. &sysfs_copy_gc_enabled,
  848. &sysfs_idle_max_writeback_rate,
  849. &sysfs_gc_after_writeback,
  850. &sysfs_io_disable,
  851. &sysfs_cutoff_writeback,
  852. &sysfs_cutoff_writeback_sync,
  853. &sysfs_feature_compat,
  854. &sysfs_feature_ro_compat,
  855. &sysfs_feature_incompat,
  856. NULL
  857. };
  858. ATTRIBUTE_GROUPS(bch_cache_set_internal);
  859. KTYPE(bch_cache_set_internal);
  860. static int __bch_cache_cmp(const void *l, const void *r)
  861. {
  862. cond_resched();
  863. return *((uint16_t *)r) - *((uint16_t *)l);
  864. }
  865. SHOW(__bch_cache)
  866. {
  867. struct cache *ca = container_of(kobj, struct cache, kobj);
  868. sysfs_hprint(bucket_size, bucket_bytes(ca));
  869. sysfs_hprint(block_size, block_bytes(ca));
  870. sysfs_print(nbuckets, ca->sb.nbuckets);
  871. sysfs_print(discard, ca->discard);
  872. sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9);
  873. sysfs_hprint(btree_written,
  874. atomic_long_read(&ca->btree_sectors_written) << 9);
  875. sysfs_hprint(metadata_written,
  876. (atomic_long_read(&ca->meta_sectors_written) +
  877. atomic_long_read(&ca->btree_sectors_written)) << 9);
  878. sysfs_print(io_errors,
  879. atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
  880. if (attr == &sysfs_cache_replacement_policy)
  881. return bch_snprint_string_list(buf, PAGE_SIZE,
  882. cache_replacement_policies,
  883. CACHE_REPLACEMENT(&ca->sb));
  884. if (attr == &sysfs_priority_stats) {
  885. struct bucket *b;
  886. size_t n = ca->sb.nbuckets, i;
  887. size_t unused = 0, available = 0, dirty = 0, meta = 0;
  888. uint64_t sum = 0;
  889. /* Compute 31 quantiles */
  890. uint16_t q[31], *p, *cached;
  891. ssize_t ret;
  892. cached = p = vmalloc(array_size(sizeof(uint16_t),
  893. ca->sb.nbuckets));
  894. if (!p)
  895. return -ENOMEM;
  896. mutex_lock(&ca->set->bucket_lock);
  897. for_each_bucket(b, ca) {
  898. if (!GC_SECTORS_USED(b))
  899. unused++;
  900. if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
  901. available++;
  902. if (GC_MARK(b) == GC_MARK_DIRTY)
  903. dirty++;
  904. if (GC_MARK(b) == GC_MARK_METADATA)
  905. meta++;
  906. }
  907. for (i = ca->sb.first_bucket; i < n; i++)
  908. p[i] = ca->buckets[i].prio;
  909. mutex_unlock(&ca->set->bucket_lock);
  910. sort(p, n, sizeof(uint16_t), __bch_cache_cmp, NULL);
  911. while (n &&
  912. !cached[n - 1])
  913. --n;
  914. while (cached < p + n &&
  915. *cached == BTREE_PRIO) {
  916. cached++;
  917. n--;
  918. }
  919. for (i = 0; i < n; i++)
  920. sum += INITIAL_PRIO - cached[i];
  921. if (n)
  922. sum = div64_u64(sum, n);
  923. for (i = 0; i < ARRAY_SIZE(q); i++)
  924. q[i] = INITIAL_PRIO - cached[n * (i + 1) /
  925. (ARRAY_SIZE(q) + 1)];
  926. vfree(p);
  927. ret = scnprintf(buf, PAGE_SIZE,
  928. "Unused: %zu%%\n"
  929. "Clean: %zu%%\n"
  930. "Dirty: %zu%%\n"
  931. "Metadata: %zu%%\n"
  932. "Average: %llu\n"
  933. "Sectors per Q: %zu\n"
  934. "Quantiles: [",
  935. unused * 100 / (size_t) ca->sb.nbuckets,
  936. available * 100 / (size_t) ca->sb.nbuckets,
  937. dirty * 100 / (size_t) ca->sb.nbuckets,
  938. meta * 100 / (size_t) ca->sb.nbuckets, sum,
  939. n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
  940. for (i = 0; i < ARRAY_SIZE(q); i++)
  941. ret += scnprintf(buf + ret, PAGE_SIZE - ret,
  942. "%u ", q[i]);
  943. ret--;
  944. ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");
  945. return ret;
  946. }
  947. return 0;
  948. }
  949. SHOW_LOCKED(bch_cache)
  950. STORE(__bch_cache)
  951. {
  952. struct cache *ca = container_of(kobj, struct cache, kobj);
  953. ssize_t v;
  954. /* no user space access if system is rebooting */
  955. if (bcache_is_reboot)
  956. return -EBUSY;
  957. if (attr == &sysfs_discard) {
  958. bool v = strtoul_or_return(buf);
  959. if (bdev_max_discard_sectors(ca->bdev))
  960. ca->discard = v;
  961. if (v != CACHE_DISCARD(&ca->sb)) {
  962. SET_CACHE_DISCARD(&ca->sb, v);
  963. bcache_write_super(ca->set);
  964. }
  965. }
  966. if (attr == &sysfs_cache_replacement_policy) {
  967. v = __sysfs_match_string(cache_replacement_policies, -1, buf);
  968. if (v < 0)
  969. return v;
  970. if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) {
  971. mutex_lock(&ca->set->bucket_lock);
  972. SET_CACHE_REPLACEMENT(&ca->sb, v);
  973. mutex_unlock(&ca->set->bucket_lock);
  974. bcache_write_super(ca->set);
  975. }
  976. }
  977. if (attr == &sysfs_clear_stats) {
  978. atomic_long_set(&ca->sectors_written, 0);
  979. atomic_long_set(&ca->btree_sectors_written, 0);
  980. atomic_long_set(&ca->meta_sectors_written, 0);
  981. atomic_set(&ca->io_count, 0);
  982. atomic_set(&ca->io_errors, 0);
  983. }
  984. return size;
  985. }
  986. STORE_LOCKED(bch_cache)
  987. static struct attribute *bch_cache_attrs[] = {
  988. &sysfs_bucket_size,
  989. &sysfs_block_size,
  990. &sysfs_nbuckets,
  991. &sysfs_priority_stats,
  992. &sysfs_discard,
  993. &sysfs_written,
  994. &sysfs_btree_written,
  995. &sysfs_metadata_written,
  996. &sysfs_io_errors,
  997. &sysfs_clear_stats,
  998. &sysfs_cache_replacement_policy,
  999. NULL
  1000. };
  1001. ATTRIBUTE_GROUPS(bch_cache);
  1002. KTYPE(bch_cache);