dm-cache-target.c 83 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454
  1. /*
  2. * Copyright (C) 2012 Red Hat. All rights reserved.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm.h"
  7. #include "dm-bio-prison-v2.h"
  8. #include "dm-bio-record.h"
  9. #include "dm-cache-metadata.h"
  10. #include "dm-io-tracker.h"
  11. #include <linux/dm-io.h>
  12. #include <linux/dm-kcopyd.h>
  13. #include <linux/jiffies.h>
  14. #include <linux/init.h>
  15. #include <linux/mempool.h>
  16. #include <linux/module.h>
  17. #include <linux/rwsem.h>
  18. #include <linux/slab.h>
  19. #include <linux/vmalloc.h>
  20. #define DM_MSG_PREFIX "cache"
  21. DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
  22. "A percentage of time allocated for copying to and/or from cache");
  23. /*----------------------------------------------------------------*/
  24. /*
  25. * Glossary:
  26. *
  27. * oblock: index of an origin block
  28. * cblock: index of a cache block
  29. * promotion: movement of a block from origin to cache
  30. * demotion: movement of a block from cache to origin
  31. * migration: movement of a block between the origin and cache device,
  32. * either direction
  33. */
  34. /*----------------------------------------------------------------*/
  35. /*
  36. * Represents a chunk of future work. 'input' allows continuations to pass
  37. * values between themselves, typically error values.
  38. */
  39. struct continuation {
  40. struct work_struct ws;
  41. blk_status_t input;
  42. };
  43. static inline void init_continuation(struct continuation *k,
  44. void (*fn)(struct work_struct *))
  45. {
  46. INIT_WORK(&k->ws, fn);
  47. k->input = 0;
  48. }
  49. static inline void queue_continuation(struct workqueue_struct *wq,
  50. struct continuation *k)
  51. {
  52. queue_work(wq, &k->ws);
  53. }
  54. /*----------------------------------------------------------------*/
  55. /*
  56. * The batcher collects together pieces of work that need a particular
  57. * operation to occur before they can proceed (typically a commit).
  58. */
  59. struct batcher {
  60. /*
  61. * The operation that everyone is waiting for.
  62. */
  63. blk_status_t (*commit_op)(void *context);
  64. void *commit_context;
  65. /*
  66. * This is how bios should be issued once the commit op is complete
  67. * (accounted_request).
  68. */
  69. void (*issue_op)(struct bio *bio, void *context);
  70. void *issue_context;
  71. /*
  72. * Queued work gets put on here after commit.
  73. */
  74. struct workqueue_struct *wq;
  75. spinlock_t lock;
  76. struct list_head work_items;
  77. struct bio_list bios;
  78. struct work_struct commit_work;
  79. bool commit_scheduled;
  80. };
  81. static void __commit(struct work_struct *_ws)
  82. {
  83. struct batcher *b = container_of(_ws, struct batcher, commit_work);
  84. blk_status_t r;
  85. struct list_head work_items;
  86. struct work_struct *ws, *tmp;
  87. struct continuation *k;
  88. struct bio *bio;
  89. struct bio_list bios;
  90. INIT_LIST_HEAD(&work_items);
  91. bio_list_init(&bios);
  92. /*
  93. * We have to grab these before the commit_op to avoid a race
  94. * condition.
  95. */
  96. spin_lock_irq(&b->lock);
  97. list_splice_init(&b->work_items, &work_items);
  98. bio_list_merge(&bios, &b->bios);
  99. bio_list_init(&b->bios);
  100. b->commit_scheduled = false;
  101. spin_unlock_irq(&b->lock);
  102. r = b->commit_op(b->commit_context);
  103. list_for_each_entry_safe(ws, tmp, &work_items, entry) {
  104. k = container_of(ws, struct continuation, ws);
  105. k->input = r;
  106. INIT_LIST_HEAD(&ws->entry); /* to avoid a WARN_ON */
  107. queue_work(b->wq, ws);
  108. }
  109. while ((bio = bio_list_pop(&bios))) {
  110. if (r) {
  111. bio->bi_status = r;
  112. bio_endio(bio);
  113. } else
  114. b->issue_op(bio, b->issue_context);
  115. }
  116. }
  117. static void batcher_init(struct batcher *b,
  118. blk_status_t (*commit_op)(void *),
  119. void *commit_context,
  120. void (*issue_op)(struct bio *bio, void *),
  121. void *issue_context,
  122. struct workqueue_struct *wq)
  123. {
  124. b->commit_op = commit_op;
  125. b->commit_context = commit_context;
  126. b->issue_op = issue_op;
  127. b->issue_context = issue_context;
  128. b->wq = wq;
  129. spin_lock_init(&b->lock);
  130. INIT_LIST_HEAD(&b->work_items);
  131. bio_list_init(&b->bios);
  132. INIT_WORK(&b->commit_work, __commit);
  133. b->commit_scheduled = false;
  134. }
  135. static void async_commit(struct batcher *b)
  136. {
  137. queue_work(b->wq, &b->commit_work);
  138. }
  139. static void continue_after_commit(struct batcher *b, struct continuation *k)
  140. {
  141. bool commit_scheduled;
  142. spin_lock_irq(&b->lock);
  143. commit_scheduled = b->commit_scheduled;
  144. list_add_tail(&k->ws.entry, &b->work_items);
  145. spin_unlock_irq(&b->lock);
  146. if (commit_scheduled)
  147. async_commit(b);
  148. }
  149. /*
  150. * Bios are errored if commit failed.
  151. */
  152. static void issue_after_commit(struct batcher *b, struct bio *bio)
  153. {
  154. bool commit_scheduled;
  155. spin_lock_irq(&b->lock);
  156. commit_scheduled = b->commit_scheduled;
  157. bio_list_add(&b->bios, bio);
  158. spin_unlock_irq(&b->lock);
  159. if (commit_scheduled)
  160. async_commit(b);
  161. }
  162. /*
  163. * Call this if some urgent work is waiting for the commit to complete.
  164. */
  165. static void schedule_commit(struct batcher *b)
  166. {
  167. bool immediate;
  168. spin_lock_irq(&b->lock);
  169. immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
  170. b->commit_scheduled = true;
  171. spin_unlock_irq(&b->lock);
  172. if (immediate)
  173. async_commit(b);
  174. }
  175. /*
  176. * There are a couple of places where we let a bio run, but want to do some
  177. * work before calling its endio function. We do this by temporarily
  178. * changing the endio fn.
  179. */
  180. struct dm_hook_info {
  181. bio_end_io_t *bi_end_io;
  182. };
  183. static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
  184. bio_end_io_t *bi_end_io, void *bi_private)
  185. {
  186. h->bi_end_io = bio->bi_end_io;
  187. bio->bi_end_io = bi_end_io;
  188. bio->bi_private = bi_private;
  189. }
  190. static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
  191. {
  192. bio->bi_end_io = h->bi_end_io;
  193. }
  194. /*----------------------------------------------------------------*/
  195. #define MIGRATION_POOL_SIZE 128
  196. #define COMMIT_PERIOD HZ
  197. #define MIGRATION_COUNT_WINDOW 10
  198. /*
  199. * The block size of the device holding cache data must be
  200. * between 32KB and 1GB.
  201. */
  202. #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
  203. #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
  204. enum cache_metadata_mode {
  205. CM_WRITE, /* metadata may be changed */
  206. CM_READ_ONLY, /* metadata may not be changed */
  207. CM_FAIL
  208. };
  209. enum cache_io_mode {
  210. /*
  211. * Data is written to cached blocks only. These blocks are marked
  212. * dirty. If you lose the cache device you will lose data.
  213. * Potential performance increase for both reads and writes.
  214. */
  215. CM_IO_WRITEBACK,
  216. /*
  217. * Data is written to both cache and origin. Blocks are never
  218. * dirty. Potential performance benfit for reads only.
  219. */
  220. CM_IO_WRITETHROUGH,
  221. /*
  222. * A degraded mode useful for various cache coherency situations
  223. * (eg, rolling back snapshots). Reads and writes always go to the
  224. * origin. If a write goes to a cached oblock, then the cache
  225. * block is invalidated.
  226. */
  227. CM_IO_PASSTHROUGH
  228. };
  229. struct cache_features {
  230. enum cache_metadata_mode mode;
  231. enum cache_io_mode io_mode;
  232. unsigned int metadata_version;
  233. bool discard_passdown:1;
  234. };
  235. struct cache_stats {
  236. atomic_t read_hit;
  237. atomic_t read_miss;
  238. atomic_t write_hit;
  239. atomic_t write_miss;
  240. atomic_t demotion;
  241. atomic_t promotion;
  242. atomic_t writeback;
  243. atomic_t copies_avoided;
  244. atomic_t cache_cell_clash;
  245. atomic_t commit_count;
  246. atomic_t discard_count;
  247. };
  248. struct cache {
  249. struct dm_target *ti;
  250. spinlock_t lock;
  251. /*
  252. * Fields for converting from sectors to blocks.
  253. */
  254. int sectors_per_block_shift;
  255. sector_t sectors_per_block;
  256. struct dm_cache_metadata *cmd;
  257. /*
  258. * Metadata is written to this device.
  259. */
  260. struct dm_dev *metadata_dev;
  261. /*
  262. * The slower of the two data devices. Typically a spindle.
  263. */
  264. struct dm_dev *origin_dev;
  265. /*
  266. * The faster of the two data devices. Typically an SSD.
  267. */
  268. struct dm_dev *cache_dev;
  269. /*
  270. * Size of the origin device in _complete_ blocks and native sectors.
  271. */
  272. dm_oblock_t origin_blocks;
  273. sector_t origin_sectors;
  274. /*
  275. * Size of the cache device in blocks.
  276. */
  277. dm_cblock_t cache_size;
  278. /*
  279. * Invalidation fields.
  280. */
  281. spinlock_t invalidation_lock;
  282. struct list_head invalidation_requests;
  283. sector_t migration_threshold;
  284. wait_queue_head_t migration_wait;
  285. atomic_t nr_allocated_migrations;
  286. /*
  287. * The number of in flight migrations that are performing
  288. * background io. eg, promotion, writeback.
  289. */
  290. atomic_t nr_io_migrations;
  291. struct bio_list deferred_bios;
  292. struct rw_semaphore quiesce_lock;
  293. /*
  294. * origin_blocks entries, discarded if set.
  295. */
  296. dm_dblock_t discard_nr_blocks;
  297. unsigned long *discard_bitset;
  298. uint32_t discard_block_size; /* a power of 2 times sectors per block */
  299. /*
  300. * Rather than reconstructing the table line for the status we just
  301. * save it and regurgitate.
  302. */
  303. unsigned int nr_ctr_args;
  304. const char **ctr_args;
  305. struct dm_kcopyd_client *copier;
  306. struct work_struct deferred_bio_worker;
  307. struct work_struct migration_worker;
  308. struct workqueue_struct *wq;
  309. struct delayed_work waker;
  310. struct dm_bio_prison_v2 *prison;
  311. /*
  312. * cache_size entries, dirty if set
  313. */
  314. unsigned long *dirty_bitset;
  315. atomic_t nr_dirty;
  316. unsigned int policy_nr_args;
  317. struct dm_cache_policy *policy;
  318. /*
  319. * Cache features such as write-through.
  320. */
  321. struct cache_features features;
  322. struct cache_stats stats;
  323. bool need_tick_bio:1;
  324. bool sized:1;
  325. bool invalidate:1;
  326. bool commit_requested:1;
  327. bool loaded_mappings:1;
  328. bool loaded_discards:1;
  329. struct rw_semaphore background_work_lock;
  330. struct batcher committer;
  331. struct work_struct commit_ws;
  332. struct dm_io_tracker tracker;
  333. mempool_t migration_pool;
  334. struct bio_set bs;
  335. };
  336. struct per_bio_data {
  337. bool tick:1;
  338. unsigned int req_nr:2;
  339. struct dm_bio_prison_cell_v2 *cell;
  340. struct dm_hook_info hook_info;
  341. sector_t len;
  342. };
  343. struct dm_cache_migration {
  344. struct continuation k;
  345. struct cache *cache;
  346. struct policy_work *op;
  347. struct bio *overwrite_bio;
  348. struct dm_bio_prison_cell_v2 *cell;
  349. dm_cblock_t invalidate_cblock;
  350. dm_oblock_t invalidate_oblock;
  351. };
  352. /*----------------------------------------------------------------*/
  353. static bool writethrough_mode(struct cache *cache)
  354. {
  355. return cache->features.io_mode == CM_IO_WRITETHROUGH;
  356. }
  357. static bool writeback_mode(struct cache *cache)
  358. {
  359. return cache->features.io_mode == CM_IO_WRITEBACK;
  360. }
  361. static inline bool passthrough_mode(struct cache *cache)
  362. {
  363. return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
  364. }
  365. /*----------------------------------------------------------------*/
  366. static void wake_deferred_bio_worker(struct cache *cache)
  367. {
  368. queue_work(cache->wq, &cache->deferred_bio_worker);
  369. }
  370. static void wake_migration_worker(struct cache *cache)
  371. {
  372. if (passthrough_mode(cache))
  373. return;
  374. queue_work(cache->wq, &cache->migration_worker);
  375. }
  376. /*----------------------------------------------------------------*/
  377. static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
  378. {
  379. return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
  380. }
  381. static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
  382. {
  383. dm_bio_prison_free_cell_v2(cache->prison, cell);
  384. }
  385. static struct dm_cache_migration *alloc_migration(struct cache *cache)
  386. {
  387. struct dm_cache_migration *mg;
  388. mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
  389. memset(mg, 0, sizeof(*mg));
  390. mg->cache = cache;
  391. atomic_inc(&cache->nr_allocated_migrations);
  392. return mg;
  393. }
  394. static void free_migration(struct dm_cache_migration *mg)
  395. {
  396. struct cache *cache = mg->cache;
  397. if (atomic_dec_and_test(&cache->nr_allocated_migrations))
  398. wake_up(&cache->migration_wait);
  399. mempool_free(mg, &cache->migration_pool);
  400. }
  401. /*----------------------------------------------------------------*/
  402. static inline dm_oblock_t oblock_succ(dm_oblock_t b)
  403. {
  404. return to_oblock(from_oblock(b) + 1ull);
  405. }
  406. static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
  407. {
  408. key->virtual = 0;
  409. key->dev = 0;
  410. key->block_begin = from_oblock(begin);
  411. key->block_end = from_oblock(end);
  412. }
  413. /*
  414. * We have two lock levels. Level 0, which is used to prevent WRITEs, and
  415. * level 1 which prevents *both* READs and WRITEs.
  416. */
  417. #define WRITE_LOCK_LEVEL 0
  418. #define READ_WRITE_LOCK_LEVEL 1
  419. static unsigned int lock_level(struct bio *bio)
  420. {
  421. return bio_data_dir(bio) == WRITE ?
  422. WRITE_LOCK_LEVEL :
  423. READ_WRITE_LOCK_LEVEL;
  424. }
  425. /*----------------------------------------------------------------
  426. * Per bio data
  427. *--------------------------------------------------------------*/
  428. static struct per_bio_data *get_per_bio_data(struct bio *bio)
  429. {
  430. struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
  431. BUG_ON(!pb);
  432. return pb;
  433. }
  434. static struct per_bio_data *init_per_bio_data(struct bio *bio)
  435. {
  436. struct per_bio_data *pb = get_per_bio_data(bio);
  437. pb->tick = false;
  438. pb->req_nr = dm_bio_get_target_bio_nr(bio);
  439. pb->cell = NULL;
  440. pb->len = 0;
  441. return pb;
  442. }
  443. /*----------------------------------------------------------------*/
  444. static void defer_bio(struct cache *cache, struct bio *bio)
  445. {
  446. spin_lock_irq(&cache->lock);
  447. bio_list_add(&cache->deferred_bios, bio);
  448. spin_unlock_irq(&cache->lock);
  449. wake_deferred_bio_worker(cache);
  450. }
  451. static void defer_bios(struct cache *cache, struct bio_list *bios)
  452. {
  453. spin_lock_irq(&cache->lock);
  454. bio_list_merge(&cache->deferred_bios, bios);
  455. bio_list_init(bios);
  456. spin_unlock_irq(&cache->lock);
  457. wake_deferred_bio_worker(cache);
  458. }
  459. /*----------------------------------------------------------------*/
  460. static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
  461. {
  462. bool r;
  463. struct per_bio_data *pb;
  464. struct dm_cell_key_v2 key;
  465. dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
  466. struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
  467. cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
  468. build_key(oblock, end, &key);
  469. r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
  470. if (!r) {
  471. /*
  472. * Failed to get the lock.
  473. */
  474. free_prison_cell(cache, cell_prealloc);
  475. return r;
  476. }
  477. if (cell != cell_prealloc)
  478. free_prison_cell(cache, cell_prealloc);
  479. pb = get_per_bio_data(bio);
  480. pb->cell = cell;
  481. return r;
  482. }
  483. /*----------------------------------------------------------------*/
  484. static bool is_dirty(struct cache *cache, dm_cblock_t b)
  485. {
  486. return test_bit(from_cblock(b), cache->dirty_bitset);
  487. }
  488. static void set_dirty(struct cache *cache, dm_cblock_t cblock)
  489. {
  490. if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
  491. atomic_inc(&cache->nr_dirty);
  492. policy_set_dirty(cache->policy, cblock);
  493. }
  494. }
  495. /*
  496. * These two are called when setting after migrations to force the policy
  497. * and dirty bitset to be in sync.
  498. */
  499. static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
  500. {
  501. if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
  502. atomic_inc(&cache->nr_dirty);
  503. policy_set_dirty(cache->policy, cblock);
  504. }
  505. static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
  506. {
  507. if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
  508. if (atomic_dec_return(&cache->nr_dirty) == 0)
  509. dm_table_event(cache->ti->table);
  510. }
  511. policy_clear_dirty(cache->policy, cblock);
  512. }
  513. /*----------------------------------------------------------------*/
  514. static bool block_size_is_power_of_two(struct cache *cache)
  515. {
  516. return cache->sectors_per_block_shift >= 0;
  517. }
  518. static dm_block_t block_div(dm_block_t b, uint32_t n)
  519. {
  520. do_div(b, n);
  521. return b;
  522. }
  523. static dm_block_t oblocks_per_dblock(struct cache *cache)
  524. {
  525. dm_block_t oblocks = cache->discard_block_size;
  526. if (block_size_is_power_of_two(cache))
  527. oblocks >>= cache->sectors_per_block_shift;
  528. else
  529. oblocks = block_div(oblocks, cache->sectors_per_block);
  530. return oblocks;
  531. }
  532. static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
  533. {
  534. return to_dblock(block_div(from_oblock(oblock),
  535. oblocks_per_dblock(cache)));
  536. }
  537. static void set_discard(struct cache *cache, dm_dblock_t b)
  538. {
  539. BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
  540. atomic_inc(&cache->stats.discard_count);
  541. spin_lock_irq(&cache->lock);
  542. set_bit(from_dblock(b), cache->discard_bitset);
  543. spin_unlock_irq(&cache->lock);
  544. }
  545. static void clear_discard(struct cache *cache, dm_dblock_t b)
  546. {
  547. spin_lock_irq(&cache->lock);
  548. clear_bit(from_dblock(b), cache->discard_bitset);
  549. spin_unlock_irq(&cache->lock);
  550. }
  551. static bool is_discarded(struct cache *cache, dm_dblock_t b)
  552. {
  553. int r;
  554. spin_lock_irq(&cache->lock);
  555. r = test_bit(from_dblock(b), cache->discard_bitset);
  556. spin_unlock_irq(&cache->lock);
  557. return r;
  558. }
  559. static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
  560. {
  561. int r;
  562. spin_lock_irq(&cache->lock);
  563. r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
  564. cache->discard_bitset);
  565. spin_unlock_irq(&cache->lock);
  566. return r;
  567. }
  568. /*----------------------------------------------------------------
  569. * Remapping
  570. *--------------------------------------------------------------*/
  571. static void remap_to_origin(struct cache *cache, struct bio *bio)
  572. {
  573. bio_set_dev(bio, cache->origin_dev->bdev);
  574. }
  575. static void remap_to_cache(struct cache *cache, struct bio *bio,
  576. dm_cblock_t cblock)
  577. {
  578. sector_t bi_sector = bio->bi_iter.bi_sector;
  579. sector_t block = from_cblock(cblock);
  580. bio_set_dev(bio, cache->cache_dev->bdev);
  581. if (!block_size_is_power_of_two(cache))
  582. bio->bi_iter.bi_sector =
  583. (block * cache->sectors_per_block) +
  584. sector_div(bi_sector, cache->sectors_per_block);
  585. else
  586. bio->bi_iter.bi_sector =
  587. (block << cache->sectors_per_block_shift) |
  588. (bi_sector & (cache->sectors_per_block - 1));
  589. }
  590. static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
  591. {
  592. struct per_bio_data *pb;
  593. spin_lock_irq(&cache->lock);
  594. if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
  595. bio_op(bio) != REQ_OP_DISCARD) {
  596. pb = get_per_bio_data(bio);
  597. pb->tick = true;
  598. cache->need_tick_bio = false;
  599. }
  600. spin_unlock_irq(&cache->lock);
  601. }
  602. static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
  603. dm_oblock_t oblock)
  604. {
  605. // FIXME: check_if_tick_bio_needed() is called way too much through this interface
  606. check_if_tick_bio_needed(cache, bio);
  607. remap_to_origin(cache, bio);
  608. if (bio_data_dir(bio) == WRITE)
  609. clear_discard(cache, oblock_to_dblock(cache, oblock));
  610. }
  611. static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
  612. dm_oblock_t oblock, dm_cblock_t cblock)
  613. {
  614. check_if_tick_bio_needed(cache, bio);
  615. remap_to_cache(cache, bio, cblock);
  616. if (bio_data_dir(bio) == WRITE) {
  617. set_dirty(cache, cblock);
  618. clear_discard(cache, oblock_to_dblock(cache, oblock));
  619. }
  620. }
  621. static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
  622. {
  623. sector_t block_nr = bio->bi_iter.bi_sector;
  624. if (!block_size_is_power_of_two(cache))
  625. (void) sector_div(block_nr, cache->sectors_per_block);
  626. else
  627. block_nr >>= cache->sectors_per_block_shift;
  628. return to_oblock(block_nr);
  629. }
  630. static bool accountable_bio(struct cache *cache, struct bio *bio)
  631. {
  632. return bio_op(bio) != REQ_OP_DISCARD;
  633. }
  634. static void accounted_begin(struct cache *cache, struct bio *bio)
  635. {
  636. struct per_bio_data *pb;
  637. if (accountable_bio(cache, bio)) {
  638. pb = get_per_bio_data(bio);
  639. pb->len = bio_sectors(bio);
  640. dm_iot_io_begin(&cache->tracker, pb->len);
  641. }
  642. }
  643. static void accounted_complete(struct cache *cache, struct bio *bio)
  644. {
  645. struct per_bio_data *pb = get_per_bio_data(bio);
  646. dm_iot_io_end(&cache->tracker, pb->len);
  647. }
  648. static void accounted_request(struct cache *cache, struct bio *bio)
  649. {
  650. accounted_begin(cache, bio);
  651. dm_submit_bio_remap(bio, NULL);
  652. }
  653. static void issue_op(struct bio *bio, void *context)
  654. {
  655. struct cache *cache = context;
  656. accounted_request(cache, bio);
  657. }
  658. /*
  659. * When running in writethrough mode we need to send writes to clean blocks
  660. * to both the cache and origin devices. Clone the bio and send them in parallel.
  661. */
  662. static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
  663. dm_oblock_t oblock, dm_cblock_t cblock)
  664. {
  665. struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio,
  666. GFP_NOIO, &cache->bs);
  667. BUG_ON(!origin_bio);
  668. bio_chain(origin_bio, bio);
  669. if (bio_data_dir(origin_bio) == WRITE)
  670. clear_discard(cache, oblock_to_dblock(cache, oblock));
  671. submit_bio(origin_bio);
  672. remap_to_cache(cache, bio, cblock);
  673. }
  674. /*----------------------------------------------------------------
  675. * Failure modes
  676. *--------------------------------------------------------------*/
  677. static enum cache_metadata_mode get_cache_mode(struct cache *cache)
  678. {
  679. return cache->features.mode;
  680. }
  681. static const char *cache_device_name(struct cache *cache)
  682. {
  683. return dm_table_device_name(cache->ti->table);
  684. }
  685. static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
  686. {
  687. const char *descs[] = {
  688. "write",
  689. "read-only",
  690. "fail"
  691. };
  692. dm_table_event(cache->ti->table);
  693. DMINFO("%s: switching cache to %s mode",
  694. cache_device_name(cache), descs[(int)mode]);
  695. }
  696. static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
  697. {
  698. bool needs_check;
  699. enum cache_metadata_mode old_mode = get_cache_mode(cache);
  700. if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
  701. DMERR("%s: unable to read needs_check flag, setting failure mode.",
  702. cache_device_name(cache));
  703. new_mode = CM_FAIL;
  704. }
  705. if (new_mode == CM_WRITE && needs_check) {
  706. DMERR("%s: unable to switch cache to write mode until repaired.",
  707. cache_device_name(cache));
  708. if (old_mode != new_mode)
  709. new_mode = old_mode;
  710. else
  711. new_mode = CM_READ_ONLY;
  712. }
  713. /* Never move out of fail mode */
  714. if (old_mode == CM_FAIL)
  715. new_mode = CM_FAIL;
  716. switch (new_mode) {
  717. case CM_FAIL:
  718. case CM_READ_ONLY:
  719. dm_cache_metadata_set_read_only(cache->cmd);
  720. break;
  721. case CM_WRITE:
  722. dm_cache_metadata_set_read_write(cache->cmd);
  723. break;
  724. }
  725. cache->features.mode = new_mode;
  726. if (new_mode != old_mode)
  727. notify_mode_switch(cache, new_mode);
  728. }
  729. static void abort_transaction(struct cache *cache)
  730. {
  731. const char *dev_name = cache_device_name(cache);
  732. if (get_cache_mode(cache) >= CM_READ_ONLY)
  733. return;
  734. DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
  735. if (dm_cache_metadata_abort(cache->cmd)) {
  736. DMERR("%s: failed to abort metadata transaction", dev_name);
  737. set_cache_mode(cache, CM_FAIL);
  738. }
  739. if (dm_cache_metadata_set_needs_check(cache->cmd)) {
  740. DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
  741. set_cache_mode(cache, CM_FAIL);
  742. }
  743. }
  744. static void metadata_operation_failed(struct cache *cache, const char *op, int r)
  745. {
  746. DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
  747. cache_device_name(cache), op, r);
  748. abort_transaction(cache);
  749. set_cache_mode(cache, CM_READ_ONLY);
  750. }
  751. /*----------------------------------------------------------------*/
  752. static void load_stats(struct cache *cache)
  753. {
  754. struct dm_cache_statistics stats;
  755. dm_cache_metadata_get_stats(cache->cmd, &stats);
  756. atomic_set(&cache->stats.read_hit, stats.read_hits);
  757. atomic_set(&cache->stats.read_miss, stats.read_misses);
  758. atomic_set(&cache->stats.write_hit, stats.write_hits);
  759. atomic_set(&cache->stats.write_miss, stats.write_misses);
  760. }
  761. static void save_stats(struct cache *cache)
  762. {
  763. struct dm_cache_statistics stats;
  764. if (get_cache_mode(cache) >= CM_READ_ONLY)
  765. return;
  766. stats.read_hits = atomic_read(&cache->stats.read_hit);
  767. stats.read_misses = atomic_read(&cache->stats.read_miss);
  768. stats.write_hits = atomic_read(&cache->stats.write_hit);
  769. stats.write_misses = atomic_read(&cache->stats.write_miss);
  770. dm_cache_metadata_set_stats(cache->cmd, &stats);
  771. }
  772. static void update_stats(struct cache_stats *stats, enum policy_operation op)
  773. {
  774. switch (op) {
  775. case POLICY_PROMOTE:
  776. atomic_inc(&stats->promotion);
  777. break;
  778. case POLICY_DEMOTE:
  779. atomic_inc(&stats->demotion);
  780. break;
  781. case POLICY_WRITEBACK:
  782. atomic_inc(&stats->writeback);
  783. break;
  784. }
  785. }
  786. /*----------------------------------------------------------------
  787. * Migration processing
  788. *
  789. * Migration covers moving data from the origin device to the cache, or
  790. * vice versa.
  791. *--------------------------------------------------------------*/
  792. static void inc_io_migrations(struct cache *cache)
  793. {
  794. atomic_inc(&cache->nr_io_migrations);
  795. }
  796. static void dec_io_migrations(struct cache *cache)
  797. {
  798. atomic_dec(&cache->nr_io_migrations);
  799. }
  800. static bool discard_or_flush(struct bio *bio)
  801. {
  802. return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
  803. }
  804. static void calc_discard_block_range(struct cache *cache, struct bio *bio,
  805. dm_dblock_t *b, dm_dblock_t *e)
  806. {
  807. sector_t sb = bio->bi_iter.bi_sector;
  808. sector_t se = bio_end_sector(bio);
  809. *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
  810. if (se - sb < cache->discard_block_size)
  811. *e = *b;
  812. else
  813. *e = to_dblock(block_div(se, cache->discard_block_size));
  814. }
  815. /*----------------------------------------------------------------*/
  816. static void prevent_background_work(struct cache *cache)
  817. {
  818. lockdep_off();
  819. down_write(&cache->background_work_lock);
  820. lockdep_on();
  821. }
  822. static void allow_background_work(struct cache *cache)
  823. {
  824. lockdep_off();
  825. up_write(&cache->background_work_lock);
  826. lockdep_on();
  827. }
  828. static bool background_work_begin(struct cache *cache)
  829. {
  830. bool r;
  831. lockdep_off();
  832. r = down_read_trylock(&cache->background_work_lock);
  833. lockdep_on();
  834. return r;
  835. }
  836. static void background_work_end(struct cache *cache)
  837. {
  838. lockdep_off();
  839. up_read(&cache->background_work_lock);
  840. lockdep_on();
  841. }
  842. /*----------------------------------------------------------------*/
  843. static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
  844. {
  845. return (bio_data_dir(bio) == WRITE) &&
  846. (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
  847. }
  848. static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
  849. {
  850. return writeback_mode(cache) &&
  851. (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
  852. }
  853. static void quiesce(struct dm_cache_migration *mg,
  854. void (*continuation)(struct work_struct *))
  855. {
  856. init_continuation(&mg->k, continuation);
  857. dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
  858. }
  859. static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
  860. {
  861. struct continuation *k = container_of(ws, struct continuation, ws);
  862. return container_of(k, struct dm_cache_migration, k);
  863. }
  864. static void copy_complete(int read_err, unsigned long write_err, void *context)
  865. {
  866. struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
  867. if (read_err || write_err)
  868. mg->k.input = BLK_STS_IOERR;
  869. queue_continuation(mg->cache->wq, &mg->k);
  870. }
  871. static void copy(struct dm_cache_migration *mg, bool promote)
  872. {
  873. struct dm_io_region o_region, c_region;
  874. struct cache *cache = mg->cache;
  875. o_region.bdev = cache->origin_dev->bdev;
  876. o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
  877. o_region.count = cache->sectors_per_block;
  878. c_region.bdev = cache->cache_dev->bdev;
  879. c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
  880. c_region.count = cache->sectors_per_block;
  881. if (promote)
  882. dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
  883. else
  884. dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
  885. }
  886. static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
  887. {
  888. struct per_bio_data *pb = get_per_bio_data(bio);
  889. if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
  890. free_prison_cell(cache, pb->cell);
  891. pb->cell = NULL;
  892. }
  893. static void overwrite_endio(struct bio *bio)
  894. {
  895. struct dm_cache_migration *mg = bio->bi_private;
  896. struct cache *cache = mg->cache;
  897. struct per_bio_data *pb = get_per_bio_data(bio);
  898. dm_unhook_bio(&pb->hook_info, bio);
  899. if (bio->bi_status)
  900. mg->k.input = bio->bi_status;
  901. queue_continuation(cache->wq, &mg->k);
  902. }
  903. static void overwrite(struct dm_cache_migration *mg,
  904. void (*continuation)(struct work_struct *))
  905. {
  906. struct bio *bio = mg->overwrite_bio;
  907. struct per_bio_data *pb = get_per_bio_data(bio);
  908. dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
  909. /*
  910. * The overwrite bio is part of the copy operation, as such it does
  911. * not set/clear discard or dirty flags.
  912. */
  913. if (mg->op->op == POLICY_PROMOTE)
  914. remap_to_cache(mg->cache, bio, mg->op->cblock);
  915. else
  916. remap_to_origin(mg->cache, bio);
  917. init_continuation(&mg->k, continuation);
  918. accounted_request(mg->cache, bio);
  919. }
  920. /*
  921. * Migration steps:
  922. *
  923. * 1) exclusive lock preventing WRITEs
  924. * 2) quiesce
  925. * 3) copy or issue overwrite bio
  926. * 4) upgrade to exclusive lock preventing READs and WRITEs
  927. * 5) quiesce
  928. * 6) update metadata and commit
  929. * 7) unlock
  930. */
  931. static void mg_complete(struct dm_cache_migration *mg, bool success)
  932. {
  933. struct bio_list bios;
  934. struct cache *cache = mg->cache;
  935. struct policy_work *op = mg->op;
  936. dm_cblock_t cblock = op->cblock;
  937. if (success)
  938. update_stats(&cache->stats, op->op);
  939. switch (op->op) {
  940. case POLICY_PROMOTE:
  941. clear_discard(cache, oblock_to_dblock(cache, op->oblock));
  942. policy_complete_background_work(cache->policy, op, success);
  943. if (mg->overwrite_bio) {
  944. if (success)
  945. force_set_dirty(cache, cblock);
  946. else if (mg->k.input)
  947. mg->overwrite_bio->bi_status = mg->k.input;
  948. else
  949. mg->overwrite_bio->bi_status = BLK_STS_IOERR;
  950. bio_endio(mg->overwrite_bio);
  951. } else {
  952. if (success)
  953. force_clear_dirty(cache, cblock);
  954. dec_io_migrations(cache);
  955. }
  956. break;
  957. case POLICY_DEMOTE:
  958. /*
  959. * We clear dirty here to update the nr_dirty counter.
  960. */
  961. if (success)
  962. force_clear_dirty(cache, cblock);
  963. policy_complete_background_work(cache->policy, op, success);
  964. dec_io_migrations(cache);
  965. break;
  966. case POLICY_WRITEBACK:
  967. if (success)
  968. force_clear_dirty(cache, cblock);
  969. policy_complete_background_work(cache->policy, op, success);
  970. dec_io_migrations(cache);
  971. break;
  972. }
  973. bio_list_init(&bios);
  974. if (mg->cell) {
  975. if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
  976. free_prison_cell(cache, mg->cell);
  977. }
  978. free_migration(mg);
  979. defer_bios(cache, &bios);
  980. wake_migration_worker(cache);
  981. background_work_end(cache);
  982. }
  983. static void mg_success(struct work_struct *ws)
  984. {
  985. struct dm_cache_migration *mg = ws_to_mg(ws);
  986. mg_complete(mg, mg->k.input == 0);
  987. }
  988. static void mg_update_metadata(struct work_struct *ws)
  989. {
  990. int r;
  991. struct dm_cache_migration *mg = ws_to_mg(ws);
  992. struct cache *cache = mg->cache;
  993. struct policy_work *op = mg->op;
  994. switch (op->op) {
  995. case POLICY_PROMOTE:
  996. r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
  997. if (r) {
  998. DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
  999. cache_device_name(cache));
  1000. metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
  1001. mg_complete(mg, false);
  1002. return;
  1003. }
  1004. mg_complete(mg, true);
  1005. break;
  1006. case POLICY_DEMOTE:
  1007. r = dm_cache_remove_mapping(cache->cmd, op->cblock);
  1008. if (r) {
  1009. DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
  1010. cache_device_name(cache));
  1011. metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
  1012. mg_complete(mg, false);
  1013. return;
  1014. }
  1015. /*
  1016. * It would be nice if we only had to commit when a REQ_FLUSH
  1017. * comes through. But there's one scenario that we have to
  1018. * look out for:
  1019. *
  1020. * - vblock x in a cache block
  1021. * - domotion occurs
  1022. * - cache block gets reallocated and over written
  1023. * - crash
  1024. *
  1025. * When we recover, because there was no commit the cache will
  1026. * rollback to having the data for vblock x in the cache block.
  1027. * But the cache block has since been overwritten, so it'll end
  1028. * up pointing to data that was never in 'x' during the history
  1029. * of the device.
  1030. *
  1031. * To avoid this issue we require a commit as part of the
  1032. * demotion operation.
  1033. */
  1034. init_continuation(&mg->k, mg_success);
  1035. continue_after_commit(&cache->committer, &mg->k);
  1036. schedule_commit(&cache->committer);
  1037. break;
  1038. case POLICY_WRITEBACK:
  1039. mg_complete(mg, true);
  1040. break;
  1041. }
  1042. }
  1043. static void mg_update_metadata_after_copy(struct work_struct *ws)
  1044. {
  1045. struct dm_cache_migration *mg = ws_to_mg(ws);
  1046. /*
  1047. * Did the copy succeed?
  1048. */
  1049. if (mg->k.input)
  1050. mg_complete(mg, false);
  1051. else
  1052. mg_update_metadata(ws);
  1053. }
  1054. static void mg_upgrade_lock(struct work_struct *ws)
  1055. {
  1056. int r;
  1057. struct dm_cache_migration *mg = ws_to_mg(ws);
  1058. /*
  1059. * Did the copy succeed?
  1060. */
  1061. if (mg->k.input)
  1062. mg_complete(mg, false);
  1063. else {
  1064. /*
  1065. * Now we want the lock to prevent both reads and writes.
  1066. */
  1067. r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
  1068. READ_WRITE_LOCK_LEVEL);
  1069. if (r < 0)
  1070. mg_complete(mg, false);
  1071. else if (r)
  1072. quiesce(mg, mg_update_metadata);
  1073. else
  1074. mg_update_metadata(ws);
  1075. }
  1076. }
  1077. static void mg_full_copy(struct work_struct *ws)
  1078. {
  1079. struct dm_cache_migration *mg = ws_to_mg(ws);
  1080. struct cache *cache = mg->cache;
  1081. struct policy_work *op = mg->op;
  1082. bool is_policy_promote = (op->op == POLICY_PROMOTE);
  1083. if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
  1084. is_discarded_oblock(cache, op->oblock)) {
  1085. mg_upgrade_lock(ws);
  1086. return;
  1087. }
  1088. init_continuation(&mg->k, mg_upgrade_lock);
  1089. copy(mg, is_policy_promote);
  1090. }
  1091. static void mg_copy(struct work_struct *ws)
  1092. {
  1093. struct dm_cache_migration *mg = ws_to_mg(ws);
  1094. if (mg->overwrite_bio) {
  1095. /*
  1096. * No exclusive lock was held when we last checked if the bio
  1097. * was optimisable. So we have to check again in case things
  1098. * have changed (eg, the block may no longer be discarded).
  1099. */
  1100. if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
  1101. /*
  1102. * Fallback to a real full copy after doing some tidying up.
  1103. */
  1104. bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
  1105. BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */
  1106. mg->overwrite_bio = NULL;
  1107. inc_io_migrations(mg->cache);
  1108. mg_full_copy(ws);
  1109. return;
  1110. }
  1111. /*
  1112. * It's safe to do this here, even though it's new data
  1113. * because all IO has been locked out of the block.
  1114. *
  1115. * mg_lock_writes() already took READ_WRITE_LOCK_LEVEL
  1116. * so _not_ using mg_upgrade_lock() as continutation.
  1117. */
  1118. overwrite(mg, mg_update_metadata_after_copy);
  1119. } else
  1120. mg_full_copy(ws);
  1121. }
  1122. static int mg_lock_writes(struct dm_cache_migration *mg)
  1123. {
  1124. int r;
  1125. struct dm_cell_key_v2 key;
  1126. struct cache *cache = mg->cache;
  1127. struct dm_bio_prison_cell_v2 *prealloc;
  1128. prealloc = alloc_prison_cell(cache);
  1129. /*
  1130. * Prevent writes to the block, but allow reads to continue.
  1131. * Unless we're using an overwrite bio, in which case we lock
  1132. * everything.
  1133. */
  1134. build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
  1135. r = dm_cell_lock_v2(cache->prison, &key,
  1136. mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
  1137. prealloc, &mg->cell);
  1138. if (r < 0) {
  1139. free_prison_cell(cache, prealloc);
  1140. mg_complete(mg, false);
  1141. return r;
  1142. }
  1143. if (mg->cell != prealloc)
  1144. free_prison_cell(cache, prealloc);
  1145. if (r == 0)
  1146. mg_copy(&mg->k.ws);
  1147. else
  1148. quiesce(mg, mg_copy);
  1149. return 0;
  1150. }
  1151. static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
  1152. {
  1153. struct dm_cache_migration *mg;
  1154. if (!background_work_begin(cache)) {
  1155. policy_complete_background_work(cache->policy, op, false);
  1156. return -EPERM;
  1157. }
  1158. mg = alloc_migration(cache);
  1159. mg->op = op;
  1160. mg->overwrite_bio = bio;
  1161. if (!bio)
  1162. inc_io_migrations(cache);
  1163. return mg_lock_writes(mg);
  1164. }
  1165. /*----------------------------------------------------------------
  1166. * invalidation processing
  1167. *--------------------------------------------------------------*/
  1168. static void invalidate_complete(struct dm_cache_migration *mg, bool success)
  1169. {
  1170. struct bio_list bios;
  1171. struct cache *cache = mg->cache;
  1172. bio_list_init(&bios);
  1173. if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
  1174. free_prison_cell(cache, mg->cell);
  1175. if (!success && mg->overwrite_bio)
  1176. bio_io_error(mg->overwrite_bio);
  1177. free_migration(mg);
  1178. defer_bios(cache, &bios);
  1179. background_work_end(cache);
  1180. }
  1181. static void invalidate_completed(struct work_struct *ws)
  1182. {
  1183. struct dm_cache_migration *mg = ws_to_mg(ws);
  1184. invalidate_complete(mg, !mg->k.input);
  1185. }
  1186. static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
  1187. {
  1188. int r = policy_invalidate_mapping(cache->policy, cblock);
  1189. if (!r) {
  1190. r = dm_cache_remove_mapping(cache->cmd, cblock);
  1191. if (r) {
  1192. DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
  1193. cache_device_name(cache));
  1194. metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
  1195. }
  1196. } else if (r == -ENODATA) {
  1197. /*
  1198. * Harmless, already unmapped.
  1199. */
  1200. r = 0;
  1201. } else
  1202. DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
  1203. return r;
  1204. }
  1205. static void invalidate_remove(struct work_struct *ws)
  1206. {
  1207. int r;
  1208. struct dm_cache_migration *mg = ws_to_mg(ws);
  1209. struct cache *cache = mg->cache;
  1210. r = invalidate_cblock(cache, mg->invalidate_cblock);
  1211. if (r) {
  1212. invalidate_complete(mg, false);
  1213. return;
  1214. }
  1215. init_continuation(&mg->k, invalidate_completed);
  1216. continue_after_commit(&cache->committer, &mg->k);
  1217. remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
  1218. mg->overwrite_bio = NULL;
  1219. schedule_commit(&cache->committer);
  1220. }
  1221. static int invalidate_lock(struct dm_cache_migration *mg)
  1222. {
  1223. int r;
  1224. struct dm_cell_key_v2 key;
  1225. struct cache *cache = mg->cache;
  1226. struct dm_bio_prison_cell_v2 *prealloc;
  1227. prealloc = alloc_prison_cell(cache);
  1228. build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
  1229. r = dm_cell_lock_v2(cache->prison, &key,
  1230. READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
  1231. if (r < 0) {
  1232. free_prison_cell(cache, prealloc);
  1233. invalidate_complete(mg, false);
  1234. return r;
  1235. }
  1236. if (mg->cell != prealloc)
  1237. free_prison_cell(cache, prealloc);
  1238. if (r)
  1239. quiesce(mg, invalidate_remove);
  1240. else {
  1241. /*
  1242. * We can't call invalidate_remove() directly here because we
  1243. * might still be in request context.
  1244. */
  1245. init_continuation(&mg->k, invalidate_remove);
  1246. queue_work(cache->wq, &mg->k.ws);
  1247. }
  1248. return 0;
  1249. }
  1250. static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
  1251. dm_oblock_t oblock, struct bio *bio)
  1252. {
  1253. struct dm_cache_migration *mg;
  1254. if (!background_work_begin(cache))
  1255. return -EPERM;
  1256. mg = alloc_migration(cache);
  1257. mg->overwrite_bio = bio;
  1258. mg->invalidate_cblock = cblock;
  1259. mg->invalidate_oblock = oblock;
  1260. return invalidate_lock(mg);
  1261. }
  1262. /*----------------------------------------------------------------
  1263. * bio processing
  1264. *--------------------------------------------------------------*/
  1265. enum busy {
  1266. IDLE,
  1267. BUSY
  1268. };
  1269. static enum busy spare_migration_bandwidth(struct cache *cache)
  1270. {
  1271. bool idle = dm_iot_idle_for(&cache->tracker, HZ);
  1272. sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
  1273. cache->sectors_per_block;
  1274. if (idle && current_volume <= cache->migration_threshold)
  1275. return IDLE;
  1276. else
  1277. return BUSY;
  1278. }
  1279. static void inc_hit_counter(struct cache *cache, struct bio *bio)
  1280. {
  1281. atomic_inc(bio_data_dir(bio) == READ ?
  1282. &cache->stats.read_hit : &cache->stats.write_hit);
  1283. }
  1284. static void inc_miss_counter(struct cache *cache, struct bio *bio)
  1285. {
  1286. atomic_inc(bio_data_dir(bio) == READ ?
  1287. &cache->stats.read_miss : &cache->stats.write_miss);
  1288. }
  1289. /*----------------------------------------------------------------*/
  1290. static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
  1291. bool *commit_needed)
  1292. {
  1293. int r, data_dir;
  1294. bool rb, background_queued;
  1295. dm_cblock_t cblock;
  1296. *commit_needed = false;
  1297. rb = bio_detain_shared(cache, block, bio);
  1298. if (!rb) {
  1299. /*
  1300. * An exclusive lock is held for this block, so we have to
  1301. * wait. We set the commit_needed flag so the current
  1302. * transaction will be committed asap, allowing this lock
  1303. * to be dropped.
  1304. */
  1305. *commit_needed = true;
  1306. return DM_MAPIO_SUBMITTED;
  1307. }
  1308. data_dir = bio_data_dir(bio);
  1309. if (optimisable_bio(cache, bio, block)) {
  1310. struct policy_work *op = NULL;
  1311. r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
  1312. if (unlikely(r && r != -ENOENT)) {
  1313. DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
  1314. cache_device_name(cache), r);
  1315. bio_io_error(bio);
  1316. return DM_MAPIO_SUBMITTED;
  1317. }
  1318. if (r == -ENOENT && op) {
  1319. bio_drop_shared_lock(cache, bio);
  1320. BUG_ON(op->op != POLICY_PROMOTE);
  1321. mg_start(cache, op, bio);
  1322. return DM_MAPIO_SUBMITTED;
  1323. }
  1324. } else {
  1325. r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
  1326. if (unlikely(r && r != -ENOENT)) {
  1327. DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
  1328. cache_device_name(cache), r);
  1329. bio_io_error(bio);
  1330. return DM_MAPIO_SUBMITTED;
  1331. }
  1332. if (background_queued)
  1333. wake_migration_worker(cache);
  1334. }
  1335. if (r == -ENOENT) {
  1336. struct per_bio_data *pb = get_per_bio_data(bio);
  1337. /*
  1338. * Miss.
  1339. */
  1340. inc_miss_counter(cache, bio);
  1341. if (pb->req_nr == 0) {
  1342. accounted_begin(cache, bio);
  1343. remap_to_origin_clear_discard(cache, bio, block);
  1344. } else {
  1345. /*
  1346. * This is a duplicate writethrough io that is no
  1347. * longer needed because the block has been demoted.
  1348. */
  1349. bio_endio(bio);
  1350. return DM_MAPIO_SUBMITTED;
  1351. }
  1352. } else {
  1353. /*
  1354. * Hit.
  1355. */
  1356. inc_hit_counter(cache, bio);
  1357. /*
  1358. * Passthrough always maps to the origin, invalidating any
  1359. * cache blocks that are written to.
  1360. */
  1361. if (passthrough_mode(cache)) {
  1362. if (bio_data_dir(bio) == WRITE) {
  1363. bio_drop_shared_lock(cache, bio);
  1364. atomic_inc(&cache->stats.demotion);
  1365. invalidate_start(cache, cblock, block, bio);
  1366. } else
  1367. remap_to_origin_clear_discard(cache, bio, block);
  1368. } else {
  1369. if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
  1370. !is_dirty(cache, cblock)) {
  1371. remap_to_origin_and_cache(cache, bio, block, cblock);
  1372. accounted_begin(cache, bio);
  1373. } else
  1374. remap_to_cache_dirty(cache, bio, block, cblock);
  1375. }
  1376. }
  1377. /*
  1378. * dm core turns FUA requests into a separate payload and FLUSH req.
  1379. */
  1380. if (bio->bi_opf & REQ_FUA) {
  1381. /*
  1382. * issue_after_commit will call accounted_begin a second time. So
  1383. * we call accounted_complete() to avoid double accounting.
  1384. */
  1385. accounted_complete(cache, bio);
  1386. issue_after_commit(&cache->committer, bio);
  1387. *commit_needed = true;
  1388. return DM_MAPIO_SUBMITTED;
  1389. }
  1390. return DM_MAPIO_REMAPPED;
  1391. }
  1392. static bool process_bio(struct cache *cache, struct bio *bio)
  1393. {
  1394. bool commit_needed;
  1395. if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
  1396. dm_submit_bio_remap(bio, NULL);
  1397. return commit_needed;
  1398. }
  1399. /*
  1400. * A non-zero return indicates read_only or fail_io mode.
  1401. */
  1402. static int commit(struct cache *cache, bool clean_shutdown)
  1403. {
  1404. int r;
  1405. if (get_cache_mode(cache) >= CM_READ_ONLY)
  1406. return -EINVAL;
  1407. atomic_inc(&cache->stats.commit_count);
  1408. r = dm_cache_commit(cache->cmd, clean_shutdown);
  1409. if (r)
  1410. metadata_operation_failed(cache, "dm_cache_commit", r);
  1411. return r;
  1412. }
  1413. /*
  1414. * Used by the batcher.
  1415. */
  1416. static blk_status_t commit_op(void *context)
  1417. {
  1418. struct cache *cache = context;
  1419. if (dm_cache_changed_this_transaction(cache->cmd))
  1420. return errno_to_blk_status(commit(cache, false));
  1421. return 0;
  1422. }
  1423. /*----------------------------------------------------------------*/
  1424. static bool process_flush_bio(struct cache *cache, struct bio *bio)
  1425. {
  1426. struct per_bio_data *pb = get_per_bio_data(bio);
  1427. if (!pb->req_nr)
  1428. remap_to_origin(cache, bio);
  1429. else
  1430. remap_to_cache(cache, bio, 0);
  1431. issue_after_commit(&cache->committer, bio);
  1432. return true;
  1433. }
  1434. static bool process_discard_bio(struct cache *cache, struct bio *bio)
  1435. {
  1436. dm_dblock_t b, e;
  1437. // FIXME: do we need to lock the region? Or can we just assume the
  1438. // user wont be so foolish as to issue discard concurrently with
  1439. // other IO?
  1440. calc_discard_block_range(cache, bio, &b, &e);
  1441. while (b != e) {
  1442. set_discard(cache, b);
  1443. b = to_dblock(from_dblock(b) + 1);
  1444. }
  1445. if (cache->features.discard_passdown) {
  1446. remap_to_origin(cache, bio);
  1447. dm_submit_bio_remap(bio, NULL);
  1448. } else
  1449. bio_endio(bio);
  1450. return false;
  1451. }
  1452. static void process_deferred_bios(struct work_struct *ws)
  1453. {
  1454. struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
  1455. bool commit_needed = false;
  1456. struct bio_list bios;
  1457. struct bio *bio;
  1458. bio_list_init(&bios);
  1459. spin_lock_irq(&cache->lock);
  1460. bio_list_merge(&bios, &cache->deferred_bios);
  1461. bio_list_init(&cache->deferred_bios);
  1462. spin_unlock_irq(&cache->lock);
  1463. while ((bio = bio_list_pop(&bios))) {
  1464. if (bio->bi_opf & REQ_PREFLUSH)
  1465. commit_needed = process_flush_bio(cache, bio) || commit_needed;
  1466. else if (bio_op(bio) == REQ_OP_DISCARD)
  1467. commit_needed = process_discard_bio(cache, bio) || commit_needed;
  1468. else
  1469. commit_needed = process_bio(cache, bio) || commit_needed;
  1470. cond_resched();
  1471. }
  1472. if (commit_needed)
  1473. schedule_commit(&cache->committer);
  1474. }
  1475. /*----------------------------------------------------------------
  1476. * Main worker loop
  1477. *--------------------------------------------------------------*/
  1478. static void requeue_deferred_bios(struct cache *cache)
  1479. {
  1480. struct bio *bio;
  1481. struct bio_list bios;
  1482. bio_list_init(&bios);
  1483. bio_list_merge(&bios, &cache->deferred_bios);
  1484. bio_list_init(&cache->deferred_bios);
  1485. while ((bio = bio_list_pop(&bios))) {
  1486. bio->bi_status = BLK_STS_DM_REQUEUE;
  1487. bio_endio(bio);
  1488. cond_resched();
  1489. }
  1490. }
  1491. /*
  1492. * We want to commit periodically so that not too much
  1493. * unwritten metadata builds up.
  1494. */
  1495. static void do_waker(struct work_struct *ws)
  1496. {
  1497. struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
  1498. policy_tick(cache->policy, true);
  1499. wake_migration_worker(cache);
  1500. schedule_commit(&cache->committer);
  1501. queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
  1502. }
  1503. static void check_migrations(struct work_struct *ws)
  1504. {
  1505. int r;
  1506. struct policy_work *op;
  1507. struct cache *cache = container_of(ws, struct cache, migration_worker);
  1508. enum busy b;
  1509. for (;;) {
  1510. b = spare_migration_bandwidth(cache);
  1511. r = policy_get_background_work(cache->policy, b == IDLE, &op);
  1512. if (r == -ENODATA)
  1513. break;
  1514. if (r) {
  1515. DMERR_LIMIT("%s: policy_background_work failed",
  1516. cache_device_name(cache));
  1517. break;
  1518. }
  1519. r = mg_start(cache, op, NULL);
  1520. if (r)
  1521. break;
  1522. cond_resched();
  1523. }
  1524. }
  1525. /*----------------------------------------------------------------
  1526. * Target methods
  1527. *--------------------------------------------------------------*/
  1528. /*
  1529. * This function gets called on the error paths of the constructor, so we
  1530. * have to cope with a partially initialised struct.
  1531. */
  1532. static void destroy(struct cache *cache)
  1533. {
  1534. unsigned int i;
  1535. mempool_exit(&cache->migration_pool);
  1536. if (cache->prison)
  1537. dm_bio_prison_destroy_v2(cache->prison);
  1538. cancel_delayed_work_sync(&cache->waker);
  1539. if (cache->wq)
  1540. destroy_workqueue(cache->wq);
  1541. if (cache->dirty_bitset)
  1542. free_bitset(cache->dirty_bitset);
  1543. if (cache->discard_bitset)
  1544. free_bitset(cache->discard_bitset);
  1545. if (cache->copier)
  1546. dm_kcopyd_client_destroy(cache->copier);
  1547. if (cache->cmd)
  1548. dm_cache_metadata_close(cache->cmd);
  1549. if (cache->metadata_dev)
  1550. dm_put_device(cache->ti, cache->metadata_dev);
  1551. if (cache->origin_dev)
  1552. dm_put_device(cache->ti, cache->origin_dev);
  1553. if (cache->cache_dev)
  1554. dm_put_device(cache->ti, cache->cache_dev);
  1555. if (cache->policy)
  1556. dm_cache_policy_destroy(cache->policy);
  1557. for (i = 0; i < cache->nr_ctr_args ; i++)
  1558. kfree(cache->ctr_args[i]);
  1559. kfree(cache->ctr_args);
  1560. bioset_exit(&cache->bs);
  1561. kfree(cache);
  1562. }
  1563. static void cache_dtr(struct dm_target *ti)
  1564. {
  1565. struct cache *cache = ti->private;
  1566. destroy(cache);
  1567. }
  1568. static sector_t get_dev_size(struct dm_dev *dev)
  1569. {
  1570. return bdev_nr_sectors(dev->bdev);
  1571. }
  1572. /*----------------------------------------------------------------*/
  1573. /*
  1574. * Construct a cache device mapping.
  1575. *
  1576. * cache <metadata dev> <cache dev> <origin dev> <block size>
  1577. * <#feature args> [<feature arg>]*
  1578. * <policy> <#policy args> [<policy arg>]*
  1579. *
  1580. * metadata dev : fast device holding the persistent metadata
  1581. * cache dev : fast device holding cached data blocks
  1582. * origin dev : slow device holding original data blocks
  1583. * block size : cache unit size in sectors
  1584. *
  1585. * #feature args : number of feature arguments passed
  1586. * feature args : writethrough. (The default is writeback.)
  1587. *
  1588. * policy : the replacement policy to use
  1589. * #policy args : an even number of policy arguments corresponding
  1590. * to key/value pairs passed to the policy
  1591. * policy args : key/value pairs passed to the policy
  1592. * E.g. 'sequential_threshold 1024'
  1593. * See cache-policies.txt for details.
  1594. *
  1595. * Optional feature arguments are:
  1596. * writethrough : write through caching that prohibits cache block
  1597. * content from being different from origin block content.
  1598. * Without this argument, the default behaviour is to write
  1599. * back cache block contents later for performance reasons,
  1600. * so they may differ from the corresponding origin blocks.
  1601. */
  1602. struct cache_args {
  1603. struct dm_target *ti;
  1604. struct dm_dev *metadata_dev;
  1605. struct dm_dev *cache_dev;
  1606. sector_t cache_sectors;
  1607. struct dm_dev *origin_dev;
  1608. sector_t origin_sectors;
  1609. uint32_t block_size;
  1610. const char *policy_name;
  1611. int policy_argc;
  1612. const char **policy_argv;
  1613. struct cache_features features;
  1614. };
  1615. static void destroy_cache_args(struct cache_args *ca)
  1616. {
  1617. if (ca->metadata_dev)
  1618. dm_put_device(ca->ti, ca->metadata_dev);
  1619. if (ca->cache_dev)
  1620. dm_put_device(ca->ti, ca->cache_dev);
  1621. if (ca->origin_dev)
  1622. dm_put_device(ca->ti, ca->origin_dev);
  1623. kfree(ca);
  1624. }
  1625. static bool at_least_one_arg(struct dm_arg_set *as, char **error)
  1626. {
  1627. if (!as->argc) {
  1628. *error = "Insufficient args";
  1629. return false;
  1630. }
  1631. return true;
  1632. }
  1633. static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
  1634. char **error)
  1635. {
  1636. int r;
  1637. sector_t metadata_dev_size;
  1638. if (!at_least_one_arg(as, error))
  1639. return -EINVAL;
  1640. r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
  1641. &ca->metadata_dev);
  1642. if (r) {
  1643. *error = "Error opening metadata device";
  1644. return r;
  1645. }
  1646. metadata_dev_size = get_dev_size(ca->metadata_dev);
  1647. if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
  1648. DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
  1649. ca->metadata_dev->bdev, THIN_METADATA_MAX_SECTORS);
  1650. return 0;
  1651. }
  1652. static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
  1653. char **error)
  1654. {
  1655. int r;
  1656. if (!at_least_one_arg(as, error))
  1657. return -EINVAL;
  1658. r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
  1659. &ca->cache_dev);
  1660. if (r) {
  1661. *error = "Error opening cache device";
  1662. return r;
  1663. }
  1664. ca->cache_sectors = get_dev_size(ca->cache_dev);
  1665. return 0;
  1666. }
  1667. static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
  1668. char **error)
  1669. {
  1670. int r;
  1671. if (!at_least_one_arg(as, error))
  1672. return -EINVAL;
  1673. r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
  1674. &ca->origin_dev);
  1675. if (r) {
  1676. *error = "Error opening origin device";
  1677. return r;
  1678. }
  1679. ca->origin_sectors = get_dev_size(ca->origin_dev);
  1680. if (ca->ti->len > ca->origin_sectors) {
  1681. *error = "Device size larger than cached device";
  1682. return -EINVAL;
  1683. }
  1684. return 0;
  1685. }
  1686. static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
  1687. char **error)
  1688. {
  1689. unsigned long block_size;
  1690. if (!at_least_one_arg(as, error))
  1691. return -EINVAL;
  1692. if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
  1693. block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
  1694. block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
  1695. block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
  1696. *error = "Invalid data block size";
  1697. return -EINVAL;
  1698. }
  1699. if (block_size > ca->cache_sectors) {
  1700. *error = "Data block size is larger than the cache device";
  1701. return -EINVAL;
  1702. }
  1703. ca->block_size = block_size;
  1704. return 0;
  1705. }
  1706. static void init_features(struct cache_features *cf)
  1707. {
  1708. cf->mode = CM_WRITE;
  1709. cf->io_mode = CM_IO_WRITEBACK;
  1710. cf->metadata_version = 1;
  1711. cf->discard_passdown = true;
  1712. }
  1713. static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
  1714. char **error)
  1715. {
  1716. static const struct dm_arg _args[] = {
  1717. {0, 3, "Invalid number of cache feature arguments"},
  1718. };
  1719. int r, mode_ctr = 0;
  1720. unsigned int argc;
  1721. const char *arg;
  1722. struct cache_features *cf = &ca->features;
  1723. init_features(cf);
  1724. r = dm_read_arg_group(_args, as, &argc, error);
  1725. if (r)
  1726. return -EINVAL;
  1727. while (argc--) {
  1728. arg = dm_shift_arg(as);
  1729. if (!strcasecmp(arg, "writeback")) {
  1730. cf->io_mode = CM_IO_WRITEBACK;
  1731. mode_ctr++;
  1732. }
  1733. else if (!strcasecmp(arg, "writethrough")) {
  1734. cf->io_mode = CM_IO_WRITETHROUGH;
  1735. mode_ctr++;
  1736. }
  1737. else if (!strcasecmp(arg, "passthrough")) {
  1738. cf->io_mode = CM_IO_PASSTHROUGH;
  1739. mode_ctr++;
  1740. }
  1741. else if (!strcasecmp(arg, "metadata2"))
  1742. cf->metadata_version = 2;
  1743. else if (!strcasecmp(arg, "no_discard_passdown"))
  1744. cf->discard_passdown = false;
  1745. else {
  1746. *error = "Unrecognised cache feature requested";
  1747. return -EINVAL;
  1748. }
  1749. }
  1750. if (mode_ctr > 1) {
  1751. *error = "Duplicate cache io_mode features requested";
  1752. return -EINVAL;
  1753. }
  1754. return 0;
  1755. }
  1756. static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
  1757. char **error)
  1758. {
  1759. static const struct dm_arg _args[] = {
  1760. {0, 1024, "Invalid number of policy arguments"},
  1761. };
  1762. int r;
  1763. if (!at_least_one_arg(as, error))
  1764. return -EINVAL;
  1765. ca->policy_name = dm_shift_arg(as);
  1766. r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
  1767. if (r)
  1768. return -EINVAL;
  1769. ca->policy_argv = (const char **)as->argv;
  1770. dm_consume_args(as, ca->policy_argc);
  1771. return 0;
  1772. }
  1773. static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
  1774. char **error)
  1775. {
  1776. int r;
  1777. struct dm_arg_set as;
  1778. as.argc = argc;
  1779. as.argv = argv;
  1780. r = parse_metadata_dev(ca, &as, error);
  1781. if (r)
  1782. return r;
  1783. r = parse_cache_dev(ca, &as, error);
  1784. if (r)
  1785. return r;
  1786. r = parse_origin_dev(ca, &as, error);
  1787. if (r)
  1788. return r;
  1789. r = parse_block_size(ca, &as, error);
  1790. if (r)
  1791. return r;
  1792. r = parse_features(ca, &as, error);
  1793. if (r)
  1794. return r;
  1795. r = parse_policy(ca, &as, error);
  1796. if (r)
  1797. return r;
  1798. return 0;
  1799. }
  1800. /*----------------------------------------------------------------*/
  1801. static struct kmem_cache *migration_cache;
  1802. #define NOT_CORE_OPTION 1
  1803. static int process_config_option(struct cache *cache, const char *key, const char *value)
  1804. {
  1805. unsigned long tmp;
  1806. if (!strcasecmp(key, "migration_threshold")) {
  1807. if (kstrtoul(value, 10, &tmp))
  1808. return -EINVAL;
  1809. cache->migration_threshold = tmp;
  1810. return 0;
  1811. }
  1812. return NOT_CORE_OPTION;
  1813. }
  1814. static int set_config_value(struct cache *cache, const char *key, const char *value)
  1815. {
  1816. int r = process_config_option(cache, key, value);
  1817. if (r == NOT_CORE_OPTION)
  1818. r = policy_set_config_value(cache->policy, key, value);
  1819. if (r)
  1820. DMWARN("bad config value for %s: %s", key, value);
  1821. return r;
  1822. }
  1823. static int set_config_values(struct cache *cache, int argc, const char **argv)
  1824. {
  1825. int r = 0;
  1826. if (argc & 1) {
  1827. DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
  1828. return -EINVAL;
  1829. }
  1830. while (argc) {
  1831. r = set_config_value(cache, argv[0], argv[1]);
  1832. if (r)
  1833. break;
  1834. argc -= 2;
  1835. argv += 2;
  1836. }
  1837. return r;
  1838. }
  1839. static int create_cache_policy(struct cache *cache, struct cache_args *ca,
  1840. char **error)
  1841. {
  1842. struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
  1843. cache->cache_size,
  1844. cache->origin_sectors,
  1845. cache->sectors_per_block);
  1846. if (IS_ERR(p)) {
  1847. *error = "Error creating cache's policy";
  1848. return PTR_ERR(p);
  1849. }
  1850. cache->policy = p;
  1851. BUG_ON(!cache->policy);
  1852. return 0;
  1853. }
  1854. /*
  1855. * We want the discard block size to be at least the size of the cache
  1856. * block size and have no more than 2^14 discard blocks across the origin.
  1857. */
  1858. #define MAX_DISCARD_BLOCKS (1 << 14)
  1859. static bool too_many_discard_blocks(sector_t discard_block_size,
  1860. sector_t origin_size)
  1861. {
  1862. (void) sector_div(origin_size, discard_block_size);
  1863. return origin_size > MAX_DISCARD_BLOCKS;
  1864. }
  1865. static sector_t calculate_discard_block_size(sector_t cache_block_size,
  1866. sector_t origin_size)
  1867. {
  1868. sector_t discard_block_size = cache_block_size;
  1869. if (origin_size)
  1870. while (too_many_discard_blocks(discard_block_size, origin_size))
  1871. discard_block_size *= 2;
  1872. return discard_block_size;
  1873. }
  1874. static void set_cache_size(struct cache *cache, dm_cblock_t size)
  1875. {
  1876. dm_block_t nr_blocks = from_cblock(size);
  1877. if (nr_blocks > (1 << 20) && cache->cache_size != size)
  1878. DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
  1879. "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
  1880. "Please consider increasing the cache block size to reduce the overall cache block count.",
  1881. (unsigned long long) nr_blocks);
  1882. cache->cache_size = size;
  1883. }
  1884. #define DEFAULT_MIGRATION_THRESHOLD 2048
  1885. static int cache_create(struct cache_args *ca, struct cache **result)
  1886. {
  1887. int r = 0;
  1888. char **error = &ca->ti->error;
  1889. struct cache *cache;
  1890. struct dm_target *ti = ca->ti;
  1891. dm_block_t origin_blocks;
  1892. struct dm_cache_metadata *cmd;
  1893. bool may_format = ca->features.mode == CM_WRITE;
  1894. cache = kzalloc(sizeof(*cache), GFP_KERNEL);
  1895. if (!cache)
  1896. return -ENOMEM;
  1897. cache->ti = ca->ti;
  1898. ti->private = cache;
  1899. ti->accounts_remapped_io = true;
  1900. ti->num_flush_bios = 2;
  1901. ti->flush_supported = true;
  1902. ti->num_discard_bios = 1;
  1903. ti->discards_supported = true;
  1904. ti->per_io_data_size = sizeof(struct per_bio_data);
  1905. cache->features = ca->features;
  1906. if (writethrough_mode(cache)) {
  1907. /* Create bioset for writethrough bios issued to origin */
  1908. r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
  1909. if (r)
  1910. goto bad;
  1911. }
  1912. cache->metadata_dev = ca->metadata_dev;
  1913. cache->origin_dev = ca->origin_dev;
  1914. cache->cache_dev = ca->cache_dev;
  1915. ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
  1916. origin_blocks = cache->origin_sectors = ca->origin_sectors;
  1917. origin_blocks = block_div(origin_blocks, ca->block_size);
  1918. cache->origin_blocks = to_oblock(origin_blocks);
  1919. cache->sectors_per_block = ca->block_size;
  1920. if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
  1921. r = -EINVAL;
  1922. goto bad;
  1923. }
  1924. if (ca->block_size & (ca->block_size - 1)) {
  1925. dm_block_t cache_size = ca->cache_sectors;
  1926. cache->sectors_per_block_shift = -1;
  1927. cache_size = block_div(cache_size, ca->block_size);
  1928. set_cache_size(cache, to_cblock(cache_size));
  1929. } else {
  1930. cache->sectors_per_block_shift = __ffs(ca->block_size);
  1931. set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
  1932. }
  1933. r = create_cache_policy(cache, ca, error);
  1934. if (r)
  1935. goto bad;
  1936. cache->policy_nr_args = ca->policy_argc;
  1937. cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
  1938. r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
  1939. if (r) {
  1940. *error = "Error setting cache policy's config values";
  1941. goto bad;
  1942. }
  1943. cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
  1944. ca->block_size, may_format,
  1945. dm_cache_policy_get_hint_size(cache->policy),
  1946. ca->features.metadata_version);
  1947. if (IS_ERR(cmd)) {
  1948. *error = "Error creating metadata object";
  1949. r = PTR_ERR(cmd);
  1950. goto bad;
  1951. }
  1952. cache->cmd = cmd;
  1953. set_cache_mode(cache, CM_WRITE);
  1954. if (get_cache_mode(cache) != CM_WRITE) {
  1955. *error = "Unable to get write access to metadata, please check/repair metadata.";
  1956. r = -EINVAL;
  1957. goto bad;
  1958. }
  1959. if (passthrough_mode(cache)) {
  1960. bool all_clean;
  1961. r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
  1962. if (r) {
  1963. *error = "dm_cache_metadata_all_clean() failed";
  1964. goto bad;
  1965. }
  1966. if (!all_clean) {
  1967. *error = "Cannot enter passthrough mode unless all blocks are clean";
  1968. r = -EINVAL;
  1969. goto bad;
  1970. }
  1971. policy_allow_migrations(cache->policy, false);
  1972. }
  1973. spin_lock_init(&cache->lock);
  1974. bio_list_init(&cache->deferred_bios);
  1975. atomic_set(&cache->nr_allocated_migrations, 0);
  1976. atomic_set(&cache->nr_io_migrations, 0);
  1977. init_waitqueue_head(&cache->migration_wait);
  1978. r = -ENOMEM;
  1979. atomic_set(&cache->nr_dirty, 0);
  1980. cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
  1981. if (!cache->dirty_bitset) {
  1982. *error = "could not allocate dirty bitset";
  1983. goto bad;
  1984. }
  1985. clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
  1986. cache->discard_block_size =
  1987. calculate_discard_block_size(cache->sectors_per_block,
  1988. cache->origin_sectors);
  1989. cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
  1990. cache->discard_block_size));
  1991. cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
  1992. if (!cache->discard_bitset) {
  1993. *error = "could not allocate discard bitset";
  1994. goto bad;
  1995. }
  1996. clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
  1997. cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
  1998. if (IS_ERR(cache->copier)) {
  1999. *error = "could not create kcopyd client";
  2000. r = PTR_ERR(cache->copier);
  2001. goto bad;
  2002. }
  2003. cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
  2004. if (!cache->wq) {
  2005. *error = "could not create workqueue for metadata object";
  2006. goto bad;
  2007. }
  2008. INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
  2009. INIT_WORK(&cache->migration_worker, check_migrations);
  2010. INIT_DELAYED_WORK(&cache->waker, do_waker);
  2011. cache->prison = dm_bio_prison_create_v2(cache->wq);
  2012. if (!cache->prison) {
  2013. *error = "could not create bio prison";
  2014. goto bad;
  2015. }
  2016. r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
  2017. migration_cache);
  2018. if (r) {
  2019. *error = "Error creating cache's migration mempool";
  2020. goto bad;
  2021. }
  2022. cache->need_tick_bio = true;
  2023. cache->sized = false;
  2024. cache->invalidate = false;
  2025. cache->commit_requested = false;
  2026. cache->loaded_mappings = false;
  2027. cache->loaded_discards = false;
  2028. load_stats(cache);
  2029. atomic_set(&cache->stats.demotion, 0);
  2030. atomic_set(&cache->stats.promotion, 0);
  2031. atomic_set(&cache->stats.copies_avoided, 0);
  2032. atomic_set(&cache->stats.cache_cell_clash, 0);
  2033. atomic_set(&cache->stats.commit_count, 0);
  2034. atomic_set(&cache->stats.discard_count, 0);
  2035. spin_lock_init(&cache->invalidation_lock);
  2036. INIT_LIST_HEAD(&cache->invalidation_requests);
  2037. batcher_init(&cache->committer, commit_op, cache,
  2038. issue_op, cache, cache->wq);
  2039. dm_iot_init(&cache->tracker);
  2040. init_rwsem(&cache->background_work_lock);
  2041. prevent_background_work(cache);
  2042. *result = cache;
  2043. return 0;
  2044. bad:
  2045. destroy(cache);
  2046. return r;
  2047. }
  2048. static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
  2049. {
  2050. unsigned int i;
  2051. const char **copy;
  2052. copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
  2053. if (!copy)
  2054. return -ENOMEM;
  2055. for (i = 0; i < argc; i++) {
  2056. copy[i] = kstrdup(argv[i], GFP_KERNEL);
  2057. if (!copy[i]) {
  2058. while (i--)
  2059. kfree(copy[i]);
  2060. kfree(copy);
  2061. return -ENOMEM;
  2062. }
  2063. }
  2064. cache->nr_ctr_args = argc;
  2065. cache->ctr_args = copy;
  2066. return 0;
  2067. }
  2068. static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
  2069. {
  2070. int r = -EINVAL;
  2071. struct cache_args *ca;
  2072. struct cache *cache = NULL;
  2073. ca = kzalloc(sizeof(*ca), GFP_KERNEL);
  2074. if (!ca) {
  2075. ti->error = "Error allocating memory for cache";
  2076. return -ENOMEM;
  2077. }
  2078. ca->ti = ti;
  2079. r = parse_cache_args(ca, argc, argv, &ti->error);
  2080. if (r)
  2081. goto out;
  2082. r = cache_create(ca, &cache);
  2083. if (r)
  2084. goto out;
  2085. r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
  2086. if (r) {
  2087. destroy(cache);
  2088. goto out;
  2089. }
  2090. ti->private = cache;
  2091. out:
  2092. destroy_cache_args(ca);
  2093. return r;
  2094. }
  2095. /*----------------------------------------------------------------*/
  2096. static int cache_map(struct dm_target *ti, struct bio *bio)
  2097. {
  2098. struct cache *cache = ti->private;
  2099. int r;
  2100. bool commit_needed;
  2101. dm_oblock_t block = get_bio_block(cache, bio);
  2102. init_per_bio_data(bio);
  2103. if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
  2104. /*
  2105. * This can only occur if the io goes to a partial block at
  2106. * the end of the origin device. We don't cache these.
  2107. * Just remap to the origin and carry on.
  2108. */
  2109. remap_to_origin(cache, bio);
  2110. accounted_begin(cache, bio);
  2111. return DM_MAPIO_REMAPPED;
  2112. }
  2113. if (discard_or_flush(bio)) {
  2114. defer_bio(cache, bio);
  2115. return DM_MAPIO_SUBMITTED;
  2116. }
  2117. r = map_bio(cache, bio, block, &commit_needed);
  2118. if (commit_needed)
  2119. schedule_commit(&cache->committer);
  2120. return r;
  2121. }
  2122. static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
  2123. {
  2124. struct cache *cache = ti->private;
  2125. unsigned long flags;
  2126. struct per_bio_data *pb = get_per_bio_data(bio);
  2127. if (pb->tick) {
  2128. policy_tick(cache->policy, false);
  2129. spin_lock_irqsave(&cache->lock, flags);
  2130. cache->need_tick_bio = true;
  2131. spin_unlock_irqrestore(&cache->lock, flags);
  2132. }
  2133. bio_drop_shared_lock(cache, bio);
  2134. accounted_complete(cache, bio);
  2135. return DM_ENDIO_DONE;
  2136. }
  2137. static int write_dirty_bitset(struct cache *cache)
  2138. {
  2139. int r;
  2140. if (get_cache_mode(cache) >= CM_READ_ONLY)
  2141. return -EINVAL;
  2142. r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
  2143. if (r)
  2144. metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
  2145. return r;
  2146. }
  2147. static int write_discard_bitset(struct cache *cache)
  2148. {
  2149. unsigned int i, r;
  2150. if (get_cache_mode(cache) >= CM_READ_ONLY)
  2151. return -EINVAL;
  2152. r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
  2153. cache->discard_nr_blocks);
  2154. if (r) {
  2155. DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
  2156. metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
  2157. return r;
  2158. }
  2159. for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
  2160. r = dm_cache_set_discard(cache->cmd, to_dblock(i),
  2161. is_discarded(cache, to_dblock(i)));
  2162. if (r) {
  2163. metadata_operation_failed(cache, "dm_cache_set_discard", r);
  2164. return r;
  2165. }
  2166. }
  2167. return 0;
  2168. }
  2169. static int write_hints(struct cache *cache)
  2170. {
  2171. int r;
  2172. if (get_cache_mode(cache) >= CM_READ_ONLY)
  2173. return -EINVAL;
  2174. r = dm_cache_write_hints(cache->cmd, cache->policy);
  2175. if (r) {
  2176. metadata_operation_failed(cache, "dm_cache_write_hints", r);
  2177. return r;
  2178. }
  2179. return 0;
  2180. }
  2181. /*
  2182. * returns true on success
  2183. */
  2184. static bool sync_metadata(struct cache *cache)
  2185. {
  2186. int r1, r2, r3, r4;
  2187. r1 = write_dirty_bitset(cache);
  2188. if (r1)
  2189. DMERR("%s: could not write dirty bitset", cache_device_name(cache));
  2190. r2 = write_discard_bitset(cache);
  2191. if (r2)
  2192. DMERR("%s: could not write discard bitset", cache_device_name(cache));
  2193. save_stats(cache);
  2194. r3 = write_hints(cache);
  2195. if (r3)
  2196. DMERR("%s: could not write hints", cache_device_name(cache));
  2197. /*
  2198. * If writing the above metadata failed, we still commit, but don't
  2199. * set the clean shutdown flag. This will effectively force every
  2200. * dirty bit to be set on reload.
  2201. */
  2202. r4 = commit(cache, !r1 && !r2 && !r3);
  2203. if (r4)
  2204. DMERR("%s: could not write cache metadata", cache_device_name(cache));
  2205. return !r1 && !r2 && !r3 && !r4;
  2206. }
  2207. static void cache_postsuspend(struct dm_target *ti)
  2208. {
  2209. struct cache *cache = ti->private;
  2210. prevent_background_work(cache);
  2211. BUG_ON(atomic_read(&cache->nr_io_migrations));
  2212. cancel_delayed_work_sync(&cache->waker);
  2213. drain_workqueue(cache->wq);
  2214. WARN_ON(cache->tracker.in_flight);
  2215. /*
  2216. * If it's a flush suspend there won't be any deferred bios, so this
  2217. * call is harmless.
  2218. */
  2219. requeue_deferred_bios(cache);
  2220. if (get_cache_mode(cache) == CM_WRITE)
  2221. (void) sync_metadata(cache);
  2222. }
  2223. static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
  2224. bool dirty, uint32_t hint, bool hint_valid)
  2225. {
  2226. struct cache *cache = context;
  2227. if (dirty) {
  2228. set_bit(from_cblock(cblock), cache->dirty_bitset);
  2229. atomic_inc(&cache->nr_dirty);
  2230. } else
  2231. clear_bit(from_cblock(cblock), cache->dirty_bitset);
  2232. return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
  2233. }
  2234. /*
  2235. * The discard block size in the on disk metadata is not
  2236. * necessarily the same as we're currently using. So we have to
  2237. * be careful to only set the discarded attribute if we know it
  2238. * covers a complete block of the new size.
  2239. */
  2240. struct discard_load_info {
  2241. struct cache *cache;
  2242. /*
  2243. * These blocks are sized using the on disk dblock size, rather
  2244. * than the current one.
  2245. */
  2246. dm_block_t block_size;
  2247. dm_block_t discard_begin, discard_end;
  2248. };
  2249. static void discard_load_info_init(struct cache *cache,
  2250. struct discard_load_info *li)
  2251. {
  2252. li->cache = cache;
  2253. li->discard_begin = li->discard_end = 0;
  2254. }
  2255. static void set_discard_range(struct discard_load_info *li)
  2256. {
  2257. sector_t b, e;
  2258. if (li->discard_begin == li->discard_end)
  2259. return;
  2260. /*
  2261. * Convert to sectors.
  2262. */
  2263. b = li->discard_begin * li->block_size;
  2264. e = li->discard_end * li->block_size;
  2265. /*
  2266. * Then convert back to the current dblock size.
  2267. */
  2268. b = dm_sector_div_up(b, li->cache->discard_block_size);
  2269. sector_div(e, li->cache->discard_block_size);
  2270. /*
  2271. * The origin may have shrunk, so we need to check we're still in
  2272. * bounds.
  2273. */
  2274. if (e > from_dblock(li->cache->discard_nr_blocks))
  2275. e = from_dblock(li->cache->discard_nr_blocks);
  2276. for (; b < e; b++)
  2277. set_discard(li->cache, to_dblock(b));
  2278. }
  2279. static int load_discard(void *context, sector_t discard_block_size,
  2280. dm_dblock_t dblock, bool discard)
  2281. {
  2282. struct discard_load_info *li = context;
  2283. li->block_size = discard_block_size;
  2284. if (discard) {
  2285. if (from_dblock(dblock) == li->discard_end)
  2286. /*
  2287. * We're already in a discard range, just extend it.
  2288. */
  2289. li->discard_end = li->discard_end + 1ULL;
  2290. else {
  2291. /*
  2292. * Emit the old range and start a new one.
  2293. */
  2294. set_discard_range(li);
  2295. li->discard_begin = from_dblock(dblock);
  2296. li->discard_end = li->discard_begin + 1ULL;
  2297. }
  2298. } else {
  2299. set_discard_range(li);
  2300. li->discard_begin = li->discard_end = 0;
  2301. }
  2302. return 0;
  2303. }
  2304. static dm_cblock_t get_cache_dev_size(struct cache *cache)
  2305. {
  2306. sector_t size = get_dev_size(cache->cache_dev);
  2307. (void) sector_div(size, cache->sectors_per_block);
  2308. return to_cblock(size);
  2309. }
  2310. static bool can_resize(struct cache *cache, dm_cblock_t new_size)
  2311. {
  2312. if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
  2313. if (cache->sized) {
  2314. DMERR("%s: unable to extend cache due to missing cache table reload",
  2315. cache_device_name(cache));
  2316. return false;
  2317. }
  2318. }
  2319. /*
  2320. * We can't drop a dirty block when shrinking the cache.
  2321. */
  2322. while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
  2323. new_size = to_cblock(from_cblock(new_size) + 1);
  2324. if (is_dirty(cache, new_size)) {
  2325. DMERR("%s: unable to shrink cache; cache block %llu is dirty",
  2326. cache_device_name(cache),
  2327. (unsigned long long) from_cblock(new_size));
  2328. return false;
  2329. }
  2330. }
  2331. return true;
  2332. }
  2333. static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
  2334. {
  2335. int r;
  2336. r = dm_cache_resize(cache->cmd, new_size);
  2337. if (r) {
  2338. DMERR("%s: could not resize cache metadata", cache_device_name(cache));
  2339. metadata_operation_failed(cache, "dm_cache_resize", r);
  2340. return r;
  2341. }
  2342. set_cache_size(cache, new_size);
  2343. return 0;
  2344. }
  2345. static int cache_preresume(struct dm_target *ti)
  2346. {
  2347. int r = 0;
  2348. struct cache *cache = ti->private;
  2349. dm_cblock_t csize = get_cache_dev_size(cache);
  2350. /*
  2351. * Check to see if the cache has resized.
  2352. */
  2353. if (!cache->sized) {
  2354. r = resize_cache_dev(cache, csize);
  2355. if (r)
  2356. return r;
  2357. cache->sized = true;
  2358. } else if (csize != cache->cache_size) {
  2359. if (!can_resize(cache, csize))
  2360. return -EINVAL;
  2361. r = resize_cache_dev(cache, csize);
  2362. if (r)
  2363. return r;
  2364. }
  2365. if (!cache->loaded_mappings) {
  2366. r = dm_cache_load_mappings(cache->cmd, cache->policy,
  2367. load_mapping, cache);
  2368. if (r) {
  2369. DMERR("%s: could not load cache mappings", cache_device_name(cache));
  2370. metadata_operation_failed(cache, "dm_cache_load_mappings", r);
  2371. return r;
  2372. }
  2373. cache->loaded_mappings = true;
  2374. }
  2375. if (!cache->loaded_discards) {
  2376. struct discard_load_info li;
  2377. /*
  2378. * The discard bitset could have been resized, or the
  2379. * discard block size changed. To be safe we start by
  2380. * setting every dblock to not discarded.
  2381. */
  2382. clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
  2383. discard_load_info_init(cache, &li);
  2384. r = dm_cache_load_discards(cache->cmd, load_discard, &li);
  2385. if (r) {
  2386. DMERR("%s: could not load origin discards", cache_device_name(cache));
  2387. metadata_operation_failed(cache, "dm_cache_load_discards", r);
  2388. return r;
  2389. }
  2390. set_discard_range(&li);
  2391. cache->loaded_discards = true;
  2392. }
  2393. return r;
  2394. }
  2395. static void cache_resume(struct dm_target *ti)
  2396. {
  2397. struct cache *cache = ti->private;
  2398. cache->need_tick_bio = true;
  2399. allow_background_work(cache);
  2400. do_waker(&cache->waker.work);
  2401. }
  2402. static void emit_flags(struct cache *cache, char *result,
  2403. unsigned int maxlen, ssize_t *sz_ptr)
  2404. {
  2405. ssize_t sz = *sz_ptr;
  2406. struct cache_features *cf = &cache->features;
  2407. unsigned int count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
  2408. DMEMIT("%u ", count);
  2409. if (cf->metadata_version == 2)
  2410. DMEMIT("metadata2 ");
  2411. if (writethrough_mode(cache))
  2412. DMEMIT("writethrough ");
  2413. else if (passthrough_mode(cache))
  2414. DMEMIT("passthrough ");
  2415. else if (writeback_mode(cache))
  2416. DMEMIT("writeback ");
  2417. else {
  2418. DMEMIT("unknown ");
  2419. DMERR("%s: internal error: unknown io mode: %d",
  2420. cache_device_name(cache), (int) cf->io_mode);
  2421. }
  2422. if (!cf->discard_passdown)
  2423. DMEMIT("no_discard_passdown ");
  2424. *sz_ptr = sz;
  2425. }
  2426. /*
  2427. * Status format:
  2428. *
  2429. * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
  2430. * <cache block size> <#used cache blocks>/<#total cache blocks>
  2431. * <#read hits> <#read misses> <#write hits> <#write misses>
  2432. * <#demotions> <#promotions> <#dirty>
  2433. * <#features> <features>*
  2434. * <#core args> <core args>
  2435. * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check>
  2436. */
  2437. static void cache_status(struct dm_target *ti, status_type_t type,
  2438. unsigned int status_flags, char *result, unsigned int maxlen)
  2439. {
  2440. int r = 0;
  2441. unsigned int i;
  2442. ssize_t sz = 0;
  2443. dm_block_t nr_free_blocks_metadata = 0;
  2444. dm_block_t nr_blocks_metadata = 0;
  2445. char buf[BDEVNAME_SIZE];
  2446. struct cache *cache = ti->private;
  2447. dm_cblock_t residency;
  2448. bool needs_check;
  2449. switch (type) {
  2450. case STATUSTYPE_INFO:
  2451. if (get_cache_mode(cache) == CM_FAIL) {
  2452. DMEMIT("Fail");
  2453. break;
  2454. }
  2455. /* Commit to ensure statistics aren't out-of-date */
  2456. if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
  2457. (void) commit(cache, false);
  2458. r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
  2459. if (r) {
  2460. DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
  2461. cache_device_name(cache), r);
  2462. goto err;
  2463. }
  2464. r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
  2465. if (r) {
  2466. DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
  2467. cache_device_name(cache), r);
  2468. goto err;
  2469. }
  2470. residency = policy_residency(cache->policy);
  2471. DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
  2472. (unsigned int)DM_CACHE_METADATA_BLOCK_SIZE,
  2473. (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
  2474. (unsigned long long)nr_blocks_metadata,
  2475. (unsigned long long)cache->sectors_per_block,
  2476. (unsigned long long) from_cblock(residency),
  2477. (unsigned long long) from_cblock(cache->cache_size),
  2478. (unsigned int) atomic_read(&cache->stats.read_hit),
  2479. (unsigned int) atomic_read(&cache->stats.read_miss),
  2480. (unsigned int) atomic_read(&cache->stats.write_hit),
  2481. (unsigned int) atomic_read(&cache->stats.write_miss),
  2482. (unsigned int) atomic_read(&cache->stats.demotion),
  2483. (unsigned int) atomic_read(&cache->stats.promotion),
  2484. (unsigned long) atomic_read(&cache->nr_dirty));
  2485. emit_flags(cache, result, maxlen, &sz);
  2486. DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
  2487. DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
  2488. if (sz < maxlen) {
  2489. r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
  2490. if (r)
  2491. DMERR("%s: policy_emit_config_values returned %d",
  2492. cache_device_name(cache), r);
  2493. }
  2494. if (get_cache_mode(cache) == CM_READ_ONLY)
  2495. DMEMIT("ro ");
  2496. else
  2497. DMEMIT("rw ");
  2498. r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
  2499. if (r || needs_check)
  2500. DMEMIT("needs_check ");
  2501. else
  2502. DMEMIT("- ");
  2503. break;
  2504. case STATUSTYPE_TABLE:
  2505. format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
  2506. DMEMIT("%s ", buf);
  2507. format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
  2508. DMEMIT("%s ", buf);
  2509. format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
  2510. DMEMIT("%s", buf);
  2511. for (i = 0; i < cache->nr_ctr_args - 1; i++)
  2512. DMEMIT(" %s", cache->ctr_args[i]);
  2513. if (cache->nr_ctr_args)
  2514. DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
  2515. break;
  2516. case STATUSTYPE_IMA:
  2517. DMEMIT_TARGET_NAME_VERSION(ti->type);
  2518. if (get_cache_mode(cache) == CM_FAIL)
  2519. DMEMIT(",metadata_mode=fail");
  2520. else if (get_cache_mode(cache) == CM_READ_ONLY)
  2521. DMEMIT(",metadata_mode=ro");
  2522. else
  2523. DMEMIT(",metadata_mode=rw");
  2524. format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
  2525. DMEMIT(",cache_metadata_device=%s", buf);
  2526. format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
  2527. DMEMIT(",cache_device=%s", buf);
  2528. format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
  2529. DMEMIT(",cache_origin_device=%s", buf);
  2530. DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n');
  2531. DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n');
  2532. DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n');
  2533. DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n');
  2534. DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y');
  2535. DMEMIT(";");
  2536. break;
  2537. }
  2538. return;
  2539. err:
  2540. DMEMIT("Error");
  2541. }
  2542. /*
  2543. * Defines a range of cblocks, begin to (end - 1) are in the range. end is
  2544. * the one-past-the-end value.
  2545. */
  2546. struct cblock_range {
  2547. dm_cblock_t begin;
  2548. dm_cblock_t end;
  2549. };
  2550. /*
  2551. * A cache block range can take two forms:
  2552. *
  2553. * i) A single cblock, eg. '3456'
  2554. * ii) A begin and end cblock with a dash between, eg. 123-234
  2555. */
  2556. static int parse_cblock_range(struct cache *cache, const char *str,
  2557. struct cblock_range *result)
  2558. {
  2559. char dummy;
  2560. uint64_t b, e;
  2561. int r;
  2562. /*
  2563. * Try and parse form (ii) first.
  2564. */
  2565. r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
  2566. if (r < 0)
  2567. return r;
  2568. if (r == 2) {
  2569. result->begin = to_cblock(b);
  2570. result->end = to_cblock(e);
  2571. return 0;
  2572. }
  2573. /*
  2574. * That didn't work, try form (i).
  2575. */
  2576. r = sscanf(str, "%llu%c", &b, &dummy);
  2577. if (r < 0)
  2578. return r;
  2579. if (r == 1) {
  2580. result->begin = to_cblock(b);
  2581. result->end = to_cblock(from_cblock(result->begin) + 1u);
  2582. return 0;
  2583. }
  2584. DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
  2585. return -EINVAL;
  2586. }
  2587. static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
  2588. {
  2589. uint64_t b = from_cblock(range->begin);
  2590. uint64_t e = from_cblock(range->end);
  2591. uint64_t n = from_cblock(cache->cache_size);
  2592. if (b >= n) {
  2593. DMERR("%s: begin cblock out of range: %llu >= %llu",
  2594. cache_device_name(cache), b, n);
  2595. return -EINVAL;
  2596. }
  2597. if (e > n) {
  2598. DMERR("%s: end cblock out of range: %llu > %llu",
  2599. cache_device_name(cache), e, n);
  2600. return -EINVAL;
  2601. }
  2602. if (b >= e) {
  2603. DMERR("%s: invalid cblock range: %llu >= %llu",
  2604. cache_device_name(cache), b, e);
  2605. return -EINVAL;
  2606. }
  2607. return 0;
  2608. }
  2609. static inline dm_cblock_t cblock_succ(dm_cblock_t b)
  2610. {
  2611. return to_cblock(from_cblock(b) + 1);
  2612. }
  2613. static int request_invalidation(struct cache *cache, struct cblock_range *range)
  2614. {
  2615. int r = 0;
  2616. /*
  2617. * We don't need to do any locking here because we know we're in
  2618. * passthrough mode. There's is potential for a race between an
  2619. * invalidation triggered by an io and an invalidation message. This
  2620. * is harmless, we must not worry if the policy call fails.
  2621. */
  2622. while (range->begin != range->end) {
  2623. r = invalidate_cblock(cache, range->begin);
  2624. if (r)
  2625. return r;
  2626. range->begin = cblock_succ(range->begin);
  2627. }
  2628. cache->commit_requested = true;
  2629. return r;
  2630. }
  2631. static int process_invalidate_cblocks_message(struct cache *cache, unsigned int count,
  2632. const char **cblock_ranges)
  2633. {
  2634. int r = 0;
  2635. unsigned int i;
  2636. struct cblock_range range;
  2637. if (!passthrough_mode(cache)) {
  2638. DMERR("%s: cache has to be in passthrough mode for invalidation",
  2639. cache_device_name(cache));
  2640. return -EPERM;
  2641. }
  2642. for (i = 0; i < count; i++) {
  2643. r = parse_cblock_range(cache, cblock_ranges[i], &range);
  2644. if (r)
  2645. break;
  2646. r = validate_cblock_range(cache, &range);
  2647. if (r)
  2648. break;
  2649. /*
  2650. * Pass begin and end origin blocks to the worker and wake it.
  2651. */
  2652. r = request_invalidation(cache, &range);
  2653. if (r)
  2654. break;
  2655. }
  2656. return r;
  2657. }
  2658. /*
  2659. * Supports
  2660. * "<key> <value>"
  2661. * and
  2662. * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
  2663. *
  2664. * The key migration_threshold is supported by the cache target core.
  2665. */
  2666. static int cache_message(struct dm_target *ti, unsigned int argc, char **argv,
  2667. char *result, unsigned int maxlen)
  2668. {
  2669. struct cache *cache = ti->private;
  2670. if (!argc)
  2671. return -EINVAL;
  2672. if (get_cache_mode(cache) >= CM_READ_ONLY) {
  2673. DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
  2674. cache_device_name(cache));
  2675. return -EOPNOTSUPP;
  2676. }
  2677. if (!strcasecmp(argv[0], "invalidate_cblocks"))
  2678. return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
  2679. if (argc != 2)
  2680. return -EINVAL;
  2681. return set_config_value(cache, argv[0], argv[1]);
  2682. }
  2683. static int cache_iterate_devices(struct dm_target *ti,
  2684. iterate_devices_callout_fn fn, void *data)
  2685. {
  2686. int r = 0;
  2687. struct cache *cache = ti->private;
  2688. r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
  2689. if (!r)
  2690. r = fn(ti, cache->origin_dev, 0, ti->len, data);
  2691. return r;
  2692. }
  2693. /*
  2694. * If discard_passdown was enabled verify that the origin device
  2695. * supports discards. Disable discard_passdown if not.
  2696. */
  2697. static void disable_passdown_if_not_supported(struct cache *cache)
  2698. {
  2699. struct block_device *origin_bdev = cache->origin_dev->bdev;
  2700. struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
  2701. const char *reason = NULL;
  2702. if (!cache->features.discard_passdown)
  2703. return;
  2704. if (!bdev_max_discard_sectors(origin_bdev))
  2705. reason = "discard unsupported";
  2706. else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
  2707. reason = "max discard sectors smaller than a block";
  2708. if (reason) {
  2709. DMWARN("Origin device (%pg) %s: Disabling discard passdown.",
  2710. origin_bdev, reason);
  2711. cache->features.discard_passdown = false;
  2712. }
  2713. }
  2714. static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
  2715. {
  2716. struct block_device *origin_bdev = cache->origin_dev->bdev;
  2717. struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
  2718. if (!cache->features.discard_passdown) {
  2719. /* No passdown is done so setting own virtual limits */
  2720. limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
  2721. cache->origin_sectors);
  2722. limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
  2723. return;
  2724. }
  2725. /*
  2726. * cache_iterate_devices() is stacking both origin and fast device limits
  2727. * but discards aren't passed to fast device, so inherit origin's limits.
  2728. */
  2729. limits->max_discard_sectors = origin_limits->max_discard_sectors;
  2730. limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
  2731. limits->discard_granularity = origin_limits->discard_granularity;
  2732. limits->discard_alignment = origin_limits->discard_alignment;
  2733. limits->discard_misaligned = origin_limits->discard_misaligned;
  2734. }
  2735. static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
  2736. {
  2737. struct cache *cache = ti->private;
  2738. uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
  2739. /*
  2740. * If the system-determined stacked limits are compatible with the
  2741. * cache's blocksize (io_opt is a factor) do not override them.
  2742. */
  2743. if (io_opt_sectors < cache->sectors_per_block ||
  2744. do_div(io_opt_sectors, cache->sectors_per_block)) {
  2745. blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
  2746. blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
  2747. }
  2748. disable_passdown_if_not_supported(cache);
  2749. set_discard_limits(cache, limits);
  2750. }
  2751. /*----------------------------------------------------------------*/
  2752. static struct target_type cache_target = {
  2753. .name = "cache",
  2754. .version = {2, 2, 0},
  2755. .module = THIS_MODULE,
  2756. .ctr = cache_ctr,
  2757. .dtr = cache_dtr,
  2758. .map = cache_map,
  2759. .end_io = cache_end_io,
  2760. .postsuspend = cache_postsuspend,
  2761. .preresume = cache_preresume,
  2762. .resume = cache_resume,
  2763. .status = cache_status,
  2764. .message = cache_message,
  2765. .iterate_devices = cache_iterate_devices,
  2766. .io_hints = cache_io_hints,
  2767. };
  2768. static int __init dm_cache_init(void)
  2769. {
  2770. int r;
  2771. migration_cache = KMEM_CACHE(dm_cache_migration, 0);
  2772. if (!migration_cache)
  2773. return -ENOMEM;
  2774. r = dm_register_target(&cache_target);
  2775. if (r) {
  2776. DMERR("cache target registration failed: %d", r);
  2777. kmem_cache_destroy(migration_cache);
  2778. return r;
  2779. }
  2780. return 0;
  2781. }
  2782. static void __exit dm_cache_exit(void)
  2783. {
  2784. dm_unregister_target(&cache_target);
  2785. kmem_cache_destroy(migration_cache);
  2786. }
  2787. module_init(dm_cache_init);
  2788. module_exit(dm_cache_exit);
  2789. MODULE_DESCRIPTION(DM_NAME " cache target");
  2790. MODULE_AUTHOR("Joe Thornber <[email protected]>");
  2791. MODULE_LICENSE("GPL");