segment.c 137 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/f2fs/segment.c
  4. *
  5. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6. * http://www.samsung.com/
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/f2fs_fs.h>
  10. #include <linux/bio.h>
  11. #include <linux/blkdev.h>
  12. #include <linux/sched/mm.h>
  13. #include <linux/prefetch.h>
  14. #include <linux/kthread.h>
  15. #include <linux/swap.h>
  16. #include <linux/timer.h>
  17. #include <linux/freezer.h>
  18. #include <linux/sched/signal.h>
  19. #include <linux/random.h>
  20. #include "f2fs.h"
  21. #include "segment.h"
  22. #include "node.h"
  23. #include "gc.h"
  24. #include "iostat.h"
  25. #include <trace/events/f2fs.h>
  26. #define __reverse_ffz(x) __reverse_ffs(~(x))
  27. static struct kmem_cache *discard_entry_slab;
  28. static struct kmem_cache *discard_cmd_slab;
  29. static struct kmem_cache *sit_entry_set_slab;
  30. static struct kmem_cache *revoke_entry_slab;
  31. static unsigned long __reverse_ulong(unsigned char *str)
  32. {
  33. unsigned long tmp = 0;
  34. int shift = 24, idx = 0;
  35. #if BITS_PER_LONG == 64
  36. shift = 56;
  37. #endif
  38. while (shift >= 0) {
  39. tmp |= (unsigned long)str[idx++] << shift;
  40. shift -= BITS_PER_BYTE;
  41. }
  42. return tmp;
  43. }
  44. /*
  45. * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
  46. * MSB and LSB are reversed in a byte by f2fs_set_bit.
  47. */
  48. static inline unsigned long __reverse_ffs(unsigned long word)
  49. {
  50. int num = 0;
  51. #if BITS_PER_LONG == 64
  52. if ((word & 0xffffffff00000000UL) == 0)
  53. num += 32;
  54. else
  55. word >>= 32;
  56. #endif
  57. if ((word & 0xffff0000) == 0)
  58. num += 16;
  59. else
  60. word >>= 16;
  61. if ((word & 0xff00) == 0)
  62. num += 8;
  63. else
  64. word >>= 8;
  65. if ((word & 0xf0) == 0)
  66. num += 4;
  67. else
  68. word >>= 4;
  69. if ((word & 0xc) == 0)
  70. num += 2;
  71. else
  72. word >>= 2;
  73. if ((word & 0x2) == 0)
  74. num += 1;
  75. return num;
  76. }
  77. /*
  78. * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
  79. * f2fs_set_bit makes MSB and LSB reversed in a byte.
  80. * @size must be integral times of unsigned long.
  81. * Example:
  82. * MSB <--> LSB
  83. * f2fs_set_bit(0, bitmap) => 1000 0000
  84. * f2fs_set_bit(7, bitmap) => 0000 0001
  85. */
  86. static unsigned long __find_rev_next_bit(const unsigned long *addr,
  87. unsigned long size, unsigned long offset)
  88. {
  89. const unsigned long *p = addr + BIT_WORD(offset);
  90. unsigned long result = size;
  91. unsigned long tmp;
  92. if (offset >= size)
  93. return size;
  94. size -= (offset & ~(BITS_PER_LONG - 1));
  95. offset %= BITS_PER_LONG;
  96. while (1) {
  97. if (*p == 0)
  98. goto pass;
  99. tmp = __reverse_ulong((unsigned char *)p);
  100. tmp &= ~0UL >> offset;
  101. if (size < BITS_PER_LONG)
  102. tmp &= (~0UL << (BITS_PER_LONG - size));
  103. if (tmp)
  104. goto found;
  105. pass:
  106. if (size <= BITS_PER_LONG)
  107. break;
  108. size -= BITS_PER_LONG;
  109. offset = 0;
  110. p++;
  111. }
  112. return result;
  113. found:
  114. return result - size + __reverse_ffs(tmp);
  115. }
  116. static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
  117. unsigned long size, unsigned long offset)
  118. {
  119. const unsigned long *p = addr + BIT_WORD(offset);
  120. unsigned long result = size;
  121. unsigned long tmp;
  122. if (offset >= size)
  123. return size;
  124. size -= (offset & ~(BITS_PER_LONG - 1));
  125. offset %= BITS_PER_LONG;
  126. while (1) {
  127. if (*p == ~0UL)
  128. goto pass;
  129. tmp = __reverse_ulong((unsigned char *)p);
  130. if (offset)
  131. tmp |= ~0UL << (BITS_PER_LONG - offset);
  132. if (size < BITS_PER_LONG)
  133. tmp |= ~0UL >> size;
  134. if (tmp != ~0UL)
  135. goto found;
  136. pass:
  137. if (size <= BITS_PER_LONG)
  138. break;
  139. size -= BITS_PER_LONG;
  140. offset = 0;
  141. p++;
  142. }
  143. return result;
  144. found:
  145. return result - size + __reverse_ffz(tmp);
  146. }
  147. bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
  148. {
  149. int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
  150. int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
  151. int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
  152. if (f2fs_lfs_mode(sbi))
  153. return false;
  154. if (sbi->gc_mode == GC_URGENT_HIGH)
  155. return true;
  156. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
  157. return true;
  158. return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
  159. SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
  160. }
  161. void f2fs_abort_atomic_write(struct inode *inode, bool clean)
  162. {
  163. struct f2fs_inode_info *fi = F2FS_I(inode);
  164. if (!f2fs_is_atomic_file(inode))
  165. return;
  166. release_atomic_write_cnt(inode);
  167. clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
  168. clear_inode_flag(inode, FI_ATOMIC_REPLACE);
  169. clear_inode_flag(inode, FI_ATOMIC_FILE);
  170. stat_dec_atomic_inode(inode);
  171. F2FS_I(inode)->atomic_write_task = NULL;
  172. if (clean) {
  173. truncate_inode_pages_final(inode->i_mapping);
  174. f2fs_i_size_write(inode, fi->original_i_size);
  175. fi->original_i_size = 0;
  176. }
  177. /* avoid stale dirty inode during eviction */
  178. sync_inode_metadata(inode, 0);
  179. }
  180. static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
  181. block_t new_addr, block_t *old_addr, bool recover)
  182. {
  183. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  184. struct dnode_of_data dn;
  185. struct node_info ni;
  186. int err;
  187. retry:
  188. set_new_dnode(&dn, inode, NULL, NULL, 0);
  189. err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
  190. if (err) {
  191. if (err == -ENOMEM) {
  192. f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
  193. goto retry;
  194. }
  195. return err;
  196. }
  197. err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
  198. if (err) {
  199. f2fs_put_dnode(&dn);
  200. return err;
  201. }
  202. if (recover) {
  203. /* dn.data_blkaddr is always valid */
  204. if (!__is_valid_data_blkaddr(new_addr)) {
  205. if (new_addr == NULL_ADDR)
  206. dec_valid_block_count(sbi, inode, 1);
  207. f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
  208. f2fs_update_data_blkaddr(&dn, new_addr);
  209. } else {
  210. f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
  211. new_addr, ni.version, true, true);
  212. }
  213. } else {
  214. blkcnt_t count = 1;
  215. err = inc_valid_block_count(sbi, inode, &count, true);
  216. if (err) {
  217. f2fs_put_dnode(&dn);
  218. return err;
  219. }
  220. *old_addr = dn.data_blkaddr;
  221. f2fs_truncate_data_blocks_range(&dn, 1);
  222. dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
  223. f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
  224. ni.version, true, false);
  225. }
  226. f2fs_put_dnode(&dn);
  227. trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
  228. index, old_addr ? *old_addr : 0, new_addr, recover);
  229. return 0;
  230. }
  231. static void __complete_revoke_list(struct inode *inode, struct list_head *head,
  232. bool revoke)
  233. {
  234. struct revoke_entry *cur, *tmp;
  235. pgoff_t start_index = 0;
  236. bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
  237. list_for_each_entry_safe(cur, tmp, head, list) {
  238. if (revoke) {
  239. __replace_atomic_write_block(inode, cur->index,
  240. cur->old_addr, NULL, true);
  241. } else if (truncate) {
  242. f2fs_truncate_hole(inode, start_index, cur->index);
  243. start_index = cur->index + 1;
  244. }
  245. list_del(&cur->list);
  246. kmem_cache_free(revoke_entry_slab, cur);
  247. }
  248. if (!revoke && truncate)
  249. f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
  250. }
  251. static int __f2fs_commit_atomic_write(struct inode *inode)
  252. {
  253. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  254. struct f2fs_inode_info *fi = F2FS_I(inode);
  255. struct inode *cow_inode = fi->cow_inode;
  256. struct revoke_entry *new;
  257. struct list_head revoke_list;
  258. block_t blkaddr;
  259. struct dnode_of_data dn;
  260. pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
  261. pgoff_t off = 0, blen, index;
  262. int ret = 0, i;
  263. INIT_LIST_HEAD(&revoke_list);
  264. while (len) {
  265. blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
  266. set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
  267. ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
  268. if (ret && ret != -ENOENT) {
  269. goto out;
  270. } else if (ret == -ENOENT) {
  271. ret = 0;
  272. if (dn.max_level == 0)
  273. goto out;
  274. goto next;
  275. }
  276. blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
  277. len);
  278. index = off;
  279. for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
  280. blkaddr = f2fs_data_blkaddr(&dn);
  281. if (!__is_valid_data_blkaddr(blkaddr)) {
  282. continue;
  283. } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
  284. DATA_GENERIC_ENHANCE)) {
  285. f2fs_put_dnode(&dn);
  286. ret = -EFSCORRUPTED;
  287. f2fs_handle_error(sbi,
  288. ERROR_INVALID_BLKADDR);
  289. goto out;
  290. }
  291. new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
  292. true, NULL);
  293. ret = __replace_atomic_write_block(inode, index, blkaddr,
  294. &new->old_addr, false);
  295. if (ret) {
  296. f2fs_put_dnode(&dn);
  297. kmem_cache_free(revoke_entry_slab, new);
  298. goto out;
  299. }
  300. f2fs_update_data_blkaddr(&dn, NULL_ADDR);
  301. new->index = index;
  302. list_add_tail(&new->list, &revoke_list);
  303. }
  304. f2fs_put_dnode(&dn);
  305. next:
  306. off += blen;
  307. len -= blen;
  308. }
  309. out:
  310. if (ret) {
  311. sbi->revoked_atomic_block += fi->atomic_write_cnt;
  312. } else {
  313. sbi->committed_atomic_block += fi->atomic_write_cnt;
  314. set_inode_flag(inode, FI_ATOMIC_COMMITTED);
  315. }
  316. __complete_revoke_list(inode, &revoke_list, ret ? true : false);
  317. return ret;
  318. }
  319. int f2fs_commit_atomic_write(struct inode *inode)
  320. {
  321. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  322. struct f2fs_inode_info *fi = F2FS_I(inode);
  323. int err;
  324. err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
  325. if (err)
  326. return err;
  327. f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
  328. f2fs_lock_op(sbi);
  329. err = __f2fs_commit_atomic_write(inode);
  330. f2fs_unlock_op(sbi);
  331. f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
  332. return err;
  333. }
  334. /*
  335. * This function balances dirty node and dentry pages.
  336. * In addition, it controls garbage collection.
  337. */
  338. void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
  339. {
  340. if (time_to_inject(sbi, FAULT_CHECKPOINT))
  341. f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
  342. /* balance_fs_bg is able to be pending */
  343. if (need && excess_cached_nats(sbi))
  344. f2fs_balance_fs_bg(sbi, false);
  345. if (!f2fs_is_checkpoint_ready(sbi))
  346. return;
  347. /*
  348. * We should do GC or end up with checkpoint, if there are so many dirty
  349. * dir/node pages without enough free segments.
  350. */
  351. if (has_enough_free_secs(sbi, 0, 0))
  352. return;
  353. if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
  354. sbi->gc_thread->f2fs_gc_task) {
  355. DEFINE_WAIT(wait);
  356. prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
  357. TASK_UNINTERRUPTIBLE);
  358. wake_up(&sbi->gc_thread->gc_wait_queue_head);
  359. io_schedule();
  360. finish_wait(&sbi->gc_thread->fggc_wq, &wait);
  361. } else {
  362. struct f2fs_gc_control gc_control = {
  363. .victim_segno = NULL_SEGNO,
  364. .init_gc_type = BG_GC,
  365. .no_bg_gc = true,
  366. .should_migrate_blocks = false,
  367. .err_gc_skipped = false,
  368. .nr_free_secs = 1 };
  369. f2fs_down_write(&sbi->gc_lock);
  370. f2fs_gc(sbi, &gc_control);
  371. }
  372. }
  373. static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
  374. {
  375. int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
  376. unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
  377. unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
  378. unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
  379. unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
  380. unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
  381. unsigned int threshold = sbi->blocks_per_seg * factor *
  382. DEFAULT_DIRTY_THRESHOLD;
  383. unsigned int global_threshold = threshold * 3 / 2;
  384. if (dents >= threshold || qdata >= threshold ||
  385. nodes >= threshold || meta >= threshold ||
  386. imeta >= threshold)
  387. return true;
  388. return dents + qdata + nodes + meta + imeta > global_threshold;
  389. }
  390. void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
  391. {
  392. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  393. return;
  394. /* try to shrink extent cache when there is no enough memory */
  395. if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
  396. f2fs_shrink_read_extent_tree(sbi,
  397. READ_EXTENT_CACHE_SHRINK_NUMBER);
  398. /* try to shrink age extent cache when there is no enough memory */
  399. if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
  400. f2fs_shrink_age_extent_tree(sbi,
  401. AGE_EXTENT_CACHE_SHRINK_NUMBER);
  402. /* check the # of cached NAT entries */
  403. if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
  404. f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
  405. if (!f2fs_available_free_memory(sbi, FREE_NIDS))
  406. f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
  407. else
  408. f2fs_build_free_nids(sbi, false, false);
  409. if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
  410. excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
  411. goto do_sync;
  412. /* there is background inflight IO or foreground operation recently */
  413. if (is_inflight_io(sbi, REQ_TIME) ||
  414. (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
  415. return;
  416. /* exceed periodical checkpoint timeout threshold */
  417. if (f2fs_time_over(sbi, CP_TIME))
  418. goto do_sync;
  419. /* checkpoint is the only way to shrink partial cached entries */
  420. if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
  421. f2fs_available_free_memory(sbi, INO_ENTRIES))
  422. return;
  423. do_sync:
  424. if (test_opt(sbi, DATA_FLUSH) && from_bg) {
  425. struct blk_plug plug;
  426. mutex_lock(&sbi->flush_lock);
  427. blk_start_plug(&plug);
  428. f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
  429. blk_finish_plug(&plug);
  430. mutex_unlock(&sbi->flush_lock);
  431. }
  432. f2fs_sync_fs(sbi->sb, 1);
  433. stat_inc_bg_cp_count(sbi->stat_info);
  434. }
  435. static int __submit_flush_wait(struct f2fs_sb_info *sbi,
  436. struct block_device *bdev)
  437. {
  438. int ret = blkdev_issue_flush(bdev);
  439. trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
  440. test_opt(sbi, FLUSH_MERGE), ret);
  441. if (!ret)
  442. f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
  443. return ret;
  444. }
  445. static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
  446. {
  447. int ret = 0;
  448. int i;
  449. if (!f2fs_is_multi_device(sbi))
  450. return __submit_flush_wait(sbi, sbi->sb->s_bdev);
  451. for (i = 0; i < sbi->s_ndevs; i++) {
  452. if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
  453. continue;
  454. ret = __submit_flush_wait(sbi, FDEV(i).bdev);
  455. if (ret)
  456. break;
  457. }
  458. return ret;
  459. }
  460. static int issue_flush_thread(void *data)
  461. {
  462. struct f2fs_sb_info *sbi = data;
  463. struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
  464. wait_queue_head_t *q = &fcc->flush_wait_queue;
  465. repeat:
  466. if (kthread_should_stop())
  467. return 0;
  468. if (!llist_empty(&fcc->issue_list)) {
  469. struct flush_cmd *cmd, *next;
  470. int ret;
  471. fcc->dispatch_list = llist_del_all(&fcc->issue_list);
  472. fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
  473. cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
  474. ret = submit_flush_wait(sbi, cmd->ino);
  475. atomic_inc(&fcc->issued_flush);
  476. llist_for_each_entry_safe(cmd, next,
  477. fcc->dispatch_list, llnode) {
  478. cmd->ret = ret;
  479. complete(&cmd->wait);
  480. }
  481. fcc->dispatch_list = NULL;
  482. }
  483. wait_event_interruptible(*q,
  484. kthread_should_stop() || !llist_empty(&fcc->issue_list));
  485. goto repeat;
  486. }
  487. int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
  488. {
  489. struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
  490. struct flush_cmd cmd;
  491. int ret;
  492. if (test_opt(sbi, NOBARRIER))
  493. return 0;
  494. if (!test_opt(sbi, FLUSH_MERGE)) {
  495. atomic_inc(&fcc->queued_flush);
  496. ret = submit_flush_wait(sbi, ino);
  497. atomic_dec(&fcc->queued_flush);
  498. atomic_inc(&fcc->issued_flush);
  499. return ret;
  500. }
  501. if (atomic_inc_return(&fcc->queued_flush) == 1 ||
  502. f2fs_is_multi_device(sbi)) {
  503. ret = submit_flush_wait(sbi, ino);
  504. atomic_dec(&fcc->queued_flush);
  505. atomic_inc(&fcc->issued_flush);
  506. return ret;
  507. }
  508. cmd.ino = ino;
  509. init_completion(&cmd.wait);
  510. llist_add(&cmd.llnode, &fcc->issue_list);
  511. /*
  512. * update issue_list before we wake up issue_flush thread, this
  513. * smp_mb() pairs with another barrier in ___wait_event(), see
  514. * more details in comments of waitqueue_active().
  515. */
  516. smp_mb();
  517. if (waitqueue_active(&fcc->flush_wait_queue))
  518. wake_up(&fcc->flush_wait_queue);
  519. if (fcc->f2fs_issue_flush) {
  520. wait_for_completion(&cmd.wait);
  521. atomic_dec(&fcc->queued_flush);
  522. } else {
  523. struct llist_node *list;
  524. list = llist_del_all(&fcc->issue_list);
  525. if (!list) {
  526. wait_for_completion(&cmd.wait);
  527. atomic_dec(&fcc->queued_flush);
  528. } else {
  529. struct flush_cmd *tmp, *next;
  530. ret = submit_flush_wait(sbi, ino);
  531. llist_for_each_entry_safe(tmp, next, list, llnode) {
  532. if (tmp == &cmd) {
  533. cmd.ret = ret;
  534. atomic_dec(&fcc->queued_flush);
  535. continue;
  536. }
  537. tmp->ret = ret;
  538. complete(&tmp->wait);
  539. }
  540. }
  541. }
  542. return cmd.ret;
  543. }
  544. int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
  545. {
  546. dev_t dev = sbi->sb->s_bdev->bd_dev;
  547. struct flush_cmd_control *fcc;
  548. if (SM_I(sbi)->fcc_info) {
  549. fcc = SM_I(sbi)->fcc_info;
  550. if (fcc->f2fs_issue_flush)
  551. return 0;
  552. goto init_thread;
  553. }
  554. fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
  555. if (!fcc)
  556. return -ENOMEM;
  557. atomic_set(&fcc->issued_flush, 0);
  558. atomic_set(&fcc->queued_flush, 0);
  559. init_waitqueue_head(&fcc->flush_wait_queue);
  560. init_llist_head(&fcc->issue_list);
  561. SM_I(sbi)->fcc_info = fcc;
  562. if (!test_opt(sbi, FLUSH_MERGE))
  563. return 0;
  564. init_thread:
  565. fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
  566. "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
  567. if (IS_ERR(fcc->f2fs_issue_flush)) {
  568. int err = PTR_ERR(fcc->f2fs_issue_flush);
  569. fcc->f2fs_issue_flush = NULL;
  570. return err;
  571. }
  572. return 0;
  573. }
  574. void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
  575. {
  576. struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
  577. if (fcc && fcc->f2fs_issue_flush) {
  578. struct task_struct *flush_thread = fcc->f2fs_issue_flush;
  579. fcc->f2fs_issue_flush = NULL;
  580. kthread_stop(flush_thread);
  581. }
  582. if (free) {
  583. kfree(fcc);
  584. SM_I(sbi)->fcc_info = NULL;
  585. }
  586. }
  587. int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
  588. {
  589. int ret = 0, i;
  590. if (!f2fs_is_multi_device(sbi))
  591. return 0;
  592. if (test_opt(sbi, NOBARRIER))
  593. return 0;
  594. for (i = 1; i < sbi->s_ndevs; i++) {
  595. int count = DEFAULT_RETRY_IO_COUNT;
  596. if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
  597. continue;
  598. do {
  599. ret = __submit_flush_wait(sbi, FDEV(i).bdev);
  600. if (ret)
  601. f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
  602. } while (ret && --count);
  603. if (ret) {
  604. f2fs_stop_checkpoint(sbi, false,
  605. STOP_CP_REASON_FLUSH_FAIL);
  606. break;
  607. }
  608. spin_lock(&sbi->dev_lock);
  609. f2fs_clear_bit(i, (char *)&sbi->dirty_device);
  610. spin_unlock(&sbi->dev_lock);
  611. }
  612. return ret;
  613. }
  614. static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
  615. enum dirty_type dirty_type)
  616. {
  617. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  618. /* need not be added */
  619. if (IS_CURSEG(sbi, segno))
  620. return;
  621. if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
  622. dirty_i->nr_dirty[dirty_type]++;
  623. if (dirty_type == DIRTY) {
  624. struct seg_entry *sentry = get_seg_entry(sbi, segno);
  625. enum dirty_type t = sentry->type;
  626. if (unlikely(t >= DIRTY)) {
  627. f2fs_bug_on(sbi, 1);
  628. return;
  629. }
  630. if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
  631. dirty_i->nr_dirty[t]++;
  632. if (__is_large_section(sbi)) {
  633. unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
  634. block_t valid_blocks =
  635. get_valid_blocks(sbi, segno, true);
  636. f2fs_bug_on(sbi, unlikely(!valid_blocks ||
  637. valid_blocks == CAP_BLKS_PER_SEC(sbi)));
  638. if (!IS_CURSEC(sbi, secno))
  639. set_bit(secno, dirty_i->dirty_secmap);
  640. }
  641. }
  642. }
  643. static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
  644. enum dirty_type dirty_type)
  645. {
  646. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  647. block_t valid_blocks;
  648. if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
  649. dirty_i->nr_dirty[dirty_type]--;
  650. if (dirty_type == DIRTY) {
  651. struct seg_entry *sentry = get_seg_entry(sbi, segno);
  652. enum dirty_type t = sentry->type;
  653. if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
  654. dirty_i->nr_dirty[t]--;
  655. valid_blocks = get_valid_blocks(sbi, segno, true);
  656. if (valid_blocks == 0) {
  657. clear_bit(GET_SEC_FROM_SEG(sbi, segno),
  658. dirty_i->victim_secmap);
  659. #ifdef CONFIG_F2FS_CHECK_FS
  660. clear_bit(segno, SIT_I(sbi)->invalid_segmap);
  661. #endif
  662. }
  663. if (__is_large_section(sbi)) {
  664. unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
  665. if (!valid_blocks ||
  666. valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
  667. clear_bit(secno, dirty_i->dirty_secmap);
  668. return;
  669. }
  670. if (!IS_CURSEC(sbi, secno))
  671. set_bit(secno, dirty_i->dirty_secmap);
  672. }
  673. }
  674. }
  675. /*
  676. * Should not occur error such as -ENOMEM.
  677. * Adding dirty entry into seglist is not critical operation.
  678. * If a given segment is one of current working segments, it won't be added.
  679. */
  680. static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
  681. {
  682. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  683. unsigned short valid_blocks, ckpt_valid_blocks;
  684. unsigned int usable_blocks;
  685. if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
  686. return;
  687. usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
  688. mutex_lock(&dirty_i->seglist_lock);
  689. valid_blocks = get_valid_blocks(sbi, segno, false);
  690. ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
  691. if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
  692. ckpt_valid_blocks == usable_blocks)) {
  693. __locate_dirty_segment(sbi, segno, PRE);
  694. __remove_dirty_segment(sbi, segno, DIRTY);
  695. } else if (valid_blocks < usable_blocks) {
  696. __locate_dirty_segment(sbi, segno, DIRTY);
  697. } else {
  698. /* Recovery routine with SSR needs this */
  699. __remove_dirty_segment(sbi, segno, DIRTY);
  700. }
  701. mutex_unlock(&dirty_i->seglist_lock);
  702. }
  703. /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
  704. void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
  705. {
  706. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  707. unsigned int segno;
  708. mutex_lock(&dirty_i->seglist_lock);
  709. for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
  710. if (get_valid_blocks(sbi, segno, false))
  711. continue;
  712. if (IS_CURSEG(sbi, segno))
  713. continue;
  714. __locate_dirty_segment(sbi, segno, PRE);
  715. __remove_dirty_segment(sbi, segno, DIRTY);
  716. }
  717. mutex_unlock(&dirty_i->seglist_lock);
  718. }
  719. block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
  720. {
  721. int ovp_hole_segs =
  722. (overprovision_segments(sbi) - reserved_segments(sbi));
  723. block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
  724. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  725. block_t holes[2] = {0, 0}; /* DATA and NODE */
  726. block_t unusable;
  727. struct seg_entry *se;
  728. unsigned int segno;
  729. mutex_lock(&dirty_i->seglist_lock);
  730. for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
  731. se = get_seg_entry(sbi, segno);
  732. if (IS_NODESEG(se->type))
  733. holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
  734. se->valid_blocks;
  735. else
  736. holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
  737. se->valid_blocks;
  738. }
  739. mutex_unlock(&dirty_i->seglist_lock);
  740. unusable = max(holes[DATA], holes[NODE]);
  741. if (unusable > ovp_holes)
  742. return unusable - ovp_holes;
  743. return 0;
  744. }
  745. int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
  746. {
  747. int ovp_hole_segs =
  748. (overprovision_segments(sbi) - reserved_segments(sbi));
  749. if (unusable > F2FS_OPTION(sbi).unusable_cap)
  750. return -EAGAIN;
  751. if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
  752. dirty_segments(sbi) > ovp_hole_segs)
  753. return -EAGAIN;
  754. return 0;
  755. }
  756. /* This is only used by SBI_CP_DISABLED */
  757. static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
  758. {
  759. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  760. unsigned int segno = 0;
  761. mutex_lock(&dirty_i->seglist_lock);
  762. for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
  763. if (get_valid_blocks(sbi, segno, false))
  764. continue;
  765. if (get_ckpt_valid_blocks(sbi, segno, false))
  766. continue;
  767. mutex_unlock(&dirty_i->seglist_lock);
  768. return segno;
  769. }
  770. mutex_unlock(&dirty_i->seglist_lock);
  771. return NULL_SEGNO;
  772. }
  773. static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
  774. struct block_device *bdev, block_t lstart,
  775. block_t start, block_t len)
  776. {
  777. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  778. struct list_head *pend_list;
  779. struct discard_cmd *dc;
  780. f2fs_bug_on(sbi, !len);
  781. pend_list = &dcc->pend_list[plist_idx(len)];
  782. dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
  783. INIT_LIST_HEAD(&dc->list);
  784. dc->bdev = bdev;
  785. dc->di.lstart = lstart;
  786. dc->di.start = start;
  787. dc->di.len = len;
  788. dc->ref = 0;
  789. dc->state = D_PREP;
  790. dc->queued = 0;
  791. dc->error = 0;
  792. init_completion(&dc->wait);
  793. list_add_tail(&dc->list, pend_list);
  794. spin_lock_init(&dc->lock);
  795. dc->bio_ref = 0;
  796. atomic_inc(&dcc->discard_cmd_cnt);
  797. dcc->undiscard_blks += len;
  798. return dc;
  799. }
  800. static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
  801. {
  802. #ifdef CONFIG_F2FS_CHECK_FS
  803. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  804. struct rb_node *cur = rb_first_cached(&dcc->root), *next;
  805. struct discard_cmd *cur_dc, *next_dc;
  806. while (cur) {
  807. next = rb_next(cur);
  808. if (!next)
  809. return true;
  810. cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
  811. next_dc = rb_entry(next, struct discard_cmd, rb_node);
  812. if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
  813. f2fs_info(sbi, "broken discard_rbtree, "
  814. "cur(%u, %u) next(%u, %u)",
  815. cur_dc->di.lstart, cur_dc->di.len,
  816. next_dc->di.lstart, next_dc->di.len);
  817. return false;
  818. }
  819. cur = next;
  820. }
  821. #endif
  822. return true;
  823. }
  824. static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
  825. block_t blkaddr)
  826. {
  827. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  828. struct rb_node *node = dcc->root.rb_root.rb_node;
  829. struct discard_cmd *dc;
  830. while (node) {
  831. dc = rb_entry(node, struct discard_cmd, rb_node);
  832. if (blkaddr < dc->di.lstart)
  833. node = node->rb_left;
  834. else if (blkaddr >= dc->di.lstart + dc->di.len)
  835. node = node->rb_right;
  836. else
  837. return dc;
  838. }
  839. return NULL;
  840. }
  841. static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
  842. block_t blkaddr,
  843. struct discard_cmd **prev_entry,
  844. struct discard_cmd **next_entry,
  845. struct rb_node ***insert_p,
  846. struct rb_node **insert_parent)
  847. {
  848. struct rb_node **pnode = &root->rb_root.rb_node;
  849. struct rb_node *parent = NULL, *tmp_node;
  850. struct discard_cmd *dc;
  851. *insert_p = NULL;
  852. *insert_parent = NULL;
  853. *prev_entry = NULL;
  854. *next_entry = NULL;
  855. if (RB_EMPTY_ROOT(&root->rb_root))
  856. return NULL;
  857. while (*pnode) {
  858. parent = *pnode;
  859. dc = rb_entry(*pnode, struct discard_cmd, rb_node);
  860. if (blkaddr < dc->di.lstart)
  861. pnode = &(*pnode)->rb_left;
  862. else if (blkaddr >= dc->di.lstart + dc->di.len)
  863. pnode = &(*pnode)->rb_right;
  864. else
  865. goto lookup_neighbors;
  866. }
  867. *insert_p = pnode;
  868. *insert_parent = parent;
  869. dc = rb_entry(parent, struct discard_cmd, rb_node);
  870. tmp_node = parent;
  871. if (parent && blkaddr > dc->di.lstart)
  872. tmp_node = rb_next(parent);
  873. *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  874. tmp_node = parent;
  875. if (parent && blkaddr < dc->di.lstart)
  876. tmp_node = rb_prev(parent);
  877. *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  878. return NULL;
  879. lookup_neighbors:
  880. /* lookup prev node for merging backward later */
  881. tmp_node = rb_prev(&dc->rb_node);
  882. *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  883. /* lookup next node for merging frontward later */
  884. tmp_node = rb_next(&dc->rb_node);
  885. *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  886. return dc;
  887. }
  888. static void __detach_discard_cmd(struct discard_cmd_control *dcc,
  889. struct discard_cmd *dc)
  890. {
  891. if (dc->state == D_DONE)
  892. atomic_sub(dc->queued, &dcc->queued_discard);
  893. list_del(&dc->list);
  894. rb_erase_cached(&dc->rb_node, &dcc->root);
  895. dcc->undiscard_blks -= dc->di.len;
  896. kmem_cache_free(discard_cmd_slab, dc);
  897. atomic_dec(&dcc->discard_cmd_cnt);
  898. }
  899. static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
  900. struct discard_cmd *dc)
  901. {
  902. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  903. unsigned long flags;
  904. trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
  905. spin_lock_irqsave(&dc->lock, flags);
  906. if (dc->bio_ref) {
  907. spin_unlock_irqrestore(&dc->lock, flags);
  908. return;
  909. }
  910. spin_unlock_irqrestore(&dc->lock, flags);
  911. f2fs_bug_on(sbi, dc->ref);
  912. if (dc->error == -EOPNOTSUPP)
  913. dc->error = 0;
  914. if (dc->error)
  915. printk_ratelimited(
  916. "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
  917. KERN_INFO, sbi->sb->s_id,
  918. dc->di.lstart, dc->di.start, dc->di.len, dc->error);
  919. __detach_discard_cmd(dcc, dc);
  920. }
  921. static void f2fs_submit_discard_endio(struct bio *bio)
  922. {
  923. struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
  924. unsigned long flags;
  925. spin_lock_irqsave(&dc->lock, flags);
  926. if (!dc->error)
  927. dc->error = blk_status_to_errno(bio->bi_status);
  928. dc->bio_ref--;
  929. if (!dc->bio_ref && dc->state == D_SUBMIT) {
  930. dc->state = D_DONE;
  931. complete_all(&dc->wait);
  932. }
  933. spin_unlock_irqrestore(&dc->lock, flags);
  934. bio_put(bio);
  935. }
  936. static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
  937. block_t start, block_t end)
  938. {
  939. #ifdef CONFIG_F2FS_CHECK_FS
  940. struct seg_entry *sentry;
  941. unsigned int segno;
  942. block_t blk = start;
  943. unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
  944. unsigned long *map;
  945. while (blk < end) {
  946. segno = GET_SEGNO(sbi, blk);
  947. sentry = get_seg_entry(sbi, segno);
  948. offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
  949. if (end < START_BLOCK(sbi, segno + 1))
  950. size = GET_BLKOFF_FROM_SEG0(sbi, end);
  951. else
  952. size = max_blocks;
  953. map = (unsigned long *)(sentry->cur_valid_map);
  954. offset = __find_rev_next_bit(map, size, offset);
  955. f2fs_bug_on(sbi, offset != size);
  956. blk = START_BLOCK(sbi, segno + 1);
  957. }
  958. #endif
  959. }
  960. static void __init_discard_policy(struct f2fs_sb_info *sbi,
  961. struct discard_policy *dpolicy,
  962. int discard_type, unsigned int granularity)
  963. {
  964. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  965. /* common policy */
  966. dpolicy->type = discard_type;
  967. dpolicy->sync = true;
  968. dpolicy->ordered = false;
  969. dpolicy->granularity = granularity;
  970. dpolicy->max_requests = dcc->max_discard_request;
  971. dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
  972. dpolicy->timeout = false;
  973. if (discard_type == DPOLICY_BG) {
  974. dpolicy->min_interval = dcc->min_discard_issue_time;
  975. dpolicy->mid_interval = dcc->mid_discard_issue_time;
  976. dpolicy->max_interval = dcc->max_discard_issue_time;
  977. dpolicy->io_aware = true;
  978. dpolicy->sync = false;
  979. dpolicy->ordered = true;
  980. if (utilization(sbi) > dcc->discard_urgent_util) {
  981. dpolicy->granularity = MIN_DISCARD_GRANULARITY;
  982. if (atomic_read(&dcc->discard_cmd_cnt))
  983. dpolicy->max_interval =
  984. dcc->min_discard_issue_time;
  985. }
  986. } else if (discard_type == DPOLICY_FORCE) {
  987. dpolicy->min_interval = dcc->min_discard_issue_time;
  988. dpolicy->mid_interval = dcc->mid_discard_issue_time;
  989. dpolicy->max_interval = dcc->max_discard_issue_time;
  990. dpolicy->io_aware = false;
  991. } else if (discard_type == DPOLICY_FSTRIM) {
  992. dpolicy->io_aware = false;
  993. } else if (discard_type == DPOLICY_UMOUNT) {
  994. dpolicy->io_aware = false;
  995. /* we need to issue all to keep CP_TRIMMED_FLAG */
  996. dpolicy->granularity = MIN_DISCARD_GRANULARITY;
  997. dpolicy->timeout = true;
  998. }
  999. }
  1000. static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
  1001. struct block_device *bdev, block_t lstart,
  1002. block_t start, block_t len);
  1003. /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
  1004. static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
  1005. struct discard_policy *dpolicy,
  1006. struct discard_cmd *dc, int *issued)
  1007. {
  1008. struct block_device *bdev = dc->bdev;
  1009. unsigned int max_discard_blocks =
  1010. SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
  1011. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1012. struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
  1013. &(dcc->fstrim_list) : &(dcc->wait_list);
  1014. blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
  1015. block_t lstart, start, len, total_len;
  1016. int err = 0;
  1017. if (dc->state != D_PREP)
  1018. return 0;
  1019. if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
  1020. return 0;
  1021. trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
  1022. lstart = dc->di.lstart;
  1023. start = dc->di.start;
  1024. len = dc->di.len;
  1025. total_len = len;
  1026. dc->di.len = 0;
  1027. while (total_len && *issued < dpolicy->max_requests && !err) {
  1028. struct bio *bio = NULL;
  1029. unsigned long flags;
  1030. bool last = true;
  1031. if (len > max_discard_blocks) {
  1032. len = max_discard_blocks;
  1033. last = false;
  1034. }
  1035. (*issued)++;
  1036. if (*issued == dpolicy->max_requests)
  1037. last = true;
  1038. dc->di.len += len;
  1039. if (time_to_inject(sbi, FAULT_DISCARD)) {
  1040. err = -EIO;
  1041. } else {
  1042. err = __blkdev_issue_discard(bdev,
  1043. SECTOR_FROM_BLOCK(start),
  1044. SECTOR_FROM_BLOCK(len),
  1045. GFP_NOFS, &bio);
  1046. }
  1047. if (err) {
  1048. spin_lock_irqsave(&dc->lock, flags);
  1049. if (dc->state == D_PARTIAL)
  1050. dc->state = D_SUBMIT;
  1051. spin_unlock_irqrestore(&dc->lock, flags);
  1052. break;
  1053. }
  1054. f2fs_bug_on(sbi, !bio);
  1055. /*
  1056. * should keep before submission to avoid D_DONE
  1057. * right away
  1058. */
  1059. spin_lock_irqsave(&dc->lock, flags);
  1060. if (last)
  1061. dc->state = D_SUBMIT;
  1062. else
  1063. dc->state = D_PARTIAL;
  1064. dc->bio_ref++;
  1065. spin_unlock_irqrestore(&dc->lock, flags);
  1066. atomic_inc(&dcc->queued_discard);
  1067. dc->queued++;
  1068. list_move_tail(&dc->list, wait_list);
  1069. /* sanity check on discard range */
  1070. __check_sit_bitmap(sbi, lstart, lstart + len);
  1071. bio->bi_private = dc;
  1072. bio->bi_end_io = f2fs_submit_discard_endio;
  1073. bio->bi_opf |= flag;
  1074. submit_bio(bio);
  1075. atomic_inc(&dcc->issued_discard);
  1076. f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
  1077. lstart += len;
  1078. start += len;
  1079. total_len -= len;
  1080. len = total_len;
  1081. }
  1082. if (!err && len) {
  1083. dcc->undiscard_blks -= len;
  1084. __update_discard_tree_range(sbi, bdev, lstart, start, len);
  1085. }
  1086. return err;
  1087. }
  1088. static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
  1089. struct block_device *bdev, block_t lstart,
  1090. block_t start, block_t len)
  1091. {
  1092. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1093. struct rb_node **p = &dcc->root.rb_root.rb_node;
  1094. struct rb_node *parent = NULL;
  1095. struct discard_cmd *dc;
  1096. bool leftmost = true;
  1097. /* look up rb tree to find parent node */
  1098. while (*p) {
  1099. parent = *p;
  1100. dc = rb_entry(parent, struct discard_cmd, rb_node);
  1101. if (lstart < dc->di.lstart) {
  1102. p = &(*p)->rb_left;
  1103. } else if (lstart >= dc->di.lstart + dc->di.len) {
  1104. p = &(*p)->rb_right;
  1105. leftmost = false;
  1106. } else {
  1107. /* Let's skip to add, if exists */
  1108. return;
  1109. }
  1110. }
  1111. dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
  1112. rb_link_node(&dc->rb_node, parent, p);
  1113. rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
  1114. }
  1115. static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
  1116. struct discard_cmd *dc)
  1117. {
  1118. list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
  1119. }
  1120. static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
  1121. struct discard_cmd *dc, block_t blkaddr)
  1122. {
  1123. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1124. struct discard_info di = dc->di;
  1125. bool modified = false;
  1126. if (dc->state == D_DONE || dc->di.len == 1) {
  1127. __remove_discard_cmd(sbi, dc);
  1128. return;
  1129. }
  1130. dcc->undiscard_blks -= di.len;
  1131. if (blkaddr > di.lstart) {
  1132. dc->di.len = blkaddr - dc->di.lstart;
  1133. dcc->undiscard_blks += dc->di.len;
  1134. __relocate_discard_cmd(dcc, dc);
  1135. modified = true;
  1136. }
  1137. if (blkaddr < di.lstart + di.len - 1) {
  1138. if (modified) {
  1139. __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
  1140. di.start + blkaddr + 1 - di.lstart,
  1141. di.lstart + di.len - 1 - blkaddr);
  1142. } else {
  1143. dc->di.lstart++;
  1144. dc->di.len--;
  1145. dc->di.start++;
  1146. dcc->undiscard_blks += dc->di.len;
  1147. __relocate_discard_cmd(dcc, dc);
  1148. }
  1149. }
  1150. }
  1151. static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
  1152. struct block_device *bdev, block_t lstart,
  1153. block_t start, block_t len)
  1154. {
  1155. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1156. struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
  1157. struct discard_cmd *dc;
  1158. struct discard_info di = {0};
  1159. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  1160. unsigned int max_discard_blocks =
  1161. SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
  1162. block_t end = lstart + len;
  1163. dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
  1164. &prev_dc, &next_dc, &insert_p, &insert_parent);
  1165. if (dc)
  1166. prev_dc = dc;
  1167. if (!prev_dc) {
  1168. di.lstart = lstart;
  1169. di.len = next_dc ? next_dc->di.lstart - lstart : len;
  1170. di.len = min(di.len, len);
  1171. di.start = start;
  1172. }
  1173. while (1) {
  1174. struct rb_node *node;
  1175. bool merged = false;
  1176. struct discard_cmd *tdc = NULL;
  1177. if (prev_dc) {
  1178. di.lstart = prev_dc->di.lstart + prev_dc->di.len;
  1179. if (di.lstart < lstart)
  1180. di.lstart = lstart;
  1181. if (di.lstart >= end)
  1182. break;
  1183. if (!next_dc || next_dc->di.lstart > end)
  1184. di.len = end - di.lstart;
  1185. else
  1186. di.len = next_dc->di.lstart - di.lstart;
  1187. di.start = start + di.lstart - lstart;
  1188. }
  1189. if (!di.len)
  1190. goto next;
  1191. if (prev_dc && prev_dc->state == D_PREP &&
  1192. prev_dc->bdev == bdev &&
  1193. __is_discard_back_mergeable(&di, &prev_dc->di,
  1194. max_discard_blocks)) {
  1195. prev_dc->di.len += di.len;
  1196. dcc->undiscard_blks += di.len;
  1197. __relocate_discard_cmd(dcc, prev_dc);
  1198. di = prev_dc->di;
  1199. tdc = prev_dc;
  1200. merged = true;
  1201. }
  1202. if (next_dc && next_dc->state == D_PREP &&
  1203. next_dc->bdev == bdev &&
  1204. __is_discard_front_mergeable(&di, &next_dc->di,
  1205. max_discard_blocks)) {
  1206. next_dc->di.lstart = di.lstart;
  1207. next_dc->di.len += di.len;
  1208. next_dc->di.start = di.start;
  1209. dcc->undiscard_blks += di.len;
  1210. __relocate_discard_cmd(dcc, next_dc);
  1211. if (tdc)
  1212. __remove_discard_cmd(sbi, tdc);
  1213. merged = true;
  1214. }
  1215. if (!merged)
  1216. __insert_discard_cmd(sbi, bdev,
  1217. di.lstart, di.start, di.len);
  1218. next:
  1219. prev_dc = next_dc;
  1220. if (!prev_dc)
  1221. break;
  1222. node = rb_next(&prev_dc->rb_node);
  1223. next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
  1224. }
  1225. }
  1226. static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
  1227. struct block_device *bdev, block_t blkstart, block_t blklen)
  1228. {
  1229. block_t lblkstart = blkstart;
  1230. if (!f2fs_bdev_support_discard(bdev))
  1231. return;
  1232. trace_f2fs_queue_discard(bdev, blkstart, blklen);
  1233. if (f2fs_is_multi_device(sbi)) {
  1234. int devi = f2fs_target_device_index(sbi, blkstart);
  1235. blkstart -= FDEV(devi).start_blk;
  1236. }
  1237. mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
  1238. __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
  1239. mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
  1240. }
  1241. static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
  1242. struct discard_policy *dpolicy, int *issued)
  1243. {
  1244. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1245. struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
  1246. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  1247. struct discard_cmd *dc;
  1248. struct blk_plug plug;
  1249. bool io_interrupted = false;
  1250. mutex_lock(&dcc->cmd_lock);
  1251. dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
  1252. &prev_dc, &next_dc, &insert_p, &insert_parent);
  1253. if (!dc)
  1254. dc = next_dc;
  1255. blk_start_plug(&plug);
  1256. while (dc) {
  1257. struct rb_node *node;
  1258. int err = 0;
  1259. if (dc->state != D_PREP)
  1260. goto next;
  1261. if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
  1262. io_interrupted = true;
  1263. break;
  1264. }
  1265. dcc->next_pos = dc->di.lstart + dc->di.len;
  1266. err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
  1267. if (*issued >= dpolicy->max_requests)
  1268. break;
  1269. next:
  1270. node = rb_next(&dc->rb_node);
  1271. if (err)
  1272. __remove_discard_cmd(sbi, dc);
  1273. dc = rb_entry_safe(node, struct discard_cmd, rb_node);
  1274. }
  1275. blk_finish_plug(&plug);
  1276. if (!dc)
  1277. dcc->next_pos = 0;
  1278. mutex_unlock(&dcc->cmd_lock);
  1279. if (!(*issued) && io_interrupted)
  1280. *issued = -1;
  1281. }
  1282. static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
  1283. struct discard_policy *dpolicy);
  1284. static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
  1285. struct discard_policy *dpolicy)
  1286. {
  1287. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1288. struct list_head *pend_list;
  1289. struct discard_cmd *dc, *tmp;
  1290. struct blk_plug plug;
  1291. int i, issued;
  1292. bool io_interrupted = false;
  1293. if (dpolicy->timeout)
  1294. f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
  1295. retry:
  1296. issued = 0;
  1297. for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
  1298. if (dpolicy->timeout &&
  1299. f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
  1300. break;
  1301. if (i + 1 < dpolicy->granularity)
  1302. break;
  1303. if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
  1304. __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
  1305. return issued;
  1306. }
  1307. pend_list = &dcc->pend_list[i];
  1308. mutex_lock(&dcc->cmd_lock);
  1309. if (list_empty(pend_list))
  1310. goto next;
  1311. if (unlikely(dcc->rbtree_check))
  1312. f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
  1313. blk_start_plug(&plug);
  1314. list_for_each_entry_safe(dc, tmp, pend_list, list) {
  1315. f2fs_bug_on(sbi, dc->state != D_PREP);
  1316. if (dpolicy->timeout &&
  1317. f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
  1318. break;
  1319. if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
  1320. !is_idle(sbi, DISCARD_TIME)) {
  1321. io_interrupted = true;
  1322. break;
  1323. }
  1324. __submit_discard_cmd(sbi, dpolicy, dc, &issued);
  1325. if (issued >= dpolicy->max_requests)
  1326. break;
  1327. }
  1328. blk_finish_plug(&plug);
  1329. next:
  1330. mutex_unlock(&dcc->cmd_lock);
  1331. if (issued >= dpolicy->max_requests || io_interrupted)
  1332. break;
  1333. }
  1334. if (dpolicy->type == DPOLICY_UMOUNT && issued) {
  1335. __wait_all_discard_cmd(sbi, dpolicy);
  1336. goto retry;
  1337. }
  1338. if (!issued && io_interrupted)
  1339. issued = -1;
  1340. return issued;
  1341. }
  1342. static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
  1343. {
  1344. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1345. struct list_head *pend_list;
  1346. struct discard_cmd *dc, *tmp;
  1347. int i;
  1348. bool dropped = false;
  1349. mutex_lock(&dcc->cmd_lock);
  1350. for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
  1351. pend_list = &dcc->pend_list[i];
  1352. list_for_each_entry_safe(dc, tmp, pend_list, list) {
  1353. f2fs_bug_on(sbi, dc->state != D_PREP);
  1354. __remove_discard_cmd(sbi, dc);
  1355. dropped = true;
  1356. }
  1357. }
  1358. mutex_unlock(&dcc->cmd_lock);
  1359. return dropped;
  1360. }
  1361. void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
  1362. {
  1363. __drop_discard_cmd(sbi);
  1364. }
  1365. static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
  1366. struct discard_cmd *dc)
  1367. {
  1368. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1369. unsigned int len = 0;
  1370. wait_for_completion_io(&dc->wait);
  1371. mutex_lock(&dcc->cmd_lock);
  1372. f2fs_bug_on(sbi, dc->state != D_DONE);
  1373. dc->ref--;
  1374. if (!dc->ref) {
  1375. if (!dc->error)
  1376. len = dc->di.len;
  1377. __remove_discard_cmd(sbi, dc);
  1378. }
  1379. mutex_unlock(&dcc->cmd_lock);
  1380. return len;
  1381. }
  1382. static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
  1383. struct discard_policy *dpolicy,
  1384. block_t start, block_t end)
  1385. {
  1386. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1387. struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
  1388. &(dcc->fstrim_list) : &(dcc->wait_list);
  1389. struct discard_cmd *dc = NULL, *iter, *tmp;
  1390. unsigned int trimmed = 0;
  1391. next:
  1392. dc = NULL;
  1393. mutex_lock(&dcc->cmd_lock);
  1394. list_for_each_entry_safe(iter, tmp, wait_list, list) {
  1395. if (iter->di.lstart + iter->di.len <= start ||
  1396. end <= iter->di.lstart)
  1397. continue;
  1398. if (iter->di.len < dpolicy->granularity)
  1399. continue;
  1400. if (iter->state == D_DONE && !iter->ref) {
  1401. wait_for_completion_io(&iter->wait);
  1402. if (!iter->error)
  1403. trimmed += iter->di.len;
  1404. __remove_discard_cmd(sbi, iter);
  1405. } else {
  1406. iter->ref++;
  1407. dc = iter;
  1408. break;
  1409. }
  1410. }
  1411. mutex_unlock(&dcc->cmd_lock);
  1412. if (dc) {
  1413. trimmed += __wait_one_discard_bio(sbi, dc);
  1414. goto next;
  1415. }
  1416. return trimmed;
  1417. }
  1418. static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
  1419. struct discard_policy *dpolicy)
  1420. {
  1421. struct discard_policy dp;
  1422. unsigned int discard_blks;
  1423. if (dpolicy)
  1424. return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
  1425. /* wait all */
  1426. __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
  1427. discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
  1428. __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
  1429. discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
  1430. return discard_blks;
  1431. }
  1432. /* This should be covered by global mutex, &sit_i->sentry_lock */
  1433. static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
  1434. {
  1435. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1436. struct discard_cmd *dc;
  1437. bool need_wait = false;
  1438. mutex_lock(&dcc->cmd_lock);
  1439. dc = __lookup_discard_cmd(sbi, blkaddr);
  1440. if (dc) {
  1441. if (dc->state == D_PREP) {
  1442. __punch_discard_cmd(sbi, dc, blkaddr);
  1443. } else {
  1444. dc->ref++;
  1445. need_wait = true;
  1446. }
  1447. }
  1448. mutex_unlock(&dcc->cmd_lock);
  1449. if (need_wait)
  1450. __wait_one_discard_bio(sbi, dc);
  1451. }
  1452. void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
  1453. {
  1454. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1455. if (dcc && dcc->f2fs_issue_discard) {
  1456. struct task_struct *discard_thread = dcc->f2fs_issue_discard;
  1457. dcc->f2fs_issue_discard = NULL;
  1458. kthread_stop(discard_thread);
  1459. }
  1460. }
  1461. /**
  1462. * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
  1463. * @sbi: the f2fs_sb_info data for discard cmd to issue
  1464. *
  1465. * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
  1466. *
  1467. * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
  1468. */
  1469. bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
  1470. {
  1471. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1472. struct discard_policy dpolicy;
  1473. bool dropped;
  1474. if (!atomic_read(&dcc->discard_cmd_cnt))
  1475. return true;
  1476. __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
  1477. dcc->discard_granularity);
  1478. __issue_discard_cmd(sbi, &dpolicy);
  1479. dropped = __drop_discard_cmd(sbi);
  1480. /* just to make sure there is no pending discard commands */
  1481. __wait_all_discard_cmd(sbi, NULL);
  1482. f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
  1483. return !dropped;
  1484. }
  1485. static int issue_discard_thread(void *data)
  1486. {
  1487. struct f2fs_sb_info *sbi = data;
  1488. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1489. wait_queue_head_t *q = &dcc->discard_wait_queue;
  1490. struct discard_policy dpolicy;
  1491. unsigned int wait_ms = dcc->min_discard_issue_time;
  1492. int issued;
  1493. set_freezable();
  1494. do {
  1495. wait_event_interruptible_timeout(*q,
  1496. kthread_should_stop() || freezing(current) ||
  1497. dcc->discard_wake,
  1498. msecs_to_jiffies(wait_ms));
  1499. if (sbi->gc_mode == GC_URGENT_HIGH ||
  1500. !f2fs_available_free_memory(sbi, DISCARD_CACHE))
  1501. __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
  1502. MIN_DISCARD_GRANULARITY);
  1503. else
  1504. __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
  1505. dcc->discard_granularity);
  1506. if (dcc->discard_wake)
  1507. dcc->discard_wake = false;
  1508. /* clean up pending candidates before going to sleep */
  1509. if (atomic_read(&dcc->queued_discard))
  1510. __wait_all_discard_cmd(sbi, NULL);
  1511. if (try_to_freeze())
  1512. continue;
  1513. if (f2fs_readonly(sbi->sb))
  1514. continue;
  1515. if (kthread_should_stop())
  1516. return 0;
  1517. if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
  1518. !atomic_read(&dcc->discard_cmd_cnt)) {
  1519. wait_ms = dpolicy.max_interval;
  1520. continue;
  1521. }
  1522. sb_start_intwrite(sbi->sb);
  1523. issued = __issue_discard_cmd(sbi, &dpolicy);
  1524. if (issued > 0) {
  1525. __wait_all_discard_cmd(sbi, &dpolicy);
  1526. wait_ms = dpolicy.min_interval;
  1527. } else if (issued == -1) {
  1528. wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
  1529. if (!wait_ms)
  1530. wait_ms = dpolicy.mid_interval;
  1531. } else {
  1532. wait_ms = dpolicy.max_interval;
  1533. }
  1534. if (!atomic_read(&dcc->discard_cmd_cnt))
  1535. wait_ms = dpolicy.max_interval;
  1536. sb_end_intwrite(sbi->sb);
  1537. } while (!kthread_should_stop());
  1538. return 0;
  1539. }
  1540. #ifdef CONFIG_BLK_DEV_ZONED
  1541. static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
  1542. struct block_device *bdev, block_t blkstart, block_t blklen)
  1543. {
  1544. sector_t sector, nr_sects;
  1545. block_t lblkstart = blkstart;
  1546. int devi = 0;
  1547. u64 remainder = 0;
  1548. if (f2fs_is_multi_device(sbi)) {
  1549. devi = f2fs_target_device_index(sbi, blkstart);
  1550. if (blkstart < FDEV(devi).start_blk ||
  1551. blkstart > FDEV(devi).end_blk) {
  1552. f2fs_err(sbi, "Invalid block %x", blkstart);
  1553. return -EIO;
  1554. }
  1555. blkstart -= FDEV(devi).start_blk;
  1556. }
  1557. /* For sequential zones, reset the zone write pointer */
  1558. if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
  1559. sector = SECTOR_FROM_BLOCK(blkstart);
  1560. nr_sects = SECTOR_FROM_BLOCK(blklen);
  1561. div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
  1562. if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
  1563. f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
  1564. devi, sbi->s_ndevs ? FDEV(devi).path : "",
  1565. blkstart, blklen);
  1566. return -EIO;
  1567. }
  1568. trace_f2fs_issue_reset_zone(bdev, blkstart);
  1569. return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
  1570. sector, nr_sects, GFP_NOFS);
  1571. }
  1572. /* For conventional zones, use regular discard if supported */
  1573. __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
  1574. return 0;
  1575. }
  1576. #endif
  1577. static int __issue_discard_async(struct f2fs_sb_info *sbi,
  1578. struct block_device *bdev, block_t blkstart, block_t blklen)
  1579. {
  1580. #ifdef CONFIG_BLK_DEV_ZONED
  1581. if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
  1582. return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
  1583. #endif
  1584. __queue_discard_cmd(sbi, bdev, blkstart, blklen);
  1585. return 0;
  1586. }
  1587. static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
  1588. block_t blkstart, block_t blklen)
  1589. {
  1590. sector_t start = blkstart, len = 0;
  1591. struct block_device *bdev;
  1592. struct seg_entry *se;
  1593. unsigned int offset;
  1594. block_t i;
  1595. int err = 0;
  1596. bdev = f2fs_target_device(sbi, blkstart, NULL);
  1597. for (i = blkstart; i < blkstart + blklen; i++, len++) {
  1598. if (i != start) {
  1599. struct block_device *bdev2 =
  1600. f2fs_target_device(sbi, i, NULL);
  1601. if (bdev2 != bdev) {
  1602. err = __issue_discard_async(sbi, bdev,
  1603. start, len);
  1604. if (err)
  1605. return err;
  1606. bdev = bdev2;
  1607. start = i;
  1608. len = 0;
  1609. }
  1610. }
  1611. se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
  1612. offset = GET_BLKOFF_FROM_SEG0(sbi, i);
  1613. if (f2fs_block_unit_discard(sbi) &&
  1614. !f2fs_test_and_set_bit(offset, se->discard_map))
  1615. sbi->discard_blks--;
  1616. }
  1617. if (len)
  1618. err = __issue_discard_async(sbi, bdev, start, len);
  1619. return err;
  1620. }
  1621. static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
  1622. bool check_only)
  1623. {
  1624. int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
  1625. int max_blocks = sbi->blocks_per_seg;
  1626. struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
  1627. unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
  1628. unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
  1629. unsigned long *discard_map = (unsigned long *)se->discard_map;
  1630. unsigned long *dmap = SIT_I(sbi)->tmp_map;
  1631. unsigned int start = 0, end = -1;
  1632. bool force = (cpc->reason & CP_DISCARD);
  1633. struct discard_entry *de = NULL;
  1634. struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
  1635. int i;
  1636. if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
  1637. !f2fs_block_unit_discard(sbi))
  1638. return false;
  1639. if (!force) {
  1640. if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
  1641. SM_I(sbi)->dcc_info->nr_discards >=
  1642. SM_I(sbi)->dcc_info->max_discards)
  1643. return false;
  1644. }
  1645. /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
  1646. for (i = 0; i < entries; i++)
  1647. dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
  1648. (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
  1649. while (force || SM_I(sbi)->dcc_info->nr_discards <=
  1650. SM_I(sbi)->dcc_info->max_discards) {
  1651. start = __find_rev_next_bit(dmap, max_blocks, end + 1);
  1652. if (start >= max_blocks)
  1653. break;
  1654. end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
  1655. if (force && start && end != max_blocks
  1656. && (end - start) < cpc->trim_minlen)
  1657. continue;
  1658. if (check_only)
  1659. return true;
  1660. if (!de) {
  1661. de = f2fs_kmem_cache_alloc(discard_entry_slab,
  1662. GFP_F2FS_ZERO, true, NULL);
  1663. de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
  1664. list_add_tail(&de->list, head);
  1665. }
  1666. for (i = start; i < end; i++)
  1667. __set_bit_le(i, (void *)de->discard_map);
  1668. SM_I(sbi)->dcc_info->nr_discards += end - start;
  1669. }
  1670. return false;
  1671. }
  1672. static void release_discard_addr(struct discard_entry *entry)
  1673. {
  1674. list_del(&entry->list);
  1675. kmem_cache_free(discard_entry_slab, entry);
  1676. }
  1677. void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
  1678. {
  1679. struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
  1680. struct discard_entry *entry, *this;
  1681. /* drop caches */
  1682. list_for_each_entry_safe(entry, this, head, list)
  1683. release_discard_addr(entry);
  1684. }
  1685. /*
  1686. * Should call f2fs_clear_prefree_segments after checkpoint is done.
  1687. */
  1688. static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
  1689. {
  1690. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  1691. unsigned int segno;
  1692. mutex_lock(&dirty_i->seglist_lock);
  1693. for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
  1694. __set_test_and_free(sbi, segno, false);
  1695. mutex_unlock(&dirty_i->seglist_lock);
  1696. }
  1697. void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
  1698. struct cp_control *cpc)
  1699. {
  1700. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1701. struct list_head *head = &dcc->entry_list;
  1702. struct discard_entry *entry, *this;
  1703. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  1704. unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
  1705. unsigned int start = 0, end = -1;
  1706. unsigned int secno, start_segno;
  1707. bool force = (cpc->reason & CP_DISCARD);
  1708. bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
  1709. DISCARD_UNIT_SECTION;
  1710. if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
  1711. section_alignment = true;
  1712. mutex_lock(&dirty_i->seglist_lock);
  1713. while (1) {
  1714. int i;
  1715. if (section_alignment && end != -1)
  1716. end--;
  1717. start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
  1718. if (start >= MAIN_SEGS(sbi))
  1719. break;
  1720. end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
  1721. start + 1);
  1722. if (section_alignment) {
  1723. start = rounddown(start, sbi->segs_per_sec);
  1724. end = roundup(end, sbi->segs_per_sec);
  1725. }
  1726. for (i = start; i < end; i++) {
  1727. if (test_and_clear_bit(i, prefree_map))
  1728. dirty_i->nr_dirty[PRE]--;
  1729. }
  1730. if (!f2fs_realtime_discard_enable(sbi))
  1731. continue;
  1732. if (force && start >= cpc->trim_start &&
  1733. (end - 1) <= cpc->trim_end)
  1734. continue;
  1735. /* Should cover 2MB zoned device for zone-based reset */
  1736. if (!f2fs_sb_has_blkzoned(sbi) &&
  1737. (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
  1738. f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
  1739. (end - start) << sbi->log_blocks_per_seg);
  1740. continue;
  1741. }
  1742. next:
  1743. secno = GET_SEC_FROM_SEG(sbi, start);
  1744. start_segno = GET_SEG_FROM_SEC(sbi, secno);
  1745. if (!IS_CURSEC(sbi, secno) &&
  1746. !get_valid_blocks(sbi, start, true))
  1747. f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
  1748. sbi->segs_per_sec << sbi->log_blocks_per_seg);
  1749. start = start_segno + sbi->segs_per_sec;
  1750. if (start < end)
  1751. goto next;
  1752. else
  1753. end = start - 1;
  1754. }
  1755. mutex_unlock(&dirty_i->seglist_lock);
  1756. if (!f2fs_block_unit_discard(sbi))
  1757. goto wakeup;
  1758. /* send small discards */
  1759. list_for_each_entry_safe(entry, this, head, list) {
  1760. unsigned int cur_pos = 0, next_pos, len, total_len = 0;
  1761. bool is_valid = test_bit_le(0, entry->discard_map);
  1762. find_next:
  1763. if (is_valid) {
  1764. next_pos = find_next_zero_bit_le(entry->discard_map,
  1765. sbi->blocks_per_seg, cur_pos);
  1766. len = next_pos - cur_pos;
  1767. if (f2fs_sb_has_blkzoned(sbi) ||
  1768. (force && len < cpc->trim_minlen))
  1769. goto skip;
  1770. f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
  1771. len);
  1772. total_len += len;
  1773. } else {
  1774. next_pos = find_next_bit_le(entry->discard_map,
  1775. sbi->blocks_per_seg, cur_pos);
  1776. }
  1777. skip:
  1778. cur_pos = next_pos;
  1779. is_valid = !is_valid;
  1780. if (cur_pos < sbi->blocks_per_seg)
  1781. goto find_next;
  1782. release_discard_addr(entry);
  1783. dcc->nr_discards -= total_len;
  1784. }
  1785. wakeup:
  1786. wake_up_discard_thread(sbi, false);
  1787. }
  1788. int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
  1789. {
  1790. dev_t dev = sbi->sb->s_bdev->bd_dev;
  1791. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1792. int err = 0;
  1793. if (!f2fs_realtime_discard_enable(sbi))
  1794. return 0;
  1795. dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
  1796. "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
  1797. if (IS_ERR(dcc->f2fs_issue_discard)) {
  1798. err = PTR_ERR(dcc->f2fs_issue_discard);
  1799. dcc->f2fs_issue_discard = NULL;
  1800. }
  1801. return err;
  1802. }
  1803. static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
  1804. {
  1805. struct discard_cmd_control *dcc;
  1806. int err = 0, i;
  1807. if (SM_I(sbi)->dcc_info) {
  1808. dcc = SM_I(sbi)->dcc_info;
  1809. goto init_thread;
  1810. }
  1811. dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
  1812. if (!dcc)
  1813. return -ENOMEM;
  1814. dcc->discard_io_aware_gran = MAX_PLIST_NUM;
  1815. dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
  1816. dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
  1817. if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
  1818. dcc->discard_granularity = sbi->blocks_per_seg;
  1819. else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
  1820. dcc->discard_granularity = BLKS_PER_SEC(sbi);
  1821. INIT_LIST_HEAD(&dcc->entry_list);
  1822. for (i = 0; i < MAX_PLIST_NUM; i++)
  1823. INIT_LIST_HEAD(&dcc->pend_list[i]);
  1824. INIT_LIST_HEAD(&dcc->wait_list);
  1825. INIT_LIST_HEAD(&dcc->fstrim_list);
  1826. mutex_init(&dcc->cmd_lock);
  1827. atomic_set(&dcc->issued_discard, 0);
  1828. atomic_set(&dcc->queued_discard, 0);
  1829. atomic_set(&dcc->discard_cmd_cnt, 0);
  1830. dcc->nr_discards = 0;
  1831. dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
  1832. dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
  1833. dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
  1834. dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
  1835. dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
  1836. dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
  1837. dcc->undiscard_blks = 0;
  1838. dcc->next_pos = 0;
  1839. dcc->root = RB_ROOT_CACHED;
  1840. dcc->rbtree_check = false;
  1841. init_waitqueue_head(&dcc->discard_wait_queue);
  1842. SM_I(sbi)->dcc_info = dcc;
  1843. init_thread:
  1844. err = f2fs_start_discard_thread(sbi);
  1845. if (err) {
  1846. kfree(dcc);
  1847. SM_I(sbi)->dcc_info = NULL;
  1848. }
  1849. return err;
  1850. }
  1851. static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
  1852. {
  1853. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1854. if (!dcc)
  1855. return;
  1856. f2fs_stop_discard_thread(sbi);
  1857. /*
  1858. * Recovery can cache discard commands, so in error path of
  1859. * fill_super(), it needs to give a chance to handle them.
  1860. */
  1861. f2fs_issue_discard_timeout(sbi);
  1862. kfree(dcc);
  1863. SM_I(sbi)->dcc_info = NULL;
  1864. }
  1865. static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
  1866. {
  1867. struct sit_info *sit_i = SIT_I(sbi);
  1868. if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
  1869. sit_i->dirty_sentries++;
  1870. return false;
  1871. }
  1872. return true;
  1873. }
  1874. static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
  1875. unsigned int segno, int modified)
  1876. {
  1877. struct seg_entry *se = get_seg_entry(sbi, segno);
  1878. se->type = type;
  1879. if (modified)
  1880. __mark_sit_entry_dirty(sbi, segno);
  1881. }
  1882. static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
  1883. block_t blkaddr)
  1884. {
  1885. unsigned int segno = GET_SEGNO(sbi, blkaddr);
  1886. if (segno == NULL_SEGNO)
  1887. return 0;
  1888. return get_seg_entry(sbi, segno)->mtime;
  1889. }
  1890. static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
  1891. unsigned long long old_mtime)
  1892. {
  1893. struct seg_entry *se;
  1894. unsigned int segno = GET_SEGNO(sbi, blkaddr);
  1895. unsigned long long ctime = get_mtime(sbi, false);
  1896. unsigned long long mtime = old_mtime ? old_mtime : ctime;
  1897. if (segno == NULL_SEGNO)
  1898. return;
  1899. se = get_seg_entry(sbi, segno);
  1900. if (!se->mtime)
  1901. se->mtime = mtime;
  1902. else
  1903. se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
  1904. se->valid_blocks + 1);
  1905. if (ctime > SIT_I(sbi)->max_mtime)
  1906. SIT_I(sbi)->max_mtime = ctime;
  1907. }
  1908. static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
  1909. {
  1910. struct seg_entry *se;
  1911. unsigned int segno, offset;
  1912. long int new_vblocks;
  1913. bool exist;
  1914. #ifdef CONFIG_F2FS_CHECK_FS
  1915. bool mir_exist;
  1916. #endif
  1917. segno = GET_SEGNO(sbi, blkaddr);
  1918. se = get_seg_entry(sbi, segno);
  1919. new_vblocks = se->valid_blocks + del;
  1920. offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
  1921. f2fs_bug_on(sbi, (new_vblocks < 0 ||
  1922. (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
  1923. se->valid_blocks = new_vblocks;
  1924. /* Update valid block bitmap */
  1925. if (del > 0) {
  1926. exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
  1927. #ifdef CONFIG_F2FS_CHECK_FS
  1928. mir_exist = f2fs_test_and_set_bit(offset,
  1929. se->cur_valid_map_mir);
  1930. if (unlikely(exist != mir_exist)) {
  1931. f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
  1932. blkaddr, exist);
  1933. f2fs_bug_on(sbi, 1);
  1934. }
  1935. #endif
  1936. if (unlikely(exist)) {
  1937. f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
  1938. blkaddr);
  1939. f2fs_bug_on(sbi, 1);
  1940. se->valid_blocks--;
  1941. del = 0;
  1942. }
  1943. if (f2fs_block_unit_discard(sbi) &&
  1944. !f2fs_test_and_set_bit(offset, se->discard_map))
  1945. sbi->discard_blks--;
  1946. /*
  1947. * SSR should never reuse block which is checkpointed
  1948. * or newly invalidated.
  1949. */
  1950. if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
  1951. if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
  1952. se->ckpt_valid_blocks++;
  1953. }
  1954. } else {
  1955. exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
  1956. #ifdef CONFIG_F2FS_CHECK_FS
  1957. mir_exist = f2fs_test_and_clear_bit(offset,
  1958. se->cur_valid_map_mir);
  1959. if (unlikely(exist != mir_exist)) {
  1960. f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
  1961. blkaddr, exist);
  1962. f2fs_bug_on(sbi, 1);
  1963. }
  1964. #endif
  1965. if (unlikely(!exist)) {
  1966. f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
  1967. blkaddr);
  1968. f2fs_bug_on(sbi, 1);
  1969. se->valid_blocks++;
  1970. del = 0;
  1971. } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
  1972. /*
  1973. * If checkpoints are off, we must not reuse data that
  1974. * was used in the previous checkpoint. If it was used
  1975. * before, we must track that to know how much space we
  1976. * really have.
  1977. */
  1978. if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
  1979. spin_lock(&sbi->stat_lock);
  1980. sbi->unusable_block_count++;
  1981. spin_unlock(&sbi->stat_lock);
  1982. }
  1983. }
  1984. if (f2fs_block_unit_discard(sbi) &&
  1985. f2fs_test_and_clear_bit(offset, se->discard_map))
  1986. sbi->discard_blks++;
  1987. }
  1988. if (!f2fs_test_bit(offset, se->ckpt_valid_map))
  1989. se->ckpt_valid_blocks += del;
  1990. __mark_sit_entry_dirty(sbi, segno);
  1991. /* update total number of valid blocks to be written in ckpt area */
  1992. SIT_I(sbi)->written_valid_blocks += del;
  1993. if (__is_large_section(sbi))
  1994. get_sec_entry(sbi, segno)->valid_blocks += del;
  1995. }
  1996. void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
  1997. {
  1998. unsigned int segno = GET_SEGNO(sbi, addr);
  1999. struct sit_info *sit_i = SIT_I(sbi);
  2000. f2fs_bug_on(sbi, addr == NULL_ADDR);
  2001. if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
  2002. return;
  2003. invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
  2004. f2fs_invalidate_compress_page(sbi, addr);
  2005. /* add it into sit main buffer */
  2006. down_write(&sit_i->sentry_lock);
  2007. update_segment_mtime(sbi, addr, 0);
  2008. update_sit_entry(sbi, addr, -1);
  2009. /* add it into dirty seglist */
  2010. locate_dirty_segment(sbi, segno);
  2011. up_write(&sit_i->sentry_lock);
  2012. }
  2013. bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
  2014. {
  2015. struct sit_info *sit_i = SIT_I(sbi);
  2016. unsigned int segno, offset;
  2017. struct seg_entry *se;
  2018. bool is_cp = false;
  2019. if (!__is_valid_data_blkaddr(blkaddr))
  2020. return true;
  2021. down_read(&sit_i->sentry_lock);
  2022. segno = GET_SEGNO(sbi, blkaddr);
  2023. se = get_seg_entry(sbi, segno);
  2024. offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
  2025. if (f2fs_test_bit(offset, se->ckpt_valid_map))
  2026. is_cp = true;
  2027. up_read(&sit_i->sentry_lock);
  2028. return is_cp;
  2029. }
  2030. static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
  2031. {
  2032. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2033. if (sbi->ckpt->alloc_type[type] == SSR)
  2034. return sbi->blocks_per_seg;
  2035. return curseg->next_blkoff;
  2036. }
  2037. /*
  2038. * Calculate the number of current summary pages for writing
  2039. */
  2040. int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
  2041. {
  2042. int valid_sum_count = 0;
  2043. int i, sum_in_page;
  2044. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
  2045. if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
  2046. valid_sum_count +=
  2047. le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
  2048. else
  2049. valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
  2050. }
  2051. sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
  2052. SUM_FOOTER_SIZE) / SUMMARY_SIZE;
  2053. if (valid_sum_count <= sum_in_page)
  2054. return 1;
  2055. else if ((valid_sum_count - sum_in_page) <=
  2056. (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
  2057. return 2;
  2058. return 3;
  2059. }
  2060. /*
  2061. * Caller should put this summary page
  2062. */
  2063. struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
  2064. {
  2065. if (unlikely(f2fs_cp_error(sbi)))
  2066. return ERR_PTR(-EIO);
  2067. return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
  2068. }
  2069. void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
  2070. void *src, block_t blk_addr)
  2071. {
  2072. struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
  2073. memcpy(page_address(page), src, PAGE_SIZE);
  2074. set_page_dirty(page);
  2075. f2fs_put_page(page, 1);
  2076. }
  2077. static void write_sum_page(struct f2fs_sb_info *sbi,
  2078. struct f2fs_summary_block *sum_blk, block_t blk_addr)
  2079. {
  2080. f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
  2081. }
  2082. static void write_current_sum_page(struct f2fs_sb_info *sbi,
  2083. int type, block_t blk_addr)
  2084. {
  2085. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2086. struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
  2087. struct f2fs_summary_block *src = curseg->sum_blk;
  2088. struct f2fs_summary_block *dst;
  2089. dst = (struct f2fs_summary_block *)page_address(page);
  2090. memset(dst, 0, PAGE_SIZE);
  2091. mutex_lock(&curseg->curseg_mutex);
  2092. down_read(&curseg->journal_rwsem);
  2093. memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
  2094. up_read(&curseg->journal_rwsem);
  2095. memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
  2096. memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
  2097. mutex_unlock(&curseg->curseg_mutex);
  2098. set_page_dirty(page);
  2099. f2fs_put_page(page, 1);
  2100. }
  2101. static int is_next_segment_free(struct f2fs_sb_info *sbi,
  2102. struct curseg_info *curseg, int type)
  2103. {
  2104. unsigned int segno = curseg->segno + 1;
  2105. struct free_segmap_info *free_i = FREE_I(sbi);
  2106. if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
  2107. return !test_bit(segno, free_i->free_segmap);
  2108. return 0;
  2109. }
  2110. /*
  2111. * Find a new segment from the free segments bitmap to right order
  2112. * This function should be returned with success, otherwise BUG
  2113. */
  2114. static void get_new_segment(struct f2fs_sb_info *sbi,
  2115. unsigned int *newseg, bool new_sec, int dir)
  2116. {
  2117. struct free_segmap_info *free_i = FREE_I(sbi);
  2118. unsigned int segno, secno, zoneno;
  2119. unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
  2120. unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
  2121. unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
  2122. unsigned int left_start = hint;
  2123. bool init = true;
  2124. int go_left = 0;
  2125. int i;
  2126. spin_lock(&free_i->segmap_lock);
  2127. if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
  2128. segno = find_next_zero_bit(free_i->free_segmap,
  2129. GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
  2130. if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
  2131. goto got_it;
  2132. }
  2133. find_other_zone:
  2134. secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
  2135. if (secno >= MAIN_SECS(sbi)) {
  2136. if (dir == ALLOC_RIGHT) {
  2137. secno = find_first_zero_bit(free_i->free_secmap,
  2138. MAIN_SECS(sbi));
  2139. f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
  2140. } else {
  2141. go_left = 1;
  2142. left_start = hint - 1;
  2143. }
  2144. }
  2145. if (go_left == 0)
  2146. goto skip_left;
  2147. while (test_bit(left_start, free_i->free_secmap)) {
  2148. if (left_start > 0) {
  2149. left_start--;
  2150. continue;
  2151. }
  2152. left_start = find_first_zero_bit(free_i->free_secmap,
  2153. MAIN_SECS(sbi));
  2154. f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
  2155. break;
  2156. }
  2157. secno = left_start;
  2158. skip_left:
  2159. segno = GET_SEG_FROM_SEC(sbi, secno);
  2160. zoneno = GET_ZONE_FROM_SEC(sbi, secno);
  2161. /* give up on finding another zone */
  2162. if (!init)
  2163. goto got_it;
  2164. if (sbi->secs_per_zone == 1)
  2165. goto got_it;
  2166. if (zoneno == old_zoneno)
  2167. goto got_it;
  2168. if (dir == ALLOC_LEFT) {
  2169. if (!go_left && zoneno + 1 >= total_zones)
  2170. goto got_it;
  2171. if (go_left && zoneno == 0)
  2172. goto got_it;
  2173. }
  2174. for (i = 0; i < NR_CURSEG_TYPE; i++)
  2175. if (CURSEG_I(sbi, i)->zone == zoneno)
  2176. break;
  2177. if (i < NR_CURSEG_TYPE) {
  2178. /* zone is in user, try another */
  2179. if (go_left)
  2180. hint = zoneno * sbi->secs_per_zone - 1;
  2181. else if (zoneno + 1 >= total_zones)
  2182. hint = 0;
  2183. else
  2184. hint = (zoneno + 1) * sbi->secs_per_zone;
  2185. init = false;
  2186. goto find_other_zone;
  2187. }
  2188. got_it:
  2189. /* set it as dirty segment in free segmap */
  2190. f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
  2191. __set_inuse(sbi, segno);
  2192. *newseg = segno;
  2193. spin_unlock(&free_i->segmap_lock);
  2194. }
  2195. static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
  2196. {
  2197. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2198. struct summary_footer *sum_footer;
  2199. unsigned short seg_type = curseg->seg_type;
  2200. curseg->inited = true;
  2201. curseg->segno = curseg->next_segno;
  2202. curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
  2203. curseg->next_blkoff = 0;
  2204. curseg->next_segno = NULL_SEGNO;
  2205. sum_footer = &(curseg->sum_blk->footer);
  2206. memset(sum_footer, 0, sizeof(struct summary_footer));
  2207. sanity_check_seg_type(sbi, seg_type);
  2208. if (IS_DATASEG(seg_type))
  2209. SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
  2210. if (IS_NODESEG(seg_type))
  2211. SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
  2212. __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
  2213. }
  2214. static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
  2215. {
  2216. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2217. unsigned short seg_type = curseg->seg_type;
  2218. sanity_check_seg_type(sbi, seg_type);
  2219. if (f2fs_need_rand_seg(sbi))
  2220. return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
  2221. /* if segs_per_sec is large than 1, we need to keep original policy. */
  2222. if (__is_large_section(sbi))
  2223. return curseg->segno;
  2224. /* inmem log may not locate on any segment after mount */
  2225. if (!curseg->inited)
  2226. return 0;
  2227. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
  2228. return 0;
  2229. if (test_opt(sbi, NOHEAP) &&
  2230. (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
  2231. return 0;
  2232. if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
  2233. return SIT_I(sbi)->last_victim[ALLOC_NEXT];
  2234. /* find segments from 0 to reuse freed segments */
  2235. if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
  2236. return 0;
  2237. return curseg->segno;
  2238. }
  2239. /*
  2240. * Allocate a current working segment.
  2241. * This function always allocates a free segment in LFS manner.
  2242. */
  2243. static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
  2244. {
  2245. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2246. unsigned short seg_type = curseg->seg_type;
  2247. unsigned int segno = curseg->segno;
  2248. int dir = ALLOC_LEFT;
  2249. if (curseg->inited)
  2250. write_sum_page(sbi, curseg->sum_blk,
  2251. GET_SUM_BLOCK(sbi, segno));
  2252. if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
  2253. dir = ALLOC_RIGHT;
  2254. if (test_opt(sbi, NOHEAP))
  2255. dir = ALLOC_RIGHT;
  2256. segno = __get_next_segno(sbi, type);
  2257. get_new_segment(sbi, &segno, new_sec, dir);
  2258. curseg->next_segno = segno;
  2259. reset_curseg(sbi, type, 1);
  2260. curseg->alloc_type = LFS;
  2261. if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
  2262. curseg->fragment_remained_chunk =
  2263. prandom_u32_max(sbi->max_fragment_chunk) + 1;
  2264. }
  2265. static int __next_free_blkoff(struct f2fs_sb_info *sbi,
  2266. int segno, block_t start)
  2267. {
  2268. struct seg_entry *se = get_seg_entry(sbi, segno);
  2269. int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
  2270. unsigned long *target_map = SIT_I(sbi)->tmp_map;
  2271. unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
  2272. unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
  2273. int i;
  2274. for (i = 0; i < entries; i++)
  2275. target_map[i] = ckpt_map[i] | cur_map[i];
  2276. return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
  2277. }
  2278. static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
  2279. struct curseg_info *seg)
  2280. {
  2281. return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
  2282. }
  2283. bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
  2284. {
  2285. return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
  2286. }
  2287. /*
  2288. * This function always allocates a used segment(from dirty seglist) by SSR
  2289. * manner, so it should recover the existing segment information of valid blocks
  2290. */
  2291. static void change_curseg(struct f2fs_sb_info *sbi, int type)
  2292. {
  2293. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  2294. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2295. unsigned int new_segno = curseg->next_segno;
  2296. struct f2fs_summary_block *sum_node;
  2297. struct page *sum_page;
  2298. write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
  2299. __set_test_and_inuse(sbi, new_segno);
  2300. mutex_lock(&dirty_i->seglist_lock);
  2301. __remove_dirty_segment(sbi, new_segno, PRE);
  2302. __remove_dirty_segment(sbi, new_segno, DIRTY);
  2303. mutex_unlock(&dirty_i->seglist_lock);
  2304. reset_curseg(sbi, type, 1);
  2305. curseg->alloc_type = SSR;
  2306. curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
  2307. sum_page = f2fs_get_sum_page(sbi, new_segno);
  2308. if (IS_ERR(sum_page)) {
  2309. /* GC won't be able to use stale summary pages by cp_error */
  2310. memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
  2311. return;
  2312. }
  2313. sum_node = (struct f2fs_summary_block *)page_address(sum_page);
  2314. memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
  2315. f2fs_put_page(sum_page, 1);
  2316. }
  2317. static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
  2318. int alloc_mode, unsigned long long age);
  2319. static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
  2320. int target_type, int alloc_mode,
  2321. unsigned long long age)
  2322. {
  2323. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2324. curseg->seg_type = target_type;
  2325. if (get_ssr_segment(sbi, type, alloc_mode, age)) {
  2326. struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
  2327. curseg->seg_type = se->type;
  2328. change_curseg(sbi, type);
  2329. } else {
  2330. /* allocate cold segment by default */
  2331. curseg->seg_type = CURSEG_COLD_DATA;
  2332. new_curseg(sbi, type, true);
  2333. }
  2334. stat_inc_seg_type(sbi, curseg);
  2335. }
  2336. static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
  2337. {
  2338. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
  2339. if (!sbi->am.atgc_enabled)
  2340. return;
  2341. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2342. mutex_lock(&curseg->curseg_mutex);
  2343. down_write(&SIT_I(sbi)->sentry_lock);
  2344. get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
  2345. up_write(&SIT_I(sbi)->sentry_lock);
  2346. mutex_unlock(&curseg->curseg_mutex);
  2347. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2348. }
  2349. void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
  2350. {
  2351. __f2fs_init_atgc_curseg(sbi);
  2352. }
  2353. static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
  2354. {
  2355. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2356. mutex_lock(&curseg->curseg_mutex);
  2357. if (!curseg->inited)
  2358. goto out;
  2359. if (get_valid_blocks(sbi, curseg->segno, false)) {
  2360. write_sum_page(sbi, curseg->sum_blk,
  2361. GET_SUM_BLOCK(sbi, curseg->segno));
  2362. } else {
  2363. mutex_lock(&DIRTY_I(sbi)->seglist_lock);
  2364. __set_test_and_free(sbi, curseg->segno, true);
  2365. mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
  2366. }
  2367. out:
  2368. mutex_unlock(&curseg->curseg_mutex);
  2369. }
  2370. void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
  2371. {
  2372. __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
  2373. if (sbi->am.atgc_enabled)
  2374. __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
  2375. }
  2376. static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
  2377. {
  2378. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2379. mutex_lock(&curseg->curseg_mutex);
  2380. if (!curseg->inited)
  2381. goto out;
  2382. if (get_valid_blocks(sbi, curseg->segno, false))
  2383. goto out;
  2384. mutex_lock(&DIRTY_I(sbi)->seglist_lock);
  2385. __set_test_and_inuse(sbi, curseg->segno);
  2386. mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
  2387. out:
  2388. mutex_unlock(&curseg->curseg_mutex);
  2389. }
  2390. void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
  2391. {
  2392. __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
  2393. if (sbi->am.atgc_enabled)
  2394. __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
  2395. }
  2396. static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
  2397. int alloc_mode, unsigned long long age)
  2398. {
  2399. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2400. unsigned segno = NULL_SEGNO;
  2401. unsigned short seg_type = curseg->seg_type;
  2402. int i, cnt;
  2403. bool reversed = false;
  2404. sanity_check_seg_type(sbi, seg_type);
  2405. /* f2fs_need_SSR() already forces to do this */
  2406. if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
  2407. curseg->next_segno = segno;
  2408. return 1;
  2409. }
  2410. /* For node segments, let's do SSR more intensively */
  2411. if (IS_NODESEG(seg_type)) {
  2412. if (seg_type >= CURSEG_WARM_NODE) {
  2413. reversed = true;
  2414. i = CURSEG_COLD_NODE;
  2415. } else {
  2416. i = CURSEG_HOT_NODE;
  2417. }
  2418. cnt = NR_CURSEG_NODE_TYPE;
  2419. } else {
  2420. if (seg_type >= CURSEG_WARM_DATA) {
  2421. reversed = true;
  2422. i = CURSEG_COLD_DATA;
  2423. } else {
  2424. i = CURSEG_HOT_DATA;
  2425. }
  2426. cnt = NR_CURSEG_DATA_TYPE;
  2427. }
  2428. for (; cnt-- > 0; reversed ? i-- : i++) {
  2429. if (i == seg_type)
  2430. continue;
  2431. if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
  2432. curseg->next_segno = segno;
  2433. return 1;
  2434. }
  2435. }
  2436. /* find valid_blocks=0 in dirty list */
  2437. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
  2438. segno = get_free_segment(sbi);
  2439. if (segno != NULL_SEGNO) {
  2440. curseg->next_segno = segno;
  2441. return 1;
  2442. }
  2443. }
  2444. return 0;
  2445. }
  2446. static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
  2447. {
  2448. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2449. if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
  2450. curseg->seg_type == CURSEG_WARM_NODE)
  2451. return true;
  2452. if (curseg->alloc_type == LFS &&
  2453. is_next_segment_free(sbi, curseg, type) &&
  2454. likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
  2455. return true;
  2456. if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
  2457. return true;
  2458. return false;
  2459. }
  2460. void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
  2461. unsigned int start, unsigned int end)
  2462. {
  2463. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2464. unsigned int segno;
  2465. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2466. mutex_lock(&curseg->curseg_mutex);
  2467. down_write(&SIT_I(sbi)->sentry_lock);
  2468. segno = CURSEG_I(sbi, type)->segno;
  2469. if (segno < start || segno > end)
  2470. goto unlock;
  2471. if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
  2472. change_curseg(sbi, type);
  2473. else
  2474. new_curseg(sbi, type, true);
  2475. stat_inc_seg_type(sbi, curseg);
  2476. locate_dirty_segment(sbi, segno);
  2477. unlock:
  2478. up_write(&SIT_I(sbi)->sentry_lock);
  2479. if (segno != curseg->segno)
  2480. f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
  2481. type, segno, curseg->segno);
  2482. mutex_unlock(&curseg->curseg_mutex);
  2483. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2484. }
  2485. static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
  2486. bool new_sec, bool force)
  2487. {
  2488. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2489. unsigned int old_segno;
  2490. if (!force && curseg->inited &&
  2491. !curseg->next_blkoff &&
  2492. !get_valid_blocks(sbi, curseg->segno, new_sec) &&
  2493. !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
  2494. return;
  2495. old_segno = curseg->segno;
  2496. new_curseg(sbi, type, true);
  2497. stat_inc_seg_type(sbi, curseg);
  2498. locate_dirty_segment(sbi, old_segno);
  2499. }
  2500. void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
  2501. {
  2502. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2503. down_write(&SIT_I(sbi)->sentry_lock);
  2504. __allocate_new_segment(sbi, type, true, force);
  2505. up_write(&SIT_I(sbi)->sentry_lock);
  2506. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2507. }
  2508. void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
  2509. {
  2510. int i;
  2511. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2512. down_write(&SIT_I(sbi)->sentry_lock);
  2513. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
  2514. __allocate_new_segment(sbi, i, false, false);
  2515. up_write(&SIT_I(sbi)->sentry_lock);
  2516. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2517. }
  2518. bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
  2519. struct cp_control *cpc)
  2520. {
  2521. __u64 trim_start = cpc->trim_start;
  2522. bool has_candidate = false;
  2523. down_write(&SIT_I(sbi)->sentry_lock);
  2524. for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
  2525. if (add_discard_addrs(sbi, cpc, true)) {
  2526. has_candidate = true;
  2527. break;
  2528. }
  2529. }
  2530. up_write(&SIT_I(sbi)->sentry_lock);
  2531. cpc->trim_start = trim_start;
  2532. return has_candidate;
  2533. }
  2534. static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
  2535. struct discard_policy *dpolicy,
  2536. unsigned int start, unsigned int end)
  2537. {
  2538. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  2539. struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
  2540. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  2541. struct discard_cmd *dc;
  2542. struct blk_plug plug;
  2543. int issued;
  2544. unsigned int trimmed = 0;
  2545. next:
  2546. issued = 0;
  2547. mutex_lock(&dcc->cmd_lock);
  2548. if (unlikely(dcc->rbtree_check))
  2549. f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
  2550. dc = __lookup_discard_cmd_ret(&dcc->root, start,
  2551. &prev_dc, &next_dc, &insert_p, &insert_parent);
  2552. if (!dc)
  2553. dc = next_dc;
  2554. blk_start_plug(&plug);
  2555. while (dc && dc->di.lstart <= end) {
  2556. struct rb_node *node;
  2557. int err = 0;
  2558. if (dc->di.len < dpolicy->granularity)
  2559. goto skip;
  2560. if (dc->state != D_PREP) {
  2561. list_move_tail(&dc->list, &dcc->fstrim_list);
  2562. goto skip;
  2563. }
  2564. err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
  2565. if (issued >= dpolicy->max_requests) {
  2566. start = dc->di.lstart + dc->di.len;
  2567. if (err)
  2568. __remove_discard_cmd(sbi, dc);
  2569. blk_finish_plug(&plug);
  2570. mutex_unlock(&dcc->cmd_lock);
  2571. trimmed += __wait_all_discard_cmd(sbi, NULL);
  2572. f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
  2573. goto next;
  2574. }
  2575. skip:
  2576. node = rb_next(&dc->rb_node);
  2577. if (err)
  2578. __remove_discard_cmd(sbi, dc);
  2579. dc = rb_entry_safe(node, struct discard_cmd, rb_node);
  2580. if (fatal_signal_pending(current))
  2581. break;
  2582. }
  2583. blk_finish_plug(&plug);
  2584. mutex_unlock(&dcc->cmd_lock);
  2585. return trimmed;
  2586. }
  2587. int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
  2588. {
  2589. __u64 start = F2FS_BYTES_TO_BLK(range->start);
  2590. __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
  2591. unsigned int start_segno, end_segno;
  2592. block_t start_block, end_block;
  2593. struct cp_control cpc;
  2594. struct discard_policy dpolicy;
  2595. unsigned long long trimmed = 0;
  2596. int err = 0;
  2597. bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
  2598. if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
  2599. return -EINVAL;
  2600. if (end < MAIN_BLKADDR(sbi))
  2601. goto out;
  2602. if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
  2603. f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
  2604. return -EFSCORRUPTED;
  2605. }
  2606. /* start/end segment number in main_area */
  2607. start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
  2608. end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
  2609. GET_SEGNO(sbi, end);
  2610. if (need_align) {
  2611. start_segno = rounddown(start_segno, sbi->segs_per_sec);
  2612. end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
  2613. }
  2614. cpc.reason = CP_DISCARD;
  2615. cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
  2616. cpc.trim_start = start_segno;
  2617. cpc.trim_end = end_segno;
  2618. if (sbi->discard_blks == 0)
  2619. goto out;
  2620. f2fs_down_write(&sbi->gc_lock);
  2621. err = f2fs_write_checkpoint(sbi, &cpc);
  2622. f2fs_up_write(&sbi->gc_lock);
  2623. if (err)
  2624. goto out;
  2625. /*
  2626. * We filed discard candidates, but actually we don't need to wait for
  2627. * all of them, since they'll be issued in idle time along with runtime
  2628. * discard option. User configuration looks like using runtime discard
  2629. * or periodic fstrim instead of it.
  2630. */
  2631. if (f2fs_realtime_discard_enable(sbi))
  2632. goto out;
  2633. start_block = START_BLOCK(sbi, start_segno);
  2634. end_block = START_BLOCK(sbi, end_segno + 1);
  2635. __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
  2636. trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
  2637. start_block, end_block);
  2638. trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
  2639. start_block, end_block);
  2640. out:
  2641. if (!err)
  2642. range->len = F2FS_BLK_TO_BYTES(trimmed);
  2643. return err;
  2644. }
  2645. int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
  2646. {
  2647. switch (hint) {
  2648. case WRITE_LIFE_SHORT:
  2649. return CURSEG_HOT_DATA;
  2650. case WRITE_LIFE_EXTREME:
  2651. return CURSEG_COLD_DATA;
  2652. default:
  2653. return CURSEG_WARM_DATA;
  2654. }
  2655. }
  2656. static int __get_segment_type_2(struct f2fs_io_info *fio)
  2657. {
  2658. if (fio->type == DATA)
  2659. return CURSEG_HOT_DATA;
  2660. else
  2661. return CURSEG_HOT_NODE;
  2662. }
  2663. static int __get_segment_type_4(struct f2fs_io_info *fio)
  2664. {
  2665. if (fio->type == DATA) {
  2666. struct inode *inode = fio->page->mapping->host;
  2667. if (S_ISDIR(inode->i_mode))
  2668. return CURSEG_HOT_DATA;
  2669. else
  2670. return CURSEG_COLD_DATA;
  2671. } else {
  2672. if (IS_DNODE(fio->page) && is_cold_node(fio->page))
  2673. return CURSEG_WARM_NODE;
  2674. else
  2675. return CURSEG_COLD_NODE;
  2676. }
  2677. }
  2678. static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
  2679. {
  2680. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2681. struct extent_info ei = {};
  2682. if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
  2683. if (!ei.age)
  2684. return NO_CHECK_TYPE;
  2685. if (ei.age <= sbi->hot_data_age_threshold)
  2686. return CURSEG_HOT_DATA;
  2687. if (ei.age <= sbi->warm_data_age_threshold)
  2688. return CURSEG_WARM_DATA;
  2689. return CURSEG_COLD_DATA;
  2690. }
  2691. return NO_CHECK_TYPE;
  2692. }
  2693. static int __get_segment_type_6(struct f2fs_io_info *fio)
  2694. {
  2695. if (fio->type == DATA) {
  2696. struct inode *inode = fio->page->mapping->host;
  2697. int type;
  2698. if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
  2699. return CURSEG_COLD_DATA_PINNED;
  2700. if (page_private_gcing(fio->page)) {
  2701. if (fio->sbi->am.atgc_enabled &&
  2702. (fio->io_type == FS_DATA_IO) &&
  2703. (fio->sbi->gc_mode != GC_URGENT_HIGH))
  2704. return CURSEG_ALL_DATA_ATGC;
  2705. else
  2706. return CURSEG_COLD_DATA;
  2707. }
  2708. if (file_is_cold(inode) || f2fs_need_compress_data(inode))
  2709. return CURSEG_COLD_DATA;
  2710. type = __get_age_segment_type(inode, fio->page->index);
  2711. if (type != NO_CHECK_TYPE)
  2712. return type;
  2713. if (file_is_hot(inode) ||
  2714. is_inode_flag_set(inode, FI_HOT_DATA) ||
  2715. f2fs_is_cow_file(inode))
  2716. return CURSEG_HOT_DATA;
  2717. return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
  2718. } else {
  2719. if (IS_DNODE(fio->page))
  2720. return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
  2721. CURSEG_HOT_NODE;
  2722. return CURSEG_COLD_NODE;
  2723. }
  2724. }
  2725. static int __get_segment_type(struct f2fs_io_info *fio)
  2726. {
  2727. int type = 0;
  2728. switch (F2FS_OPTION(fio->sbi).active_logs) {
  2729. case 2:
  2730. type = __get_segment_type_2(fio);
  2731. break;
  2732. case 4:
  2733. type = __get_segment_type_4(fio);
  2734. break;
  2735. case 6:
  2736. type = __get_segment_type_6(fio);
  2737. break;
  2738. default:
  2739. f2fs_bug_on(fio->sbi, true);
  2740. }
  2741. if (IS_HOT(type))
  2742. fio->temp = HOT;
  2743. else if (IS_WARM(type))
  2744. fio->temp = WARM;
  2745. else
  2746. fio->temp = COLD;
  2747. return type;
  2748. }
  2749. static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
  2750. struct curseg_info *seg)
  2751. {
  2752. /* To allocate block chunks in different sizes, use random number */
  2753. if (--seg->fragment_remained_chunk > 0)
  2754. return;
  2755. seg->fragment_remained_chunk =
  2756. prandom_u32_max(sbi->max_fragment_chunk) + 1;
  2757. seg->next_blkoff +=
  2758. prandom_u32_max(sbi->max_fragment_hole) + 1;
  2759. }
  2760. void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
  2761. block_t old_blkaddr, block_t *new_blkaddr,
  2762. struct f2fs_summary *sum, int type,
  2763. struct f2fs_io_info *fio)
  2764. {
  2765. struct sit_info *sit_i = SIT_I(sbi);
  2766. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2767. unsigned long long old_mtime;
  2768. bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
  2769. struct seg_entry *se = NULL;
  2770. bool segment_full = false;
  2771. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2772. mutex_lock(&curseg->curseg_mutex);
  2773. down_write(&sit_i->sentry_lock);
  2774. if (from_gc) {
  2775. f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
  2776. se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
  2777. sanity_check_seg_type(sbi, se->type);
  2778. f2fs_bug_on(sbi, IS_NODESEG(se->type));
  2779. }
  2780. *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
  2781. f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
  2782. f2fs_wait_discard_bio(sbi, *new_blkaddr);
  2783. curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
  2784. if (curseg->alloc_type == SSR) {
  2785. curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
  2786. } else {
  2787. curseg->next_blkoff++;
  2788. if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
  2789. f2fs_randomize_chunk(sbi, curseg);
  2790. }
  2791. if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
  2792. segment_full = true;
  2793. stat_inc_block_count(sbi, curseg);
  2794. if (from_gc) {
  2795. old_mtime = get_segment_mtime(sbi, old_blkaddr);
  2796. } else {
  2797. update_segment_mtime(sbi, old_blkaddr, 0);
  2798. old_mtime = 0;
  2799. }
  2800. update_segment_mtime(sbi, *new_blkaddr, old_mtime);
  2801. /*
  2802. * SIT information should be updated before segment allocation,
  2803. * since SSR needs latest valid block information.
  2804. */
  2805. update_sit_entry(sbi, *new_blkaddr, 1);
  2806. if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
  2807. update_sit_entry(sbi, old_blkaddr, -1);
  2808. /*
  2809. * If the current segment is full, flush it out and replace it with a
  2810. * new segment.
  2811. */
  2812. if (segment_full) {
  2813. if (from_gc) {
  2814. get_atssr_segment(sbi, type, se->type,
  2815. AT_SSR, se->mtime);
  2816. } else {
  2817. if (need_new_seg(sbi, type))
  2818. new_curseg(sbi, type, false);
  2819. else
  2820. change_curseg(sbi, type);
  2821. stat_inc_seg_type(sbi, curseg);
  2822. }
  2823. }
  2824. /*
  2825. * segment dirty status should be updated after segment allocation,
  2826. * so we just need to update status only one time after previous
  2827. * segment being closed.
  2828. */
  2829. locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
  2830. locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
  2831. if (IS_DATASEG(type))
  2832. atomic64_inc(&sbi->allocated_data_blocks);
  2833. up_write(&sit_i->sentry_lock);
  2834. if (page && IS_NODESEG(type)) {
  2835. fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
  2836. f2fs_inode_chksum_set(sbi, page);
  2837. }
  2838. if (fio) {
  2839. struct f2fs_bio_info *io;
  2840. if (F2FS_IO_ALIGNED(sbi))
  2841. fio->retry = 0;
  2842. INIT_LIST_HEAD(&fio->list);
  2843. fio->in_list = 1;
  2844. io = sbi->write_io[fio->type] + fio->temp;
  2845. spin_lock(&io->io_lock);
  2846. list_add_tail(&fio->list, &io->io_list);
  2847. spin_unlock(&io->io_lock);
  2848. }
  2849. mutex_unlock(&curseg->curseg_mutex);
  2850. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2851. }
  2852. void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
  2853. block_t blkaddr, unsigned int blkcnt)
  2854. {
  2855. if (!f2fs_is_multi_device(sbi))
  2856. return;
  2857. while (1) {
  2858. unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
  2859. unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
  2860. /* update device state for fsync */
  2861. f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
  2862. /* update device state for checkpoint */
  2863. if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
  2864. spin_lock(&sbi->dev_lock);
  2865. f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
  2866. spin_unlock(&sbi->dev_lock);
  2867. }
  2868. if (blkcnt <= blks)
  2869. break;
  2870. blkcnt -= blks;
  2871. blkaddr += blks;
  2872. }
  2873. }
  2874. static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
  2875. {
  2876. int type = __get_segment_type(fio);
  2877. bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
  2878. if (keep_order)
  2879. f2fs_down_read(&fio->sbi->io_order_lock);
  2880. reallocate:
  2881. f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
  2882. &fio->new_blkaddr, sum, type, fio);
  2883. if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) {
  2884. invalidate_mapping_pages(META_MAPPING(fio->sbi),
  2885. fio->old_blkaddr, fio->old_blkaddr);
  2886. f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr);
  2887. }
  2888. /* writeout dirty page into bdev */
  2889. f2fs_submit_page_write(fio);
  2890. if (fio->retry) {
  2891. fio->old_blkaddr = fio->new_blkaddr;
  2892. goto reallocate;
  2893. }
  2894. f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
  2895. if (keep_order)
  2896. f2fs_up_read(&fio->sbi->io_order_lock);
  2897. }
  2898. void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
  2899. enum iostat_type io_type)
  2900. {
  2901. struct f2fs_io_info fio = {
  2902. .sbi = sbi,
  2903. .type = META,
  2904. .temp = HOT,
  2905. .op = REQ_OP_WRITE,
  2906. .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
  2907. .old_blkaddr = page->index,
  2908. .new_blkaddr = page->index,
  2909. .page = page,
  2910. .encrypted_page = NULL,
  2911. .in_list = 0,
  2912. };
  2913. if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
  2914. fio.op_flags &= ~REQ_META;
  2915. set_page_writeback(page);
  2916. f2fs_submit_page_write(&fio);
  2917. stat_inc_meta_count(sbi, page->index);
  2918. f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
  2919. }
  2920. void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
  2921. {
  2922. struct f2fs_summary sum;
  2923. set_summary(&sum, nid, 0, 0);
  2924. do_write_page(&sum, fio);
  2925. f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
  2926. }
  2927. void f2fs_outplace_write_data(struct dnode_of_data *dn,
  2928. struct f2fs_io_info *fio)
  2929. {
  2930. struct f2fs_sb_info *sbi = fio->sbi;
  2931. struct f2fs_summary sum;
  2932. f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
  2933. if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
  2934. f2fs_update_age_extent_cache(dn);
  2935. set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
  2936. do_write_page(&sum, fio);
  2937. f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
  2938. f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
  2939. }
  2940. int f2fs_inplace_write_data(struct f2fs_io_info *fio)
  2941. {
  2942. int err;
  2943. struct f2fs_sb_info *sbi = fio->sbi;
  2944. unsigned int segno;
  2945. fio->new_blkaddr = fio->old_blkaddr;
  2946. /* i/o temperature is needed for passing down write hints */
  2947. __get_segment_type(fio);
  2948. segno = GET_SEGNO(sbi, fio->new_blkaddr);
  2949. if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
  2950. set_sbi_flag(sbi, SBI_NEED_FSCK);
  2951. f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
  2952. __func__, segno);
  2953. err = -EFSCORRUPTED;
  2954. f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
  2955. goto drop_bio;
  2956. }
  2957. if (f2fs_cp_error(sbi)) {
  2958. err = -EIO;
  2959. goto drop_bio;
  2960. }
  2961. if (fio->post_read)
  2962. invalidate_mapping_pages(META_MAPPING(sbi),
  2963. fio->new_blkaddr, fio->new_blkaddr);
  2964. stat_inc_inplace_blocks(fio->sbi);
  2965. if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
  2966. err = f2fs_merge_page_bio(fio);
  2967. else
  2968. err = f2fs_submit_page_bio(fio);
  2969. if (!err) {
  2970. f2fs_update_device_state(fio->sbi, fio->ino,
  2971. fio->new_blkaddr, 1);
  2972. f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
  2973. fio->io_type, F2FS_BLKSIZE);
  2974. }
  2975. return err;
  2976. drop_bio:
  2977. if (fio->bio && *(fio->bio)) {
  2978. struct bio *bio = *(fio->bio);
  2979. bio->bi_status = BLK_STS_IOERR;
  2980. bio_endio(bio);
  2981. *(fio->bio) = NULL;
  2982. }
  2983. return err;
  2984. }
  2985. static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
  2986. unsigned int segno)
  2987. {
  2988. int i;
  2989. for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
  2990. if (CURSEG_I(sbi, i)->segno == segno)
  2991. break;
  2992. }
  2993. return i;
  2994. }
  2995. void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
  2996. block_t old_blkaddr, block_t new_blkaddr,
  2997. bool recover_curseg, bool recover_newaddr,
  2998. bool from_gc)
  2999. {
  3000. struct sit_info *sit_i = SIT_I(sbi);
  3001. struct curseg_info *curseg;
  3002. unsigned int segno, old_cursegno;
  3003. struct seg_entry *se;
  3004. int type;
  3005. unsigned short old_blkoff;
  3006. unsigned char old_alloc_type;
  3007. segno = GET_SEGNO(sbi, new_blkaddr);
  3008. se = get_seg_entry(sbi, segno);
  3009. type = se->type;
  3010. f2fs_down_write(&SM_I(sbi)->curseg_lock);
  3011. if (!recover_curseg) {
  3012. /* for recovery flow */
  3013. if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
  3014. if (old_blkaddr == NULL_ADDR)
  3015. type = CURSEG_COLD_DATA;
  3016. else
  3017. type = CURSEG_WARM_DATA;
  3018. }
  3019. } else {
  3020. if (IS_CURSEG(sbi, segno)) {
  3021. /* se->type is volatile as SSR allocation */
  3022. type = __f2fs_get_curseg(sbi, segno);
  3023. f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
  3024. } else {
  3025. type = CURSEG_WARM_DATA;
  3026. }
  3027. }
  3028. f2fs_bug_on(sbi, !IS_DATASEG(type));
  3029. curseg = CURSEG_I(sbi, type);
  3030. mutex_lock(&curseg->curseg_mutex);
  3031. down_write(&sit_i->sentry_lock);
  3032. old_cursegno = curseg->segno;
  3033. old_blkoff = curseg->next_blkoff;
  3034. old_alloc_type = curseg->alloc_type;
  3035. /* change the current segment */
  3036. if (segno != curseg->segno) {
  3037. curseg->next_segno = segno;
  3038. change_curseg(sbi, type);
  3039. }
  3040. curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
  3041. curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
  3042. if (!recover_curseg || recover_newaddr) {
  3043. if (!from_gc)
  3044. update_segment_mtime(sbi, new_blkaddr, 0);
  3045. update_sit_entry(sbi, new_blkaddr, 1);
  3046. }
  3047. if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
  3048. invalidate_mapping_pages(META_MAPPING(sbi),
  3049. old_blkaddr, old_blkaddr);
  3050. f2fs_invalidate_compress_page(sbi, old_blkaddr);
  3051. if (!from_gc)
  3052. update_segment_mtime(sbi, old_blkaddr, 0);
  3053. update_sit_entry(sbi, old_blkaddr, -1);
  3054. }
  3055. locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
  3056. locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
  3057. locate_dirty_segment(sbi, old_cursegno);
  3058. if (recover_curseg) {
  3059. if (old_cursegno != curseg->segno) {
  3060. curseg->next_segno = old_cursegno;
  3061. change_curseg(sbi, type);
  3062. }
  3063. curseg->next_blkoff = old_blkoff;
  3064. curseg->alloc_type = old_alloc_type;
  3065. }
  3066. up_write(&sit_i->sentry_lock);
  3067. mutex_unlock(&curseg->curseg_mutex);
  3068. f2fs_up_write(&SM_I(sbi)->curseg_lock);
  3069. }
  3070. void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
  3071. block_t old_addr, block_t new_addr,
  3072. unsigned char version, bool recover_curseg,
  3073. bool recover_newaddr)
  3074. {
  3075. struct f2fs_summary sum;
  3076. set_summary(&sum, dn->nid, dn->ofs_in_node, version);
  3077. f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
  3078. recover_curseg, recover_newaddr, false);
  3079. f2fs_update_data_blkaddr(dn, new_addr);
  3080. }
  3081. void f2fs_wait_on_page_writeback(struct page *page,
  3082. enum page_type type, bool ordered, bool locked)
  3083. {
  3084. if (PageWriteback(page)) {
  3085. struct f2fs_sb_info *sbi = F2FS_P_SB(page);
  3086. /* submit cached LFS IO */
  3087. f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
  3088. /* submit cached IPU IO */
  3089. f2fs_submit_merged_ipu_write(sbi, NULL, page);
  3090. if (ordered) {
  3091. wait_on_page_writeback(page);
  3092. f2fs_bug_on(sbi, locked && PageWriteback(page));
  3093. } else {
  3094. wait_for_stable_page(page);
  3095. }
  3096. }
  3097. }
  3098. void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
  3099. {
  3100. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3101. struct page *cpage;
  3102. if (!f2fs_post_read_required(inode))
  3103. return;
  3104. if (!__is_valid_data_blkaddr(blkaddr))
  3105. return;
  3106. cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
  3107. if (cpage) {
  3108. f2fs_wait_on_page_writeback(cpage, DATA, true, true);
  3109. f2fs_put_page(cpage, 1);
  3110. }
  3111. }
  3112. void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
  3113. block_t len)
  3114. {
  3115. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3116. block_t i;
  3117. if (!f2fs_post_read_required(inode))
  3118. return;
  3119. for (i = 0; i < len; i++)
  3120. f2fs_wait_on_block_writeback(inode, blkaddr + i);
  3121. invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
  3122. }
  3123. static int read_compacted_summaries(struct f2fs_sb_info *sbi)
  3124. {
  3125. struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
  3126. struct curseg_info *seg_i;
  3127. unsigned char *kaddr;
  3128. struct page *page;
  3129. block_t start;
  3130. int i, j, offset;
  3131. start = start_sum_block(sbi);
  3132. page = f2fs_get_meta_page(sbi, start++);
  3133. if (IS_ERR(page))
  3134. return PTR_ERR(page);
  3135. kaddr = (unsigned char *)page_address(page);
  3136. /* Step 1: restore nat cache */
  3137. seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
  3138. memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
  3139. /* Step 2: restore sit cache */
  3140. seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3141. memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
  3142. offset = 2 * SUM_JOURNAL_SIZE;
  3143. /* Step 3: restore summary entries */
  3144. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
  3145. unsigned short blk_off;
  3146. unsigned int segno;
  3147. seg_i = CURSEG_I(sbi, i);
  3148. segno = le32_to_cpu(ckpt->cur_data_segno[i]);
  3149. blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
  3150. seg_i->next_segno = segno;
  3151. reset_curseg(sbi, i, 0);
  3152. seg_i->alloc_type = ckpt->alloc_type[i];
  3153. seg_i->next_blkoff = blk_off;
  3154. if (seg_i->alloc_type == SSR)
  3155. blk_off = sbi->blocks_per_seg;
  3156. for (j = 0; j < blk_off; j++) {
  3157. struct f2fs_summary *s;
  3158. s = (struct f2fs_summary *)(kaddr + offset);
  3159. seg_i->sum_blk->entries[j] = *s;
  3160. offset += SUMMARY_SIZE;
  3161. if (offset + SUMMARY_SIZE <= PAGE_SIZE -
  3162. SUM_FOOTER_SIZE)
  3163. continue;
  3164. f2fs_put_page(page, 1);
  3165. page = NULL;
  3166. page = f2fs_get_meta_page(sbi, start++);
  3167. if (IS_ERR(page))
  3168. return PTR_ERR(page);
  3169. kaddr = (unsigned char *)page_address(page);
  3170. offset = 0;
  3171. }
  3172. }
  3173. f2fs_put_page(page, 1);
  3174. return 0;
  3175. }
  3176. static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
  3177. {
  3178. struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
  3179. struct f2fs_summary_block *sum;
  3180. struct curseg_info *curseg;
  3181. struct page *new;
  3182. unsigned short blk_off;
  3183. unsigned int segno = 0;
  3184. block_t blk_addr = 0;
  3185. int err = 0;
  3186. /* get segment number and block addr */
  3187. if (IS_DATASEG(type)) {
  3188. segno = le32_to_cpu(ckpt->cur_data_segno[type]);
  3189. blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
  3190. CURSEG_HOT_DATA]);
  3191. if (__exist_node_summaries(sbi))
  3192. blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
  3193. else
  3194. blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
  3195. } else {
  3196. segno = le32_to_cpu(ckpt->cur_node_segno[type -
  3197. CURSEG_HOT_NODE]);
  3198. blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
  3199. CURSEG_HOT_NODE]);
  3200. if (__exist_node_summaries(sbi))
  3201. blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
  3202. type - CURSEG_HOT_NODE);
  3203. else
  3204. blk_addr = GET_SUM_BLOCK(sbi, segno);
  3205. }
  3206. new = f2fs_get_meta_page(sbi, blk_addr);
  3207. if (IS_ERR(new))
  3208. return PTR_ERR(new);
  3209. sum = (struct f2fs_summary_block *)page_address(new);
  3210. if (IS_NODESEG(type)) {
  3211. if (__exist_node_summaries(sbi)) {
  3212. struct f2fs_summary *ns = &sum->entries[0];
  3213. int i;
  3214. for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
  3215. ns->version = 0;
  3216. ns->ofs_in_node = 0;
  3217. }
  3218. } else {
  3219. err = f2fs_restore_node_summary(sbi, segno, sum);
  3220. if (err)
  3221. goto out;
  3222. }
  3223. }
  3224. /* set uncompleted segment to curseg */
  3225. curseg = CURSEG_I(sbi, type);
  3226. mutex_lock(&curseg->curseg_mutex);
  3227. /* update journal info */
  3228. down_write(&curseg->journal_rwsem);
  3229. memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
  3230. up_write(&curseg->journal_rwsem);
  3231. memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
  3232. memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
  3233. curseg->next_segno = segno;
  3234. reset_curseg(sbi, type, 0);
  3235. curseg->alloc_type = ckpt->alloc_type[type];
  3236. curseg->next_blkoff = blk_off;
  3237. mutex_unlock(&curseg->curseg_mutex);
  3238. out:
  3239. f2fs_put_page(new, 1);
  3240. return err;
  3241. }
  3242. static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
  3243. {
  3244. struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
  3245. struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
  3246. int type = CURSEG_HOT_DATA;
  3247. int err;
  3248. if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
  3249. int npages = f2fs_npages_for_summary_flush(sbi, true);
  3250. if (npages >= 2)
  3251. f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
  3252. META_CP, true);
  3253. /* restore for compacted data summary */
  3254. err = read_compacted_summaries(sbi);
  3255. if (err)
  3256. return err;
  3257. type = CURSEG_HOT_NODE;
  3258. }
  3259. if (__exist_node_summaries(sbi))
  3260. f2fs_ra_meta_pages(sbi,
  3261. sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
  3262. NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
  3263. for (; type <= CURSEG_COLD_NODE; type++) {
  3264. err = read_normal_summaries(sbi, type);
  3265. if (err)
  3266. return err;
  3267. }
  3268. /* sanity check for summary blocks */
  3269. if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
  3270. sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
  3271. f2fs_err(sbi, "invalid journal entries nats %u sits %u",
  3272. nats_in_cursum(nat_j), sits_in_cursum(sit_j));
  3273. return -EINVAL;
  3274. }
  3275. return 0;
  3276. }
  3277. static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
  3278. {
  3279. struct page *page;
  3280. unsigned char *kaddr;
  3281. struct f2fs_summary *summary;
  3282. struct curseg_info *seg_i;
  3283. int written_size = 0;
  3284. int i, j;
  3285. page = f2fs_grab_meta_page(sbi, blkaddr++);
  3286. kaddr = (unsigned char *)page_address(page);
  3287. memset(kaddr, 0, PAGE_SIZE);
  3288. /* Step 1: write nat cache */
  3289. seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
  3290. memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
  3291. written_size += SUM_JOURNAL_SIZE;
  3292. /* Step 2: write sit cache */
  3293. seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3294. memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
  3295. written_size += SUM_JOURNAL_SIZE;
  3296. /* Step 3: write summary entries */
  3297. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
  3298. seg_i = CURSEG_I(sbi, i);
  3299. for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
  3300. if (!page) {
  3301. page = f2fs_grab_meta_page(sbi, blkaddr++);
  3302. kaddr = (unsigned char *)page_address(page);
  3303. memset(kaddr, 0, PAGE_SIZE);
  3304. written_size = 0;
  3305. }
  3306. summary = (struct f2fs_summary *)(kaddr + written_size);
  3307. *summary = seg_i->sum_blk->entries[j];
  3308. written_size += SUMMARY_SIZE;
  3309. if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
  3310. SUM_FOOTER_SIZE)
  3311. continue;
  3312. set_page_dirty(page);
  3313. f2fs_put_page(page, 1);
  3314. page = NULL;
  3315. }
  3316. }
  3317. if (page) {
  3318. set_page_dirty(page);
  3319. f2fs_put_page(page, 1);
  3320. }
  3321. }
  3322. static void write_normal_summaries(struct f2fs_sb_info *sbi,
  3323. block_t blkaddr, int type)
  3324. {
  3325. int i, end;
  3326. if (IS_DATASEG(type))
  3327. end = type + NR_CURSEG_DATA_TYPE;
  3328. else
  3329. end = type + NR_CURSEG_NODE_TYPE;
  3330. for (i = type; i < end; i++)
  3331. write_current_sum_page(sbi, i, blkaddr + (i - type));
  3332. }
  3333. void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
  3334. {
  3335. if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
  3336. write_compacted_summaries(sbi, start_blk);
  3337. else
  3338. write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
  3339. }
  3340. void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
  3341. {
  3342. write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
  3343. }
  3344. int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
  3345. unsigned int val, int alloc)
  3346. {
  3347. int i;
  3348. if (type == NAT_JOURNAL) {
  3349. for (i = 0; i < nats_in_cursum(journal); i++) {
  3350. if (le32_to_cpu(nid_in_journal(journal, i)) == val)
  3351. return i;
  3352. }
  3353. if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
  3354. return update_nats_in_cursum(journal, 1);
  3355. } else if (type == SIT_JOURNAL) {
  3356. for (i = 0; i < sits_in_cursum(journal); i++)
  3357. if (le32_to_cpu(segno_in_journal(journal, i)) == val)
  3358. return i;
  3359. if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
  3360. return update_sits_in_cursum(journal, 1);
  3361. }
  3362. return -1;
  3363. }
  3364. static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
  3365. unsigned int segno)
  3366. {
  3367. return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
  3368. }
  3369. static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
  3370. unsigned int start)
  3371. {
  3372. struct sit_info *sit_i = SIT_I(sbi);
  3373. struct page *page;
  3374. pgoff_t src_off, dst_off;
  3375. src_off = current_sit_addr(sbi, start);
  3376. dst_off = next_sit_addr(sbi, src_off);
  3377. page = f2fs_grab_meta_page(sbi, dst_off);
  3378. seg_info_to_sit_page(sbi, page, start);
  3379. set_page_dirty(page);
  3380. set_to_next_sit(sit_i, start);
  3381. return page;
  3382. }
  3383. static struct sit_entry_set *grab_sit_entry_set(void)
  3384. {
  3385. struct sit_entry_set *ses =
  3386. f2fs_kmem_cache_alloc(sit_entry_set_slab,
  3387. GFP_NOFS, true, NULL);
  3388. ses->entry_cnt = 0;
  3389. INIT_LIST_HEAD(&ses->set_list);
  3390. return ses;
  3391. }
  3392. static void release_sit_entry_set(struct sit_entry_set *ses)
  3393. {
  3394. list_del(&ses->set_list);
  3395. kmem_cache_free(sit_entry_set_slab, ses);
  3396. }
  3397. static void adjust_sit_entry_set(struct sit_entry_set *ses,
  3398. struct list_head *head)
  3399. {
  3400. struct sit_entry_set *next = ses;
  3401. if (list_is_last(&ses->set_list, head))
  3402. return;
  3403. list_for_each_entry_continue(next, head, set_list)
  3404. if (ses->entry_cnt <= next->entry_cnt) {
  3405. list_move_tail(&ses->set_list, &next->set_list);
  3406. return;
  3407. }
  3408. list_move_tail(&ses->set_list, head);
  3409. }
  3410. static void add_sit_entry(unsigned int segno, struct list_head *head)
  3411. {
  3412. struct sit_entry_set *ses;
  3413. unsigned int start_segno = START_SEGNO(segno);
  3414. list_for_each_entry(ses, head, set_list) {
  3415. if (ses->start_segno == start_segno) {
  3416. ses->entry_cnt++;
  3417. adjust_sit_entry_set(ses, head);
  3418. return;
  3419. }
  3420. }
  3421. ses = grab_sit_entry_set();
  3422. ses->start_segno = start_segno;
  3423. ses->entry_cnt++;
  3424. list_add(&ses->set_list, head);
  3425. }
  3426. static void add_sits_in_set(struct f2fs_sb_info *sbi)
  3427. {
  3428. struct f2fs_sm_info *sm_info = SM_I(sbi);
  3429. struct list_head *set_list = &sm_info->sit_entry_set;
  3430. unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
  3431. unsigned int segno;
  3432. for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
  3433. add_sit_entry(segno, set_list);
  3434. }
  3435. static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
  3436. {
  3437. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3438. struct f2fs_journal *journal = curseg->journal;
  3439. int i;
  3440. down_write(&curseg->journal_rwsem);
  3441. for (i = 0; i < sits_in_cursum(journal); i++) {
  3442. unsigned int segno;
  3443. bool dirtied;
  3444. segno = le32_to_cpu(segno_in_journal(journal, i));
  3445. dirtied = __mark_sit_entry_dirty(sbi, segno);
  3446. if (!dirtied)
  3447. add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
  3448. }
  3449. update_sits_in_cursum(journal, -i);
  3450. up_write(&curseg->journal_rwsem);
  3451. }
  3452. /*
  3453. * CP calls this function, which flushes SIT entries including sit_journal,
  3454. * and moves prefree segs to free segs.
  3455. */
  3456. void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  3457. {
  3458. struct sit_info *sit_i = SIT_I(sbi);
  3459. unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
  3460. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3461. struct f2fs_journal *journal = curseg->journal;
  3462. struct sit_entry_set *ses, *tmp;
  3463. struct list_head *head = &SM_I(sbi)->sit_entry_set;
  3464. bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
  3465. struct seg_entry *se;
  3466. down_write(&sit_i->sentry_lock);
  3467. if (!sit_i->dirty_sentries)
  3468. goto out;
  3469. /*
  3470. * add and account sit entries of dirty bitmap in sit entry
  3471. * set temporarily
  3472. */
  3473. add_sits_in_set(sbi);
  3474. /*
  3475. * if there are no enough space in journal to store dirty sit
  3476. * entries, remove all entries from journal and add and account
  3477. * them in sit entry set.
  3478. */
  3479. if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
  3480. !to_journal)
  3481. remove_sits_in_journal(sbi);
  3482. /*
  3483. * there are two steps to flush sit entries:
  3484. * #1, flush sit entries to journal in current cold data summary block.
  3485. * #2, flush sit entries to sit page.
  3486. */
  3487. list_for_each_entry_safe(ses, tmp, head, set_list) {
  3488. struct page *page = NULL;
  3489. struct f2fs_sit_block *raw_sit = NULL;
  3490. unsigned int start_segno = ses->start_segno;
  3491. unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
  3492. (unsigned long)MAIN_SEGS(sbi));
  3493. unsigned int segno = start_segno;
  3494. if (to_journal &&
  3495. !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
  3496. to_journal = false;
  3497. if (to_journal) {
  3498. down_write(&curseg->journal_rwsem);
  3499. } else {
  3500. page = get_next_sit_page(sbi, start_segno);
  3501. raw_sit = page_address(page);
  3502. }
  3503. /* flush dirty sit entries in region of current sit set */
  3504. for_each_set_bit_from(segno, bitmap, end) {
  3505. int offset, sit_offset;
  3506. se = get_seg_entry(sbi, segno);
  3507. #ifdef CONFIG_F2FS_CHECK_FS
  3508. if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
  3509. SIT_VBLOCK_MAP_SIZE))
  3510. f2fs_bug_on(sbi, 1);
  3511. #endif
  3512. /* add discard candidates */
  3513. if (!(cpc->reason & CP_DISCARD)) {
  3514. cpc->trim_start = segno;
  3515. add_discard_addrs(sbi, cpc, false);
  3516. }
  3517. if (to_journal) {
  3518. offset = f2fs_lookup_journal_in_cursum(journal,
  3519. SIT_JOURNAL, segno, 1);
  3520. f2fs_bug_on(sbi, offset < 0);
  3521. segno_in_journal(journal, offset) =
  3522. cpu_to_le32(segno);
  3523. seg_info_to_raw_sit(se,
  3524. &sit_in_journal(journal, offset));
  3525. check_block_count(sbi, segno,
  3526. &sit_in_journal(journal, offset));
  3527. } else {
  3528. sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
  3529. seg_info_to_raw_sit(se,
  3530. &raw_sit->entries[sit_offset]);
  3531. check_block_count(sbi, segno,
  3532. &raw_sit->entries[sit_offset]);
  3533. }
  3534. __clear_bit(segno, bitmap);
  3535. sit_i->dirty_sentries--;
  3536. ses->entry_cnt--;
  3537. }
  3538. if (to_journal)
  3539. up_write(&curseg->journal_rwsem);
  3540. else
  3541. f2fs_put_page(page, 1);
  3542. f2fs_bug_on(sbi, ses->entry_cnt);
  3543. release_sit_entry_set(ses);
  3544. }
  3545. f2fs_bug_on(sbi, !list_empty(head));
  3546. f2fs_bug_on(sbi, sit_i->dirty_sentries);
  3547. out:
  3548. if (cpc->reason & CP_DISCARD) {
  3549. __u64 trim_start = cpc->trim_start;
  3550. for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
  3551. add_discard_addrs(sbi, cpc, false);
  3552. cpc->trim_start = trim_start;
  3553. }
  3554. up_write(&sit_i->sentry_lock);
  3555. set_prefree_as_free_segments(sbi);
  3556. }
  3557. static int build_sit_info(struct f2fs_sb_info *sbi)
  3558. {
  3559. struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
  3560. struct sit_info *sit_i;
  3561. unsigned int sit_segs, start;
  3562. char *src_bitmap, *bitmap;
  3563. unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
  3564. unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
  3565. /* allocate memory for SIT information */
  3566. sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
  3567. if (!sit_i)
  3568. return -ENOMEM;
  3569. SM_I(sbi)->sit_info = sit_i;
  3570. sit_i->sentries =
  3571. f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
  3572. MAIN_SEGS(sbi)),
  3573. GFP_KERNEL);
  3574. if (!sit_i->sentries)
  3575. return -ENOMEM;
  3576. main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
  3577. sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
  3578. GFP_KERNEL);
  3579. if (!sit_i->dirty_sentries_bitmap)
  3580. return -ENOMEM;
  3581. #ifdef CONFIG_F2FS_CHECK_FS
  3582. bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
  3583. #else
  3584. bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
  3585. #endif
  3586. sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
  3587. if (!sit_i->bitmap)
  3588. return -ENOMEM;
  3589. bitmap = sit_i->bitmap;
  3590. for (start = 0; start < MAIN_SEGS(sbi); start++) {
  3591. sit_i->sentries[start].cur_valid_map = bitmap;
  3592. bitmap += SIT_VBLOCK_MAP_SIZE;
  3593. sit_i->sentries[start].ckpt_valid_map = bitmap;
  3594. bitmap += SIT_VBLOCK_MAP_SIZE;
  3595. #ifdef CONFIG_F2FS_CHECK_FS
  3596. sit_i->sentries[start].cur_valid_map_mir = bitmap;
  3597. bitmap += SIT_VBLOCK_MAP_SIZE;
  3598. #endif
  3599. if (discard_map) {
  3600. sit_i->sentries[start].discard_map = bitmap;
  3601. bitmap += SIT_VBLOCK_MAP_SIZE;
  3602. }
  3603. }
  3604. sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
  3605. if (!sit_i->tmp_map)
  3606. return -ENOMEM;
  3607. if (__is_large_section(sbi)) {
  3608. sit_i->sec_entries =
  3609. f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
  3610. MAIN_SECS(sbi)),
  3611. GFP_KERNEL);
  3612. if (!sit_i->sec_entries)
  3613. return -ENOMEM;
  3614. }
  3615. /* get information related with SIT */
  3616. sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
  3617. /* setup SIT bitmap from ckeckpoint pack */
  3618. sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
  3619. src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
  3620. sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
  3621. if (!sit_i->sit_bitmap)
  3622. return -ENOMEM;
  3623. #ifdef CONFIG_F2FS_CHECK_FS
  3624. sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
  3625. sit_bitmap_size, GFP_KERNEL);
  3626. if (!sit_i->sit_bitmap_mir)
  3627. return -ENOMEM;
  3628. sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
  3629. main_bitmap_size, GFP_KERNEL);
  3630. if (!sit_i->invalid_segmap)
  3631. return -ENOMEM;
  3632. #endif
  3633. sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
  3634. sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
  3635. sit_i->written_valid_blocks = 0;
  3636. sit_i->bitmap_size = sit_bitmap_size;
  3637. sit_i->dirty_sentries = 0;
  3638. sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
  3639. sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
  3640. sit_i->mounted_time = ktime_get_boottime_seconds();
  3641. init_rwsem(&sit_i->sentry_lock);
  3642. return 0;
  3643. }
  3644. static int build_free_segmap(struct f2fs_sb_info *sbi)
  3645. {
  3646. struct free_segmap_info *free_i;
  3647. unsigned int bitmap_size, sec_bitmap_size;
  3648. /* allocate memory for free segmap information */
  3649. free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
  3650. if (!free_i)
  3651. return -ENOMEM;
  3652. SM_I(sbi)->free_info = free_i;
  3653. bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
  3654. free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
  3655. if (!free_i->free_segmap)
  3656. return -ENOMEM;
  3657. sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
  3658. free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
  3659. if (!free_i->free_secmap)
  3660. return -ENOMEM;
  3661. /* set all segments as dirty temporarily */
  3662. memset(free_i->free_segmap, 0xff, bitmap_size);
  3663. memset(free_i->free_secmap, 0xff, sec_bitmap_size);
  3664. /* init free segmap information */
  3665. free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
  3666. free_i->free_segments = 0;
  3667. free_i->free_sections = 0;
  3668. spin_lock_init(&free_i->segmap_lock);
  3669. return 0;
  3670. }
  3671. static int build_curseg(struct f2fs_sb_info *sbi)
  3672. {
  3673. struct curseg_info *array;
  3674. int i;
  3675. array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
  3676. sizeof(*array)), GFP_KERNEL);
  3677. if (!array)
  3678. return -ENOMEM;
  3679. SM_I(sbi)->curseg_array = array;
  3680. for (i = 0; i < NO_CHECK_TYPE; i++) {
  3681. mutex_init(&array[i].curseg_mutex);
  3682. array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
  3683. if (!array[i].sum_blk)
  3684. return -ENOMEM;
  3685. init_rwsem(&array[i].journal_rwsem);
  3686. array[i].journal = f2fs_kzalloc(sbi,
  3687. sizeof(struct f2fs_journal), GFP_KERNEL);
  3688. if (!array[i].journal)
  3689. return -ENOMEM;
  3690. if (i < NR_PERSISTENT_LOG)
  3691. array[i].seg_type = CURSEG_HOT_DATA + i;
  3692. else if (i == CURSEG_COLD_DATA_PINNED)
  3693. array[i].seg_type = CURSEG_COLD_DATA;
  3694. else if (i == CURSEG_ALL_DATA_ATGC)
  3695. array[i].seg_type = CURSEG_COLD_DATA;
  3696. array[i].segno = NULL_SEGNO;
  3697. array[i].next_blkoff = 0;
  3698. array[i].inited = false;
  3699. }
  3700. return restore_curseg_summaries(sbi);
  3701. }
  3702. static int build_sit_entries(struct f2fs_sb_info *sbi)
  3703. {
  3704. struct sit_info *sit_i = SIT_I(sbi);
  3705. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3706. struct f2fs_journal *journal = curseg->journal;
  3707. struct seg_entry *se;
  3708. struct f2fs_sit_entry sit;
  3709. int sit_blk_cnt = SIT_BLK_CNT(sbi);
  3710. unsigned int i, start, end;
  3711. unsigned int readed, start_blk = 0;
  3712. int err = 0;
  3713. block_t sit_valid_blocks[2] = {0, 0};
  3714. do {
  3715. readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
  3716. META_SIT, true);
  3717. start = start_blk * sit_i->sents_per_block;
  3718. end = (start_blk + readed) * sit_i->sents_per_block;
  3719. for (; start < end && start < MAIN_SEGS(sbi); start++) {
  3720. struct f2fs_sit_block *sit_blk;
  3721. struct page *page;
  3722. se = &sit_i->sentries[start];
  3723. page = get_current_sit_page(sbi, start);
  3724. if (IS_ERR(page))
  3725. return PTR_ERR(page);
  3726. sit_blk = (struct f2fs_sit_block *)page_address(page);
  3727. sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
  3728. f2fs_put_page(page, 1);
  3729. err = check_block_count(sbi, start, &sit);
  3730. if (err)
  3731. return err;
  3732. seg_info_from_raw_sit(se, &sit);
  3733. if (se->type >= NR_PERSISTENT_LOG) {
  3734. f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
  3735. se->type, start);
  3736. f2fs_handle_error(sbi,
  3737. ERROR_INCONSISTENT_SUM_TYPE);
  3738. return -EFSCORRUPTED;
  3739. }
  3740. sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
  3741. if (f2fs_block_unit_discard(sbi)) {
  3742. /* build discard map only one time */
  3743. if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
  3744. memset(se->discard_map, 0xff,
  3745. SIT_VBLOCK_MAP_SIZE);
  3746. } else {
  3747. memcpy(se->discard_map,
  3748. se->cur_valid_map,
  3749. SIT_VBLOCK_MAP_SIZE);
  3750. sbi->discard_blks +=
  3751. sbi->blocks_per_seg -
  3752. se->valid_blocks;
  3753. }
  3754. }
  3755. if (__is_large_section(sbi))
  3756. get_sec_entry(sbi, start)->valid_blocks +=
  3757. se->valid_blocks;
  3758. }
  3759. start_blk += readed;
  3760. } while (start_blk < sit_blk_cnt);
  3761. down_read(&curseg->journal_rwsem);
  3762. for (i = 0; i < sits_in_cursum(journal); i++) {
  3763. unsigned int old_valid_blocks;
  3764. start = le32_to_cpu(segno_in_journal(journal, i));
  3765. if (start >= MAIN_SEGS(sbi)) {
  3766. f2fs_err(sbi, "Wrong journal entry on segno %u",
  3767. start);
  3768. err = -EFSCORRUPTED;
  3769. f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
  3770. break;
  3771. }
  3772. se = &sit_i->sentries[start];
  3773. sit = sit_in_journal(journal, i);
  3774. old_valid_blocks = se->valid_blocks;
  3775. sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
  3776. err = check_block_count(sbi, start, &sit);
  3777. if (err)
  3778. break;
  3779. seg_info_from_raw_sit(se, &sit);
  3780. if (se->type >= NR_PERSISTENT_LOG) {
  3781. f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
  3782. se->type, start);
  3783. err = -EFSCORRUPTED;
  3784. f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
  3785. break;
  3786. }
  3787. sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
  3788. if (f2fs_block_unit_discard(sbi)) {
  3789. if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
  3790. memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
  3791. } else {
  3792. memcpy(se->discard_map, se->cur_valid_map,
  3793. SIT_VBLOCK_MAP_SIZE);
  3794. sbi->discard_blks += old_valid_blocks;
  3795. sbi->discard_blks -= se->valid_blocks;
  3796. }
  3797. }
  3798. if (__is_large_section(sbi)) {
  3799. get_sec_entry(sbi, start)->valid_blocks +=
  3800. se->valid_blocks;
  3801. get_sec_entry(sbi, start)->valid_blocks -=
  3802. old_valid_blocks;
  3803. }
  3804. }
  3805. up_read(&curseg->journal_rwsem);
  3806. if (err)
  3807. return err;
  3808. if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
  3809. f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
  3810. sit_valid_blocks[NODE], valid_node_count(sbi));
  3811. f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
  3812. return -EFSCORRUPTED;
  3813. }
  3814. if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
  3815. valid_user_blocks(sbi)) {
  3816. f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
  3817. sit_valid_blocks[DATA], sit_valid_blocks[NODE],
  3818. valid_user_blocks(sbi));
  3819. f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
  3820. return -EFSCORRUPTED;
  3821. }
  3822. return 0;
  3823. }
  3824. static void init_free_segmap(struct f2fs_sb_info *sbi)
  3825. {
  3826. unsigned int start;
  3827. int type;
  3828. struct seg_entry *sentry;
  3829. for (start = 0; start < MAIN_SEGS(sbi); start++) {
  3830. if (f2fs_usable_blks_in_seg(sbi, start) == 0)
  3831. continue;
  3832. sentry = get_seg_entry(sbi, start);
  3833. if (!sentry->valid_blocks)
  3834. __set_free(sbi, start);
  3835. else
  3836. SIT_I(sbi)->written_valid_blocks +=
  3837. sentry->valid_blocks;
  3838. }
  3839. /* set use the current segments */
  3840. for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
  3841. struct curseg_info *curseg_t = CURSEG_I(sbi, type);
  3842. __set_test_and_inuse(sbi, curseg_t->segno);
  3843. }
  3844. }
  3845. static void init_dirty_segmap(struct f2fs_sb_info *sbi)
  3846. {
  3847. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  3848. struct free_segmap_info *free_i = FREE_I(sbi);
  3849. unsigned int segno = 0, offset = 0, secno;
  3850. block_t valid_blocks, usable_blks_in_seg;
  3851. while (1) {
  3852. /* find dirty segment based on free segmap */
  3853. segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
  3854. if (segno >= MAIN_SEGS(sbi))
  3855. break;
  3856. offset = segno + 1;
  3857. valid_blocks = get_valid_blocks(sbi, segno, false);
  3858. usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
  3859. if (valid_blocks == usable_blks_in_seg || !valid_blocks)
  3860. continue;
  3861. if (valid_blocks > usable_blks_in_seg) {
  3862. f2fs_bug_on(sbi, 1);
  3863. continue;
  3864. }
  3865. mutex_lock(&dirty_i->seglist_lock);
  3866. __locate_dirty_segment(sbi, segno, DIRTY);
  3867. mutex_unlock(&dirty_i->seglist_lock);
  3868. }
  3869. if (!__is_large_section(sbi))
  3870. return;
  3871. mutex_lock(&dirty_i->seglist_lock);
  3872. for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
  3873. valid_blocks = get_valid_blocks(sbi, segno, true);
  3874. secno = GET_SEC_FROM_SEG(sbi, segno);
  3875. if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
  3876. continue;
  3877. if (IS_CURSEC(sbi, secno))
  3878. continue;
  3879. set_bit(secno, dirty_i->dirty_secmap);
  3880. }
  3881. mutex_unlock(&dirty_i->seglist_lock);
  3882. }
  3883. static int init_victim_secmap(struct f2fs_sb_info *sbi)
  3884. {
  3885. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  3886. unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
  3887. dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
  3888. if (!dirty_i->victim_secmap)
  3889. return -ENOMEM;
  3890. dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
  3891. if (!dirty_i->pinned_secmap)
  3892. return -ENOMEM;
  3893. dirty_i->pinned_secmap_cnt = 0;
  3894. dirty_i->enable_pin_section = true;
  3895. return 0;
  3896. }
  3897. static int build_dirty_segmap(struct f2fs_sb_info *sbi)
  3898. {
  3899. struct dirty_seglist_info *dirty_i;
  3900. unsigned int bitmap_size, i;
  3901. /* allocate memory for dirty segments list information */
  3902. dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
  3903. GFP_KERNEL);
  3904. if (!dirty_i)
  3905. return -ENOMEM;
  3906. SM_I(sbi)->dirty_info = dirty_i;
  3907. mutex_init(&dirty_i->seglist_lock);
  3908. bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
  3909. for (i = 0; i < NR_DIRTY_TYPE; i++) {
  3910. dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
  3911. GFP_KERNEL);
  3912. if (!dirty_i->dirty_segmap[i])
  3913. return -ENOMEM;
  3914. }
  3915. if (__is_large_section(sbi)) {
  3916. bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
  3917. dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
  3918. bitmap_size, GFP_KERNEL);
  3919. if (!dirty_i->dirty_secmap)
  3920. return -ENOMEM;
  3921. }
  3922. init_dirty_segmap(sbi);
  3923. return init_victim_secmap(sbi);
  3924. }
  3925. static int sanity_check_curseg(struct f2fs_sb_info *sbi)
  3926. {
  3927. int i;
  3928. /*
  3929. * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
  3930. * In LFS curseg, all blkaddr after .next_blkoff should be unused.
  3931. */
  3932. for (i = 0; i < NR_PERSISTENT_LOG; i++) {
  3933. struct curseg_info *curseg = CURSEG_I(sbi, i);
  3934. struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
  3935. unsigned int blkofs = curseg->next_blkoff;
  3936. if (f2fs_sb_has_readonly(sbi) &&
  3937. i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
  3938. continue;
  3939. sanity_check_seg_type(sbi, curseg->seg_type);
  3940. if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
  3941. f2fs_err(sbi,
  3942. "Current segment has invalid alloc_type:%d",
  3943. curseg->alloc_type);
  3944. f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
  3945. return -EFSCORRUPTED;
  3946. }
  3947. if (f2fs_test_bit(blkofs, se->cur_valid_map))
  3948. goto out;
  3949. if (curseg->alloc_type == SSR)
  3950. continue;
  3951. for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
  3952. if (!f2fs_test_bit(blkofs, se->cur_valid_map))
  3953. continue;
  3954. out:
  3955. f2fs_err(sbi,
  3956. "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
  3957. i, curseg->segno, curseg->alloc_type,
  3958. curseg->next_blkoff, blkofs);
  3959. f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
  3960. return -EFSCORRUPTED;
  3961. }
  3962. }
  3963. return 0;
  3964. }
  3965. #ifdef CONFIG_BLK_DEV_ZONED
  3966. static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
  3967. struct f2fs_dev_info *fdev,
  3968. struct blk_zone *zone)
  3969. {
  3970. unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
  3971. block_t zone_block, wp_block, last_valid_block;
  3972. unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
  3973. int i, s, b, ret;
  3974. struct seg_entry *se;
  3975. if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
  3976. return 0;
  3977. wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
  3978. wp_segno = GET_SEGNO(sbi, wp_block);
  3979. wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
  3980. zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
  3981. zone_segno = GET_SEGNO(sbi, zone_block);
  3982. zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
  3983. if (zone_segno >= MAIN_SEGS(sbi))
  3984. return 0;
  3985. /*
  3986. * Skip check of zones cursegs point to, since
  3987. * fix_curseg_write_pointer() checks them.
  3988. */
  3989. for (i = 0; i < NO_CHECK_TYPE; i++)
  3990. if (zone_secno == GET_SEC_FROM_SEG(sbi,
  3991. CURSEG_I(sbi, i)->segno))
  3992. return 0;
  3993. /*
  3994. * Get last valid block of the zone.
  3995. */
  3996. last_valid_block = zone_block - 1;
  3997. for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
  3998. segno = zone_segno + s;
  3999. se = get_seg_entry(sbi, segno);
  4000. for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
  4001. if (f2fs_test_bit(b, se->cur_valid_map)) {
  4002. last_valid_block = START_BLOCK(sbi, segno) + b;
  4003. break;
  4004. }
  4005. if (last_valid_block >= zone_block)
  4006. break;
  4007. }
  4008. /*
  4009. * When safely unmounted in the previous mount, we can trust write
  4010. * pointers. Otherwise, finish zones.
  4011. */
  4012. if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
  4013. /*
  4014. * The write pointer matches with the valid blocks or
  4015. * already points to the end of the zone.
  4016. */
  4017. if ((last_valid_block + 1 == wp_block) ||
  4018. (zone->wp == zone->start + zone->len))
  4019. return 0;
  4020. }
  4021. if (last_valid_block + 1 == zone_block) {
  4022. if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
  4023. /*
  4024. * If there is no valid block in the zone and if write
  4025. * pointer is not at zone start, reset the write
  4026. * pointer.
  4027. */
  4028. f2fs_notice(sbi,
  4029. "Zone without valid block has non-zero write "
  4030. "pointer. Reset the write pointer: wp[0x%x,0x%x]",
  4031. wp_segno, wp_blkoff);
  4032. }
  4033. ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
  4034. zone->len >> log_sectors_per_block);
  4035. if (ret)
  4036. f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
  4037. fdev->path, ret);
  4038. return ret;
  4039. }
  4040. if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
  4041. /*
  4042. * If there are valid blocks and the write pointer doesn't match
  4043. * with them, we need to report the inconsistency and fill
  4044. * the zone till the end to close the zone. This inconsistency
  4045. * does not cause write error because the zone will not be
  4046. * selected for write operation until it get discarded.
  4047. */
  4048. f2fs_notice(sbi, "Valid blocks are not aligned with write "
  4049. "pointer: valid block[0x%x,0x%x] wp[0x%x,0x%x]",
  4050. GET_SEGNO(sbi, last_valid_block),
  4051. GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
  4052. wp_segno, wp_blkoff);
  4053. }
  4054. ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
  4055. zone->start, zone->len, GFP_NOFS);
  4056. if (ret == -EOPNOTSUPP) {
  4057. ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
  4058. zone->len - (zone->wp - zone->start),
  4059. GFP_NOFS, 0);
  4060. if (ret)
  4061. f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
  4062. fdev->path, ret);
  4063. } else if (ret) {
  4064. f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
  4065. fdev->path, ret);
  4066. }
  4067. return ret;
  4068. }
  4069. static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
  4070. block_t zone_blkaddr)
  4071. {
  4072. int i;
  4073. for (i = 0; i < sbi->s_ndevs; i++) {
  4074. if (!bdev_is_zoned(FDEV(i).bdev))
  4075. continue;
  4076. if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
  4077. zone_blkaddr <= FDEV(i).end_blk))
  4078. return &FDEV(i);
  4079. }
  4080. return NULL;
  4081. }
  4082. static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
  4083. void *data)
  4084. {
  4085. memcpy(data, zone, sizeof(struct blk_zone));
  4086. return 0;
  4087. }
  4088. static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
  4089. {
  4090. struct curseg_info *cs = CURSEG_I(sbi, type);
  4091. struct f2fs_dev_info *zbd;
  4092. struct blk_zone zone;
  4093. unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
  4094. block_t cs_zone_block, wp_block;
  4095. unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
  4096. sector_t zone_sector;
  4097. int err;
  4098. cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
  4099. cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
  4100. zbd = get_target_zoned_dev(sbi, cs_zone_block);
  4101. if (!zbd)
  4102. return 0;
  4103. /* report zone for the sector the curseg points to */
  4104. zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
  4105. << log_sectors_per_block;
  4106. err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
  4107. report_one_zone_cb, &zone);
  4108. if (err != 1) {
  4109. f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
  4110. zbd->path, err);
  4111. return err;
  4112. }
  4113. if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
  4114. return 0;
  4115. /*
  4116. * When safely unmounted in the previous mount, we could use current
  4117. * segments. Otherwise, allocate new sections.
  4118. */
  4119. if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
  4120. wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
  4121. wp_segno = GET_SEGNO(sbi, wp_block);
  4122. wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
  4123. wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
  4124. if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
  4125. wp_sector_off == 0)
  4126. return 0;
  4127. f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
  4128. "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
  4129. cs->next_blkoff, wp_segno, wp_blkoff);
  4130. } else {
  4131. f2fs_notice(sbi, "Not successfully unmounted in the previous "
  4132. "mount");
  4133. }
  4134. f2fs_notice(sbi, "Assign new section to curseg[%d]: "
  4135. "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
  4136. f2fs_allocate_new_section(sbi, type, true);
  4137. /* check consistency of the zone curseg pointed to */
  4138. if (check_zone_write_pointer(sbi, zbd, &zone))
  4139. return -EIO;
  4140. /* check newly assigned zone */
  4141. cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
  4142. cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
  4143. zbd = get_target_zoned_dev(sbi, cs_zone_block);
  4144. if (!zbd)
  4145. return 0;
  4146. zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
  4147. << log_sectors_per_block;
  4148. err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
  4149. report_one_zone_cb, &zone);
  4150. if (err != 1) {
  4151. f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
  4152. zbd->path, err);
  4153. return err;
  4154. }
  4155. if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
  4156. return 0;
  4157. if (zone.wp != zone.start) {
  4158. f2fs_notice(sbi,
  4159. "New zone for curseg[%d] is not yet discarded. "
  4160. "Reset the zone: curseg[0x%x,0x%x]",
  4161. type, cs->segno, cs->next_blkoff);
  4162. err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
  4163. zone.len >> log_sectors_per_block);
  4164. if (err) {
  4165. f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
  4166. zbd->path, err);
  4167. return err;
  4168. }
  4169. }
  4170. return 0;
  4171. }
  4172. int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
  4173. {
  4174. int i, ret;
  4175. for (i = 0; i < NR_PERSISTENT_LOG; i++) {
  4176. ret = fix_curseg_write_pointer(sbi, i);
  4177. if (ret)
  4178. return ret;
  4179. }
  4180. return 0;
  4181. }
  4182. struct check_zone_write_pointer_args {
  4183. struct f2fs_sb_info *sbi;
  4184. struct f2fs_dev_info *fdev;
  4185. };
  4186. static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
  4187. void *data)
  4188. {
  4189. struct check_zone_write_pointer_args *args;
  4190. args = (struct check_zone_write_pointer_args *)data;
  4191. return check_zone_write_pointer(args->sbi, args->fdev, zone);
  4192. }
  4193. int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
  4194. {
  4195. int i, ret;
  4196. struct check_zone_write_pointer_args args;
  4197. for (i = 0; i < sbi->s_ndevs; i++) {
  4198. if (!bdev_is_zoned(FDEV(i).bdev))
  4199. continue;
  4200. args.sbi = sbi;
  4201. args.fdev = &FDEV(i);
  4202. ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
  4203. check_zone_write_pointer_cb, &args);
  4204. if (ret < 0)
  4205. return ret;
  4206. }
  4207. return 0;
  4208. }
  4209. /*
  4210. * Return the number of usable blocks in a segment. The number of blocks
  4211. * returned is always equal to the number of blocks in a segment for
  4212. * segments fully contained within a sequential zone capacity or a
  4213. * conventional zone. For segments partially contained in a sequential
  4214. * zone capacity, the number of usable blocks up to the zone capacity
  4215. * is returned. 0 is returned in all other cases.
  4216. */
  4217. static inline unsigned int f2fs_usable_zone_blks_in_seg(
  4218. struct f2fs_sb_info *sbi, unsigned int segno)
  4219. {
  4220. block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
  4221. unsigned int secno;
  4222. if (!sbi->unusable_blocks_per_sec)
  4223. return sbi->blocks_per_seg;
  4224. secno = GET_SEC_FROM_SEG(sbi, segno);
  4225. seg_start = START_BLOCK(sbi, segno);
  4226. sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
  4227. sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
  4228. /*
  4229. * If segment starts before zone capacity and spans beyond
  4230. * zone capacity, then usable blocks are from seg start to
  4231. * zone capacity. If the segment starts after the zone capacity,
  4232. * then there are no usable blocks.
  4233. */
  4234. if (seg_start >= sec_cap_blkaddr)
  4235. return 0;
  4236. if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
  4237. return sec_cap_blkaddr - seg_start;
  4238. return sbi->blocks_per_seg;
  4239. }
  4240. #else
  4241. int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
  4242. {
  4243. return 0;
  4244. }
  4245. int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
  4246. {
  4247. return 0;
  4248. }
  4249. static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
  4250. unsigned int segno)
  4251. {
  4252. return 0;
  4253. }
  4254. #endif
  4255. unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
  4256. unsigned int segno)
  4257. {
  4258. if (f2fs_sb_has_blkzoned(sbi))
  4259. return f2fs_usable_zone_blks_in_seg(sbi, segno);
  4260. return sbi->blocks_per_seg;
  4261. }
  4262. unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
  4263. unsigned int segno)
  4264. {
  4265. if (f2fs_sb_has_blkzoned(sbi))
  4266. return CAP_SEGS_PER_SEC(sbi);
  4267. return sbi->segs_per_sec;
  4268. }
  4269. /*
  4270. * Update min, max modified time for cost-benefit GC algorithm
  4271. */
  4272. static void init_min_max_mtime(struct f2fs_sb_info *sbi)
  4273. {
  4274. struct sit_info *sit_i = SIT_I(sbi);
  4275. unsigned int segno;
  4276. down_write(&sit_i->sentry_lock);
  4277. sit_i->min_mtime = ULLONG_MAX;
  4278. for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
  4279. unsigned int i;
  4280. unsigned long long mtime = 0;
  4281. for (i = 0; i < sbi->segs_per_sec; i++)
  4282. mtime += get_seg_entry(sbi, segno + i)->mtime;
  4283. mtime = div_u64(mtime, sbi->segs_per_sec);
  4284. if (sit_i->min_mtime > mtime)
  4285. sit_i->min_mtime = mtime;
  4286. }
  4287. sit_i->max_mtime = get_mtime(sbi, false);
  4288. sit_i->dirty_max_mtime = 0;
  4289. up_write(&sit_i->sentry_lock);
  4290. }
  4291. int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
  4292. {
  4293. struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
  4294. struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
  4295. struct f2fs_sm_info *sm_info;
  4296. int err;
  4297. sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
  4298. if (!sm_info)
  4299. return -ENOMEM;
  4300. /* init sm info */
  4301. sbi->sm_info = sm_info;
  4302. sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
  4303. sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
  4304. sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
  4305. sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
  4306. sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
  4307. sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
  4308. sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
  4309. sm_info->rec_prefree_segments = sm_info->main_segments *
  4310. DEF_RECLAIM_PREFREE_SEGMENTS / 100;
  4311. if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
  4312. sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
  4313. if (!f2fs_lfs_mode(sbi))
  4314. sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
  4315. sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
  4316. sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
  4317. sm_info->min_seq_blocks = sbi->blocks_per_seg;
  4318. sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
  4319. sm_info->min_ssr_sections = reserved_sections(sbi);
  4320. INIT_LIST_HEAD(&sm_info->sit_entry_set);
  4321. init_f2fs_rwsem(&sm_info->curseg_lock);
  4322. err = f2fs_create_flush_cmd_control(sbi);
  4323. if (err)
  4324. return err;
  4325. err = create_discard_cmd_control(sbi);
  4326. if (err)
  4327. return err;
  4328. err = build_sit_info(sbi);
  4329. if (err)
  4330. return err;
  4331. err = build_free_segmap(sbi);
  4332. if (err)
  4333. return err;
  4334. err = build_curseg(sbi);
  4335. if (err)
  4336. return err;
  4337. /* reinit free segmap based on SIT */
  4338. err = build_sit_entries(sbi);
  4339. if (err)
  4340. return err;
  4341. init_free_segmap(sbi);
  4342. err = build_dirty_segmap(sbi);
  4343. if (err)
  4344. return err;
  4345. err = sanity_check_curseg(sbi);
  4346. if (err)
  4347. return err;
  4348. init_min_max_mtime(sbi);
  4349. return 0;
  4350. }
  4351. static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
  4352. enum dirty_type dirty_type)
  4353. {
  4354. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4355. mutex_lock(&dirty_i->seglist_lock);
  4356. kvfree(dirty_i->dirty_segmap[dirty_type]);
  4357. dirty_i->nr_dirty[dirty_type] = 0;
  4358. mutex_unlock(&dirty_i->seglist_lock);
  4359. }
  4360. static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
  4361. {
  4362. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4363. kvfree(dirty_i->pinned_secmap);
  4364. kvfree(dirty_i->victim_secmap);
  4365. }
  4366. static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
  4367. {
  4368. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4369. int i;
  4370. if (!dirty_i)
  4371. return;
  4372. /* discard pre-free/dirty segments list */
  4373. for (i = 0; i < NR_DIRTY_TYPE; i++)
  4374. discard_dirty_segmap(sbi, i);
  4375. if (__is_large_section(sbi)) {
  4376. mutex_lock(&dirty_i->seglist_lock);
  4377. kvfree(dirty_i->dirty_secmap);
  4378. mutex_unlock(&dirty_i->seglist_lock);
  4379. }
  4380. destroy_victim_secmap(sbi);
  4381. SM_I(sbi)->dirty_info = NULL;
  4382. kfree(dirty_i);
  4383. }
  4384. static void destroy_curseg(struct f2fs_sb_info *sbi)
  4385. {
  4386. struct curseg_info *array = SM_I(sbi)->curseg_array;
  4387. int i;
  4388. if (!array)
  4389. return;
  4390. SM_I(sbi)->curseg_array = NULL;
  4391. for (i = 0; i < NR_CURSEG_TYPE; i++) {
  4392. kfree(array[i].sum_blk);
  4393. kfree(array[i].journal);
  4394. }
  4395. kfree(array);
  4396. }
  4397. static void destroy_free_segmap(struct f2fs_sb_info *sbi)
  4398. {
  4399. struct free_segmap_info *free_i = SM_I(sbi)->free_info;
  4400. if (!free_i)
  4401. return;
  4402. SM_I(sbi)->free_info = NULL;
  4403. kvfree(free_i->free_segmap);
  4404. kvfree(free_i->free_secmap);
  4405. kfree(free_i);
  4406. }
  4407. static void destroy_sit_info(struct f2fs_sb_info *sbi)
  4408. {
  4409. struct sit_info *sit_i = SIT_I(sbi);
  4410. if (!sit_i)
  4411. return;
  4412. if (sit_i->sentries)
  4413. kvfree(sit_i->bitmap);
  4414. kfree(sit_i->tmp_map);
  4415. kvfree(sit_i->sentries);
  4416. kvfree(sit_i->sec_entries);
  4417. kvfree(sit_i->dirty_sentries_bitmap);
  4418. SM_I(sbi)->sit_info = NULL;
  4419. kvfree(sit_i->sit_bitmap);
  4420. #ifdef CONFIG_F2FS_CHECK_FS
  4421. kvfree(sit_i->sit_bitmap_mir);
  4422. kvfree(sit_i->invalid_segmap);
  4423. #endif
  4424. kfree(sit_i);
  4425. }
  4426. void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
  4427. {
  4428. struct f2fs_sm_info *sm_info = SM_I(sbi);
  4429. if (!sm_info)
  4430. return;
  4431. f2fs_destroy_flush_cmd_control(sbi, true);
  4432. destroy_discard_cmd_control(sbi);
  4433. destroy_dirty_segmap(sbi);
  4434. destroy_curseg(sbi);
  4435. destroy_free_segmap(sbi);
  4436. destroy_sit_info(sbi);
  4437. sbi->sm_info = NULL;
  4438. kfree(sm_info);
  4439. }
  4440. int __init f2fs_create_segment_manager_caches(void)
  4441. {
  4442. discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
  4443. sizeof(struct discard_entry));
  4444. if (!discard_entry_slab)
  4445. goto fail;
  4446. discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
  4447. sizeof(struct discard_cmd));
  4448. if (!discard_cmd_slab)
  4449. goto destroy_discard_entry;
  4450. sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
  4451. sizeof(struct sit_entry_set));
  4452. if (!sit_entry_set_slab)
  4453. goto destroy_discard_cmd;
  4454. revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
  4455. sizeof(struct revoke_entry));
  4456. if (!revoke_entry_slab)
  4457. goto destroy_sit_entry_set;
  4458. return 0;
  4459. destroy_sit_entry_set:
  4460. kmem_cache_destroy(sit_entry_set_slab);
  4461. destroy_discard_cmd:
  4462. kmem_cache_destroy(discard_cmd_slab);
  4463. destroy_discard_entry:
  4464. kmem_cache_destroy(discard_entry_slab);
  4465. fail:
  4466. return -ENOMEM;
  4467. }
  4468. void f2fs_destroy_segment_manager_caches(void)
  4469. {
  4470. kmem_cache_destroy(sit_entry_set_slab);
  4471. kmem_cache_destroy(discard_cmd_slab);
  4472. kmem_cache_destroy(discard_entry_slab);
  4473. kmem_cache_destroy(revoke_entry_slab);
  4474. }