jfs_xtree.c 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) International Business Machines Corp., 2000-2005
  4. */
  5. /*
  6. * jfs_xtree.c: extent allocation descriptor B+-tree manager
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/module.h>
  10. #include <linux/quotaops.h>
  11. #include <linux/seq_file.h>
  12. #include "jfs_incore.h"
  13. #include "jfs_filsys.h"
  14. #include "jfs_metapage.h"
  15. #include "jfs_dmap.h"
  16. #include "jfs_dinode.h"
  17. #include "jfs_superblock.h"
  18. #include "jfs_debug.h"
  19. /*
  20. * xtree local flag
  21. */
  22. #define XT_INSERT 0x00000001
  23. /*
  24. * xtree key/entry comparison: extent offset
  25. *
  26. * return:
  27. * -1: k < start of extent
  28. * 0: start_of_extent <= k <= end_of_extent
  29. * 1: k > end_of_extent
  30. */
  31. #define XT_CMP(CMP, K, X, OFFSET64)\
  32. {\
  33. OFFSET64 = offsetXAD(X);\
  34. (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
  35. ((K) < OFFSET64) ? -1 : 0;\
  36. }
  37. /* write a xad entry */
  38. #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
  39. {\
  40. (XAD)->flag = (FLAG);\
  41. XADoffset((XAD), (OFF));\
  42. XADlength((XAD), (LEN));\
  43. XADaddress((XAD), (ADDR));\
  44. }
  45. #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
  46. /* get page buffer for specified block address */
  47. /* ToDo: Replace this ugly macro with a function */
  48. #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \
  49. do { \
  50. BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \
  51. if (!(RC)) { \
  52. if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
  53. (le16_to_cpu((P)->header.nextindex) > \
  54. le16_to_cpu((P)->header.maxentry)) || \
  55. (le16_to_cpu((P)->header.maxentry) > \
  56. (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
  57. jfs_error((IP)->i_sb, \
  58. "XT_GETPAGE: xtree page corrupt\n"); \
  59. BT_PUTPAGE(MP); \
  60. MP = NULL; \
  61. RC = -EIO; \
  62. } \
  63. } \
  64. } while (0)
  65. /* for consistency */
  66. #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
  67. #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
  68. BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
  69. /* xtree entry parameter descriptor */
  70. struct xtsplit {
  71. struct metapage *mp;
  72. s16 index;
  73. u8 flag;
  74. s64 off;
  75. s64 addr;
  76. int len;
  77. struct pxdlist *pxdlist;
  78. };
  79. /*
  80. * statistics
  81. */
  82. #ifdef CONFIG_JFS_STATISTICS
  83. static struct {
  84. uint search;
  85. uint fastSearch;
  86. uint split;
  87. } xtStat;
  88. #endif
  89. /*
  90. * forward references
  91. */
  92. static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
  93. struct btstack * btstack, int flag);
  94. static int xtSplitUp(tid_t tid,
  95. struct inode *ip,
  96. struct xtsplit * split, struct btstack * btstack);
  97. static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
  98. struct metapage ** rmpp, s64 * rbnp);
  99. static int xtSplitRoot(tid_t tid, struct inode *ip,
  100. struct xtsplit * split, struct metapage ** rmpp);
  101. /*
  102. * xtLookup()
  103. *
  104. * function: map a single page into a physical extent;
  105. */
  106. int xtLookup(struct inode *ip, s64 lstart,
  107. s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
  108. {
  109. int rc = 0;
  110. struct btstack btstack;
  111. int cmp;
  112. s64 bn;
  113. struct metapage *mp;
  114. xtpage_t *p;
  115. int index;
  116. xad_t *xad;
  117. s64 next, size, xoff, xend;
  118. int xlen;
  119. s64 xaddr;
  120. *paddr = 0;
  121. *plen = llen;
  122. if (!no_check) {
  123. /* is lookup offset beyond eof ? */
  124. size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  125. JFS_SBI(ip->i_sb)->l2bsize;
  126. if (lstart >= size)
  127. return 0;
  128. }
  129. /*
  130. * search for the xad entry covering the logical extent
  131. */
  132. //search:
  133. if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
  134. jfs_err("xtLookup: xtSearch returned %d", rc);
  135. return rc;
  136. }
  137. /*
  138. * compute the physical extent covering logical extent
  139. *
  140. * N.B. search may have failed (e.g., hole in sparse file),
  141. * and returned the index of the next entry.
  142. */
  143. /* retrieve search result */
  144. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  145. /* is xad found covering start of logical extent ?
  146. * lstart is a page start address,
  147. * i.e., lstart cannot start in a hole;
  148. */
  149. if (cmp) {
  150. if (next)
  151. *plen = min(next - lstart, llen);
  152. goto out;
  153. }
  154. /*
  155. * lxd covered by xad
  156. */
  157. xad = &p->xad[index];
  158. xoff = offsetXAD(xad);
  159. xlen = lengthXAD(xad);
  160. xend = xoff + xlen;
  161. xaddr = addressXAD(xad);
  162. /* initialize new pxd */
  163. *pflag = xad->flag;
  164. *paddr = xaddr + (lstart - xoff);
  165. /* a page must be fully covered by an xad */
  166. *plen = min(xend - lstart, llen);
  167. out:
  168. XT_PUTPAGE(mp);
  169. return rc;
  170. }
  171. /*
  172. * xtSearch()
  173. *
  174. * function: search for the xad entry covering specified offset.
  175. *
  176. * parameters:
  177. * ip - file object;
  178. * xoff - extent offset;
  179. * nextp - address of next extent (if any) for search miss
  180. * cmpp - comparison result:
  181. * btstack - traverse stack;
  182. * flag - search process flag (XT_INSERT);
  183. *
  184. * returns:
  185. * btstack contains (bn, index) of search path traversed to the entry.
  186. * *cmpp is set to result of comparison with the entry returned.
  187. * the page containing the entry is pinned at exit.
  188. */
  189. static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
  190. int *cmpp, struct btstack * btstack, int flag)
  191. {
  192. struct jfs_inode_info *jfs_ip = JFS_IP(ip);
  193. int rc = 0;
  194. int cmp = 1; /* init for empty page */
  195. s64 bn; /* block number */
  196. struct metapage *mp; /* page buffer */
  197. xtpage_t *p; /* page */
  198. xad_t *xad;
  199. int base, index, lim, btindex;
  200. struct btframe *btsp;
  201. int nsplit = 0; /* number of pages to split */
  202. s64 t64;
  203. s64 next = 0;
  204. INCREMENT(xtStat.search);
  205. BT_CLR(btstack);
  206. btstack->nsplit = 0;
  207. /*
  208. * search down tree from root:
  209. *
  210. * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
  211. * internal page, child page Pi contains entry with k, Ki <= K < Kj.
  212. *
  213. * if entry with search key K is not found
  214. * internal page search find the entry with largest key Ki
  215. * less than K which point to the child page to search;
  216. * leaf page search find the entry with smallest key Kj
  217. * greater than K so that the returned index is the position of
  218. * the entry to be shifted right for insertion of new entry.
  219. * for empty tree, search key is greater than any key of the tree.
  220. *
  221. * by convention, root bn = 0.
  222. */
  223. for (bn = 0;;) {
  224. /* get/pin the page to search */
  225. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  226. if (rc)
  227. return rc;
  228. /* try sequential access heuristics with the previous
  229. * access entry in target leaf page:
  230. * once search narrowed down into the target leaf,
  231. * key must either match an entry in the leaf or
  232. * key entry does not exist in the tree;
  233. */
  234. //fastSearch:
  235. if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
  236. (p->header.flag & BT_LEAF) &&
  237. (index = jfs_ip->btindex) <
  238. le16_to_cpu(p->header.nextindex)) {
  239. xad = &p->xad[index];
  240. t64 = offsetXAD(xad);
  241. if (xoff < t64 + lengthXAD(xad)) {
  242. if (xoff >= t64) {
  243. *cmpp = 0;
  244. goto out;
  245. }
  246. /* stop sequential access heuristics */
  247. goto binarySearch;
  248. } else { /* (t64 + lengthXAD(xad)) <= xoff */
  249. /* try next sequential entry */
  250. index++;
  251. if (index <
  252. le16_to_cpu(p->header.nextindex)) {
  253. xad++;
  254. t64 = offsetXAD(xad);
  255. if (xoff < t64 + lengthXAD(xad)) {
  256. if (xoff >= t64) {
  257. *cmpp = 0;
  258. goto out;
  259. }
  260. /* miss: key falls between
  261. * previous and this entry
  262. */
  263. *cmpp = 1;
  264. next = t64;
  265. goto out;
  266. }
  267. /* (xoff >= t64 + lengthXAD(xad));
  268. * matching entry may be further out:
  269. * stop heuristic search
  270. */
  271. /* stop sequential access heuristics */
  272. goto binarySearch;
  273. }
  274. /* (index == p->header.nextindex);
  275. * miss: key entry does not exist in
  276. * the target leaf/tree
  277. */
  278. *cmpp = 1;
  279. goto out;
  280. }
  281. /*
  282. * if hit, return index of the entry found, and
  283. * if miss, where new entry with search key is
  284. * to be inserted;
  285. */
  286. out:
  287. /* compute number of pages to split */
  288. if (flag & XT_INSERT) {
  289. if (p->header.nextindex == /* little-endian */
  290. p->header.maxentry)
  291. nsplit++;
  292. else
  293. nsplit = 0;
  294. btstack->nsplit = nsplit;
  295. }
  296. /* save search result */
  297. btsp = btstack->top;
  298. btsp->bn = bn;
  299. btsp->index = index;
  300. btsp->mp = mp;
  301. /* update sequential access heuristics */
  302. jfs_ip->btindex = index;
  303. if (nextp)
  304. *nextp = next;
  305. INCREMENT(xtStat.fastSearch);
  306. return 0;
  307. }
  308. /* well, ... full search now */
  309. binarySearch:
  310. lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  311. /*
  312. * binary search with search key K on the current page
  313. */
  314. for (base = XTENTRYSTART; lim; lim >>= 1) {
  315. index = base + (lim >> 1);
  316. XT_CMP(cmp, xoff, &p->xad[index], t64);
  317. if (cmp == 0) {
  318. /*
  319. * search hit
  320. */
  321. /* search hit - leaf page:
  322. * return the entry found
  323. */
  324. if (p->header.flag & BT_LEAF) {
  325. *cmpp = cmp;
  326. /* compute number of pages to split */
  327. if (flag & XT_INSERT) {
  328. if (p->header.nextindex ==
  329. p->header.maxentry)
  330. nsplit++;
  331. else
  332. nsplit = 0;
  333. btstack->nsplit = nsplit;
  334. }
  335. /* save search result */
  336. btsp = btstack->top;
  337. btsp->bn = bn;
  338. btsp->index = index;
  339. btsp->mp = mp;
  340. /* init sequential access heuristics */
  341. btindex = jfs_ip->btindex;
  342. if (index == btindex ||
  343. index == btindex + 1)
  344. jfs_ip->btorder = BT_SEQUENTIAL;
  345. else
  346. jfs_ip->btorder = BT_RANDOM;
  347. jfs_ip->btindex = index;
  348. return 0;
  349. }
  350. /* search hit - internal page:
  351. * descend/search its child page
  352. */
  353. if (index < le16_to_cpu(p->header.nextindex)-1)
  354. next = offsetXAD(&p->xad[index + 1]);
  355. goto next;
  356. }
  357. if (cmp > 0) {
  358. base = index + 1;
  359. --lim;
  360. }
  361. }
  362. /*
  363. * search miss
  364. *
  365. * base is the smallest index with key (Kj) greater than
  366. * search key (K) and may be zero or maxentry index.
  367. */
  368. if (base < le16_to_cpu(p->header.nextindex))
  369. next = offsetXAD(&p->xad[base]);
  370. /*
  371. * search miss - leaf page:
  372. *
  373. * return location of entry (base) where new entry with
  374. * search key K is to be inserted.
  375. */
  376. if (p->header.flag & BT_LEAF) {
  377. *cmpp = cmp;
  378. /* compute number of pages to split */
  379. if (flag & XT_INSERT) {
  380. if (p->header.nextindex ==
  381. p->header.maxentry)
  382. nsplit++;
  383. else
  384. nsplit = 0;
  385. btstack->nsplit = nsplit;
  386. }
  387. /* save search result */
  388. btsp = btstack->top;
  389. btsp->bn = bn;
  390. btsp->index = base;
  391. btsp->mp = mp;
  392. /* init sequential access heuristics */
  393. btindex = jfs_ip->btindex;
  394. if (base == btindex || base == btindex + 1)
  395. jfs_ip->btorder = BT_SEQUENTIAL;
  396. else
  397. jfs_ip->btorder = BT_RANDOM;
  398. jfs_ip->btindex = base;
  399. if (nextp)
  400. *nextp = next;
  401. return 0;
  402. }
  403. /*
  404. * search miss - non-leaf page:
  405. *
  406. * if base is non-zero, decrement base by one to get the parent
  407. * entry of the child page to search.
  408. */
  409. index = base ? base - 1 : base;
  410. /*
  411. * go down to child page
  412. */
  413. next:
  414. /* update number of pages to split */
  415. if (p->header.nextindex == p->header.maxentry)
  416. nsplit++;
  417. else
  418. nsplit = 0;
  419. /* push (bn, index) of the parent page/entry */
  420. if (BT_STACK_FULL(btstack)) {
  421. jfs_error(ip->i_sb, "stack overrun!\n");
  422. XT_PUTPAGE(mp);
  423. return -EIO;
  424. }
  425. BT_PUSH(btstack, bn, index);
  426. /* get the child page block number */
  427. bn = addressXAD(&p->xad[index]);
  428. /* unpin the parent page */
  429. XT_PUTPAGE(mp);
  430. }
  431. }
  432. /*
  433. * xtInsert()
  434. *
  435. * function:
  436. *
  437. * parameter:
  438. * tid - transaction id;
  439. * ip - file object;
  440. * xflag - extent flag (XAD_NOTRECORDED):
  441. * xoff - extent offset;
  442. * xlen - extent length;
  443. * xaddrp - extent address pointer (in/out):
  444. * if (*xaddrp)
  445. * caller allocated data extent at *xaddrp;
  446. * else
  447. * allocate data extent and return its xaddr;
  448. * flag -
  449. *
  450. * return:
  451. */
  452. int xtInsert(tid_t tid, /* transaction id */
  453. struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
  454. int flag)
  455. {
  456. int rc = 0;
  457. s64 xaddr, hint;
  458. struct metapage *mp; /* meta-page buffer */
  459. xtpage_t *p; /* base B+-tree index page */
  460. s64 bn;
  461. int index, nextindex;
  462. struct btstack btstack; /* traverse stack */
  463. struct xtsplit split; /* split information */
  464. xad_t *xad;
  465. int cmp;
  466. s64 next;
  467. struct tlock *tlck;
  468. struct xtlock *xtlck;
  469. jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
  470. /*
  471. * search for the entry location at which to insert:
  472. *
  473. * xtFastSearch() and xtSearch() both returns (leaf page
  474. * pinned, index at which to insert).
  475. * n.b. xtSearch() may return index of maxentry of
  476. * the full page.
  477. */
  478. if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
  479. return rc;
  480. /* retrieve search result */
  481. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  482. /* This test must follow XT_GETSEARCH since mp must be valid if
  483. * we branch to out: */
  484. if ((cmp == 0) || (next && (xlen > next - xoff))) {
  485. rc = -EEXIST;
  486. goto out;
  487. }
  488. /*
  489. * allocate data extent requested
  490. *
  491. * allocation hint: last xad
  492. */
  493. if ((xaddr = *xaddrp) == 0) {
  494. if (index > XTENTRYSTART) {
  495. xad = &p->xad[index - 1];
  496. hint = addressXAD(xad) + lengthXAD(xad) - 1;
  497. } else
  498. hint = 0;
  499. if ((rc = dquot_alloc_block(ip, xlen)))
  500. goto out;
  501. if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
  502. dquot_free_block(ip, xlen);
  503. goto out;
  504. }
  505. }
  506. /*
  507. * insert entry for new extent
  508. */
  509. xflag |= XAD_NEW;
  510. /*
  511. * if the leaf page is full, split the page and
  512. * propagate up the router entry for the new page from split
  513. *
  514. * The xtSplitUp() will insert the entry and unpin the leaf page.
  515. */
  516. nextindex = le16_to_cpu(p->header.nextindex);
  517. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  518. split.mp = mp;
  519. split.index = index;
  520. split.flag = xflag;
  521. split.off = xoff;
  522. split.len = xlen;
  523. split.addr = xaddr;
  524. split.pxdlist = NULL;
  525. if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
  526. /* undo data extent allocation */
  527. if (*xaddrp == 0) {
  528. dbFree(ip, xaddr, (s64) xlen);
  529. dquot_free_block(ip, xlen);
  530. }
  531. return rc;
  532. }
  533. *xaddrp = xaddr;
  534. return 0;
  535. }
  536. /*
  537. * insert the new entry into the leaf page
  538. */
  539. /*
  540. * acquire a transaction lock on the leaf page;
  541. *
  542. * action: xad insertion/extension;
  543. */
  544. BT_MARK_DIRTY(mp, ip);
  545. /* if insert into middle, shift right remaining entries. */
  546. if (index < nextindex)
  547. memmove(&p->xad[index + 1], &p->xad[index],
  548. (nextindex - index) * sizeof(xad_t));
  549. /* insert the new entry: mark the entry NEW */
  550. xad = &p->xad[index];
  551. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  552. /* advance next available entry index */
  553. le16_add_cpu(&p->header.nextindex, 1);
  554. /* Don't log it if there are no links to the file */
  555. if (!test_cflag(COMMIT_Nolink, ip)) {
  556. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  557. xtlck = (struct xtlock *) & tlck->lock;
  558. xtlck->lwm.offset =
  559. (xtlck->lwm.offset) ? min(index,
  560. (int)xtlck->lwm.offset) : index;
  561. xtlck->lwm.length =
  562. le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  563. }
  564. *xaddrp = xaddr;
  565. out:
  566. /* unpin the leaf page */
  567. XT_PUTPAGE(mp);
  568. return rc;
  569. }
  570. /*
  571. * xtSplitUp()
  572. *
  573. * function:
  574. * split full pages as propagating insertion up the tree
  575. *
  576. * parameter:
  577. * tid - transaction id;
  578. * ip - file object;
  579. * split - entry parameter descriptor;
  580. * btstack - traverse stack from xtSearch()
  581. *
  582. * return:
  583. */
  584. static int
  585. xtSplitUp(tid_t tid,
  586. struct inode *ip, struct xtsplit * split, struct btstack * btstack)
  587. {
  588. int rc = 0;
  589. struct metapage *smp;
  590. xtpage_t *sp; /* split page */
  591. struct metapage *rmp;
  592. s64 rbn; /* new right page block number */
  593. struct metapage *rcmp;
  594. xtpage_t *rcp; /* right child page */
  595. s64 rcbn; /* right child page block number */
  596. int skip; /* index of entry of insertion */
  597. int nextindex; /* next available entry index of p */
  598. struct btframe *parent; /* parent page entry on traverse stack */
  599. xad_t *xad;
  600. s64 xaddr;
  601. int xlen;
  602. int nsplit; /* number of pages split */
  603. struct pxdlist pxdlist;
  604. pxd_t *pxd;
  605. struct tlock *tlck;
  606. struct xtlock *xtlck;
  607. smp = split->mp;
  608. sp = XT_PAGE(ip, smp);
  609. /* is inode xtree root extension/inline EA area free ? */
  610. if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
  611. (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
  612. (JFS_IP(ip)->mode2 & INLINEEA)) {
  613. sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
  614. JFS_IP(ip)->mode2 &= ~INLINEEA;
  615. BT_MARK_DIRTY(smp, ip);
  616. /*
  617. * acquire a transaction lock on the leaf page;
  618. *
  619. * action: xad insertion/extension;
  620. */
  621. /* if insert into middle, shift right remaining entries. */
  622. skip = split->index;
  623. nextindex = le16_to_cpu(sp->header.nextindex);
  624. if (skip < nextindex)
  625. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  626. (nextindex - skip) * sizeof(xad_t));
  627. /* insert the new entry: mark the entry NEW */
  628. xad = &sp->xad[skip];
  629. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  630. split->addr);
  631. /* advance next available entry index */
  632. le16_add_cpu(&sp->header.nextindex, 1);
  633. /* Don't log it if there are no links to the file */
  634. if (!test_cflag(COMMIT_Nolink, ip)) {
  635. tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
  636. xtlck = (struct xtlock *) & tlck->lock;
  637. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  638. min(skip, (int)xtlck->lwm.offset) : skip;
  639. xtlck->lwm.length =
  640. le16_to_cpu(sp->header.nextindex) -
  641. xtlck->lwm.offset;
  642. }
  643. return 0;
  644. }
  645. /*
  646. * allocate new index blocks to cover index page split(s)
  647. *
  648. * allocation hint: ?
  649. */
  650. if (split->pxdlist == NULL) {
  651. nsplit = btstack->nsplit;
  652. split->pxdlist = &pxdlist;
  653. pxdlist.maxnpxd = pxdlist.npxd = 0;
  654. pxd = &pxdlist.pxd[0];
  655. xlen = JFS_SBI(ip->i_sb)->nbperpage;
  656. for (; nsplit > 0; nsplit--, pxd++) {
  657. if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
  658. == 0) {
  659. PXDaddress(pxd, xaddr);
  660. PXDlength(pxd, xlen);
  661. pxdlist.maxnpxd++;
  662. continue;
  663. }
  664. /* undo allocation */
  665. XT_PUTPAGE(smp);
  666. return rc;
  667. }
  668. }
  669. /*
  670. * Split leaf page <sp> into <sp> and a new right page <rp>.
  671. *
  672. * The split routines insert the new entry into the leaf page,
  673. * and acquire txLock as appropriate.
  674. * return <rp> pinned and its block number <rpbn>.
  675. */
  676. rc = (sp->header.flag & BT_ROOT) ?
  677. xtSplitRoot(tid, ip, split, &rmp) :
  678. xtSplitPage(tid, ip, split, &rmp, &rbn);
  679. XT_PUTPAGE(smp);
  680. if (rc)
  681. return -EIO;
  682. /*
  683. * propagate up the router entry for the leaf page just split
  684. *
  685. * insert a router entry for the new page into the parent page,
  686. * propagate the insert/split up the tree by walking back the stack
  687. * of (bn of parent page, index of child page entry in parent page)
  688. * that were traversed during the search for the page that split.
  689. *
  690. * the propagation of insert/split up the tree stops if the root
  691. * splits or the page inserted into doesn't have to split to hold
  692. * the new entry.
  693. *
  694. * the parent entry for the split page remains the same, and
  695. * a new entry is inserted at its right with the first key and
  696. * block number of the new right page.
  697. *
  698. * There are a maximum of 3 pages pinned at any time:
  699. * right child, left parent and right parent (when the parent splits)
  700. * to keep the child page pinned while working on the parent.
  701. * make sure that all pins are released at exit.
  702. */
  703. while ((parent = BT_POP(btstack)) != NULL) {
  704. /* parent page specified by stack frame <parent> */
  705. /* keep current child pages <rcp> pinned */
  706. rcmp = rmp;
  707. rcbn = rbn;
  708. rcp = XT_PAGE(ip, rcmp);
  709. /*
  710. * insert router entry in parent for new right child page <rp>
  711. */
  712. /* get/pin the parent page <sp> */
  713. XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
  714. if (rc) {
  715. XT_PUTPAGE(rcmp);
  716. return rc;
  717. }
  718. /*
  719. * The new key entry goes ONE AFTER the index of parent entry,
  720. * because the split was to the right.
  721. */
  722. skip = parent->index + 1;
  723. /*
  724. * split or shift right remaining entries of the parent page
  725. */
  726. nextindex = le16_to_cpu(sp->header.nextindex);
  727. /*
  728. * parent page is full - split the parent page
  729. */
  730. if (nextindex == le16_to_cpu(sp->header.maxentry)) {
  731. /* init for parent page split */
  732. split->mp = smp;
  733. split->index = skip; /* index at insert */
  734. split->flag = XAD_NEW;
  735. split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
  736. split->len = JFS_SBI(ip->i_sb)->nbperpage;
  737. split->addr = rcbn;
  738. /* unpin previous right child page */
  739. XT_PUTPAGE(rcmp);
  740. /* The split routines insert the new entry,
  741. * and acquire txLock as appropriate.
  742. * return <rp> pinned and its block number <rpbn>.
  743. */
  744. rc = (sp->header.flag & BT_ROOT) ?
  745. xtSplitRoot(tid, ip, split, &rmp) :
  746. xtSplitPage(tid, ip, split, &rmp, &rbn);
  747. if (rc) {
  748. XT_PUTPAGE(smp);
  749. return rc;
  750. }
  751. XT_PUTPAGE(smp);
  752. /* keep new child page <rp> pinned */
  753. }
  754. /*
  755. * parent page is not full - insert in parent page
  756. */
  757. else {
  758. /*
  759. * insert router entry in parent for the right child
  760. * page from the first entry of the right child page:
  761. */
  762. /*
  763. * acquire a transaction lock on the parent page;
  764. *
  765. * action: router xad insertion;
  766. */
  767. BT_MARK_DIRTY(smp, ip);
  768. /*
  769. * if insert into middle, shift right remaining entries
  770. */
  771. if (skip < nextindex)
  772. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  773. (nextindex -
  774. skip) << L2XTSLOTSIZE);
  775. /* insert the router entry */
  776. xad = &sp->xad[skip];
  777. XT_PUTENTRY(xad, XAD_NEW,
  778. offsetXAD(&rcp->xad[XTENTRYSTART]),
  779. JFS_SBI(ip->i_sb)->nbperpage, rcbn);
  780. /* advance next available entry index. */
  781. le16_add_cpu(&sp->header.nextindex, 1);
  782. /* Don't log it if there are no links to the file */
  783. if (!test_cflag(COMMIT_Nolink, ip)) {
  784. tlck = txLock(tid, ip, smp,
  785. tlckXTREE | tlckGROW);
  786. xtlck = (struct xtlock *) & tlck->lock;
  787. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  788. min(skip, (int)xtlck->lwm.offset) : skip;
  789. xtlck->lwm.length =
  790. le16_to_cpu(sp->header.nextindex) -
  791. xtlck->lwm.offset;
  792. }
  793. /* unpin parent page */
  794. XT_PUTPAGE(smp);
  795. /* exit propagate up */
  796. break;
  797. }
  798. }
  799. /* unpin current right page */
  800. XT_PUTPAGE(rmp);
  801. return 0;
  802. }
  803. /*
  804. * xtSplitPage()
  805. *
  806. * function:
  807. * split a full non-root page into
  808. * original/split/left page and new right page
  809. * i.e., the original/split page remains as left page.
  810. *
  811. * parameter:
  812. * int tid,
  813. * struct inode *ip,
  814. * struct xtsplit *split,
  815. * struct metapage **rmpp,
  816. * u64 *rbnp,
  817. *
  818. * return:
  819. * Pointer to page in which to insert or NULL on error.
  820. */
  821. static int
  822. xtSplitPage(tid_t tid, struct inode *ip,
  823. struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
  824. {
  825. int rc = 0;
  826. struct metapage *smp;
  827. xtpage_t *sp;
  828. struct metapage *rmp;
  829. xtpage_t *rp; /* new right page allocated */
  830. s64 rbn; /* new right page block number */
  831. struct metapage *mp;
  832. xtpage_t *p;
  833. s64 nextbn;
  834. int skip, maxentry, middle, righthalf, n;
  835. xad_t *xad;
  836. struct pxdlist *pxdlist;
  837. pxd_t *pxd;
  838. struct tlock *tlck;
  839. struct xtlock *sxtlck = NULL, *rxtlck = NULL;
  840. int quota_allocation = 0;
  841. smp = split->mp;
  842. sp = XT_PAGE(ip, smp);
  843. INCREMENT(xtStat.split);
  844. pxdlist = split->pxdlist;
  845. pxd = &pxdlist->pxd[pxdlist->npxd];
  846. pxdlist->npxd++;
  847. rbn = addressPXD(pxd);
  848. /* Allocate blocks to quota. */
  849. rc = dquot_alloc_block(ip, lengthPXD(pxd));
  850. if (rc)
  851. goto clean_up;
  852. quota_allocation += lengthPXD(pxd);
  853. /*
  854. * allocate the new right page for the split
  855. */
  856. rmp = get_metapage(ip, rbn, PSIZE, 1);
  857. if (rmp == NULL) {
  858. rc = -EIO;
  859. goto clean_up;
  860. }
  861. jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
  862. BT_MARK_DIRTY(rmp, ip);
  863. /*
  864. * action: new page;
  865. */
  866. rp = (xtpage_t *) rmp->data;
  867. rp->header.self = *pxd;
  868. rp->header.flag = sp->header.flag & BT_TYPE;
  869. rp->header.maxentry = sp->header.maxentry; /* little-endian */
  870. rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
  871. BT_MARK_DIRTY(smp, ip);
  872. /* Don't log it if there are no links to the file */
  873. if (!test_cflag(COMMIT_Nolink, ip)) {
  874. /*
  875. * acquire a transaction lock on the new right page;
  876. */
  877. tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
  878. rxtlck = (struct xtlock *) & tlck->lock;
  879. rxtlck->lwm.offset = XTENTRYSTART;
  880. /*
  881. * acquire a transaction lock on the split page
  882. */
  883. tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
  884. sxtlck = (struct xtlock *) & tlck->lock;
  885. }
  886. /*
  887. * initialize/update sibling pointers of <sp> and <rp>
  888. */
  889. nextbn = le64_to_cpu(sp->header.next);
  890. rp->header.next = cpu_to_le64(nextbn);
  891. rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
  892. sp->header.next = cpu_to_le64(rbn);
  893. skip = split->index;
  894. /*
  895. * sequential append at tail (after last entry of last page)
  896. *
  897. * if splitting the last page on a level because of appending
  898. * a entry to it (skip is maxentry), it's likely that the access is
  899. * sequential. adding an empty page on the side of the level is less
  900. * work and can push the fill factor much higher than normal.
  901. * if we're wrong it's no big deal - we will do the split the right
  902. * way next time.
  903. * (it may look like it's equally easy to do a similar hack for
  904. * reverse sorted data, that is, split the tree left, but it's not.
  905. * Be my guest.)
  906. */
  907. if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
  908. /*
  909. * acquire a transaction lock on the new/right page;
  910. *
  911. * action: xad insertion;
  912. */
  913. /* insert entry at the first entry of the new right page */
  914. xad = &rp->xad[XTENTRYSTART];
  915. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  916. split->addr);
  917. rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
  918. if (!test_cflag(COMMIT_Nolink, ip)) {
  919. /* rxtlck->lwm.offset = XTENTRYSTART; */
  920. rxtlck->lwm.length = 1;
  921. }
  922. *rmpp = rmp;
  923. *rbnp = rbn;
  924. jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
  925. return 0;
  926. }
  927. /*
  928. * non-sequential insert (at possibly middle page)
  929. */
  930. /*
  931. * update previous pointer of old next/right page of <sp>
  932. */
  933. if (nextbn != 0) {
  934. XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
  935. if (rc) {
  936. XT_PUTPAGE(rmp);
  937. goto clean_up;
  938. }
  939. BT_MARK_DIRTY(mp, ip);
  940. /*
  941. * acquire a transaction lock on the next page;
  942. *
  943. * action:sibling pointer update;
  944. */
  945. if (!test_cflag(COMMIT_Nolink, ip))
  946. tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
  947. p->header.prev = cpu_to_le64(rbn);
  948. /* sibling page may have been updated previously, or
  949. * it may be updated later;
  950. */
  951. XT_PUTPAGE(mp);
  952. }
  953. /*
  954. * split the data between the split and new/right pages
  955. */
  956. maxentry = le16_to_cpu(sp->header.maxentry);
  957. middle = maxentry >> 1;
  958. righthalf = maxentry - middle;
  959. /*
  960. * skip index in old split/left page - insert into left page:
  961. */
  962. if (skip <= middle) {
  963. /* move right half of split page to the new right page */
  964. memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
  965. righthalf << L2XTSLOTSIZE);
  966. /* shift right tail of left half to make room for new entry */
  967. if (skip < middle)
  968. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  969. (middle - skip) << L2XTSLOTSIZE);
  970. /* insert new entry */
  971. xad = &sp->xad[skip];
  972. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  973. split->addr);
  974. /* update page header */
  975. sp->header.nextindex = cpu_to_le16(middle + 1);
  976. if (!test_cflag(COMMIT_Nolink, ip)) {
  977. sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
  978. min(skip, (int)sxtlck->lwm.offset) : skip;
  979. }
  980. rp->header.nextindex =
  981. cpu_to_le16(XTENTRYSTART + righthalf);
  982. }
  983. /*
  984. * skip index in new right page - insert into right page:
  985. */
  986. else {
  987. /* move left head of right half to right page */
  988. n = skip - middle;
  989. memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
  990. n << L2XTSLOTSIZE);
  991. /* insert new entry */
  992. n += XTENTRYSTART;
  993. xad = &rp->xad[n];
  994. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  995. split->addr);
  996. /* move right tail of right half to right page */
  997. if (skip < maxentry)
  998. memmove(&rp->xad[n + 1], &sp->xad[skip],
  999. (maxentry - skip) << L2XTSLOTSIZE);
  1000. /* update page header */
  1001. sp->header.nextindex = cpu_to_le16(middle);
  1002. if (!test_cflag(COMMIT_Nolink, ip)) {
  1003. sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
  1004. min(middle, (int)sxtlck->lwm.offset) : middle;
  1005. }
  1006. rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
  1007. righthalf + 1);
  1008. }
  1009. if (!test_cflag(COMMIT_Nolink, ip)) {
  1010. sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
  1011. sxtlck->lwm.offset;
  1012. /* rxtlck->lwm.offset = XTENTRYSTART; */
  1013. rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
  1014. XTENTRYSTART;
  1015. }
  1016. *rmpp = rmp;
  1017. *rbnp = rbn;
  1018. jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
  1019. return rc;
  1020. clean_up:
  1021. /* Rollback quota allocation. */
  1022. if (quota_allocation)
  1023. dquot_free_block(ip, quota_allocation);
  1024. return (rc);
  1025. }
  1026. /*
  1027. * xtSplitRoot()
  1028. *
  1029. * function:
  1030. * split the full root page into original/root/split page and new
  1031. * right page
  1032. * i.e., root remains fixed in tree anchor (inode) and the root is
  1033. * copied to a single new right child page since root page <<
  1034. * non-root page, and the split root page contains a single entry
  1035. * for the new right child page.
  1036. *
  1037. * parameter:
  1038. * int tid,
  1039. * struct inode *ip,
  1040. * struct xtsplit *split,
  1041. * struct metapage **rmpp)
  1042. *
  1043. * return:
  1044. * Pointer to page in which to insert or NULL on error.
  1045. */
  1046. static int
  1047. xtSplitRoot(tid_t tid,
  1048. struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
  1049. {
  1050. xtpage_t *sp;
  1051. struct metapage *rmp;
  1052. xtpage_t *rp;
  1053. s64 rbn;
  1054. int skip, nextindex;
  1055. xad_t *xad;
  1056. pxd_t *pxd;
  1057. struct pxdlist *pxdlist;
  1058. struct tlock *tlck;
  1059. struct xtlock *xtlck;
  1060. int rc;
  1061. sp = &JFS_IP(ip)->i_xtroot;
  1062. INCREMENT(xtStat.split);
  1063. /*
  1064. * allocate a single (right) child page
  1065. */
  1066. pxdlist = split->pxdlist;
  1067. pxd = &pxdlist->pxd[pxdlist->npxd];
  1068. pxdlist->npxd++;
  1069. rbn = addressPXD(pxd);
  1070. rmp = get_metapage(ip, rbn, PSIZE, 1);
  1071. if (rmp == NULL)
  1072. return -EIO;
  1073. /* Allocate blocks to quota. */
  1074. rc = dquot_alloc_block(ip, lengthPXD(pxd));
  1075. if (rc) {
  1076. release_metapage(rmp);
  1077. return rc;
  1078. }
  1079. jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
  1080. /*
  1081. * acquire a transaction lock on the new right page;
  1082. *
  1083. * action: new page;
  1084. */
  1085. BT_MARK_DIRTY(rmp, ip);
  1086. rp = (xtpage_t *) rmp->data;
  1087. rp->header.flag =
  1088. (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
  1089. rp->header.self = *pxd;
  1090. rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
  1091. rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
  1092. /* initialize sibling pointers */
  1093. rp->header.next = 0;
  1094. rp->header.prev = 0;
  1095. /*
  1096. * copy the in-line root page into new right page extent
  1097. */
  1098. nextindex = le16_to_cpu(sp->header.maxentry);
  1099. memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
  1100. (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
  1101. /*
  1102. * insert the new entry into the new right/child page
  1103. * (skip index in the new right page will not change)
  1104. */
  1105. skip = split->index;
  1106. /* if insert into middle, shift right remaining entries */
  1107. if (skip != nextindex)
  1108. memmove(&rp->xad[skip + 1], &rp->xad[skip],
  1109. (nextindex - skip) * sizeof(xad_t));
  1110. xad = &rp->xad[skip];
  1111. XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
  1112. /* update page header */
  1113. rp->header.nextindex = cpu_to_le16(nextindex + 1);
  1114. if (!test_cflag(COMMIT_Nolink, ip)) {
  1115. tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
  1116. xtlck = (struct xtlock *) & tlck->lock;
  1117. xtlck->lwm.offset = XTENTRYSTART;
  1118. xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
  1119. XTENTRYSTART;
  1120. }
  1121. /*
  1122. * reset the root
  1123. *
  1124. * init root with the single entry for the new right page
  1125. * set the 1st entry offset to 0, which force the left-most key
  1126. * at any level of the tree to be less than any search key.
  1127. */
  1128. /*
  1129. * acquire a transaction lock on the root page (in-memory inode);
  1130. *
  1131. * action: root split;
  1132. */
  1133. BT_MARK_DIRTY(split->mp, ip);
  1134. xad = &sp->xad[XTENTRYSTART];
  1135. XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
  1136. /* update page header of root */
  1137. sp->header.flag &= ~BT_LEAF;
  1138. sp->header.flag |= BT_INTERNAL;
  1139. sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
  1140. if (!test_cflag(COMMIT_Nolink, ip)) {
  1141. tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
  1142. xtlck = (struct xtlock *) & tlck->lock;
  1143. xtlck->lwm.offset = XTENTRYSTART;
  1144. xtlck->lwm.length = 1;
  1145. }
  1146. *rmpp = rmp;
  1147. jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
  1148. return 0;
  1149. }
  1150. /*
  1151. * xtExtend()
  1152. *
  1153. * function: extend in-place;
  1154. *
  1155. * note: existing extent may or may not have been committed.
  1156. * caller is responsible for pager buffer cache update, and
  1157. * working block allocation map update;
  1158. * update pmap: alloc whole extended extent;
  1159. */
  1160. int xtExtend(tid_t tid, /* transaction id */
  1161. struct inode *ip, s64 xoff, /* delta extent offset */
  1162. s32 xlen, /* delta extent length */
  1163. int flag)
  1164. {
  1165. int rc = 0;
  1166. int cmp;
  1167. struct metapage *mp; /* meta-page buffer */
  1168. xtpage_t *p; /* base B+-tree index page */
  1169. s64 bn;
  1170. int index, nextindex, len;
  1171. struct btstack btstack; /* traverse stack */
  1172. struct xtsplit split; /* split information */
  1173. xad_t *xad;
  1174. s64 xaddr;
  1175. struct tlock *tlck;
  1176. struct xtlock *xtlck = NULL;
  1177. jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
  1178. /* there must exist extent to be extended */
  1179. if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
  1180. return rc;
  1181. /* retrieve search result */
  1182. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  1183. if (cmp != 0) {
  1184. XT_PUTPAGE(mp);
  1185. jfs_error(ip->i_sb, "xtSearch did not find extent\n");
  1186. return -EIO;
  1187. }
  1188. /* extension must be contiguous */
  1189. xad = &p->xad[index];
  1190. if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
  1191. XT_PUTPAGE(mp);
  1192. jfs_error(ip->i_sb, "extension is not contiguous\n");
  1193. return -EIO;
  1194. }
  1195. /*
  1196. * acquire a transaction lock on the leaf page;
  1197. *
  1198. * action: xad insertion/extension;
  1199. */
  1200. BT_MARK_DIRTY(mp, ip);
  1201. if (!test_cflag(COMMIT_Nolink, ip)) {
  1202. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1203. xtlck = (struct xtlock *) & tlck->lock;
  1204. }
  1205. /* extend will overflow extent ? */
  1206. xlen = lengthXAD(xad) + xlen;
  1207. if ((len = xlen - MAXXLEN) <= 0)
  1208. goto extendOld;
  1209. /*
  1210. * extent overflow: insert entry for new extent
  1211. */
  1212. //insertNew:
  1213. xoff = offsetXAD(xad) + MAXXLEN;
  1214. xaddr = addressXAD(xad) + MAXXLEN;
  1215. nextindex = le16_to_cpu(p->header.nextindex);
  1216. /*
  1217. * if the leaf page is full, insert the new entry and
  1218. * propagate up the router entry for the new page from split
  1219. *
  1220. * The xtSplitUp() will insert the entry and unpin the leaf page.
  1221. */
  1222. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1223. /* xtSpliUp() unpins leaf pages */
  1224. split.mp = mp;
  1225. split.index = index + 1;
  1226. split.flag = XAD_NEW;
  1227. split.off = xoff; /* split offset */
  1228. split.len = len;
  1229. split.addr = xaddr;
  1230. split.pxdlist = NULL;
  1231. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1232. return rc;
  1233. /* get back old page */
  1234. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1235. if (rc)
  1236. return rc;
  1237. /*
  1238. * if leaf root has been split, original root has been
  1239. * copied to new child page, i.e., original entry now
  1240. * resides on the new child page;
  1241. */
  1242. if (p->header.flag & BT_INTERNAL) {
  1243. ASSERT(p->header.nextindex ==
  1244. cpu_to_le16(XTENTRYSTART + 1));
  1245. xad = &p->xad[XTENTRYSTART];
  1246. bn = addressXAD(xad);
  1247. XT_PUTPAGE(mp);
  1248. /* get new child page */
  1249. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1250. if (rc)
  1251. return rc;
  1252. BT_MARK_DIRTY(mp, ip);
  1253. if (!test_cflag(COMMIT_Nolink, ip)) {
  1254. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1255. xtlck = (struct xtlock *) & tlck->lock;
  1256. }
  1257. }
  1258. }
  1259. /*
  1260. * insert the new entry into the leaf page
  1261. */
  1262. else {
  1263. /* insert the new entry: mark the entry NEW */
  1264. xad = &p->xad[index + 1];
  1265. XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
  1266. /* advance next available entry index */
  1267. le16_add_cpu(&p->header.nextindex, 1);
  1268. }
  1269. /* get back old entry */
  1270. xad = &p->xad[index];
  1271. xlen = MAXXLEN;
  1272. /*
  1273. * extend old extent
  1274. */
  1275. extendOld:
  1276. XADlength(xad, xlen);
  1277. if (!(xad->flag & XAD_NEW))
  1278. xad->flag |= XAD_EXTENDED;
  1279. if (!test_cflag(COMMIT_Nolink, ip)) {
  1280. xtlck->lwm.offset =
  1281. (xtlck->lwm.offset) ? min(index,
  1282. (int)xtlck->lwm.offset) : index;
  1283. xtlck->lwm.length =
  1284. le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  1285. }
  1286. /* unpin the leaf page */
  1287. XT_PUTPAGE(mp);
  1288. return rc;
  1289. }
  1290. /*
  1291. * xtUpdate()
  1292. *
  1293. * function: update XAD;
  1294. *
  1295. * update extent for allocated_but_not_recorded or
  1296. * compressed extent;
  1297. *
  1298. * parameter:
  1299. * nxad - new XAD;
  1300. * logical extent of the specified XAD must be completely
  1301. * contained by an existing XAD;
  1302. */
  1303. int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
  1304. { /* new XAD */
  1305. int rc = 0;
  1306. int cmp;
  1307. struct metapage *mp; /* meta-page buffer */
  1308. xtpage_t *p; /* base B+-tree index page */
  1309. s64 bn;
  1310. int index0, index, newindex, nextindex;
  1311. struct btstack btstack; /* traverse stack */
  1312. struct xtsplit split; /* split information */
  1313. xad_t *xad, *lxad, *rxad;
  1314. int xflag;
  1315. s64 nxoff, xoff;
  1316. int nxlen, xlen, lxlen, rxlen;
  1317. s64 nxaddr, xaddr;
  1318. struct tlock *tlck;
  1319. struct xtlock *xtlck = NULL;
  1320. int newpage = 0;
  1321. /* there must exist extent to be tailgated */
  1322. nxoff = offsetXAD(nxad);
  1323. nxlen = lengthXAD(nxad);
  1324. nxaddr = addressXAD(nxad);
  1325. if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
  1326. return rc;
  1327. /* retrieve search result */
  1328. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
  1329. if (cmp != 0) {
  1330. XT_PUTPAGE(mp);
  1331. jfs_error(ip->i_sb, "Could not find extent\n");
  1332. return -EIO;
  1333. }
  1334. BT_MARK_DIRTY(mp, ip);
  1335. /*
  1336. * acquire tlock of the leaf page containing original entry
  1337. */
  1338. if (!test_cflag(COMMIT_Nolink, ip)) {
  1339. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1340. xtlck = (struct xtlock *) & tlck->lock;
  1341. }
  1342. xad = &p->xad[index0];
  1343. xflag = xad->flag;
  1344. xoff = offsetXAD(xad);
  1345. xlen = lengthXAD(xad);
  1346. xaddr = addressXAD(xad);
  1347. /* nXAD must be completely contained within XAD */
  1348. if ((xoff > nxoff) ||
  1349. (nxoff + nxlen > xoff + xlen)) {
  1350. XT_PUTPAGE(mp);
  1351. jfs_error(ip->i_sb,
  1352. "nXAD in not completely contained within XAD\n");
  1353. return -EIO;
  1354. }
  1355. index = index0;
  1356. newindex = index + 1;
  1357. nextindex = le16_to_cpu(p->header.nextindex);
  1358. if (xoff < nxoff)
  1359. goto coalesceRight;
  1360. /*
  1361. * coalesce with left XAD
  1362. */
  1363. /* is XAD first entry of page ? */
  1364. if (index == XTENTRYSTART)
  1365. goto replace;
  1366. /* is nXAD logically and physically contiguous with lXAD ? */
  1367. lxad = &p->xad[index - 1];
  1368. lxlen = lengthXAD(lxad);
  1369. if (!(lxad->flag & XAD_NOTRECORDED) &&
  1370. (nxoff == offsetXAD(lxad) + lxlen) &&
  1371. (nxaddr == addressXAD(lxad) + lxlen) &&
  1372. (lxlen + nxlen < MAXXLEN)) {
  1373. /* extend right lXAD */
  1374. index0 = index - 1;
  1375. XADlength(lxad, lxlen + nxlen);
  1376. /* If we just merged two extents together, need to make sure the
  1377. * right extent gets logged. If the left one is marked XAD_NEW,
  1378. * then we know it will be logged. Otherwise, mark as
  1379. * XAD_EXTENDED
  1380. */
  1381. if (!(lxad->flag & XAD_NEW))
  1382. lxad->flag |= XAD_EXTENDED;
  1383. if (xlen > nxlen) {
  1384. /* truncate XAD */
  1385. XADoffset(xad, xoff + nxlen);
  1386. XADlength(xad, xlen - nxlen);
  1387. XADaddress(xad, xaddr + nxlen);
  1388. goto out;
  1389. } else { /* (xlen == nxlen) */
  1390. /* remove XAD */
  1391. if (index < nextindex - 1)
  1392. memmove(&p->xad[index], &p->xad[index + 1],
  1393. (nextindex - index -
  1394. 1) << L2XTSLOTSIZE);
  1395. p->header.nextindex =
  1396. cpu_to_le16(le16_to_cpu(p->header.nextindex) -
  1397. 1);
  1398. index = index0;
  1399. newindex = index + 1;
  1400. nextindex = le16_to_cpu(p->header.nextindex);
  1401. xoff = nxoff = offsetXAD(lxad);
  1402. xlen = nxlen = lxlen + nxlen;
  1403. xaddr = nxaddr = addressXAD(lxad);
  1404. goto coalesceRight;
  1405. }
  1406. }
  1407. /*
  1408. * replace XAD with nXAD
  1409. */
  1410. replace: /* (nxoff == xoff) */
  1411. if (nxlen == xlen) {
  1412. /* replace XAD with nXAD:recorded */
  1413. *xad = *nxad;
  1414. xad->flag = xflag & ~XAD_NOTRECORDED;
  1415. goto coalesceRight;
  1416. } else /* (nxlen < xlen) */
  1417. goto updateLeft;
  1418. /*
  1419. * coalesce with right XAD
  1420. */
  1421. coalesceRight: /* (xoff <= nxoff) */
  1422. /* is XAD last entry of page ? */
  1423. if (newindex == nextindex) {
  1424. if (xoff == nxoff)
  1425. goto out;
  1426. goto updateRight;
  1427. }
  1428. /* is nXAD logically and physically contiguous with rXAD ? */
  1429. rxad = &p->xad[index + 1];
  1430. rxlen = lengthXAD(rxad);
  1431. if (!(rxad->flag & XAD_NOTRECORDED) &&
  1432. (nxoff + nxlen == offsetXAD(rxad)) &&
  1433. (nxaddr + nxlen == addressXAD(rxad)) &&
  1434. (rxlen + nxlen < MAXXLEN)) {
  1435. /* extend left rXAD */
  1436. XADoffset(rxad, nxoff);
  1437. XADlength(rxad, rxlen + nxlen);
  1438. XADaddress(rxad, nxaddr);
  1439. /* If we just merged two extents together, need to make sure
  1440. * the left extent gets logged. If the right one is marked
  1441. * XAD_NEW, then we know it will be logged. Otherwise, mark as
  1442. * XAD_EXTENDED
  1443. */
  1444. if (!(rxad->flag & XAD_NEW))
  1445. rxad->flag |= XAD_EXTENDED;
  1446. if (xlen > nxlen)
  1447. /* truncate XAD */
  1448. XADlength(xad, xlen - nxlen);
  1449. else { /* (xlen == nxlen) */
  1450. /* remove XAD */
  1451. memmove(&p->xad[index], &p->xad[index + 1],
  1452. (nextindex - index - 1) << L2XTSLOTSIZE);
  1453. p->header.nextindex =
  1454. cpu_to_le16(le16_to_cpu(p->header.nextindex) -
  1455. 1);
  1456. }
  1457. goto out;
  1458. } else if (xoff == nxoff)
  1459. goto out;
  1460. if (xoff >= nxoff) {
  1461. XT_PUTPAGE(mp);
  1462. jfs_error(ip->i_sb, "xoff >= nxoff\n");
  1463. return -EIO;
  1464. }
  1465. /*
  1466. * split XAD into (lXAD, nXAD):
  1467. *
  1468. * |---nXAD--->
  1469. * --|----------XAD----------|--
  1470. * |-lXAD-|
  1471. */
  1472. updateRight: /* (xoff < nxoff) */
  1473. /* truncate old XAD as lXAD:not_recorded */
  1474. xad = &p->xad[index];
  1475. XADlength(xad, nxoff - xoff);
  1476. /* insert nXAD:recorded */
  1477. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1478. /* xtSpliUp() unpins leaf pages */
  1479. split.mp = mp;
  1480. split.index = newindex;
  1481. split.flag = xflag & ~XAD_NOTRECORDED;
  1482. split.off = nxoff;
  1483. split.len = nxlen;
  1484. split.addr = nxaddr;
  1485. split.pxdlist = NULL;
  1486. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1487. return rc;
  1488. /* get back old page */
  1489. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1490. if (rc)
  1491. return rc;
  1492. /*
  1493. * if leaf root has been split, original root has been
  1494. * copied to new child page, i.e., original entry now
  1495. * resides on the new child page;
  1496. */
  1497. if (p->header.flag & BT_INTERNAL) {
  1498. ASSERT(p->header.nextindex ==
  1499. cpu_to_le16(XTENTRYSTART + 1));
  1500. xad = &p->xad[XTENTRYSTART];
  1501. bn = addressXAD(xad);
  1502. XT_PUTPAGE(mp);
  1503. /* get new child page */
  1504. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1505. if (rc)
  1506. return rc;
  1507. BT_MARK_DIRTY(mp, ip);
  1508. if (!test_cflag(COMMIT_Nolink, ip)) {
  1509. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1510. xtlck = (struct xtlock *) & tlck->lock;
  1511. }
  1512. } else {
  1513. /* is nXAD on new page ? */
  1514. if (newindex >
  1515. (le16_to_cpu(p->header.maxentry) >> 1)) {
  1516. newindex =
  1517. newindex -
  1518. le16_to_cpu(p->header.nextindex) +
  1519. XTENTRYSTART;
  1520. newpage = 1;
  1521. }
  1522. }
  1523. } else {
  1524. /* if insert into middle, shift right remaining entries */
  1525. if (newindex < nextindex)
  1526. memmove(&p->xad[newindex + 1], &p->xad[newindex],
  1527. (nextindex - newindex) << L2XTSLOTSIZE);
  1528. /* insert the entry */
  1529. xad = &p->xad[newindex];
  1530. *xad = *nxad;
  1531. xad->flag = xflag & ~XAD_NOTRECORDED;
  1532. /* advance next available entry index. */
  1533. p->header.nextindex =
  1534. cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
  1535. }
  1536. /*
  1537. * does nXAD force 3-way split ?
  1538. *
  1539. * |---nXAD--->|
  1540. * --|----------XAD-------------|--
  1541. * |-lXAD-| |-rXAD -|
  1542. */
  1543. if (nxoff + nxlen == xoff + xlen)
  1544. goto out;
  1545. /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
  1546. if (newpage) {
  1547. /* close out old page */
  1548. if (!test_cflag(COMMIT_Nolink, ip)) {
  1549. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  1550. min(index0, (int)xtlck->lwm.offset) : index0;
  1551. xtlck->lwm.length =
  1552. le16_to_cpu(p->header.nextindex) -
  1553. xtlck->lwm.offset;
  1554. }
  1555. bn = le64_to_cpu(p->header.next);
  1556. XT_PUTPAGE(mp);
  1557. /* get new right page */
  1558. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1559. if (rc)
  1560. return rc;
  1561. BT_MARK_DIRTY(mp, ip);
  1562. if (!test_cflag(COMMIT_Nolink, ip)) {
  1563. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1564. xtlck = (struct xtlock *) & tlck->lock;
  1565. }
  1566. index0 = index = newindex;
  1567. } else
  1568. index++;
  1569. newindex = index + 1;
  1570. nextindex = le16_to_cpu(p->header.nextindex);
  1571. xlen = xlen - (nxoff - xoff);
  1572. xoff = nxoff;
  1573. xaddr = nxaddr;
  1574. /* recompute split pages */
  1575. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1576. XT_PUTPAGE(mp);
  1577. if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
  1578. return rc;
  1579. /* retrieve search result */
  1580. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
  1581. if (cmp != 0) {
  1582. XT_PUTPAGE(mp);
  1583. jfs_error(ip->i_sb, "xtSearch failed\n");
  1584. return -EIO;
  1585. }
  1586. if (index0 != index) {
  1587. XT_PUTPAGE(mp);
  1588. jfs_error(ip->i_sb, "unexpected value of index\n");
  1589. return -EIO;
  1590. }
  1591. }
  1592. /*
  1593. * split XAD into (nXAD, rXAD)
  1594. *
  1595. * ---nXAD---|
  1596. * --|----------XAD----------|--
  1597. * |-rXAD-|
  1598. */
  1599. updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */
  1600. /* update old XAD with nXAD:recorded */
  1601. xad = &p->xad[index];
  1602. *xad = *nxad;
  1603. xad->flag = xflag & ~XAD_NOTRECORDED;
  1604. /* insert rXAD:not_recorded */
  1605. xoff = xoff + nxlen;
  1606. xlen = xlen - nxlen;
  1607. xaddr = xaddr + nxlen;
  1608. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1609. /*
  1610. printf("xtUpdate.updateLeft.split p:0x%p\n", p);
  1611. */
  1612. /* xtSpliUp() unpins leaf pages */
  1613. split.mp = mp;
  1614. split.index = newindex;
  1615. split.flag = xflag;
  1616. split.off = xoff;
  1617. split.len = xlen;
  1618. split.addr = xaddr;
  1619. split.pxdlist = NULL;
  1620. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1621. return rc;
  1622. /* get back old page */
  1623. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1624. if (rc)
  1625. return rc;
  1626. /*
  1627. * if leaf root has been split, original root has been
  1628. * copied to new child page, i.e., original entry now
  1629. * resides on the new child page;
  1630. */
  1631. if (p->header.flag & BT_INTERNAL) {
  1632. ASSERT(p->header.nextindex ==
  1633. cpu_to_le16(XTENTRYSTART + 1));
  1634. xad = &p->xad[XTENTRYSTART];
  1635. bn = addressXAD(xad);
  1636. XT_PUTPAGE(mp);
  1637. /* get new child page */
  1638. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1639. if (rc)
  1640. return rc;
  1641. BT_MARK_DIRTY(mp, ip);
  1642. if (!test_cflag(COMMIT_Nolink, ip)) {
  1643. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1644. xtlck = (struct xtlock *) & tlck->lock;
  1645. }
  1646. }
  1647. } else {
  1648. /* if insert into middle, shift right remaining entries */
  1649. if (newindex < nextindex)
  1650. memmove(&p->xad[newindex + 1], &p->xad[newindex],
  1651. (nextindex - newindex) << L2XTSLOTSIZE);
  1652. /* insert the entry */
  1653. xad = &p->xad[newindex];
  1654. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  1655. /* advance next available entry index. */
  1656. p->header.nextindex =
  1657. cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
  1658. }
  1659. out:
  1660. if (!test_cflag(COMMIT_Nolink, ip)) {
  1661. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  1662. min(index0, (int)xtlck->lwm.offset) : index0;
  1663. xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
  1664. xtlck->lwm.offset;
  1665. }
  1666. /* unpin the leaf page */
  1667. XT_PUTPAGE(mp);
  1668. return rc;
  1669. }
  1670. /*
  1671. * xtAppend()
  1672. *
  1673. * function: grow in append mode from contiguous region specified ;
  1674. *
  1675. * parameter:
  1676. * tid - transaction id;
  1677. * ip - file object;
  1678. * xflag - extent flag:
  1679. * xoff - extent offset;
  1680. * maxblocks - max extent length;
  1681. * xlen - extent length (in/out);
  1682. * xaddrp - extent address pointer (in/out):
  1683. * flag -
  1684. *
  1685. * return:
  1686. */
  1687. int xtAppend(tid_t tid, /* transaction id */
  1688. struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
  1689. s32 * xlenp, /* (in/out) */
  1690. s64 * xaddrp, /* (in/out) */
  1691. int flag)
  1692. {
  1693. int rc = 0;
  1694. struct metapage *mp; /* meta-page buffer */
  1695. xtpage_t *p; /* base B+-tree index page */
  1696. s64 bn, xaddr;
  1697. int index, nextindex;
  1698. struct btstack btstack; /* traverse stack */
  1699. struct xtsplit split; /* split information */
  1700. xad_t *xad;
  1701. int cmp;
  1702. struct tlock *tlck;
  1703. struct xtlock *xtlck;
  1704. int nsplit, nblocks, xlen;
  1705. struct pxdlist pxdlist;
  1706. pxd_t *pxd;
  1707. s64 next;
  1708. xaddr = *xaddrp;
  1709. xlen = *xlenp;
  1710. jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
  1711. (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
  1712. /*
  1713. * search for the entry location at which to insert:
  1714. *
  1715. * xtFastSearch() and xtSearch() both returns (leaf page
  1716. * pinned, index at which to insert).
  1717. * n.b. xtSearch() may return index of maxentry of
  1718. * the full page.
  1719. */
  1720. if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
  1721. return rc;
  1722. /* retrieve search result */
  1723. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  1724. if (cmp == 0) {
  1725. rc = -EEXIST;
  1726. goto out;
  1727. }
  1728. if (next)
  1729. xlen = min(xlen, (int)(next - xoff));
  1730. //insert:
  1731. /*
  1732. * insert entry for new extent
  1733. */
  1734. xflag |= XAD_NEW;
  1735. /*
  1736. * if the leaf page is full, split the page and
  1737. * propagate up the router entry for the new page from split
  1738. *
  1739. * The xtSplitUp() will insert the entry and unpin the leaf page.
  1740. */
  1741. nextindex = le16_to_cpu(p->header.nextindex);
  1742. if (nextindex < le16_to_cpu(p->header.maxentry))
  1743. goto insertLeaf;
  1744. /*
  1745. * allocate new index blocks to cover index page split(s)
  1746. */
  1747. nsplit = btstack.nsplit;
  1748. split.pxdlist = &pxdlist;
  1749. pxdlist.maxnpxd = pxdlist.npxd = 0;
  1750. pxd = &pxdlist.pxd[0];
  1751. nblocks = JFS_SBI(ip->i_sb)->nbperpage;
  1752. for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
  1753. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
  1754. PXDaddress(pxd, xaddr);
  1755. PXDlength(pxd, nblocks);
  1756. pxdlist.maxnpxd++;
  1757. continue;
  1758. }
  1759. /* undo allocation */
  1760. goto out;
  1761. }
  1762. xlen = min(xlen, maxblocks);
  1763. /*
  1764. * allocate data extent requested
  1765. */
  1766. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
  1767. goto out;
  1768. split.mp = mp;
  1769. split.index = index;
  1770. split.flag = xflag;
  1771. split.off = xoff;
  1772. split.len = xlen;
  1773. split.addr = xaddr;
  1774. if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
  1775. /* undo data extent allocation */
  1776. dbFree(ip, *xaddrp, (s64) * xlenp);
  1777. return rc;
  1778. }
  1779. *xaddrp = xaddr;
  1780. *xlenp = xlen;
  1781. return 0;
  1782. /*
  1783. * insert the new entry into the leaf page
  1784. */
  1785. insertLeaf:
  1786. /*
  1787. * allocate data extent requested
  1788. */
  1789. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
  1790. goto out;
  1791. BT_MARK_DIRTY(mp, ip);
  1792. /*
  1793. * acquire a transaction lock on the leaf page;
  1794. *
  1795. * action: xad insertion/extension;
  1796. */
  1797. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1798. xtlck = (struct xtlock *) & tlck->lock;
  1799. /* insert the new entry: mark the entry NEW */
  1800. xad = &p->xad[index];
  1801. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  1802. /* advance next available entry index */
  1803. le16_add_cpu(&p->header.nextindex, 1);
  1804. xtlck->lwm.offset =
  1805. (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
  1806. xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
  1807. xtlck->lwm.offset;
  1808. *xaddrp = xaddr;
  1809. *xlenp = xlen;
  1810. out:
  1811. /* unpin the leaf page */
  1812. XT_PUTPAGE(mp);
  1813. return rc;
  1814. }
  1815. /*
  1816. * xtInitRoot()
  1817. *
  1818. * initialize file root (inline in inode)
  1819. */
  1820. void xtInitRoot(tid_t tid, struct inode *ip)
  1821. {
  1822. xtpage_t *p;
  1823. /*
  1824. * acquire a transaction lock on the root
  1825. *
  1826. * action:
  1827. */
  1828. txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
  1829. tlckXTREE | tlckNEW);
  1830. p = &JFS_IP(ip)->i_xtroot;
  1831. p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
  1832. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  1833. if (S_ISDIR(ip->i_mode))
  1834. p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
  1835. else {
  1836. p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
  1837. ip->i_size = 0;
  1838. }
  1839. return;
  1840. }
  1841. /*
  1842. * We can run into a deadlock truncating a file with a large number of
  1843. * xtree pages (large fragmented file). A robust fix would entail a
  1844. * reservation system where we would reserve a number of metadata pages
  1845. * and tlocks which we would be guaranteed without a deadlock. Without
  1846. * this, a partial fix is to limit number of metadata pages we will lock
  1847. * in a single transaction. Currently we will truncate the file so that
  1848. * no more than 50 leaf pages will be locked. The caller of xtTruncate
  1849. * will be responsible for ensuring that the current transaction gets
  1850. * committed, and that subsequent transactions are created to truncate
  1851. * the file further if needed.
  1852. */
  1853. #define MAX_TRUNCATE_LEAVES 50
  1854. /*
  1855. * xtTruncate()
  1856. *
  1857. * function:
  1858. * traverse for truncation logging backward bottom up;
  1859. * terminate at the last extent entry at the current subtree
  1860. * root page covering new down size.
  1861. * truncation may occur within the last extent entry.
  1862. *
  1863. * parameter:
  1864. * int tid,
  1865. * struct inode *ip,
  1866. * s64 newsize,
  1867. * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
  1868. *
  1869. * return:
  1870. *
  1871. * note:
  1872. * PWMAP:
  1873. * 1. truncate (non-COMMIT_NOLINK file)
  1874. * by jfs_truncate() or jfs_open(O_TRUNC):
  1875. * xtree is updated;
  1876. * 2. truncate index table of directory when last entry removed
  1877. * map update via tlock at commit time;
  1878. * PMAP:
  1879. * Call xtTruncate_pmap instead
  1880. * WMAP:
  1881. * 1. remove (free zero link count) on last reference release
  1882. * (pmap has been freed at commit zero link count);
  1883. * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
  1884. * xtree is updated;
  1885. * map update directly at truncation time;
  1886. *
  1887. * if (DELETE)
  1888. * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
  1889. * else if (TRUNCATE)
  1890. * must write LOG_NOREDOPAGE for deleted index page;
  1891. *
  1892. * pages may already have been tlocked by anonymous transactions
  1893. * during file growth (i.e., write) before truncation;
  1894. *
  1895. * except last truncated entry, deleted entries remains as is
  1896. * in the page (nextindex is updated) for other use
  1897. * (e.g., log/update allocation map): this avoid copying the page
  1898. * info but delay free of pages;
  1899. *
  1900. */
  1901. s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
  1902. {
  1903. int rc = 0;
  1904. s64 teof;
  1905. struct metapage *mp;
  1906. xtpage_t *p;
  1907. s64 bn;
  1908. int index, nextindex;
  1909. xad_t *xad;
  1910. s64 xoff, xaddr;
  1911. int xlen, len, freexlen;
  1912. struct btstack btstack;
  1913. struct btframe *parent;
  1914. struct tblock *tblk = NULL;
  1915. struct tlock *tlck = NULL;
  1916. struct xtlock *xtlck = NULL;
  1917. struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */
  1918. struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */
  1919. s64 nfreed;
  1920. int freed, log;
  1921. int locked_leaves = 0;
  1922. /* save object truncation type */
  1923. if (tid) {
  1924. tblk = tid_to_tblock(tid);
  1925. tblk->xflag |= flag;
  1926. }
  1927. nfreed = 0;
  1928. flag &= COMMIT_MAP;
  1929. assert(flag != COMMIT_PMAP);
  1930. if (flag == COMMIT_PWMAP)
  1931. log = 1;
  1932. else {
  1933. log = 0;
  1934. xadlock.flag = mlckFREEXADLIST;
  1935. xadlock.index = 1;
  1936. }
  1937. /*
  1938. * if the newsize is not an integral number of pages,
  1939. * the file between newsize and next page boundary will
  1940. * be cleared.
  1941. * if truncating into a file hole, it will cause
  1942. * a full block to be allocated for the logical block.
  1943. */
  1944. /*
  1945. * release page blocks of truncated region <teof, eof>
  1946. *
  1947. * free the data blocks from the leaf index blocks.
  1948. * delete the parent index entries corresponding to
  1949. * the freed child data/index blocks.
  1950. * free the index blocks themselves which aren't needed
  1951. * in new sized file.
  1952. *
  1953. * index blocks are updated only if the blocks are to be
  1954. * retained in the new sized file.
  1955. * if type is PMAP, the data and index pages are NOT
  1956. * freed, and the data and index blocks are NOT freed
  1957. * from working map.
  1958. * (this will allow continued access of data/index of
  1959. * temporary file (zerolink count file truncated to zero-length)).
  1960. */
  1961. teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  1962. JFS_SBI(ip->i_sb)->l2bsize;
  1963. /* clear stack */
  1964. BT_CLR(&btstack);
  1965. /*
  1966. * start with root
  1967. *
  1968. * root resides in the inode
  1969. */
  1970. bn = 0;
  1971. /*
  1972. * first access of each page:
  1973. */
  1974. getPage:
  1975. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1976. if (rc)
  1977. return rc;
  1978. /* process entries backward from last index */
  1979. index = le16_to_cpu(p->header.nextindex) - 1;
  1980. /* Since this is the rightmost page at this level, and we may have
  1981. * already freed a page that was formerly to the right, let's make
  1982. * sure that the next pointer is zero.
  1983. */
  1984. if (p->header.next) {
  1985. if (log)
  1986. /*
  1987. * Make sure this change to the header is logged.
  1988. * If we really truncate this leaf, the flag
  1989. * will be changed to tlckTRUNCATE
  1990. */
  1991. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1992. BT_MARK_DIRTY(mp, ip);
  1993. p->header.next = 0;
  1994. }
  1995. if (p->header.flag & BT_INTERNAL)
  1996. goto getChild;
  1997. /*
  1998. * leaf page
  1999. */
  2000. freed = 0;
  2001. /* does region covered by leaf page precede Teof ? */
  2002. xad = &p->xad[index];
  2003. xoff = offsetXAD(xad);
  2004. xlen = lengthXAD(xad);
  2005. if (teof >= xoff + xlen) {
  2006. XT_PUTPAGE(mp);
  2007. goto getParent;
  2008. }
  2009. /* (re)acquire tlock of the leaf page */
  2010. if (log) {
  2011. if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
  2012. /*
  2013. * We need to limit the size of the transaction
  2014. * to avoid exhausting pagecache & tlocks
  2015. */
  2016. XT_PUTPAGE(mp);
  2017. newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
  2018. goto getParent;
  2019. }
  2020. tlck = txLock(tid, ip, mp, tlckXTREE);
  2021. tlck->type = tlckXTREE | tlckTRUNCATE;
  2022. xtlck = (struct xtlock *) & tlck->lock;
  2023. xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
  2024. }
  2025. BT_MARK_DIRTY(mp, ip);
  2026. /*
  2027. * scan backward leaf page entries
  2028. */
  2029. for (; index >= XTENTRYSTART; index--) {
  2030. xad = &p->xad[index];
  2031. xoff = offsetXAD(xad);
  2032. xlen = lengthXAD(xad);
  2033. xaddr = addressXAD(xad);
  2034. /*
  2035. * The "data" for a directory is indexed by the block
  2036. * device's address space. This metadata must be invalidated
  2037. * here
  2038. */
  2039. if (S_ISDIR(ip->i_mode) && (teof == 0))
  2040. invalidate_xad_metapages(ip, *xad);
  2041. /*
  2042. * entry beyond eof: continue scan of current page
  2043. * xad
  2044. * ---|---=======------->
  2045. * eof
  2046. */
  2047. if (teof < xoff) {
  2048. nfreed += xlen;
  2049. continue;
  2050. }
  2051. /*
  2052. * (xoff <= teof): last entry to be deleted from page;
  2053. * If other entries remain in page: keep and update the page.
  2054. */
  2055. /*
  2056. * eof == entry_start: delete the entry
  2057. * xad
  2058. * -------|=======------->
  2059. * eof
  2060. *
  2061. */
  2062. if (teof == xoff) {
  2063. nfreed += xlen;
  2064. if (index == XTENTRYSTART)
  2065. break;
  2066. nextindex = index;
  2067. }
  2068. /*
  2069. * eof within the entry: truncate the entry.
  2070. * xad
  2071. * -------===|===------->
  2072. * eof
  2073. */
  2074. else if (teof < xoff + xlen) {
  2075. /* update truncated entry */
  2076. len = teof - xoff;
  2077. freexlen = xlen - len;
  2078. XADlength(xad, len);
  2079. /* save pxd of truncated extent in tlck */
  2080. xaddr += len;
  2081. if (log) { /* COMMIT_PWMAP */
  2082. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  2083. min(index, (int)xtlck->lwm.offset) : index;
  2084. xtlck->lwm.length = index + 1 -
  2085. xtlck->lwm.offset;
  2086. xtlck->twm.offset = index;
  2087. pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
  2088. pxdlock->flag = mlckFREEPXD;
  2089. PXDaddress(&pxdlock->pxd, xaddr);
  2090. PXDlength(&pxdlock->pxd, freexlen);
  2091. }
  2092. /* free truncated extent */
  2093. else { /* COMMIT_WMAP */
  2094. pxdlock = (struct pxd_lock *) & xadlock;
  2095. pxdlock->flag = mlckFREEPXD;
  2096. PXDaddress(&pxdlock->pxd, xaddr);
  2097. PXDlength(&pxdlock->pxd, freexlen);
  2098. txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
  2099. /* reset map lock */
  2100. xadlock.flag = mlckFREEXADLIST;
  2101. }
  2102. /* current entry is new last entry; */
  2103. nextindex = index + 1;
  2104. nfreed += freexlen;
  2105. }
  2106. /*
  2107. * eof beyond the entry:
  2108. * xad
  2109. * -------=======---|--->
  2110. * eof
  2111. */
  2112. else { /* (xoff + xlen < teof) */
  2113. nextindex = index + 1;
  2114. }
  2115. if (nextindex < le16_to_cpu(p->header.nextindex)) {
  2116. if (!log) { /* COMMIT_WAMP */
  2117. xadlock.xdlist = &p->xad[nextindex];
  2118. xadlock.count =
  2119. le16_to_cpu(p->header.nextindex) -
  2120. nextindex;
  2121. txFreeMap(ip, (struct maplock *) & xadlock,
  2122. NULL, COMMIT_WMAP);
  2123. }
  2124. p->header.nextindex = cpu_to_le16(nextindex);
  2125. }
  2126. XT_PUTPAGE(mp);
  2127. /* assert(freed == 0); */
  2128. goto getParent;
  2129. } /* end scan of leaf page entries */
  2130. freed = 1;
  2131. /*
  2132. * leaf page become empty: free the page if type != PMAP
  2133. */
  2134. if (log) { /* COMMIT_PWMAP */
  2135. /* txCommit() with tlckFREE:
  2136. * free data extents covered by leaf [XTENTRYSTART:hwm);
  2137. * invalidate leaf if COMMIT_PWMAP;
  2138. * if (TRUNCATE), will write LOG_NOREDOPAGE;
  2139. */
  2140. tlck->type = tlckXTREE | tlckFREE;
  2141. } else { /* COMMIT_WAMP */
  2142. /* free data extents covered by leaf */
  2143. xadlock.xdlist = &p->xad[XTENTRYSTART];
  2144. xadlock.count =
  2145. le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  2146. txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
  2147. }
  2148. if (p->header.flag & BT_ROOT) {
  2149. p->header.flag &= ~BT_INTERNAL;
  2150. p->header.flag |= BT_LEAF;
  2151. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  2152. XT_PUTPAGE(mp); /* debug */
  2153. goto out;
  2154. } else {
  2155. if (log) { /* COMMIT_PWMAP */
  2156. /* page will be invalidated at tx completion
  2157. */
  2158. XT_PUTPAGE(mp);
  2159. } else { /* COMMIT_WMAP */
  2160. if (mp->lid)
  2161. lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
  2162. /* invalidate empty leaf page */
  2163. discard_metapage(mp);
  2164. }
  2165. }
  2166. /*
  2167. * the leaf page become empty: delete the parent entry
  2168. * for the leaf page if the parent page is to be kept
  2169. * in the new sized file.
  2170. */
  2171. /*
  2172. * go back up to the parent page
  2173. */
  2174. getParent:
  2175. /* pop/restore parent entry for the current child page */
  2176. if ((parent = BT_POP(&btstack)) == NULL)
  2177. /* current page must have been root */
  2178. goto out;
  2179. /* get back the parent page */
  2180. bn = parent->bn;
  2181. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  2182. if (rc)
  2183. return rc;
  2184. index = parent->index;
  2185. /*
  2186. * child page was not empty:
  2187. */
  2188. if (freed == 0) {
  2189. /* has any entry deleted from parent ? */
  2190. if (index < le16_to_cpu(p->header.nextindex) - 1) {
  2191. /* (re)acquire tlock on the parent page */
  2192. if (log) { /* COMMIT_PWMAP */
  2193. /* txCommit() with tlckTRUNCATE:
  2194. * free child extents covered by parent [);
  2195. */
  2196. tlck = txLock(tid, ip, mp, tlckXTREE);
  2197. xtlck = (struct xtlock *) & tlck->lock;
  2198. if (!(tlck->type & tlckTRUNCATE)) {
  2199. xtlck->hwm.offset =
  2200. le16_to_cpu(p->header.
  2201. nextindex) - 1;
  2202. tlck->type =
  2203. tlckXTREE | tlckTRUNCATE;
  2204. }
  2205. } else { /* COMMIT_WMAP */
  2206. /* free child extents covered by parent */
  2207. xadlock.xdlist = &p->xad[index + 1];
  2208. xadlock.count =
  2209. le16_to_cpu(p->header.nextindex) -
  2210. index - 1;
  2211. txFreeMap(ip, (struct maplock *) & xadlock,
  2212. NULL, COMMIT_WMAP);
  2213. }
  2214. BT_MARK_DIRTY(mp, ip);
  2215. p->header.nextindex = cpu_to_le16(index + 1);
  2216. }
  2217. XT_PUTPAGE(mp);
  2218. goto getParent;
  2219. }
  2220. /*
  2221. * child page was empty:
  2222. */
  2223. nfreed += lengthXAD(&p->xad[index]);
  2224. /*
  2225. * During working map update, child page's tlock must be handled
  2226. * before parent's. This is because the parent's tlock will cause
  2227. * the child's disk space to be marked available in the wmap, so
  2228. * it's important that the child page be released by that time.
  2229. *
  2230. * ToDo: tlocks should be on doubly-linked list, so we can
  2231. * quickly remove it and add it to the end.
  2232. */
  2233. /*
  2234. * Move parent page's tlock to the end of the tid's tlock list
  2235. */
  2236. if (log && mp->lid && (tblk->last != mp->lid) &&
  2237. lid_to_tlock(mp->lid)->tid) {
  2238. lid_t lid = mp->lid;
  2239. struct tlock *prev;
  2240. tlck = lid_to_tlock(lid);
  2241. if (tblk->next == lid)
  2242. tblk->next = tlck->next;
  2243. else {
  2244. for (prev = lid_to_tlock(tblk->next);
  2245. prev->next != lid;
  2246. prev = lid_to_tlock(prev->next)) {
  2247. assert(prev->next);
  2248. }
  2249. prev->next = tlck->next;
  2250. }
  2251. lid_to_tlock(tblk->last)->next = lid;
  2252. tlck->next = 0;
  2253. tblk->last = lid;
  2254. }
  2255. /*
  2256. * parent page become empty: free the page
  2257. */
  2258. if (index == XTENTRYSTART) {
  2259. if (log) { /* COMMIT_PWMAP */
  2260. /* txCommit() with tlckFREE:
  2261. * free child extents covered by parent;
  2262. * invalidate parent if COMMIT_PWMAP;
  2263. */
  2264. tlck = txLock(tid, ip, mp, tlckXTREE);
  2265. xtlck = (struct xtlock *) & tlck->lock;
  2266. xtlck->hwm.offset =
  2267. le16_to_cpu(p->header.nextindex) - 1;
  2268. tlck->type = tlckXTREE | tlckFREE;
  2269. } else { /* COMMIT_WMAP */
  2270. /* free child extents covered by parent */
  2271. xadlock.xdlist = &p->xad[XTENTRYSTART];
  2272. xadlock.count =
  2273. le16_to_cpu(p->header.nextindex) -
  2274. XTENTRYSTART;
  2275. txFreeMap(ip, (struct maplock *) & xadlock, NULL,
  2276. COMMIT_WMAP);
  2277. }
  2278. BT_MARK_DIRTY(mp, ip);
  2279. if (p->header.flag & BT_ROOT) {
  2280. p->header.flag &= ~BT_INTERNAL;
  2281. p->header.flag |= BT_LEAF;
  2282. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  2283. if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
  2284. /*
  2285. * Shrink root down to allow inline
  2286. * EA (otherwise fsck complains)
  2287. */
  2288. p->header.maxentry =
  2289. cpu_to_le16(XTROOTINITSLOT);
  2290. JFS_IP(ip)->mode2 |= INLINEEA;
  2291. }
  2292. XT_PUTPAGE(mp); /* debug */
  2293. goto out;
  2294. } else {
  2295. if (log) { /* COMMIT_PWMAP */
  2296. /* page will be invalidated at tx completion
  2297. */
  2298. XT_PUTPAGE(mp);
  2299. } else { /* COMMIT_WMAP */
  2300. if (mp->lid)
  2301. lid_to_tlock(mp->lid)->flag |=
  2302. tlckFREELOCK;
  2303. /* invalidate parent page */
  2304. discard_metapage(mp);
  2305. }
  2306. /* parent has become empty and freed:
  2307. * go back up to its parent page
  2308. */
  2309. /* freed = 1; */
  2310. goto getParent;
  2311. }
  2312. }
  2313. /*
  2314. * parent page still has entries for front region;
  2315. */
  2316. else {
  2317. /* try truncate region covered by preceding entry
  2318. * (process backward)
  2319. */
  2320. index--;
  2321. /* go back down to the child page corresponding
  2322. * to the entry
  2323. */
  2324. goto getChild;
  2325. }
  2326. /*
  2327. * internal page: go down to child page of current entry
  2328. */
  2329. getChild:
  2330. /* save current parent entry for the child page */
  2331. if (BT_STACK_FULL(&btstack)) {
  2332. jfs_error(ip->i_sb, "stack overrun!\n");
  2333. XT_PUTPAGE(mp);
  2334. return -EIO;
  2335. }
  2336. BT_PUSH(&btstack, bn, index);
  2337. /* get child page */
  2338. xad = &p->xad[index];
  2339. bn = addressXAD(xad);
  2340. /*
  2341. * first access of each internal entry:
  2342. */
  2343. /* release parent page */
  2344. XT_PUTPAGE(mp);
  2345. /* process the child page */
  2346. goto getPage;
  2347. out:
  2348. /*
  2349. * update file resource stat
  2350. */
  2351. /* set size
  2352. */
  2353. if (S_ISDIR(ip->i_mode) && !newsize)
  2354. ip->i_size = 1; /* fsck hates zero-length directories */
  2355. else
  2356. ip->i_size = newsize;
  2357. /* update quota allocation to reflect freed blocks */
  2358. dquot_free_block(ip, nfreed);
  2359. /*
  2360. * free tlock of invalidated pages
  2361. */
  2362. if (flag == COMMIT_WMAP)
  2363. txFreelock(ip);
  2364. return newsize;
  2365. }
  2366. /*
  2367. * xtTruncate_pmap()
  2368. *
  2369. * function:
  2370. * Perform truncate to zero length for deleted file, leaving the
  2371. * xtree and working map untouched. This allows the file to
  2372. * be accessed via open file handles, while the delete of the file
  2373. * is committed to disk.
  2374. *
  2375. * parameter:
  2376. * tid_t tid,
  2377. * struct inode *ip,
  2378. * s64 committed_size)
  2379. *
  2380. * return: new committed size
  2381. *
  2382. * note:
  2383. *
  2384. * To avoid deadlock by holding too many transaction locks, the
  2385. * truncation may be broken up into multiple transactions.
  2386. * The committed_size keeps track of part of the file has been
  2387. * freed from the pmaps.
  2388. */
  2389. s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
  2390. {
  2391. s64 bn;
  2392. struct btstack btstack;
  2393. int cmp;
  2394. int index;
  2395. int locked_leaves = 0;
  2396. struct metapage *mp;
  2397. xtpage_t *p;
  2398. struct btframe *parent;
  2399. int rc;
  2400. struct tblock *tblk;
  2401. struct tlock *tlck = NULL;
  2402. xad_t *xad;
  2403. int xlen;
  2404. s64 xoff;
  2405. struct xtlock *xtlck = NULL;
  2406. /* save object truncation type */
  2407. tblk = tid_to_tblock(tid);
  2408. tblk->xflag |= COMMIT_PMAP;
  2409. /* clear stack */
  2410. BT_CLR(&btstack);
  2411. if (committed_size) {
  2412. xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
  2413. rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
  2414. if (rc)
  2415. return rc;
  2416. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  2417. if (cmp != 0) {
  2418. XT_PUTPAGE(mp);
  2419. jfs_error(ip->i_sb, "did not find extent\n");
  2420. return -EIO;
  2421. }
  2422. } else {
  2423. /*
  2424. * start with root
  2425. *
  2426. * root resides in the inode
  2427. */
  2428. bn = 0;
  2429. /*
  2430. * first access of each page:
  2431. */
  2432. getPage:
  2433. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  2434. if (rc)
  2435. return rc;
  2436. /* process entries backward from last index */
  2437. index = le16_to_cpu(p->header.nextindex) - 1;
  2438. if (p->header.flag & BT_INTERNAL)
  2439. goto getChild;
  2440. }
  2441. /*
  2442. * leaf page
  2443. */
  2444. if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
  2445. /*
  2446. * We need to limit the size of the transaction
  2447. * to avoid exhausting pagecache & tlocks
  2448. */
  2449. xad = &p->xad[index];
  2450. xoff = offsetXAD(xad);
  2451. xlen = lengthXAD(xad);
  2452. XT_PUTPAGE(mp);
  2453. return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
  2454. }
  2455. tlck = txLock(tid, ip, mp, tlckXTREE);
  2456. tlck->type = tlckXTREE | tlckFREE;
  2457. xtlck = (struct xtlock *) & tlck->lock;
  2458. xtlck->hwm.offset = index;
  2459. XT_PUTPAGE(mp);
  2460. /*
  2461. * go back up to the parent page
  2462. */
  2463. getParent:
  2464. /* pop/restore parent entry for the current child page */
  2465. if ((parent = BT_POP(&btstack)) == NULL)
  2466. /* current page must have been root */
  2467. goto out;
  2468. /* get back the parent page */
  2469. bn = parent->bn;
  2470. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  2471. if (rc)
  2472. return rc;
  2473. index = parent->index;
  2474. /*
  2475. * parent page become empty: free the page
  2476. */
  2477. if (index == XTENTRYSTART) {
  2478. /* txCommit() with tlckFREE:
  2479. * free child extents covered by parent;
  2480. * invalidate parent if COMMIT_PWMAP;
  2481. */
  2482. tlck = txLock(tid, ip, mp, tlckXTREE);
  2483. xtlck = (struct xtlock *) & tlck->lock;
  2484. xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
  2485. tlck->type = tlckXTREE | tlckFREE;
  2486. XT_PUTPAGE(mp);
  2487. if (p->header.flag & BT_ROOT) {
  2488. goto out;
  2489. } else {
  2490. goto getParent;
  2491. }
  2492. }
  2493. /*
  2494. * parent page still has entries for front region;
  2495. */
  2496. else
  2497. index--;
  2498. /*
  2499. * internal page: go down to child page of current entry
  2500. */
  2501. getChild:
  2502. /* save current parent entry for the child page */
  2503. if (BT_STACK_FULL(&btstack)) {
  2504. jfs_error(ip->i_sb, "stack overrun!\n");
  2505. XT_PUTPAGE(mp);
  2506. return -EIO;
  2507. }
  2508. BT_PUSH(&btstack, bn, index);
  2509. /* get child page */
  2510. xad = &p->xad[index];
  2511. bn = addressXAD(xad);
  2512. /*
  2513. * first access of each internal entry:
  2514. */
  2515. /* release parent page */
  2516. XT_PUTPAGE(mp);
  2517. /* process the child page */
  2518. goto getPage;
  2519. out:
  2520. return 0;
  2521. }
  2522. #ifdef CONFIG_JFS_STATISTICS
  2523. int jfs_xtstat_proc_show(struct seq_file *m, void *v)
  2524. {
  2525. seq_printf(m,
  2526. "JFS Xtree statistics\n"
  2527. "====================\n"
  2528. "searches = %d\n"
  2529. "fast searches = %d\n"
  2530. "splits = %d\n",
  2531. xtStat.search,
  2532. xtStat.fastSearch,
  2533. xtStat.split);
  2534. return 0;
  2535. }
  2536. #endif