stree.c 64 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281
  1. /*
  2. * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  3. */
  4. /*
  5. * Written by Anatoly P. Pinchuk [email protected]
  6. * Programm System Institute
  7. * Pereslavl-Zalessky Russia
  8. */
  9. #include <linux/time.h>
  10. #include <linux/string.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/bio.h>
  13. #include "reiserfs.h"
  14. #include <linux/buffer_head.h>
  15. #include <linux/quotaops.h>
  16. /* Does the buffer contain a disk block which is in the tree. */
  17. inline int B_IS_IN_TREE(const struct buffer_head *bh)
  18. {
  19. RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
  20. "PAP-1010: block (%b) has too big level (%z)", bh, bh);
  21. return (B_LEVEL(bh) != FREE_LEVEL);
  22. }
  23. /* to get item head in le form */
  24. inline void copy_item_head(struct item_head *to,
  25. const struct item_head *from)
  26. {
  27. memcpy(to, from, IH_SIZE);
  28. }
  29. /*
  30. * k1 is pointer to on-disk structure which is stored in little-endian
  31. * form. k2 is pointer to cpu variable. For key of items of the same
  32. * object this returns 0.
  33. * Returns: -1 if key1 < key2
  34. * 0 if key1 == key2
  35. * 1 if key1 > key2
  36. */
  37. inline int comp_short_keys(const struct reiserfs_key *le_key,
  38. const struct cpu_key *cpu_key)
  39. {
  40. __u32 n;
  41. n = le32_to_cpu(le_key->k_dir_id);
  42. if (n < cpu_key->on_disk_key.k_dir_id)
  43. return -1;
  44. if (n > cpu_key->on_disk_key.k_dir_id)
  45. return 1;
  46. n = le32_to_cpu(le_key->k_objectid);
  47. if (n < cpu_key->on_disk_key.k_objectid)
  48. return -1;
  49. if (n > cpu_key->on_disk_key.k_objectid)
  50. return 1;
  51. return 0;
  52. }
  53. /*
  54. * k1 is pointer to on-disk structure which is stored in little-endian
  55. * form. k2 is pointer to cpu variable.
  56. * Compare keys using all 4 key fields.
  57. * Returns: -1 if key1 < key2 0
  58. * if key1 = key2 1 if key1 > key2
  59. */
  60. static inline int comp_keys(const struct reiserfs_key *le_key,
  61. const struct cpu_key *cpu_key)
  62. {
  63. int retval;
  64. retval = comp_short_keys(le_key, cpu_key);
  65. if (retval)
  66. return retval;
  67. if (le_key_k_offset(le_key_version(le_key), le_key) <
  68. cpu_key_k_offset(cpu_key))
  69. return -1;
  70. if (le_key_k_offset(le_key_version(le_key), le_key) >
  71. cpu_key_k_offset(cpu_key))
  72. return 1;
  73. if (cpu_key->key_length == 3)
  74. return 0;
  75. /* this part is needed only when tail conversion is in progress */
  76. if (le_key_k_type(le_key_version(le_key), le_key) <
  77. cpu_key_k_type(cpu_key))
  78. return -1;
  79. if (le_key_k_type(le_key_version(le_key), le_key) >
  80. cpu_key_k_type(cpu_key))
  81. return 1;
  82. return 0;
  83. }
  84. inline int comp_short_le_keys(const struct reiserfs_key *key1,
  85. const struct reiserfs_key *key2)
  86. {
  87. __u32 *k1_u32, *k2_u32;
  88. int key_length = REISERFS_SHORT_KEY_LEN;
  89. k1_u32 = (__u32 *) key1;
  90. k2_u32 = (__u32 *) key2;
  91. for (; key_length--; ++k1_u32, ++k2_u32) {
  92. if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
  93. return -1;
  94. if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
  95. return 1;
  96. }
  97. return 0;
  98. }
  99. inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
  100. {
  101. int version;
  102. to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
  103. to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
  104. /* find out version of the key */
  105. version = le_key_version(from);
  106. to->version = version;
  107. to->on_disk_key.k_offset = le_key_k_offset(version, from);
  108. to->on_disk_key.k_type = le_key_k_type(version, from);
  109. }
  110. /*
  111. * this does not say which one is bigger, it only returns 1 if keys
  112. * are not equal, 0 otherwise
  113. */
  114. inline int comp_le_keys(const struct reiserfs_key *k1,
  115. const struct reiserfs_key *k2)
  116. {
  117. return memcmp(k1, k2, sizeof(struct reiserfs_key));
  118. }
  119. /**************************************************************************
  120. * Binary search toolkit function *
  121. * Search for an item in the array by the item key *
  122. * Returns: 1 if found, 0 if not found; *
  123. * *pos = number of the searched element if found, else the *
  124. * number of the first element that is larger than key. *
  125. **************************************************************************/
  126. /*
  127. * For those not familiar with binary search: lbound is the leftmost item
  128. * that it could be, rbound the rightmost item that it could be. We examine
  129. * the item halfway between lbound and rbound, and that tells us either
  130. * that we can increase lbound, or decrease rbound, or that we have found it,
  131. * or if lbound <= rbound that there are no possible items, and we have not
  132. * found it. With each examination we cut the number of possible items it
  133. * could be by one more than half rounded down, or we find it.
  134. */
  135. static inline int bin_search(const void *key, /* Key to search for. */
  136. const void *base, /* First item in the array. */
  137. int num, /* Number of items in the array. */
  138. /*
  139. * Item size in the array. searched. Lest the
  140. * reader be confused, note that this is crafted
  141. * as a general function, and when it is applied
  142. * specifically to the array of item headers in a
  143. * node, width is actually the item header size
  144. * not the item size.
  145. */
  146. int width,
  147. int *pos /* Number of the searched for element. */
  148. )
  149. {
  150. int rbound, lbound, j;
  151. for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
  152. lbound <= rbound; j = (rbound + lbound) / 2)
  153. switch (comp_keys
  154. ((struct reiserfs_key *)((char *)base + j * width),
  155. (struct cpu_key *)key)) {
  156. case -1:
  157. lbound = j + 1;
  158. continue;
  159. case 1:
  160. rbound = j - 1;
  161. continue;
  162. case 0:
  163. *pos = j;
  164. return ITEM_FOUND; /* Key found in the array. */
  165. }
  166. /*
  167. * bin_search did not find given key, it returns position of key,
  168. * that is minimal and greater than the given one.
  169. */
  170. *pos = lbound;
  171. return ITEM_NOT_FOUND;
  172. }
  173. /* Minimal possible key. It is never in the tree. */
  174. const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
  175. /* Maximal possible key. It is never in the tree. */
  176. static const struct reiserfs_key MAX_KEY = {
  177. cpu_to_le32(0xffffffff),
  178. cpu_to_le32(0xffffffff),
  179. {{cpu_to_le32(0xffffffff),
  180. cpu_to_le32(0xffffffff)},}
  181. };
  182. /*
  183. * Get delimiting key of the buffer by looking for it in the buffers in the
  184. * path, starting from the bottom of the path, and going upwards. We must
  185. * check the path's validity at each step. If the key is not in the path,
  186. * there is no delimiting key in the tree (buffer is first or last buffer
  187. * in tree), and in this case we return a special key, either MIN_KEY or
  188. * MAX_KEY.
  189. */
  190. static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
  191. const struct super_block *sb)
  192. {
  193. int position, path_offset = chk_path->path_length;
  194. struct buffer_head *parent;
  195. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  196. "PAP-5010: invalid offset in the path");
  197. /* While not higher in path than first element. */
  198. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  199. RFALSE(!buffer_uptodate
  200. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  201. "PAP-5020: parent is not uptodate");
  202. /* Parent at the path is not in the tree now. */
  203. if (!B_IS_IN_TREE
  204. (parent =
  205. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  206. return &MAX_KEY;
  207. /* Check whether position in the parent is correct. */
  208. if ((position =
  209. PATH_OFFSET_POSITION(chk_path,
  210. path_offset)) >
  211. B_NR_ITEMS(parent))
  212. return &MAX_KEY;
  213. /* Check whether parent at the path really points to the child. */
  214. if (B_N_CHILD_NUM(parent, position) !=
  215. PATH_OFFSET_PBUFFER(chk_path,
  216. path_offset + 1)->b_blocknr)
  217. return &MAX_KEY;
  218. /*
  219. * Return delimiting key if position in the parent
  220. * is not equal to zero.
  221. */
  222. if (position)
  223. return internal_key(parent, position - 1);
  224. }
  225. /* Return MIN_KEY if we are in the root of the buffer tree. */
  226. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  227. b_blocknr == SB_ROOT_BLOCK(sb))
  228. return &MIN_KEY;
  229. return &MAX_KEY;
  230. }
  231. /* Get delimiting key of the buffer at the path and its right neighbor. */
  232. inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
  233. const struct super_block *sb)
  234. {
  235. int position, path_offset = chk_path->path_length;
  236. struct buffer_head *parent;
  237. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  238. "PAP-5030: invalid offset in the path");
  239. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  240. RFALSE(!buffer_uptodate
  241. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  242. "PAP-5040: parent is not uptodate");
  243. /* Parent at the path is not in the tree now. */
  244. if (!B_IS_IN_TREE
  245. (parent =
  246. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  247. return &MIN_KEY;
  248. /* Check whether position in the parent is correct. */
  249. if ((position =
  250. PATH_OFFSET_POSITION(chk_path,
  251. path_offset)) >
  252. B_NR_ITEMS(parent))
  253. return &MIN_KEY;
  254. /*
  255. * Check whether parent at the path really points
  256. * to the child.
  257. */
  258. if (B_N_CHILD_NUM(parent, position) !=
  259. PATH_OFFSET_PBUFFER(chk_path,
  260. path_offset + 1)->b_blocknr)
  261. return &MIN_KEY;
  262. /*
  263. * Return delimiting key if position in the parent
  264. * is not the last one.
  265. */
  266. if (position != B_NR_ITEMS(parent))
  267. return internal_key(parent, position);
  268. }
  269. /* Return MAX_KEY if we are in the root of the buffer tree. */
  270. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  271. b_blocknr == SB_ROOT_BLOCK(sb))
  272. return &MAX_KEY;
  273. return &MIN_KEY;
  274. }
  275. /*
  276. * Check whether a key is contained in the tree rooted from a buffer at a path.
  277. * This works by looking at the left and right delimiting keys for the buffer
  278. * in the last path_element in the path. These delimiting keys are stored
  279. * at least one level above that buffer in the tree. If the buffer is the
  280. * first or last node in the tree order then one of the delimiting keys may
  281. * be absent, and in this case get_lkey and get_rkey return a special key
  282. * which is MIN_KEY or MAX_KEY.
  283. */
  284. static inline int key_in_buffer(
  285. /* Path which should be checked. */
  286. struct treepath *chk_path,
  287. /* Key which should be checked. */
  288. const struct cpu_key *key,
  289. struct super_block *sb
  290. )
  291. {
  292. RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
  293. || chk_path->path_length > MAX_HEIGHT,
  294. "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
  295. key, chk_path->path_length);
  296. RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
  297. "PAP-5060: device must not be NODEV");
  298. if (comp_keys(get_lkey(chk_path, sb), key) == 1)
  299. /* left delimiting key is bigger, that the key we look for */
  300. return 0;
  301. /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
  302. if (comp_keys(get_rkey(chk_path, sb), key) != 1)
  303. /* key must be less than right delimitiing key */
  304. return 0;
  305. return 1;
  306. }
  307. int reiserfs_check_path(struct treepath *p)
  308. {
  309. RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
  310. "path not properly relsed");
  311. return 0;
  312. }
  313. /*
  314. * Drop the reference to each buffer in a path and restore
  315. * dirty bits clean when preparing the buffer for the log.
  316. * This version should only be called from fix_nodes()
  317. */
  318. void pathrelse_and_restore(struct super_block *sb,
  319. struct treepath *search_path)
  320. {
  321. int path_offset = search_path->path_length;
  322. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  323. "clm-4000: invalid path offset");
  324. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
  325. struct buffer_head *bh;
  326. bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
  327. reiserfs_restore_prepared_buffer(sb, bh);
  328. brelse(bh);
  329. }
  330. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  331. }
  332. /* Drop the reference to each buffer in a path */
  333. void pathrelse(struct treepath *search_path)
  334. {
  335. int path_offset = search_path->path_length;
  336. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  337. "PAP-5090: invalid path offset");
  338. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
  339. brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
  340. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  341. }
  342. static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
  343. {
  344. struct reiserfs_de_head *deh;
  345. int i;
  346. deh = B_I_DEH(bh, ih);
  347. for (i = 0; i < ih_entry_count(ih); i++) {
  348. if (deh_location(&deh[i]) > ih_item_len(ih)) {
  349. reiserfs_warning(NULL, "reiserfs-5094",
  350. "directory entry location seems wrong %h",
  351. &deh[i]);
  352. return 0;
  353. }
  354. }
  355. return 1;
  356. }
  357. static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
  358. {
  359. struct block_head *blkh;
  360. struct item_head *ih;
  361. int used_space;
  362. int prev_location;
  363. int i;
  364. int nr;
  365. blkh = (struct block_head *)buf;
  366. if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
  367. reiserfs_warning(NULL, "reiserfs-5080",
  368. "this should be caught earlier");
  369. return 0;
  370. }
  371. nr = blkh_nr_item(blkh);
  372. if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
  373. /* item number is too big or too small */
  374. reiserfs_warning(NULL, "reiserfs-5081",
  375. "nr_item seems wrong: %z", bh);
  376. return 0;
  377. }
  378. ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
  379. used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
  380. /* free space does not match to calculated amount of use space */
  381. if (used_space != blocksize - blkh_free_space(blkh)) {
  382. reiserfs_warning(NULL, "reiserfs-5082",
  383. "free space seems wrong: %z", bh);
  384. return 0;
  385. }
  386. /*
  387. * FIXME: it is_leaf will hit performance too much - we may have
  388. * return 1 here
  389. */
  390. /* check tables of item heads */
  391. ih = (struct item_head *)(buf + BLKH_SIZE);
  392. prev_location = blocksize;
  393. for (i = 0; i < nr; i++, ih++) {
  394. if (le_ih_k_type(ih) == TYPE_ANY) {
  395. reiserfs_warning(NULL, "reiserfs-5083",
  396. "wrong item type for item %h",
  397. ih);
  398. return 0;
  399. }
  400. if (ih_location(ih) >= blocksize
  401. || ih_location(ih) < IH_SIZE * nr) {
  402. reiserfs_warning(NULL, "reiserfs-5084",
  403. "item location seems wrong: %h",
  404. ih);
  405. return 0;
  406. }
  407. if (ih_item_len(ih) < 1
  408. || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
  409. reiserfs_warning(NULL, "reiserfs-5085",
  410. "item length seems wrong: %h",
  411. ih);
  412. return 0;
  413. }
  414. if (prev_location - ih_location(ih) != ih_item_len(ih)) {
  415. reiserfs_warning(NULL, "reiserfs-5086",
  416. "item location seems wrong "
  417. "(second one): %h", ih);
  418. return 0;
  419. }
  420. if (is_direntry_le_ih(ih)) {
  421. if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
  422. reiserfs_warning(NULL, "reiserfs-5093",
  423. "item entry count seems wrong %h",
  424. ih);
  425. return 0;
  426. }
  427. return has_valid_deh_location(bh, ih);
  428. }
  429. prev_location = ih_location(ih);
  430. }
  431. /* one may imagine many more checks */
  432. return 1;
  433. }
  434. /* returns 1 if buf looks like an internal node, 0 otherwise */
  435. static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
  436. {
  437. struct block_head *blkh;
  438. int nr;
  439. int used_space;
  440. blkh = (struct block_head *)buf;
  441. nr = blkh_level(blkh);
  442. if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
  443. /* this level is not possible for internal nodes */
  444. reiserfs_warning(NULL, "reiserfs-5087",
  445. "this should be caught earlier");
  446. return 0;
  447. }
  448. nr = blkh_nr_item(blkh);
  449. /* for internal which is not root we might check min number of keys */
  450. if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
  451. reiserfs_warning(NULL, "reiserfs-5088",
  452. "number of key seems wrong: %z", bh);
  453. return 0;
  454. }
  455. used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
  456. if (used_space != blocksize - blkh_free_space(blkh)) {
  457. reiserfs_warning(NULL, "reiserfs-5089",
  458. "free space seems wrong: %z", bh);
  459. return 0;
  460. }
  461. /* one may imagine many more checks */
  462. return 1;
  463. }
  464. /*
  465. * make sure that bh contains formatted node of reiserfs tree of
  466. * 'level'-th level
  467. */
  468. static int is_tree_node(struct buffer_head *bh, int level)
  469. {
  470. if (B_LEVEL(bh) != level) {
  471. reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
  472. "not match to the expected one %d",
  473. B_LEVEL(bh), level);
  474. return 0;
  475. }
  476. if (level == DISK_LEAF_NODE_LEVEL)
  477. return is_leaf(bh->b_data, bh->b_size, bh);
  478. return is_internal(bh->b_data, bh->b_size, bh);
  479. }
  480. #define SEARCH_BY_KEY_READA 16
  481. /*
  482. * The function is NOT SCHEDULE-SAFE!
  483. * It might unlock the write lock if we needed to wait for a block
  484. * to be read. Note that in this case it won't recover the lock to avoid
  485. * high contention resulting from too much lock requests, especially
  486. * the caller (search_by_key) will perform other schedule-unsafe
  487. * operations just after calling this function.
  488. *
  489. * @return depth of lock to be restored after read completes
  490. */
  491. static int search_by_key_reada(struct super_block *s,
  492. struct buffer_head **bh,
  493. b_blocknr_t *b, int num)
  494. {
  495. int i, j;
  496. int depth = -1;
  497. for (i = 0; i < num; i++) {
  498. bh[i] = sb_getblk(s, b[i]);
  499. }
  500. /*
  501. * We are going to read some blocks on which we
  502. * have a reference. It's safe, though we might be
  503. * reading blocks concurrently changed if we release
  504. * the lock. But it's still fine because we check later
  505. * if the tree changed
  506. */
  507. for (j = 0; j < i; j++) {
  508. /*
  509. * note, this needs attention if we are getting rid of the BKL
  510. * you have to make sure the prepared bit isn't set on this
  511. * buffer
  512. */
  513. if (!buffer_uptodate(bh[j])) {
  514. if (depth == -1)
  515. depth = reiserfs_write_unlock_nested(s);
  516. bh_readahead(bh[j], REQ_RAHEAD);
  517. }
  518. brelse(bh[j]);
  519. }
  520. return depth;
  521. }
  522. /*
  523. * This function fills up the path from the root to the leaf as it
  524. * descends the tree looking for the key. It uses reiserfs_bread to
  525. * try to find buffers in the cache given their block number. If it
  526. * does not find them in the cache it reads them from disk. For each
  527. * node search_by_key finds using reiserfs_bread it then uses
  528. * bin_search to look through that node. bin_search will find the
  529. * position of the block_number of the next node if it is looking
  530. * through an internal node. If it is looking through a leaf node
  531. * bin_search will find the position of the item which has key either
  532. * equal to given key, or which is the maximal key less than the given
  533. * key. search_by_key returns a path that must be checked for the
  534. * correctness of the top of the path but need not be checked for the
  535. * correctness of the bottom of the path
  536. */
  537. /*
  538. * search_by_key - search for key (and item) in stree
  539. * @sb: superblock
  540. * @key: pointer to key to search for
  541. * @search_path: Allocated and initialized struct treepath; Returned filled
  542. * on success.
  543. * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
  544. * stop at leaf level.
  545. *
  546. * The function is NOT SCHEDULE-SAFE!
  547. */
  548. int search_by_key(struct super_block *sb, const struct cpu_key *key,
  549. struct treepath *search_path, int stop_level)
  550. {
  551. b_blocknr_t block_number;
  552. int expected_level;
  553. struct buffer_head *bh;
  554. struct path_element *last_element;
  555. int node_level, retval;
  556. int fs_gen;
  557. struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
  558. b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
  559. int reada_count = 0;
  560. #ifdef CONFIG_REISERFS_CHECK
  561. int repeat_counter = 0;
  562. #endif
  563. PROC_INFO_INC(sb, search_by_key);
  564. /*
  565. * As we add each node to a path we increase its count. This means
  566. * that we must be careful to release all nodes in a path before we
  567. * either discard the path struct or re-use the path struct, as we
  568. * do here.
  569. */
  570. pathrelse(search_path);
  571. /*
  572. * With each iteration of this loop we search through the items in the
  573. * current node, and calculate the next current node(next path element)
  574. * for the next iteration of this loop..
  575. */
  576. block_number = SB_ROOT_BLOCK(sb);
  577. expected_level = -1;
  578. while (1) {
  579. #ifdef CONFIG_REISERFS_CHECK
  580. if (!(++repeat_counter % 50000))
  581. reiserfs_warning(sb, "PAP-5100",
  582. "%s: there were %d iterations of "
  583. "while loop looking for key %K",
  584. current->comm, repeat_counter,
  585. key);
  586. #endif
  587. /* prep path to have another element added to it. */
  588. last_element =
  589. PATH_OFFSET_PELEMENT(search_path,
  590. ++search_path->path_length);
  591. fs_gen = get_generation(sb);
  592. /*
  593. * Read the next tree node, and set the last element
  594. * in the path to have a pointer to it.
  595. */
  596. if ((bh = last_element->pe_buffer =
  597. sb_getblk(sb, block_number))) {
  598. /*
  599. * We'll need to drop the lock if we encounter any
  600. * buffers that need to be read. If all of them are
  601. * already up to date, we don't need to drop the lock.
  602. */
  603. int depth = -1;
  604. if (!buffer_uptodate(bh) && reada_count > 1)
  605. depth = search_by_key_reada(sb, reada_bh,
  606. reada_blocks, reada_count);
  607. if (!buffer_uptodate(bh) && depth == -1)
  608. depth = reiserfs_write_unlock_nested(sb);
  609. bh_read_nowait(bh, 0);
  610. wait_on_buffer(bh);
  611. if (depth != -1)
  612. reiserfs_write_lock_nested(sb, depth);
  613. if (!buffer_uptodate(bh))
  614. goto io_error;
  615. } else {
  616. io_error:
  617. search_path->path_length--;
  618. pathrelse(search_path);
  619. return IO_ERROR;
  620. }
  621. reada_count = 0;
  622. if (expected_level == -1)
  623. expected_level = SB_TREE_HEIGHT(sb);
  624. expected_level--;
  625. /*
  626. * It is possible that schedule occurred. We must check
  627. * whether the key to search is still in the tree rooted
  628. * from the current buffer. If not then repeat search
  629. * from the root.
  630. */
  631. if (fs_changed(fs_gen, sb) &&
  632. (!B_IS_IN_TREE(bh) ||
  633. B_LEVEL(bh) != expected_level ||
  634. !key_in_buffer(search_path, key, sb))) {
  635. PROC_INFO_INC(sb, search_by_key_fs_changed);
  636. PROC_INFO_INC(sb, search_by_key_restarted);
  637. PROC_INFO_INC(sb,
  638. sbk_restarted[expected_level - 1]);
  639. pathrelse(search_path);
  640. /*
  641. * Get the root block number so that we can
  642. * repeat the search starting from the root.
  643. */
  644. block_number = SB_ROOT_BLOCK(sb);
  645. expected_level = -1;
  646. /* repeat search from the root */
  647. continue;
  648. }
  649. /*
  650. * only check that the key is in the buffer if key is not
  651. * equal to the MAX_KEY. Latter case is only possible in
  652. * "finish_unfinished()" processing during mount.
  653. */
  654. RFALSE(comp_keys(&MAX_KEY, key) &&
  655. !key_in_buffer(search_path, key, sb),
  656. "PAP-5130: key is not in the buffer");
  657. #ifdef CONFIG_REISERFS_CHECK
  658. if (REISERFS_SB(sb)->cur_tb) {
  659. print_cur_tb("5140");
  660. reiserfs_panic(sb, "PAP-5140",
  661. "schedule occurred in do_balance!");
  662. }
  663. #endif
  664. /*
  665. * make sure, that the node contents look like a node of
  666. * certain level
  667. */
  668. if (!is_tree_node(bh, expected_level)) {
  669. reiserfs_error(sb, "vs-5150",
  670. "invalid format found in block %ld. "
  671. "Fsck?", bh->b_blocknr);
  672. pathrelse(search_path);
  673. return IO_ERROR;
  674. }
  675. /* ok, we have acquired next formatted node in the tree */
  676. node_level = B_LEVEL(bh);
  677. PROC_INFO_BH_STAT(sb, bh, node_level - 1);
  678. RFALSE(node_level < stop_level,
  679. "vs-5152: tree level (%d) is less than stop level (%d)",
  680. node_level, stop_level);
  681. retval = bin_search(key, item_head(bh, 0),
  682. B_NR_ITEMS(bh),
  683. (node_level ==
  684. DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
  685. KEY_SIZE,
  686. &last_element->pe_position);
  687. if (node_level == stop_level) {
  688. return retval;
  689. }
  690. /* we are not in the stop level */
  691. /*
  692. * item has been found, so we choose the pointer which
  693. * is to the right of the found one
  694. */
  695. if (retval == ITEM_FOUND)
  696. last_element->pe_position++;
  697. /*
  698. * if item was not found we choose the position which is to
  699. * the left of the found item. This requires no code,
  700. * bin_search did it already.
  701. */
  702. /*
  703. * So we have chosen a position in the current node which is
  704. * an internal node. Now we calculate child block number by
  705. * position in the node.
  706. */
  707. block_number =
  708. B_N_CHILD_NUM(bh, last_element->pe_position);
  709. /*
  710. * if we are going to read leaf nodes, try for read
  711. * ahead as well
  712. */
  713. if ((search_path->reada & PATH_READA) &&
  714. node_level == DISK_LEAF_NODE_LEVEL + 1) {
  715. int pos = last_element->pe_position;
  716. int limit = B_NR_ITEMS(bh);
  717. struct reiserfs_key *le_key;
  718. if (search_path->reada & PATH_READA_BACK)
  719. limit = 0;
  720. while (reada_count < SEARCH_BY_KEY_READA) {
  721. if (pos == limit)
  722. break;
  723. reada_blocks[reada_count++] =
  724. B_N_CHILD_NUM(bh, pos);
  725. if (search_path->reada & PATH_READA_BACK)
  726. pos--;
  727. else
  728. pos++;
  729. /*
  730. * check to make sure we're in the same object
  731. */
  732. le_key = internal_key(bh, pos);
  733. if (le32_to_cpu(le_key->k_objectid) !=
  734. key->on_disk_key.k_objectid) {
  735. break;
  736. }
  737. }
  738. }
  739. }
  740. }
  741. /*
  742. * Form the path to an item and position in this item which contains
  743. * file byte defined by key. If there is no such item
  744. * corresponding to the key, we point the path to the item with
  745. * maximal key less than key, and *pos_in_item is set to one
  746. * past the last entry/byte in the item. If searching for entry in a
  747. * directory item, and it is not found, *pos_in_item is set to one
  748. * entry more than the entry with maximal key which is less than the
  749. * sought key.
  750. *
  751. * Note that if there is no entry in this same node which is one more,
  752. * then we point to an imaginary entry. for direct items, the
  753. * position is in units of bytes, for indirect items the position is
  754. * in units of blocknr entries, for directory items the position is in
  755. * units of directory entries.
  756. */
  757. /* The function is NOT SCHEDULE-SAFE! */
  758. int search_for_position_by_key(struct super_block *sb,
  759. /* Key to search (cpu variable) */
  760. const struct cpu_key *p_cpu_key,
  761. /* Filled up by this function. */
  762. struct treepath *search_path)
  763. {
  764. struct item_head *p_le_ih; /* pointer to on-disk structure */
  765. int blk_size;
  766. loff_t item_offset, offset;
  767. struct reiserfs_dir_entry de;
  768. int retval;
  769. /* If searching for directory entry. */
  770. if (is_direntry_cpu_key(p_cpu_key))
  771. return search_by_entry_key(sb, p_cpu_key, search_path,
  772. &de);
  773. /* If not searching for directory entry. */
  774. /* If item is found. */
  775. retval = search_item(sb, p_cpu_key, search_path);
  776. if (retval == IO_ERROR)
  777. return retval;
  778. if (retval == ITEM_FOUND) {
  779. RFALSE(!ih_item_len
  780. (item_head
  781. (PATH_PLAST_BUFFER(search_path),
  782. PATH_LAST_POSITION(search_path))),
  783. "PAP-5165: item length equals zero");
  784. pos_in_item(search_path) = 0;
  785. return POSITION_FOUND;
  786. }
  787. RFALSE(!PATH_LAST_POSITION(search_path),
  788. "PAP-5170: position equals zero");
  789. /* Item is not found. Set path to the previous item. */
  790. p_le_ih =
  791. item_head(PATH_PLAST_BUFFER(search_path),
  792. --PATH_LAST_POSITION(search_path));
  793. blk_size = sb->s_blocksize;
  794. if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
  795. return FILE_NOT_FOUND;
  796. /* FIXME: quite ugly this far */
  797. item_offset = le_ih_k_offset(p_le_ih);
  798. offset = cpu_key_k_offset(p_cpu_key);
  799. /* Needed byte is contained in the item pointed to by the path. */
  800. if (item_offset <= offset &&
  801. item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
  802. pos_in_item(search_path) = offset - item_offset;
  803. if (is_indirect_le_ih(p_le_ih)) {
  804. pos_in_item(search_path) /= blk_size;
  805. }
  806. return POSITION_FOUND;
  807. }
  808. /*
  809. * Needed byte is not contained in the item pointed to by the
  810. * path. Set pos_in_item out of the item.
  811. */
  812. if (is_indirect_le_ih(p_le_ih))
  813. pos_in_item(search_path) =
  814. ih_item_len(p_le_ih) / UNFM_P_SIZE;
  815. else
  816. pos_in_item(search_path) = ih_item_len(p_le_ih);
  817. return POSITION_NOT_FOUND;
  818. }
  819. /* Compare given item and item pointed to by the path. */
  820. int comp_items(const struct item_head *stored_ih, const struct treepath *path)
  821. {
  822. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  823. struct item_head *ih;
  824. /* Last buffer at the path is not in the tree. */
  825. if (!B_IS_IN_TREE(bh))
  826. return 1;
  827. /* Last path position is invalid. */
  828. if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
  829. return 1;
  830. /* we need only to know, whether it is the same item */
  831. ih = tp_item_head(path);
  832. return memcmp(stored_ih, ih, IH_SIZE);
  833. }
  834. /* prepare for delete or cut of direct item */
  835. static inline int prepare_for_direct_item(struct treepath *path,
  836. struct item_head *le_ih,
  837. struct inode *inode,
  838. loff_t new_file_length, int *cut_size)
  839. {
  840. loff_t round_len;
  841. if (new_file_length == max_reiserfs_offset(inode)) {
  842. /* item has to be deleted */
  843. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  844. return M_DELETE;
  845. }
  846. /* new file gets truncated */
  847. if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
  848. round_len = ROUND_UP(new_file_length);
  849. /* this was new_file_length < le_ih ... */
  850. if (round_len < le_ih_k_offset(le_ih)) {
  851. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  852. return M_DELETE; /* Delete this item. */
  853. }
  854. /* Calculate first position and size for cutting from item. */
  855. pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
  856. *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
  857. return M_CUT; /* Cut from this item. */
  858. }
  859. /* old file: items may have any length */
  860. if (new_file_length < le_ih_k_offset(le_ih)) {
  861. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  862. return M_DELETE; /* Delete this item. */
  863. }
  864. /* Calculate first position and size for cutting from item. */
  865. *cut_size = -(ih_item_len(le_ih) -
  866. (pos_in_item(path) =
  867. new_file_length + 1 - le_ih_k_offset(le_ih)));
  868. return M_CUT; /* Cut from this item. */
  869. }
  870. static inline int prepare_for_direntry_item(struct treepath *path,
  871. struct item_head *le_ih,
  872. struct inode *inode,
  873. loff_t new_file_length,
  874. int *cut_size)
  875. {
  876. if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
  877. new_file_length == max_reiserfs_offset(inode)) {
  878. RFALSE(ih_entry_count(le_ih) != 2,
  879. "PAP-5220: incorrect empty directory item (%h)", le_ih);
  880. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  881. /* Delete the directory item containing "." and ".." entry. */
  882. return M_DELETE;
  883. }
  884. if (ih_entry_count(le_ih) == 1) {
  885. /*
  886. * Delete the directory item such as there is one record only
  887. * in this item
  888. */
  889. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  890. return M_DELETE;
  891. }
  892. /* Cut one record from the directory item. */
  893. *cut_size =
  894. -(DEH_SIZE +
  895. entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
  896. return M_CUT;
  897. }
  898. #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
  899. /*
  900. * If the path points to a directory or direct item, calculate mode
  901. * and the size cut, for balance.
  902. * If the path points to an indirect item, remove some number of its
  903. * unformatted nodes.
  904. * In case of file truncate calculate whether this item must be
  905. * deleted/truncated or last unformatted node of this item will be
  906. * converted to a direct item.
  907. * This function returns a determination of what balance mode the
  908. * calling function should employ.
  909. */
  910. static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
  911. struct inode *inode,
  912. struct treepath *path,
  913. const struct cpu_key *item_key,
  914. /*
  915. * Number of unformatted nodes
  916. * which were removed from end
  917. * of the file.
  918. */
  919. int *removed,
  920. int *cut_size,
  921. /* MAX_KEY_OFFSET in case of delete. */
  922. unsigned long long new_file_length
  923. )
  924. {
  925. struct super_block *sb = inode->i_sb;
  926. struct item_head *p_le_ih = tp_item_head(path);
  927. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  928. BUG_ON(!th->t_trans_id);
  929. /* Stat_data item. */
  930. if (is_statdata_le_ih(p_le_ih)) {
  931. RFALSE(new_file_length != max_reiserfs_offset(inode),
  932. "PAP-5210: mode must be M_DELETE");
  933. *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
  934. return M_DELETE;
  935. }
  936. /* Directory item. */
  937. if (is_direntry_le_ih(p_le_ih))
  938. return prepare_for_direntry_item(path, p_le_ih, inode,
  939. new_file_length,
  940. cut_size);
  941. /* Direct item. */
  942. if (is_direct_le_ih(p_le_ih))
  943. return prepare_for_direct_item(path, p_le_ih, inode,
  944. new_file_length, cut_size);
  945. /* Case of an indirect item. */
  946. {
  947. int blk_size = sb->s_blocksize;
  948. struct item_head s_ih;
  949. int need_re_search;
  950. int delete = 0;
  951. int result = M_CUT;
  952. int pos = 0;
  953. if ( new_file_length == max_reiserfs_offset (inode) ) {
  954. /*
  955. * prepare_for_delete_or_cut() is called by
  956. * reiserfs_delete_item()
  957. */
  958. new_file_length = 0;
  959. delete = 1;
  960. }
  961. do {
  962. need_re_search = 0;
  963. *cut_size = 0;
  964. bh = PATH_PLAST_BUFFER(path);
  965. copy_item_head(&s_ih, tp_item_head(path));
  966. pos = I_UNFM_NUM(&s_ih);
  967. while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
  968. __le32 *unfm;
  969. __u32 block;
  970. /*
  971. * Each unformatted block deletion may involve
  972. * one additional bitmap block into the transaction,
  973. * thereby the initial journal space reservation
  974. * might not be enough.
  975. */
  976. if (!delete && (*cut_size) != 0 &&
  977. reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
  978. break;
  979. unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
  980. block = get_block_num(unfm, 0);
  981. if (block != 0) {
  982. reiserfs_prepare_for_journal(sb, bh, 1);
  983. put_block_num(unfm, 0, 0);
  984. journal_mark_dirty(th, bh);
  985. reiserfs_free_block(th, inode, block, 1);
  986. }
  987. reiserfs_cond_resched(sb);
  988. if (item_moved (&s_ih, path)) {
  989. need_re_search = 1;
  990. break;
  991. }
  992. pos --;
  993. (*removed)++;
  994. (*cut_size) -= UNFM_P_SIZE;
  995. if (pos == 0) {
  996. (*cut_size) -= IH_SIZE;
  997. result = M_DELETE;
  998. break;
  999. }
  1000. }
  1001. /*
  1002. * a trick. If the buffer has been logged, this will
  1003. * do nothing. If we've broken the loop without logging
  1004. * it, it will restore the buffer
  1005. */
  1006. reiserfs_restore_prepared_buffer(sb, bh);
  1007. } while (need_re_search &&
  1008. search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
  1009. pos_in_item(path) = pos * UNFM_P_SIZE;
  1010. if (*cut_size == 0) {
  1011. /*
  1012. * Nothing was cut. maybe convert last unformatted node to the
  1013. * direct item?
  1014. */
  1015. result = M_CONVERT;
  1016. }
  1017. return result;
  1018. }
  1019. }
  1020. /* Calculate number of bytes which will be deleted or cut during balance */
  1021. static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
  1022. {
  1023. int del_size;
  1024. struct item_head *p_le_ih = tp_item_head(tb->tb_path);
  1025. if (is_statdata_le_ih(p_le_ih))
  1026. return 0;
  1027. del_size =
  1028. (mode ==
  1029. M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
  1030. if (is_direntry_le_ih(p_le_ih)) {
  1031. /*
  1032. * return EMPTY_DIR_SIZE; We delete emty directories only.
  1033. * we can't use EMPTY_DIR_SIZE, as old format dirs have a
  1034. * different empty size. ick. FIXME, is this right?
  1035. */
  1036. return del_size;
  1037. }
  1038. if (is_indirect_le_ih(p_le_ih))
  1039. del_size = (del_size / UNFM_P_SIZE) *
  1040. (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
  1041. return del_size;
  1042. }
  1043. static void init_tb_struct(struct reiserfs_transaction_handle *th,
  1044. struct tree_balance *tb,
  1045. struct super_block *sb,
  1046. struct treepath *path, int size)
  1047. {
  1048. BUG_ON(!th->t_trans_id);
  1049. memset(tb, '\0', sizeof(struct tree_balance));
  1050. tb->transaction_handle = th;
  1051. tb->tb_sb = sb;
  1052. tb->tb_path = path;
  1053. PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
  1054. PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
  1055. tb->insert_size[0] = size;
  1056. }
  1057. void padd_item(char *item, int total_length, int length)
  1058. {
  1059. int i;
  1060. for (i = total_length; i > length;)
  1061. item[--i] = 0;
  1062. }
  1063. #ifdef REISERQUOTA_DEBUG
  1064. char key2type(struct reiserfs_key *ih)
  1065. {
  1066. if (is_direntry_le_key(2, ih))
  1067. return 'd';
  1068. if (is_direct_le_key(2, ih))
  1069. return 'D';
  1070. if (is_indirect_le_key(2, ih))
  1071. return 'i';
  1072. if (is_statdata_le_key(2, ih))
  1073. return 's';
  1074. return 'u';
  1075. }
  1076. char head2type(struct item_head *ih)
  1077. {
  1078. if (is_direntry_le_ih(ih))
  1079. return 'd';
  1080. if (is_direct_le_ih(ih))
  1081. return 'D';
  1082. if (is_indirect_le_ih(ih))
  1083. return 'i';
  1084. if (is_statdata_le_ih(ih))
  1085. return 's';
  1086. return 'u';
  1087. }
  1088. #endif
  1089. /*
  1090. * Delete object item.
  1091. * th - active transaction handle
  1092. * path - path to the deleted item
  1093. * item_key - key to search for the deleted item
  1094. * indode - used for updating i_blocks and quotas
  1095. * un_bh - NULL or unformatted node pointer
  1096. */
  1097. int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
  1098. struct treepath *path, const struct cpu_key *item_key,
  1099. struct inode *inode, struct buffer_head *un_bh)
  1100. {
  1101. struct super_block *sb = inode->i_sb;
  1102. struct tree_balance s_del_balance;
  1103. struct item_head s_ih;
  1104. struct item_head *q_ih;
  1105. int quota_cut_bytes;
  1106. int ret_value, del_size, removed;
  1107. int depth;
  1108. #ifdef CONFIG_REISERFS_CHECK
  1109. char mode;
  1110. int iter = 0;
  1111. #endif
  1112. BUG_ON(!th->t_trans_id);
  1113. init_tb_struct(th, &s_del_balance, sb, path,
  1114. 0 /*size is unknown */ );
  1115. while (1) {
  1116. removed = 0;
  1117. #ifdef CONFIG_REISERFS_CHECK
  1118. iter++;
  1119. mode =
  1120. #endif
  1121. prepare_for_delete_or_cut(th, inode, path,
  1122. item_key, &removed,
  1123. &del_size,
  1124. max_reiserfs_offset(inode));
  1125. RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
  1126. copy_item_head(&s_ih, tp_item_head(path));
  1127. s_del_balance.insert_size[0] = del_size;
  1128. ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
  1129. if (ret_value != REPEAT_SEARCH)
  1130. break;
  1131. PROC_INFO_INC(sb, delete_item_restarted);
  1132. /* file system changed, repeat search */
  1133. ret_value =
  1134. search_for_position_by_key(sb, item_key, path);
  1135. if (ret_value == IO_ERROR)
  1136. break;
  1137. if (ret_value == FILE_NOT_FOUND) {
  1138. reiserfs_warning(sb, "vs-5340",
  1139. "no items of the file %K found",
  1140. item_key);
  1141. break;
  1142. }
  1143. } /* while (1) */
  1144. if (ret_value != CARRY_ON) {
  1145. unfix_nodes(&s_del_balance);
  1146. return 0;
  1147. }
  1148. /* reiserfs_delete_item returns item length when success */
  1149. ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
  1150. q_ih = tp_item_head(path);
  1151. quota_cut_bytes = ih_item_len(q_ih);
  1152. /*
  1153. * hack so the quota code doesn't have to guess if the file has a
  1154. * tail. On tail insert, we allocate quota for 1 unformatted node.
  1155. * We test the offset because the tail might have been
  1156. * split into multiple items, and we only want to decrement for
  1157. * the unfm node once
  1158. */
  1159. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
  1160. if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
  1161. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1162. } else {
  1163. quota_cut_bytes = 0;
  1164. }
  1165. }
  1166. if (un_bh) {
  1167. int off;
  1168. char *data;
  1169. /*
  1170. * We are in direct2indirect conversion, so move tail contents
  1171. * to the unformatted node
  1172. */
  1173. /*
  1174. * note, we do the copy before preparing the buffer because we
  1175. * don't care about the contents of the unformatted node yet.
  1176. * the only thing we really care about is the direct item's
  1177. * data is in the unformatted node.
  1178. *
  1179. * Otherwise, we would have to call
  1180. * reiserfs_prepare_for_journal on the unformatted node,
  1181. * which might schedule, meaning we'd have to loop all the
  1182. * way back up to the start of the while loop.
  1183. *
  1184. * The unformatted node must be dirtied later on. We can't be
  1185. * sure here if the entire tail has been deleted yet.
  1186. *
  1187. * un_bh is from the page cache (all unformatted nodes are
  1188. * from the page cache) and might be a highmem page. So, we
  1189. * can't use un_bh->b_data.
  1190. * -clm
  1191. */
  1192. data = kmap_atomic(un_bh->b_page);
  1193. off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
  1194. memcpy(data + off,
  1195. ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
  1196. ret_value);
  1197. kunmap_atomic(data);
  1198. }
  1199. /* Perform balancing after all resources have been collected at once. */
  1200. do_balance(&s_del_balance, NULL, NULL, M_DELETE);
  1201. #ifdef REISERQUOTA_DEBUG
  1202. reiserfs_debug(sb, REISERFS_DEBUG_CODE,
  1203. "reiserquota delete_item(): freeing %u, id=%u type=%c",
  1204. quota_cut_bytes, inode->i_uid, head2type(&s_ih));
  1205. #endif
  1206. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1207. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1208. reiserfs_write_lock_nested(inode->i_sb, depth);
  1209. /* Return deleted body length */
  1210. return ret_value;
  1211. }
  1212. /*
  1213. * Summary Of Mechanisms For Handling Collisions Between Processes:
  1214. *
  1215. * deletion of the body of the object is performed by iput(), with the
  1216. * result that if multiple processes are operating on a file, the
  1217. * deletion of the body of the file is deferred until the last process
  1218. * that has an open inode performs its iput().
  1219. *
  1220. * writes and truncates are protected from collisions by use of
  1221. * semaphores.
  1222. *
  1223. * creates, linking, and mknod are protected from collisions with other
  1224. * processes by making the reiserfs_add_entry() the last step in the
  1225. * creation, and then rolling back all changes if there was a collision.
  1226. * - Hans
  1227. */
  1228. /* this deletes item which never gets split */
  1229. void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
  1230. struct inode *inode, struct reiserfs_key *key)
  1231. {
  1232. struct super_block *sb = th->t_super;
  1233. struct tree_balance tb;
  1234. INITIALIZE_PATH(path);
  1235. int item_len = 0;
  1236. int tb_init = 0;
  1237. struct cpu_key cpu_key;
  1238. int retval;
  1239. int quota_cut_bytes = 0;
  1240. BUG_ON(!th->t_trans_id);
  1241. le_key2cpu_key(&cpu_key, key);
  1242. while (1) {
  1243. retval = search_item(th->t_super, &cpu_key, &path);
  1244. if (retval == IO_ERROR) {
  1245. reiserfs_error(th->t_super, "vs-5350",
  1246. "i/o failure occurred trying "
  1247. "to delete %K", &cpu_key);
  1248. break;
  1249. }
  1250. if (retval != ITEM_FOUND) {
  1251. pathrelse(&path);
  1252. /*
  1253. * No need for a warning, if there is just no free
  1254. * space to insert '..' item into the
  1255. * newly-created subdir
  1256. */
  1257. if (!
  1258. ((unsigned long long)
  1259. GET_HASH_VALUE(le_key_k_offset
  1260. (le_key_version(key), key)) == 0
  1261. && (unsigned long long)
  1262. GET_GENERATION_NUMBER(le_key_k_offset
  1263. (le_key_version(key),
  1264. key)) == 1))
  1265. reiserfs_warning(th->t_super, "vs-5355",
  1266. "%k not found", key);
  1267. break;
  1268. }
  1269. if (!tb_init) {
  1270. tb_init = 1;
  1271. item_len = ih_item_len(tp_item_head(&path));
  1272. init_tb_struct(th, &tb, th->t_super, &path,
  1273. -(IH_SIZE + item_len));
  1274. }
  1275. quota_cut_bytes = ih_item_len(tp_item_head(&path));
  1276. retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
  1277. if (retval == REPEAT_SEARCH) {
  1278. PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
  1279. continue;
  1280. }
  1281. if (retval == CARRY_ON) {
  1282. do_balance(&tb, NULL, NULL, M_DELETE);
  1283. /*
  1284. * Should we count quota for item? (we don't
  1285. * count quotas for save-links)
  1286. */
  1287. if (inode) {
  1288. int depth;
  1289. #ifdef REISERQUOTA_DEBUG
  1290. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1291. "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
  1292. quota_cut_bytes, inode->i_uid,
  1293. key2type(key));
  1294. #endif
  1295. depth = reiserfs_write_unlock_nested(sb);
  1296. dquot_free_space_nodirty(inode,
  1297. quota_cut_bytes);
  1298. reiserfs_write_lock_nested(sb, depth);
  1299. }
  1300. break;
  1301. }
  1302. /* IO_ERROR, NO_DISK_SPACE, etc */
  1303. reiserfs_warning(th->t_super, "vs-5360",
  1304. "could not delete %K due to fix_nodes failure",
  1305. &cpu_key);
  1306. unfix_nodes(&tb);
  1307. break;
  1308. }
  1309. reiserfs_check_path(&path);
  1310. }
  1311. int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
  1312. struct inode *inode)
  1313. {
  1314. int err;
  1315. inode->i_size = 0;
  1316. BUG_ON(!th->t_trans_id);
  1317. /* for directory this deletes item containing "." and ".." */
  1318. err =
  1319. reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
  1320. if (err)
  1321. return err;
  1322. #if defined( USE_INODE_GENERATION_COUNTER )
  1323. if (!old_format_only(th->t_super)) {
  1324. __le32 *inode_generation;
  1325. inode_generation =
  1326. &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
  1327. le32_add_cpu(inode_generation, 1);
  1328. }
  1329. /* USE_INODE_GENERATION_COUNTER */
  1330. #endif
  1331. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1332. return err;
  1333. }
  1334. static void unmap_buffers(struct page *page, loff_t pos)
  1335. {
  1336. struct buffer_head *bh;
  1337. struct buffer_head *head;
  1338. struct buffer_head *next;
  1339. unsigned long tail_index;
  1340. unsigned long cur_index;
  1341. if (page) {
  1342. if (page_has_buffers(page)) {
  1343. tail_index = pos & (PAGE_SIZE - 1);
  1344. cur_index = 0;
  1345. head = page_buffers(page);
  1346. bh = head;
  1347. do {
  1348. next = bh->b_this_page;
  1349. /*
  1350. * we want to unmap the buffers that contain
  1351. * the tail, and all the buffers after it
  1352. * (since the tail must be at the end of the
  1353. * file). We don't want to unmap file data
  1354. * before the tail, since it might be dirty
  1355. * and waiting to reach disk
  1356. */
  1357. cur_index += bh->b_size;
  1358. if (cur_index > tail_index) {
  1359. reiserfs_unmap_buffer(bh);
  1360. }
  1361. bh = next;
  1362. } while (bh != head);
  1363. }
  1364. }
  1365. }
  1366. static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
  1367. struct inode *inode,
  1368. struct page *page,
  1369. struct treepath *path,
  1370. const struct cpu_key *item_key,
  1371. loff_t new_file_size, char *mode)
  1372. {
  1373. struct super_block *sb = inode->i_sb;
  1374. int block_size = sb->s_blocksize;
  1375. int cut_bytes;
  1376. BUG_ON(!th->t_trans_id);
  1377. BUG_ON(new_file_size != inode->i_size);
  1378. /*
  1379. * the page being sent in could be NULL if there was an i/o error
  1380. * reading in the last block. The user will hit problems trying to
  1381. * read the file, but for now we just skip the indirect2direct
  1382. */
  1383. if (atomic_read(&inode->i_count) > 1 ||
  1384. !tail_has_to_be_packed(inode) ||
  1385. !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
  1386. /* leave tail in an unformatted node */
  1387. *mode = M_SKIP_BALANCING;
  1388. cut_bytes =
  1389. block_size - (new_file_size & (block_size - 1));
  1390. pathrelse(path);
  1391. return cut_bytes;
  1392. }
  1393. /* Perform the conversion to a direct_item. */
  1394. return indirect2direct(th, inode, page, path, item_key,
  1395. new_file_size, mode);
  1396. }
  1397. /*
  1398. * we did indirect_to_direct conversion. And we have inserted direct
  1399. * item successesfully, but there were no disk space to cut unfm
  1400. * pointer being converted. Therefore we have to delete inserted
  1401. * direct item(s)
  1402. */
  1403. static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
  1404. struct inode *inode, struct treepath *path)
  1405. {
  1406. struct cpu_key tail_key;
  1407. int tail_len;
  1408. int removed;
  1409. BUG_ON(!th->t_trans_id);
  1410. make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
  1411. tail_key.key_length = 4;
  1412. tail_len =
  1413. (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
  1414. while (tail_len) {
  1415. /* look for the last byte of the tail */
  1416. if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
  1417. POSITION_NOT_FOUND)
  1418. reiserfs_panic(inode->i_sb, "vs-5615",
  1419. "found invalid item");
  1420. RFALSE(path->pos_in_item !=
  1421. ih_item_len(tp_item_head(path)) - 1,
  1422. "vs-5616: appended bytes found");
  1423. PATH_LAST_POSITION(path)--;
  1424. removed =
  1425. reiserfs_delete_item(th, path, &tail_key, inode,
  1426. NULL /*unbh not needed */ );
  1427. RFALSE(removed <= 0
  1428. || removed > tail_len,
  1429. "vs-5617: there was tail %d bytes, removed item length %d bytes",
  1430. tail_len, removed);
  1431. tail_len -= removed;
  1432. set_cpu_key_k_offset(&tail_key,
  1433. cpu_key_k_offset(&tail_key) - removed);
  1434. }
  1435. reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
  1436. "conversion has been rolled back due to "
  1437. "lack of disk space");
  1438. mark_inode_dirty(inode);
  1439. }
  1440. /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
  1441. int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
  1442. struct treepath *path,
  1443. struct cpu_key *item_key,
  1444. struct inode *inode,
  1445. struct page *page, loff_t new_file_size)
  1446. {
  1447. struct super_block *sb = inode->i_sb;
  1448. /*
  1449. * Every function which is going to call do_balance must first
  1450. * create a tree_balance structure. Then it must fill up this
  1451. * structure by using the init_tb_struct and fix_nodes functions.
  1452. * After that we can make tree balancing.
  1453. */
  1454. struct tree_balance s_cut_balance;
  1455. struct item_head *p_le_ih;
  1456. int cut_size = 0; /* Amount to be cut. */
  1457. int ret_value = CARRY_ON;
  1458. int removed = 0; /* Number of the removed unformatted nodes. */
  1459. int is_inode_locked = 0;
  1460. char mode; /* Mode of the balance. */
  1461. int retval2 = -1;
  1462. int quota_cut_bytes;
  1463. loff_t tail_pos = 0;
  1464. int depth;
  1465. BUG_ON(!th->t_trans_id);
  1466. init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
  1467. cut_size);
  1468. /*
  1469. * Repeat this loop until we either cut the item without needing
  1470. * to balance, or we fix_nodes without schedule occurring
  1471. */
  1472. while (1) {
  1473. /*
  1474. * Determine the balance mode, position of the first byte to
  1475. * be cut, and size to be cut. In case of the indirect item
  1476. * free unformatted nodes which are pointed to by the cut
  1477. * pointers.
  1478. */
  1479. mode =
  1480. prepare_for_delete_or_cut(th, inode, path,
  1481. item_key, &removed,
  1482. &cut_size, new_file_size);
  1483. if (mode == M_CONVERT) {
  1484. /*
  1485. * convert last unformatted node to direct item or
  1486. * leave tail in the unformatted node
  1487. */
  1488. RFALSE(ret_value != CARRY_ON,
  1489. "PAP-5570: can not convert twice");
  1490. ret_value =
  1491. maybe_indirect_to_direct(th, inode, page,
  1492. path, item_key,
  1493. new_file_size, &mode);
  1494. if (mode == M_SKIP_BALANCING)
  1495. /* tail has been left in the unformatted node */
  1496. return ret_value;
  1497. is_inode_locked = 1;
  1498. /*
  1499. * removing of last unformatted node will
  1500. * change value we have to return to truncate.
  1501. * Save it
  1502. */
  1503. retval2 = ret_value;
  1504. /*
  1505. * So, we have performed the first part of the
  1506. * conversion:
  1507. * inserting the new direct item. Now we are
  1508. * removing the last unformatted node pointer.
  1509. * Set key to search for it.
  1510. */
  1511. set_cpu_key_k_type(item_key, TYPE_INDIRECT);
  1512. item_key->key_length = 4;
  1513. new_file_size -=
  1514. (new_file_size & (sb->s_blocksize - 1));
  1515. tail_pos = new_file_size;
  1516. set_cpu_key_k_offset(item_key, new_file_size + 1);
  1517. if (search_for_position_by_key
  1518. (sb, item_key,
  1519. path) == POSITION_NOT_FOUND) {
  1520. print_block(PATH_PLAST_BUFFER(path), 3,
  1521. PATH_LAST_POSITION(path) - 1,
  1522. PATH_LAST_POSITION(path) + 1);
  1523. reiserfs_panic(sb, "PAP-5580", "item to "
  1524. "convert does not exist (%K)",
  1525. item_key);
  1526. }
  1527. continue;
  1528. }
  1529. if (cut_size == 0) {
  1530. pathrelse(path);
  1531. return 0;
  1532. }
  1533. s_cut_balance.insert_size[0] = cut_size;
  1534. ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
  1535. if (ret_value != REPEAT_SEARCH)
  1536. break;
  1537. PROC_INFO_INC(sb, cut_from_item_restarted);
  1538. ret_value =
  1539. search_for_position_by_key(sb, item_key, path);
  1540. if (ret_value == POSITION_FOUND)
  1541. continue;
  1542. reiserfs_warning(sb, "PAP-5610", "item %K not found",
  1543. item_key);
  1544. unfix_nodes(&s_cut_balance);
  1545. return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
  1546. } /* while */
  1547. /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
  1548. if (ret_value != CARRY_ON) {
  1549. if (is_inode_locked) {
  1550. /*
  1551. * FIXME: this seems to be not needed: we are always
  1552. * able to cut item
  1553. */
  1554. indirect_to_direct_roll_back(th, inode, path);
  1555. }
  1556. if (ret_value == NO_DISK_SPACE)
  1557. reiserfs_warning(sb, "reiserfs-5092",
  1558. "NO_DISK_SPACE");
  1559. unfix_nodes(&s_cut_balance);
  1560. return -EIO;
  1561. }
  1562. /* go ahead and perform balancing */
  1563. RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
  1564. /* Calculate number of bytes that need to be cut from the item. */
  1565. quota_cut_bytes =
  1566. (mode ==
  1567. M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
  1568. insert_size[0];
  1569. if (retval2 == -1)
  1570. ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
  1571. else
  1572. ret_value = retval2;
  1573. /*
  1574. * For direct items, we only change the quota when deleting the last
  1575. * item.
  1576. */
  1577. p_le_ih = tp_item_head(s_cut_balance.tb_path);
  1578. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
  1579. if (mode == M_DELETE &&
  1580. (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
  1581. 1) {
  1582. /* FIXME: this is to keep 3.5 happy */
  1583. REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
  1584. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1585. } else {
  1586. quota_cut_bytes = 0;
  1587. }
  1588. }
  1589. #ifdef CONFIG_REISERFS_CHECK
  1590. if (is_inode_locked) {
  1591. struct item_head *le_ih =
  1592. tp_item_head(s_cut_balance.tb_path);
  1593. /*
  1594. * we are going to complete indirect2direct conversion. Make
  1595. * sure, that we exactly remove last unformatted node pointer
  1596. * of the item
  1597. */
  1598. if (!is_indirect_le_ih(le_ih))
  1599. reiserfs_panic(sb, "vs-5652",
  1600. "item must be indirect %h", le_ih);
  1601. if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
  1602. reiserfs_panic(sb, "vs-5653", "completing "
  1603. "indirect2direct conversion indirect "
  1604. "item %h being deleted must be of "
  1605. "4 byte long", le_ih);
  1606. if (mode == M_CUT
  1607. && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
  1608. reiserfs_panic(sb, "vs-5654", "can not complete "
  1609. "indirect2direct conversion of %h "
  1610. "(CUT, insert_size==%d)",
  1611. le_ih, s_cut_balance.insert_size[0]);
  1612. }
  1613. /*
  1614. * it would be useful to make sure, that right neighboring
  1615. * item is direct item of this file
  1616. */
  1617. }
  1618. #endif
  1619. do_balance(&s_cut_balance, NULL, NULL, mode);
  1620. if (is_inode_locked) {
  1621. /*
  1622. * we've done an indirect->direct conversion. when the
  1623. * data block was freed, it was removed from the list of
  1624. * blocks that must be flushed before the transaction
  1625. * commits, make sure to unmap and invalidate it
  1626. */
  1627. unmap_buffers(page, tail_pos);
  1628. REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
  1629. }
  1630. #ifdef REISERQUOTA_DEBUG
  1631. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1632. "reiserquota cut_from_item(): freeing %u id=%u type=%c",
  1633. quota_cut_bytes, inode->i_uid, '?');
  1634. #endif
  1635. depth = reiserfs_write_unlock_nested(sb);
  1636. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1637. reiserfs_write_lock_nested(sb, depth);
  1638. return ret_value;
  1639. }
  1640. static void truncate_directory(struct reiserfs_transaction_handle *th,
  1641. struct inode *inode)
  1642. {
  1643. BUG_ON(!th->t_trans_id);
  1644. if (inode->i_nlink)
  1645. reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
  1646. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
  1647. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
  1648. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1649. reiserfs_update_sd(th, inode);
  1650. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
  1651. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
  1652. }
  1653. /*
  1654. * Truncate file to the new size. Note, this must be called with a
  1655. * transaction already started
  1656. */
  1657. int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
  1658. struct inode *inode, /* ->i_size contains new size */
  1659. struct page *page, /* up to date for last block */
  1660. /*
  1661. * when it is called by file_release to convert
  1662. * the tail - no timestamps should be updated
  1663. */
  1664. int update_timestamps
  1665. )
  1666. {
  1667. INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
  1668. struct item_head *p_le_ih; /* Pointer to an item header. */
  1669. /* Key to search for a previous file item. */
  1670. struct cpu_key s_item_key;
  1671. loff_t file_size, /* Old file size. */
  1672. new_file_size; /* New file size. */
  1673. int deleted; /* Number of deleted or truncated bytes. */
  1674. int retval;
  1675. int err = 0;
  1676. BUG_ON(!th->t_trans_id);
  1677. if (!
  1678. (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
  1679. || S_ISLNK(inode->i_mode)))
  1680. return 0;
  1681. /* deletion of directory - no need to update timestamps */
  1682. if (S_ISDIR(inode->i_mode)) {
  1683. truncate_directory(th, inode);
  1684. return 0;
  1685. }
  1686. /* Get new file size. */
  1687. new_file_size = inode->i_size;
  1688. /* FIXME: note, that key type is unimportant here */
  1689. make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
  1690. TYPE_DIRECT, 3);
  1691. retval =
  1692. search_for_position_by_key(inode->i_sb, &s_item_key,
  1693. &s_search_path);
  1694. if (retval == IO_ERROR) {
  1695. reiserfs_error(inode->i_sb, "vs-5657",
  1696. "i/o failure occurred trying to truncate %K",
  1697. &s_item_key);
  1698. err = -EIO;
  1699. goto out;
  1700. }
  1701. if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
  1702. reiserfs_error(inode->i_sb, "PAP-5660",
  1703. "wrong result %d of search for %K", retval,
  1704. &s_item_key);
  1705. err = -EIO;
  1706. goto out;
  1707. }
  1708. s_search_path.pos_in_item--;
  1709. /* Get real file size (total length of all file items) */
  1710. p_le_ih = tp_item_head(&s_search_path);
  1711. if (is_statdata_le_ih(p_le_ih))
  1712. file_size = 0;
  1713. else {
  1714. loff_t offset = le_ih_k_offset(p_le_ih);
  1715. int bytes =
  1716. op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
  1717. /*
  1718. * this may mismatch with real file size: if last direct item
  1719. * had no padding zeros and last unformatted node had no free
  1720. * space, this file would have this file size
  1721. */
  1722. file_size = offset + bytes - 1;
  1723. }
  1724. /*
  1725. * are we doing a full truncate or delete, if so
  1726. * kick in the reada code
  1727. */
  1728. if (new_file_size == 0)
  1729. s_search_path.reada = PATH_READA | PATH_READA_BACK;
  1730. if (file_size == 0 || file_size < new_file_size) {
  1731. goto update_and_out;
  1732. }
  1733. /* Update key to search for the last file item. */
  1734. set_cpu_key_k_offset(&s_item_key, file_size);
  1735. do {
  1736. /* Cut or delete file item. */
  1737. deleted =
  1738. reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
  1739. inode, page, new_file_size);
  1740. if (deleted < 0) {
  1741. reiserfs_warning(inode->i_sb, "vs-5665",
  1742. "reiserfs_cut_from_item failed");
  1743. reiserfs_check_path(&s_search_path);
  1744. return 0;
  1745. }
  1746. RFALSE(deleted > file_size,
  1747. "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
  1748. deleted, file_size, &s_item_key);
  1749. /* Change key to search the last file item. */
  1750. file_size -= deleted;
  1751. set_cpu_key_k_offset(&s_item_key, file_size);
  1752. /*
  1753. * While there are bytes to truncate and previous
  1754. * file item is presented in the tree.
  1755. */
  1756. /*
  1757. * This loop could take a really long time, and could log
  1758. * many more blocks than a transaction can hold. So, we do
  1759. * a polite journal end here, and if the transaction needs
  1760. * ending, we make sure the file is consistent before ending
  1761. * the current trans and starting a new one
  1762. */
  1763. if (journal_transaction_should_end(th, 0) ||
  1764. reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
  1765. pathrelse(&s_search_path);
  1766. if (update_timestamps) {
  1767. inode->i_mtime = current_time(inode);
  1768. inode->i_ctime = current_time(inode);
  1769. }
  1770. reiserfs_update_sd(th, inode);
  1771. err = journal_end(th);
  1772. if (err)
  1773. goto out;
  1774. err = journal_begin(th, inode->i_sb,
  1775. JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
  1776. if (err)
  1777. goto out;
  1778. reiserfs_update_inode_transaction(inode);
  1779. }
  1780. } while (file_size > ROUND_UP(new_file_size) &&
  1781. search_for_position_by_key(inode->i_sb, &s_item_key,
  1782. &s_search_path) == POSITION_FOUND);
  1783. RFALSE(file_size > ROUND_UP(new_file_size),
  1784. "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
  1785. new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
  1786. update_and_out:
  1787. if (update_timestamps) {
  1788. /* this is truncate, not file closing */
  1789. inode->i_mtime = current_time(inode);
  1790. inode->i_ctime = current_time(inode);
  1791. }
  1792. reiserfs_update_sd(th, inode);
  1793. out:
  1794. pathrelse(&s_search_path);
  1795. return err;
  1796. }
  1797. #ifdef CONFIG_REISERFS_CHECK
  1798. /* this makes sure, that we __append__, not overwrite or add holes */
  1799. static void check_research_for_paste(struct treepath *path,
  1800. const struct cpu_key *key)
  1801. {
  1802. struct item_head *found_ih = tp_item_head(path);
  1803. if (is_direct_le_ih(found_ih)) {
  1804. if (le_ih_k_offset(found_ih) +
  1805. op_bytes_number(found_ih,
  1806. get_last_bh(path)->b_size) !=
  1807. cpu_key_k_offset(key)
  1808. || op_bytes_number(found_ih,
  1809. get_last_bh(path)->b_size) !=
  1810. pos_in_item(path))
  1811. reiserfs_panic(NULL, "PAP-5720", "found direct item "
  1812. "%h or position (%d) does not match "
  1813. "to key %K", found_ih,
  1814. pos_in_item(path), key);
  1815. }
  1816. if (is_indirect_le_ih(found_ih)) {
  1817. if (le_ih_k_offset(found_ih) +
  1818. op_bytes_number(found_ih,
  1819. get_last_bh(path)->b_size) !=
  1820. cpu_key_k_offset(key)
  1821. || I_UNFM_NUM(found_ih) != pos_in_item(path)
  1822. || get_ih_free_space(found_ih) != 0)
  1823. reiserfs_panic(NULL, "PAP-5730", "found indirect "
  1824. "item (%h) or position (%d) does not "
  1825. "match to key (%K)",
  1826. found_ih, pos_in_item(path), key);
  1827. }
  1828. }
  1829. #endif /* config reiserfs check */
  1830. /*
  1831. * Paste bytes to the existing item.
  1832. * Returns bytes number pasted into the item.
  1833. */
  1834. int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
  1835. /* Path to the pasted item. */
  1836. struct treepath *search_path,
  1837. /* Key to search for the needed item. */
  1838. const struct cpu_key *key,
  1839. /* Inode item belongs to */
  1840. struct inode *inode,
  1841. /* Pointer to the bytes to paste. */
  1842. const char *body,
  1843. /* Size of pasted bytes. */
  1844. int pasted_size)
  1845. {
  1846. struct super_block *sb = inode->i_sb;
  1847. struct tree_balance s_paste_balance;
  1848. int retval;
  1849. int fs_gen;
  1850. int depth;
  1851. BUG_ON(!th->t_trans_id);
  1852. fs_gen = get_generation(inode->i_sb);
  1853. #ifdef REISERQUOTA_DEBUG
  1854. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1855. "reiserquota paste_into_item(): allocating %u id=%u type=%c",
  1856. pasted_size, inode->i_uid,
  1857. key2type(&key->on_disk_key));
  1858. #endif
  1859. depth = reiserfs_write_unlock_nested(sb);
  1860. retval = dquot_alloc_space_nodirty(inode, pasted_size);
  1861. reiserfs_write_lock_nested(sb, depth);
  1862. if (retval) {
  1863. pathrelse(search_path);
  1864. return retval;
  1865. }
  1866. init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
  1867. pasted_size);
  1868. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1869. s_paste_balance.key = key->on_disk_key;
  1870. #endif
  1871. /* DQUOT_* can schedule, must check before the fix_nodes */
  1872. if (fs_changed(fs_gen, inode->i_sb)) {
  1873. goto search_again;
  1874. }
  1875. while ((retval =
  1876. fix_nodes(M_PASTE, &s_paste_balance, NULL,
  1877. body)) == REPEAT_SEARCH) {
  1878. search_again:
  1879. /* file system changed while we were in the fix_nodes */
  1880. PROC_INFO_INC(th->t_super, paste_into_item_restarted);
  1881. retval =
  1882. search_for_position_by_key(th->t_super, key,
  1883. search_path);
  1884. if (retval == IO_ERROR) {
  1885. retval = -EIO;
  1886. goto error_out;
  1887. }
  1888. if (retval == POSITION_FOUND) {
  1889. reiserfs_warning(inode->i_sb, "PAP-5710",
  1890. "entry or pasted byte (%K) exists",
  1891. key);
  1892. retval = -EEXIST;
  1893. goto error_out;
  1894. }
  1895. #ifdef CONFIG_REISERFS_CHECK
  1896. check_research_for_paste(search_path, key);
  1897. #endif
  1898. }
  1899. /*
  1900. * Perform balancing after all resources are collected by fix_nodes,
  1901. * and accessing them will not risk triggering schedule.
  1902. */
  1903. if (retval == CARRY_ON) {
  1904. do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
  1905. return 0;
  1906. }
  1907. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1908. error_out:
  1909. /* this also releases the path */
  1910. unfix_nodes(&s_paste_balance);
  1911. #ifdef REISERQUOTA_DEBUG
  1912. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1913. "reiserquota paste_into_item(): freeing %u id=%u type=%c",
  1914. pasted_size, inode->i_uid,
  1915. key2type(&key->on_disk_key));
  1916. #endif
  1917. depth = reiserfs_write_unlock_nested(sb);
  1918. dquot_free_space_nodirty(inode, pasted_size);
  1919. reiserfs_write_lock_nested(sb, depth);
  1920. return retval;
  1921. }
  1922. /*
  1923. * Insert new item into the buffer at the path.
  1924. * th - active transaction handle
  1925. * path - path to the inserted item
  1926. * ih - pointer to the item header to insert
  1927. * body - pointer to the bytes to insert
  1928. */
  1929. int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
  1930. struct treepath *path, const struct cpu_key *key,
  1931. struct item_head *ih, struct inode *inode,
  1932. const char *body)
  1933. {
  1934. struct tree_balance s_ins_balance;
  1935. int retval;
  1936. int fs_gen = 0;
  1937. int quota_bytes = 0;
  1938. BUG_ON(!th->t_trans_id);
  1939. if (inode) { /* Do we count quotas for item? */
  1940. int depth;
  1941. fs_gen = get_generation(inode->i_sb);
  1942. quota_bytes = ih_item_len(ih);
  1943. /*
  1944. * hack so the quota code doesn't have to guess
  1945. * if the file has a tail, links are always tails,
  1946. * so there's no guessing needed
  1947. */
  1948. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
  1949. quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
  1950. #ifdef REISERQUOTA_DEBUG
  1951. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1952. "reiserquota insert_item(): allocating %u id=%u type=%c",
  1953. quota_bytes, inode->i_uid, head2type(ih));
  1954. #endif
  1955. /*
  1956. * We can't dirty inode here. It would be immediately
  1957. * written but appropriate stat item isn't inserted yet...
  1958. */
  1959. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1960. retval = dquot_alloc_space_nodirty(inode, quota_bytes);
  1961. reiserfs_write_lock_nested(inode->i_sb, depth);
  1962. if (retval) {
  1963. pathrelse(path);
  1964. return retval;
  1965. }
  1966. }
  1967. init_tb_struct(th, &s_ins_balance, th->t_super, path,
  1968. IH_SIZE + ih_item_len(ih));
  1969. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1970. s_ins_balance.key = key->on_disk_key;
  1971. #endif
  1972. /*
  1973. * DQUOT_* can schedule, must check to be sure calling
  1974. * fix_nodes is safe
  1975. */
  1976. if (inode && fs_changed(fs_gen, inode->i_sb)) {
  1977. goto search_again;
  1978. }
  1979. while ((retval =
  1980. fix_nodes(M_INSERT, &s_ins_balance, ih,
  1981. body)) == REPEAT_SEARCH) {
  1982. search_again:
  1983. /* file system changed while we were in the fix_nodes */
  1984. PROC_INFO_INC(th->t_super, insert_item_restarted);
  1985. retval = search_item(th->t_super, key, path);
  1986. if (retval == IO_ERROR) {
  1987. retval = -EIO;
  1988. goto error_out;
  1989. }
  1990. if (retval == ITEM_FOUND) {
  1991. reiserfs_warning(th->t_super, "PAP-5760",
  1992. "key %K already exists in the tree",
  1993. key);
  1994. retval = -EEXIST;
  1995. goto error_out;
  1996. }
  1997. }
  1998. /* make balancing after all resources will be collected at a time */
  1999. if (retval == CARRY_ON) {
  2000. do_balance(&s_ins_balance, ih, body, M_INSERT);
  2001. return 0;
  2002. }
  2003. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  2004. error_out:
  2005. /* also releases the path */
  2006. unfix_nodes(&s_ins_balance);
  2007. #ifdef REISERQUOTA_DEBUG
  2008. if (inode)
  2009. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  2010. "reiserquota insert_item(): freeing %u id=%u type=%c",
  2011. quota_bytes, inode->i_uid, head2type(ih));
  2012. #endif
  2013. if (inode) {
  2014. int depth = reiserfs_write_unlock_nested(inode->i_sb);
  2015. dquot_free_space_nodirty(inode, quota_bytes);
  2016. reiserfs_write_lock_nested(inode->i_sb, depth);
  2017. }
  2018. return retval;
  2019. }