file.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Squashfs - a compressed read only filesystem for Linux
  4. *
  5. * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
  6. * Phillip Lougher <[email protected]>
  7. *
  8. * file.c
  9. */
  10. /*
  11. * This file contains code for handling regular files. A regular file
  12. * consists of a sequence of contiguous compressed blocks, and/or a
  13. * compressed fragment block (tail-end packed block). The compressed size
  14. * of each datablock is stored in a block list contained within the
  15. * file inode (itself stored in one or more compressed metadata blocks).
  16. *
  17. * To speed up access to datablocks when reading 'large' files (256 Mbytes or
  18. * larger), the code implements an index cache that caches the mapping from
  19. * block index to datablock location on disk.
  20. *
  21. * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while
  22. * retaining a simple and space-efficient block list on disk. The cache
  23. * is split into slots, caching up to eight 224 GiB files (128 KiB blocks).
  24. * Larger files use multiple slots, with 1.75 TiB files using all 8 slots.
  25. * The index cache is designed to be memory efficient, and by default uses
  26. * 16 KiB.
  27. */
  28. #include <linux/fs.h>
  29. #include <linux/vfs.h>
  30. #include <linux/kernel.h>
  31. #include <linux/slab.h>
  32. #include <linux/string.h>
  33. #include <linux/pagemap.h>
  34. #include <linux/mutex.h>
  35. #include "squashfs_fs.h"
  36. #include "squashfs_fs_sb.h"
  37. #include "squashfs_fs_i.h"
  38. #include "squashfs.h"
  39. #include "page_actor.h"
  40. /*
  41. * Locate cache slot in range [offset, index] for specified inode. If
  42. * there's more than one return the slot closest to index.
  43. */
  44. static struct meta_index *locate_meta_index(struct inode *inode, int offset,
  45. int index)
  46. {
  47. struct meta_index *meta = NULL;
  48. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  49. int i;
  50. mutex_lock(&msblk->meta_index_mutex);
  51. TRACE("locate_meta_index: index %d, offset %d\n", index, offset);
  52. if (msblk->meta_index == NULL)
  53. goto not_allocated;
  54. for (i = 0; i < SQUASHFS_META_SLOTS; i++) {
  55. if (msblk->meta_index[i].inode_number == inode->i_ino &&
  56. msblk->meta_index[i].offset >= offset &&
  57. msblk->meta_index[i].offset <= index &&
  58. msblk->meta_index[i].locked == 0) {
  59. TRACE("locate_meta_index: entry %d, offset %d\n", i,
  60. msblk->meta_index[i].offset);
  61. meta = &msblk->meta_index[i];
  62. offset = meta->offset;
  63. }
  64. }
  65. if (meta)
  66. meta->locked = 1;
  67. not_allocated:
  68. mutex_unlock(&msblk->meta_index_mutex);
  69. return meta;
  70. }
  71. /*
  72. * Find and initialise an empty cache slot for index offset.
  73. */
  74. static struct meta_index *empty_meta_index(struct inode *inode, int offset,
  75. int skip)
  76. {
  77. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  78. struct meta_index *meta = NULL;
  79. int i;
  80. mutex_lock(&msblk->meta_index_mutex);
  81. TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip);
  82. if (msblk->meta_index == NULL) {
  83. /*
  84. * First time cache index has been used, allocate and
  85. * initialise. The cache index could be allocated at
  86. * mount time but doing it here means it is allocated only
  87. * if a 'large' file is read.
  88. */
  89. msblk->meta_index = kcalloc(SQUASHFS_META_SLOTS,
  90. sizeof(*(msblk->meta_index)), GFP_KERNEL);
  91. if (msblk->meta_index == NULL) {
  92. ERROR("Failed to allocate meta_index\n");
  93. goto failed;
  94. }
  95. for (i = 0; i < SQUASHFS_META_SLOTS; i++) {
  96. msblk->meta_index[i].inode_number = 0;
  97. msblk->meta_index[i].locked = 0;
  98. }
  99. msblk->next_meta_index = 0;
  100. }
  101. for (i = SQUASHFS_META_SLOTS; i &&
  102. msblk->meta_index[msblk->next_meta_index].locked; i--)
  103. msblk->next_meta_index = (msblk->next_meta_index + 1) %
  104. SQUASHFS_META_SLOTS;
  105. if (i == 0) {
  106. TRACE("empty_meta_index: failed!\n");
  107. goto failed;
  108. }
  109. TRACE("empty_meta_index: returned meta entry %d, %p\n",
  110. msblk->next_meta_index,
  111. &msblk->meta_index[msblk->next_meta_index]);
  112. meta = &msblk->meta_index[msblk->next_meta_index];
  113. msblk->next_meta_index = (msblk->next_meta_index + 1) %
  114. SQUASHFS_META_SLOTS;
  115. meta->inode_number = inode->i_ino;
  116. meta->offset = offset;
  117. meta->skip = skip;
  118. meta->entries = 0;
  119. meta->locked = 1;
  120. failed:
  121. mutex_unlock(&msblk->meta_index_mutex);
  122. return meta;
  123. }
  124. static void release_meta_index(struct inode *inode, struct meta_index *meta)
  125. {
  126. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  127. mutex_lock(&msblk->meta_index_mutex);
  128. meta->locked = 0;
  129. mutex_unlock(&msblk->meta_index_mutex);
  130. }
  131. /*
  132. * Read the next n blocks from the block list, starting from
  133. * metadata block <start_block, offset>.
  134. */
  135. static long long read_indexes(struct super_block *sb, int n,
  136. u64 *start_block, int *offset)
  137. {
  138. int err, i;
  139. long long block = 0;
  140. __le32 *blist = kmalloc(PAGE_SIZE, GFP_KERNEL);
  141. if (blist == NULL) {
  142. ERROR("read_indexes: Failed to allocate block_list\n");
  143. return -ENOMEM;
  144. }
  145. while (n) {
  146. int blocks = min_t(int, n, PAGE_SIZE >> 2);
  147. err = squashfs_read_metadata(sb, blist, start_block,
  148. offset, blocks << 2);
  149. if (err < 0) {
  150. ERROR("read_indexes: reading block [%llx:%x]\n",
  151. *start_block, *offset);
  152. goto failure;
  153. }
  154. for (i = 0; i < blocks; i++) {
  155. int size = squashfs_block_size(blist[i]);
  156. if (size < 0) {
  157. err = size;
  158. goto failure;
  159. }
  160. block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size);
  161. }
  162. n -= blocks;
  163. }
  164. kfree(blist);
  165. return block;
  166. failure:
  167. kfree(blist);
  168. return err;
  169. }
  170. /*
  171. * Each cache index slot has SQUASHFS_META_ENTRIES, each of which
  172. * can cache one index -> datablock/blocklist-block mapping. We wish
  173. * to distribute these over the length of the file, entry[0] maps index x,
  174. * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on.
  175. * The larger the file, the greater the skip factor. The skip factor is
  176. * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure
  177. * the number of metadata blocks that need to be read fits into the cache.
  178. * If the skip factor is limited in this way then the file will use multiple
  179. * slots.
  180. */
  181. static inline int calculate_skip(u64 blocks)
  182. {
  183. u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
  184. * SQUASHFS_META_INDEXES);
  185. return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
  186. }
  187. /*
  188. * Search and grow the index cache for the specified inode, returning the
  189. * on-disk locations of the datablock and block list metadata block
  190. * <index_block, index_offset> for index (scaled to nearest cache index).
  191. */
  192. static int fill_meta_index(struct inode *inode, int index,
  193. u64 *index_block, int *index_offset, u64 *data_block)
  194. {
  195. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  196. int skip = calculate_skip(i_size_read(inode) >> msblk->block_log);
  197. int offset = 0;
  198. struct meta_index *meta;
  199. struct meta_entry *meta_entry;
  200. u64 cur_index_block = squashfs_i(inode)->block_list_start;
  201. int cur_offset = squashfs_i(inode)->offset;
  202. u64 cur_data_block = squashfs_i(inode)->start;
  203. int err, i;
  204. /*
  205. * Scale index to cache index (cache slot entry)
  206. */
  207. index /= SQUASHFS_META_INDEXES * skip;
  208. while (offset < index) {
  209. meta = locate_meta_index(inode, offset + 1, index);
  210. if (meta == NULL) {
  211. meta = empty_meta_index(inode, offset + 1, skip);
  212. if (meta == NULL)
  213. goto all_done;
  214. } else {
  215. offset = index < meta->offset + meta->entries ? index :
  216. meta->offset + meta->entries - 1;
  217. meta_entry = &meta->meta_entry[offset - meta->offset];
  218. cur_index_block = meta_entry->index_block +
  219. msblk->inode_table;
  220. cur_offset = meta_entry->offset;
  221. cur_data_block = meta_entry->data_block;
  222. TRACE("get_meta_index: offset %d, meta->offset %d, "
  223. "meta->entries %d\n", offset, meta->offset,
  224. meta->entries);
  225. TRACE("get_meta_index: index_block 0x%llx, offset 0x%x"
  226. " data_block 0x%llx\n", cur_index_block,
  227. cur_offset, cur_data_block);
  228. }
  229. /*
  230. * If necessary grow cache slot by reading block list. Cache
  231. * slot is extended up to index or to the end of the slot, in
  232. * which case further slots will be used.
  233. */
  234. for (i = meta->offset + meta->entries; i <= index &&
  235. i < meta->offset + SQUASHFS_META_ENTRIES; i++) {
  236. int blocks = skip * SQUASHFS_META_INDEXES;
  237. long long res = read_indexes(inode->i_sb, blocks,
  238. &cur_index_block, &cur_offset);
  239. if (res < 0) {
  240. if (meta->entries == 0)
  241. /*
  242. * Don't leave an empty slot on read
  243. * error allocated to this inode...
  244. */
  245. meta->inode_number = 0;
  246. err = res;
  247. goto failed;
  248. }
  249. cur_data_block += res;
  250. meta_entry = &meta->meta_entry[i - meta->offset];
  251. meta_entry->index_block = cur_index_block -
  252. msblk->inode_table;
  253. meta_entry->offset = cur_offset;
  254. meta_entry->data_block = cur_data_block;
  255. meta->entries++;
  256. offset++;
  257. }
  258. TRACE("get_meta_index: meta->offset %d, meta->entries %d\n",
  259. meta->offset, meta->entries);
  260. release_meta_index(inode, meta);
  261. }
  262. all_done:
  263. *index_block = cur_index_block;
  264. *index_offset = cur_offset;
  265. *data_block = cur_data_block;
  266. /*
  267. * Scale cache index (cache slot entry) to index
  268. */
  269. return offset * SQUASHFS_META_INDEXES * skip;
  270. failed:
  271. release_meta_index(inode, meta);
  272. return err;
  273. }
  274. /*
  275. * Get the on-disk location and compressed size of the datablock
  276. * specified by index. Fill_meta_index() does most of the work.
  277. */
  278. static int read_blocklist(struct inode *inode, int index, u64 *block)
  279. {
  280. u64 start;
  281. long long blks;
  282. int offset;
  283. __le32 size;
  284. int res = fill_meta_index(inode, index, &start, &offset, block);
  285. TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset"
  286. " 0x%x, block 0x%llx\n", res, index, start, offset,
  287. *block);
  288. if (res < 0)
  289. return res;
  290. /*
  291. * res contains the index of the mapping returned by fill_meta_index(),
  292. * this will likely be less than the desired index (because the
  293. * meta_index cache works at a higher granularity). Read any
  294. * extra block indexes needed.
  295. */
  296. if (res < index) {
  297. blks = read_indexes(inode->i_sb, index - res, &start, &offset);
  298. if (blks < 0)
  299. return (int) blks;
  300. *block += blks;
  301. }
  302. /*
  303. * Read length of block specified by index.
  304. */
  305. res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset,
  306. sizeof(size));
  307. if (res < 0)
  308. return res;
  309. return squashfs_block_size(size);
  310. }
  311. void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail)
  312. {
  313. int copied;
  314. void *pageaddr;
  315. pageaddr = kmap_atomic(page);
  316. copied = squashfs_copy_data(pageaddr, buffer, offset, avail);
  317. memset(pageaddr + copied, 0, PAGE_SIZE - copied);
  318. kunmap_atomic(pageaddr);
  319. flush_dcache_page(page);
  320. if (copied == avail)
  321. SetPageUptodate(page);
  322. else
  323. SetPageError(page);
  324. }
  325. /* Copy data into page cache */
  326. void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
  327. int bytes, int offset)
  328. {
  329. struct inode *inode = page->mapping->host;
  330. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  331. int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
  332. int start_index = page->index & ~mask, end_index = start_index | mask;
  333. /*
  334. * Loop copying datablock into pages. As the datablock likely covers
  335. * many PAGE_SIZE pages (default block size is 128 KiB) explicitly
  336. * grab the pages from the page cache, except for the page that we've
  337. * been called to fill.
  338. */
  339. for (i = start_index; i <= end_index && bytes > 0; i++,
  340. bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
  341. struct page *push_page;
  342. int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0;
  343. TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
  344. push_page = (i == page->index) ? page :
  345. grab_cache_page_nowait(page->mapping, i);
  346. if (!push_page)
  347. continue;
  348. if (PageUptodate(push_page))
  349. goto skip_page;
  350. squashfs_fill_page(push_page, buffer, offset, avail);
  351. skip_page:
  352. unlock_page(push_page);
  353. if (i != page->index)
  354. put_page(push_page);
  355. }
  356. }
  357. /* Read datablock stored packed inside a fragment (tail-end packed block) */
  358. static int squashfs_readpage_fragment(struct page *page, int expected)
  359. {
  360. struct inode *inode = page->mapping->host;
  361. struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
  362. squashfs_i(inode)->fragment_block,
  363. squashfs_i(inode)->fragment_size);
  364. int res = buffer->error;
  365. if (res)
  366. ERROR("Unable to read page, block %llx, size %x\n",
  367. squashfs_i(inode)->fragment_block,
  368. squashfs_i(inode)->fragment_size);
  369. else
  370. squashfs_copy_cache(page, buffer, expected,
  371. squashfs_i(inode)->fragment_offset);
  372. squashfs_cache_put(buffer);
  373. return res;
  374. }
  375. static int squashfs_readpage_sparse(struct page *page, int expected)
  376. {
  377. squashfs_copy_cache(page, NULL, expected, 0);
  378. return 0;
  379. }
  380. static int squashfs_read_folio(struct file *file, struct folio *folio)
  381. {
  382. struct page *page = &folio->page;
  383. struct inode *inode = page->mapping->host;
  384. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  385. int index = page->index >> (msblk->block_log - PAGE_SHIFT);
  386. int file_end = i_size_read(inode) >> msblk->block_log;
  387. int expected = index == file_end ?
  388. (i_size_read(inode) & (msblk->block_size - 1)) :
  389. msblk->block_size;
  390. int res = 0;
  391. void *pageaddr;
  392. TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
  393. page->index, squashfs_i(inode)->start);
  394. if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
  395. PAGE_SHIFT))
  396. goto out;
  397. if (index < file_end || squashfs_i(inode)->fragment_block ==
  398. SQUASHFS_INVALID_BLK) {
  399. u64 block = 0;
  400. res = read_blocklist(inode, index, &block);
  401. if (res < 0)
  402. goto error_out;
  403. if (res == 0)
  404. res = squashfs_readpage_sparse(page, expected);
  405. else
  406. res = squashfs_readpage_block(page, block, res, expected);
  407. } else
  408. res = squashfs_readpage_fragment(page, expected);
  409. if (!res)
  410. return 0;
  411. error_out:
  412. SetPageError(page);
  413. out:
  414. pageaddr = kmap_atomic(page);
  415. memset(pageaddr, 0, PAGE_SIZE);
  416. kunmap_atomic(pageaddr);
  417. flush_dcache_page(page);
  418. if (res == 0)
  419. SetPageUptodate(page);
  420. unlock_page(page);
  421. return res;
  422. }
  423. static int squashfs_readahead_fragment(struct page **page,
  424. unsigned int pages, unsigned int expected)
  425. {
  426. struct inode *inode = page[0]->mapping->host;
  427. struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
  428. squashfs_i(inode)->fragment_block,
  429. squashfs_i(inode)->fragment_size);
  430. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  431. unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
  432. int error = buffer->error;
  433. if (error)
  434. goto out;
  435. expected += squashfs_i(inode)->fragment_offset;
  436. for (n = 0; n < pages; n++) {
  437. unsigned int base = (page[n]->index & mask) << PAGE_SHIFT;
  438. unsigned int offset = base + squashfs_i(inode)->fragment_offset;
  439. if (expected > offset) {
  440. unsigned int avail = min_t(unsigned int, expected -
  441. offset, PAGE_SIZE);
  442. squashfs_fill_page(page[n], buffer, offset, avail);
  443. }
  444. unlock_page(page[n]);
  445. put_page(page[n]);
  446. }
  447. out:
  448. squashfs_cache_put(buffer);
  449. return error;
  450. }
  451. static void squashfs_readahead(struct readahead_control *ractl)
  452. {
  453. struct inode *inode = ractl->mapping->host;
  454. struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
  455. size_t mask = (1UL << msblk->block_log) - 1;
  456. unsigned short shift = msblk->block_log - PAGE_SHIFT;
  457. loff_t start = readahead_pos(ractl) & ~mask;
  458. size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
  459. struct squashfs_page_actor *actor;
  460. unsigned int nr_pages = 0;
  461. struct page **pages;
  462. int i, file_end = i_size_read(inode) >> msblk->block_log;
  463. unsigned int max_pages = 1UL << shift;
  464. readahead_expand(ractl, start, (len | mask) + 1);
  465. pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
  466. if (!pages)
  467. return;
  468. for (;;) {
  469. pgoff_t index;
  470. int res, bsize;
  471. u64 block = 0;
  472. unsigned int expected;
  473. struct page *last_page;
  474. expected = start >> msblk->block_log == file_end ?
  475. (i_size_read(inode) & (msblk->block_size - 1)) :
  476. msblk->block_size;
  477. max_pages = (expected + PAGE_SIZE - 1) >> PAGE_SHIFT;
  478. nr_pages = __readahead_batch(ractl, pages, max_pages);
  479. if (!nr_pages)
  480. break;
  481. if (readahead_pos(ractl) >= i_size_read(inode))
  482. goto skip_pages;
  483. index = pages[0]->index >> shift;
  484. if ((pages[nr_pages - 1]->index >> shift) != index)
  485. goto skip_pages;
  486. if (index == file_end && squashfs_i(inode)->fragment_block !=
  487. SQUASHFS_INVALID_BLK) {
  488. res = squashfs_readahead_fragment(pages, nr_pages,
  489. expected);
  490. if (res)
  491. goto skip_pages;
  492. continue;
  493. }
  494. bsize = read_blocklist(inode, index, &block);
  495. if (bsize == 0)
  496. goto skip_pages;
  497. actor = squashfs_page_actor_init_special(msblk, pages, nr_pages,
  498. expected);
  499. if (!actor)
  500. goto skip_pages;
  501. res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
  502. last_page = squashfs_page_actor_free(actor);
  503. if (res == expected) {
  504. int bytes;
  505. /* Last page (if present) may have trailing bytes not filled */
  506. bytes = res % PAGE_SIZE;
  507. if (index == file_end && bytes && last_page)
  508. memzero_page(last_page, bytes,
  509. PAGE_SIZE - bytes);
  510. for (i = 0; i < nr_pages; i++) {
  511. flush_dcache_page(pages[i]);
  512. SetPageUptodate(pages[i]);
  513. }
  514. }
  515. for (i = 0; i < nr_pages; i++) {
  516. unlock_page(pages[i]);
  517. put_page(pages[i]);
  518. }
  519. }
  520. kfree(pages);
  521. return;
  522. skip_pages:
  523. for (i = 0; i < nr_pages; i++) {
  524. unlock_page(pages[i]);
  525. put_page(pages[i]);
  526. }
  527. kfree(pages);
  528. }
  529. const struct address_space_operations squashfs_aops = {
  530. .read_folio = squashfs_read_folio,
  531. .readahead = squashfs_readahead
  532. };