libfs.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * fs/libfs.c
  4. * Library for filesystems writers.
  5. */
  6. #include <linux/blkdev.h>
  7. #include <linux/export.h>
  8. #include <linux/pagemap.h>
  9. #include <linux/slab.h>
  10. #include <linux/cred.h>
  11. #include <linux/mount.h>
  12. #include <linux/vfs.h>
  13. #include <linux/quotaops.h>
  14. #include <linux/mutex.h>
  15. #include <linux/namei.h>
  16. #include <linux/exportfs.h>
  17. #include <linux/iversion.h>
  18. #include <linux/writeback.h>
  19. #include <linux/buffer_head.h> /* sync_mapping_buffers */
  20. #include <linux/fs_context.h>
  21. #include <linux/pseudo_fs.h>
  22. #include <linux/fsnotify.h>
  23. #include <linux/unicode.h>
  24. #include <linux/fscrypt.h>
  25. #include <linux/uaccess.h>
  26. #include "internal.h"
  27. int simple_getattr(struct user_namespace *mnt_userns, const struct path *path,
  28. struct kstat *stat, u32 request_mask,
  29. unsigned int query_flags)
  30. {
  31. struct inode *inode = d_inode(path->dentry);
  32. generic_fillattr(&init_user_ns, inode, stat);
  33. stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
  34. return 0;
  35. }
  36. EXPORT_SYMBOL(simple_getattr);
  37. int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
  38. {
  39. buf->f_type = dentry->d_sb->s_magic;
  40. buf->f_bsize = PAGE_SIZE;
  41. buf->f_namelen = NAME_MAX;
  42. return 0;
  43. }
  44. EXPORT_SYMBOL(simple_statfs);
  45. /*
  46. * Retaining negative dentries for an in-memory filesystem just wastes
  47. * memory and lookup time: arrange for them to be deleted immediately.
  48. */
  49. int always_delete_dentry(const struct dentry *dentry)
  50. {
  51. return 1;
  52. }
  53. EXPORT_SYMBOL(always_delete_dentry);
  54. const struct dentry_operations simple_dentry_operations = {
  55. .d_delete = always_delete_dentry,
  56. };
  57. EXPORT_SYMBOL(simple_dentry_operations);
  58. /*
  59. * Lookup the data. This is trivial - if the dentry didn't already
  60. * exist, we know it is negative. Set d_op to delete negative dentries.
  61. */
  62. struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  63. {
  64. if (dentry->d_name.len > NAME_MAX)
  65. return ERR_PTR(-ENAMETOOLONG);
  66. if (!dentry->d_sb->s_d_op)
  67. d_set_d_op(dentry, &simple_dentry_operations);
  68. d_add(dentry, NULL);
  69. return NULL;
  70. }
  71. EXPORT_SYMBOL(simple_lookup);
  72. int dcache_dir_open(struct inode *inode, struct file *file)
  73. {
  74. file->private_data = d_alloc_cursor(file->f_path.dentry);
  75. return file->private_data ? 0 : -ENOMEM;
  76. }
  77. EXPORT_SYMBOL(dcache_dir_open);
  78. int dcache_dir_close(struct inode *inode, struct file *file)
  79. {
  80. dput(file->private_data);
  81. return 0;
  82. }
  83. EXPORT_SYMBOL(dcache_dir_close);
  84. /* parent is locked at least shared */
  85. /*
  86. * Returns an element of siblings' list.
  87. * We are looking for <count>th positive after <p>; if
  88. * found, dentry is grabbed and returned to caller.
  89. * If no such element exists, NULL is returned.
  90. */
  91. static struct dentry *scan_positives(struct dentry *cursor,
  92. struct list_head *p,
  93. loff_t count,
  94. struct dentry *last)
  95. {
  96. struct dentry *dentry = cursor->d_parent, *found = NULL;
  97. spin_lock(&dentry->d_lock);
  98. while ((p = p->next) != &dentry->d_subdirs) {
  99. struct dentry *d = list_entry(p, struct dentry, d_child);
  100. // we must at least skip cursors, to avoid livelocks
  101. if (d->d_flags & DCACHE_DENTRY_CURSOR)
  102. continue;
  103. if (simple_positive(d) && !--count) {
  104. spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
  105. if (simple_positive(d))
  106. found = dget_dlock(d);
  107. spin_unlock(&d->d_lock);
  108. if (likely(found))
  109. break;
  110. count = 1;
  111. }
  112. if (need_resched()) {
  113. list_move(&cursor->d_child, p);
  114. p = &cursor->d_child;
  115. spin_unlock(&dentry->d_lock);
  116. cond_resched();
  117. spin_lock(&dentry->d_lock);
  118. }
  119. }
  120. spin_unlock(&dentry->d_lock);
  121. dput(last);
  122. return found;
  123. }
  124. loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
  125. {
  126. struct dentry *dentry = file->f_path.dentry;
  127. switch (whence) {
  128. case 1:
  129. offset += file->f_pos;
  130. fallthrough;
  131. case 0:
  132. if (offset >= 0)
  133. break;
  134. fallthrough;
  135. default:
  136. return -EINVAL;
  137. }
  138. if (offset != file->f_pos) {
  139. struct dentry *cursor = file->private_data;
  140. struct dentry *to = NULL;
  141. inode_lock_shared(dentry->d_inode);
  142. if (offset > 2)
  143. to = scan_positives(cursor, &dentry->d_subdirs,
  144. offset - 2, NULL);
  145. spin_lock(&dentry->d_lock);
  146. if (to)
  147. list_move(&cursor->d_child, &to->d_child);
  148. else
  149. list_del_init(&cursor->d_child);
  150. spin_unlock(&dentry->d_lock);
  151. dput(to);
  152. file->f_pos = offset;
  153. inode_unlock_shared(dentry->d_inode);
  154. }
  155. return offset;
  156. }
  157. EXPORT_SYMBOL(dcache_dir_lseek);
  158. /* Relationship between i_mode and the DT_xxx types */
  159. static inline unsigned char dt_type(struct inode *inode)
  160. {
  161. return (inode->i_mode >> 12) & 15;
  162. }
  163. /*
  164. * Directory is locked and all positive dentries in it are safe, since
  165. * for ramfs-type trees they can't go away without unlink() or rmdir(),
  166. * both impossible due to the lock on directory.
  167. */
  168. int dcache_readdir(struct file *file, struct dir_context *ctx)
  169. {
  170. struct dentry *dentry = file->f_path.dentry;
  171. struct dentry *cursor = file->private_data;
  172. struct list_head *anchor = &dentry->d_subdirs;
  173. struct dentry *next = NULL;
  174. struct list_head *p;
  175. if (!dir_emit_dots(file, ctx))
  176. return 0;
  177. if (ctx->pos == 2)
  178. p = anchor;
  179. else if (!list_empty(&cursor->d_child))
  180. p = &cursor->d_child;
  181. else
  182. return 0;
  183. while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
  184. if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
  185. d_inode(next)->i_ino, dt_type(d_inode(next))))
  186. break;
  187. ctx->pos++;
  188. p = &next->d_child;
  189. }
  190. spin_lock(&dentry->d_lock);
  191. if (next)
  192. list_move_tail(&cursor->d_child, &next->d_child);
  193. else
  194. list_del_init(&cursor->d_child);
  195. spin_unlock(&dentry->d_lock);
  196. dput(next);
  197. return 0;
  198. }
  199. EXPORT_SYMBOL(dcache_readdir);
  200. ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
  201. {
  202. return -EISDIR;
  203. }
  204. EXPORT_SYMBOL(generic_read_dir);
  205. const struct file_operations simple_dir_operations = {
  206. .open = dcache_dir_open,
  207. .release = dcache_dir_close,
  208. .llseek = dcache_dir_lseek,
  209. .read = generic_read_dir,
  210. .iterate_shared = dcache_readdir,
  211. .fsync = noop_fsync,
  212. };
  213. EXPORT_SYMBOL(simple_dir_operations);
  214. const struct inode_operations simple_dir_inode_operations = {
  215. .lookup = simple_lookup,
  216. };
  217. EXPORT_SYMBOL(simple_dir_inode_operations);
  218. static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
  219. {
  220. struct dentry *child = NULL;
  221. struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs;
  222. spin_lock(&parent->d_lock);
  223. while ((p = p->next) != &parent->d_subdirs) {
  224. struct dentry *d = container_of(p, struct dentry, d_child);
  225. if (simple_positive(d)) {
  226. spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
  227. if (simple_positive(d))
  228. child = dget_dlock(d);
  229. spin_unlock(&d->d_lock);
  230. if (likely(child))
  231. break;
  232. }
  233. }
  234. spin_unlock(&parent->d_lock);
  235. dput(prev);
  236. return child;
  237. }
  238. void simple_recursive_removal(struct dentry *dentry,
  239. void (*callback)(struct dentry *))
  240. {
  241. struct dentry *this = dget(dentry);
  242. while (true) {
  243. struct dentry *victim = NULL, *child;
  244. struct inode *inode = this->d_inode;
  245. inode_lock(inode);
  246. if (d_is_dir(this))
  247. inode->i_flags |= S_DEAD;
  248. while ((child = find_next_child(this, victim)) == NULL) {
  249. // kill and ascend
  250. // update metadata while it's still locked
  251. inode->i_ctime = current_time(inode);
  252. clear_nlink(inode);
  253. inode_unlock(inode);
  254. victim = this;
  255. this = this->d_parent;
  256. inode = this->d_inode;
  257. inode_lock(inode);
  258. if (simple_positive(victim)) {
  259. d_invalidate(victim); // avoid lost mounts
  260. if (d_is_dir(victim))
  261. fsnotify_rmdir(inode, victim);
  262. else
  263. fsnotify_unlink(inode, victim);
  264. if (callback)
  265. callback(victim);
  266. dput(victim); // unpin it
  267. }
  268. if (victim == dentry) {
  269. inode->i_ctime = inode->i_mtime =
  270. current_time(inode);
  271. if (d_is_dir(dentry))
  272. drop_nlink(inode);
  273. inode_unlock(inode);
  274. dput(dentry);
  275. return;
  276. }
  277. }
  278. inode_unlock(inode);
  279. this = child;
  280. }
  281. }
  282. EXPORT_SYMBOL(simple_recursive_removal);
  283. static const struct super_operations simple_super_operations = {
  284. .statfs = simple_statfs,
  285. };
  286. static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
  287. {
  288. struct pseudo_fs_context *ctx = fc->fs_private;
  289. struct inode *root;
  290. s->s_maxbytes = MAX_LFS_FILESIZE;
  291. s->s_blocksize = PAGE_SIZE;
  292. s->s_blocksize_bits = PAGE_SHIFT;
  293. s->s_magic = ctx->magic;
  294. s->s_op = ctx->ops ?: &simple_super_operations;
  295. s->s_xattr = ctx->xattr;
  296. s->s_time_gran = 1;
  297. root = new_inode(s);
  298. if (!root)
  299. return -ENOMEM;
  300. /*
  301. * since this is the first inode, make it number 1. New inodes created
  302. * after this must take care not to collide with it (by passing
  303. * max_reserved of 1 to iunique).
  304. */
  305. root->i_ino = 1;
  306. root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
  307. root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
  308. s->s_root = d_make_root(root);
  309. if (!s->s_root)
  310. return -ENOMEM;
  311. s->s_d_op = ctx->dops;
  312. return 0;
  313. }
  314. static int pseudo_fs_get_tree(struct fs_context *fc)
  315. {
  316. return get_tree_nodev(fc, pseudo_fs_fill_super);
  317. }
  318. static void pseudo_fs_free(struct fs_context *fc)
  319. {
  320. kfree(fc->fs_private);
  321. }
  322. static const struct fs_context_operations pseudo_fs_context_ops = {
  323. .free = pseudo_fs_free,
  324. .get_tree = pseudo_fs_get_tree,
  325. };
  326. /*
  327. * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
  328. * will never be mountable)
  329. */
  330. struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
  331. unsigned long magic)
  332. {
  333. struct pseudo_fs_context *ctx;
  334. ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
  335. if (likely(ctx)) {
  336. ctx->magic = magic;
  337. fc->fs_private = ctx;
  338. fc->ops = &pseudo_fs_context_ops;
  339. fc->sb_flags |= SB_NOUSER;
  340. fc->global = true;
  341. }
  342. return ctx;
  343. }
  344. EXPORT_SYMBOL(init_pseudo);
  345. int simple_open(struct inode *inode, struct file *file)
  346. {
  347. if (inode->i_private)
  348. file->private_data = inode->i_private;
  349. return 0;
  350. }
  351. EXPORT_SYMBOL(simple_open);
  352. int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  353. {
  354. struct inode *inode = d_inode(old_dentry);
  355. inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
  356. inc_nlink(inode);
  357. ihold(inode);
  358. dget(dentry);
  359. d_instantiate(dentry, inode);
  360. return 0;
  361. }
  362. EXPORT_SYMBOL(simple_link);
  363. int simple_empty(struct dentry *dentry)
  364. {
  365. struct dentry *child;
  366. int ret = 0;
  367. spin_lock(&dentry->d_lock);
  368. list_for_each_entry(child, &dentry->d_subdirs, d_child) {
  369. spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
  370. if (simple_positive(child)) {
  371. spin_unlock(&child->d_lock);
  372. goto out;
  373. }
  374. spin_unlock(&child->d_lock);
  375. }
  376. ret = 1;
  377. out:
  378. spin_unlock(&dentry->d_lock);
  379. return ret;
  380. }
  381. EXPORT_SYMBOL(simple_empty);
  382. int simple_unlink(struct inode *dir, struct dentry *dentry)
  383. {
  384. struct inode *inode = d_inode(dentry);
  385. inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
  386. drop_nlink(inode);
  387. dput(dentry);
  388. return 0;
  389. }
  390. EXPORT_SYMBOL(simple_unlink);
  391. int simple_rmdir(struct inode *dir, struct dentry *dentry)
  392. {
  393. if (!simple_empty(dentry))
  394. return -ENOTEMPTY;
  395. drop_nlink(d_inode(dentry));
  396. simple_unlink(dir, dentry);
  397. drop_nlink(dir);
  398. return 0;
  399. }
  400. EXPORT_SYMBOL(simple_rmdir);
  401. int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
  402. struct inode *new_dir, struct dentry *new_dentry)
  403. {
  404. bool old_is_dir = d_is_dir(old_dentry);
  405. bool new_is_dir = d_is_dir(new_dentry);
  406. if (old_dir != new_dir && old_is_dir != new_is_dir) {
  407. if (old_is_dir) {
  408. drop_nlink(old_dir);
  409. inc_nlink(new_dir);
  410. } else {
  411. drop_nlink(new_dir);
  412. inc_nlink(old_dir);
  413. }
  414. }
  415. old_dir->i_ctime = old_dir->i_mtime =
  416. new_dir->i_ctime = new_dir->i_mtime =
  417. d_inode(old_dentry)->i_ctime =
  418. d_inode(new_dentry)->i_ctime = current_time(old_dir);
  419. return 0;
  420. }
  421. EXPORT_SYMBOL_GPL(simple_rename_exchange);
  422. int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
  423. struct dentry *old_dentry, struct inode *new_dir,
  424. struct dentry *new_dentry, unsigned int flags)
  425. {
  426. struct inode *inode = d_inode(old_dentry);
  427. int they_are_dirs = d_is_dir(old_dentry);
  428. if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
  429. return -EINVAL;
  430. if (flags & RENAME_EXCHANGE)
  431. return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
  432. if (!simple_empty(new_dentry))
  433. return -ENOTEMPTY;
  434. if (d_really_is_positive(new_dentry)) {
  435. simple_unlink(new_dir, new_dentry);
  436. if (they_are_dirs) {
  437. drop_nlink(d_inode(new_dentry));
  438. drop_nlink(old_dir);
  439. }
  440. } else if (they_are_dirs) {
  441. drop_nlink(old_dir);
  442. inc_nlink(new_dir);
  443. }
  444. old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
  445. new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
  446. return 0;
  447. }
  448. EXPORT_SYMBOL(simple_rename);
  449. /**
  450. * simple_setattr - setattr for simple filesystem
  451. * @mnt_userns: user namespace of the target mount
  452. * @dentry: dentry
  453. * @iattr: iattr structure
  454. *
  455. * Returns 0 on success, -error on failure.
  456. *
  457. * simple_setattr is a simple ->setattr implementation without a proper
  458. * implementation of size changes.
  459. *
  460. * It can either be used for in-memory filesystems or special files
  461. * on simple regular filesystems. Anything that needs to change on-disk
  462. * or wire state on size changes needs its own setattr method.
  463. */
  464. int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
  465. struct iattr *iattr)
  466. {
  467. struct inode *inode = d_inode(dentry);
  468. int error;
  469. error = setattr_prepare(mnt_userns, dentry, iattr);
  470. if (error)
  471. return error;
  472. if (iattr->ia_valid & ATTR_SIZE)
  473. truncate_setsize(inode, iattr->ia_size);
  474. setattr_copy(mnt_userns, inode, iattr);
  475. mark_inode_dirty(inode);
  476. return 0;
  477. }
  478. EXPORT_SYMBOL(simple_setattr);
  479. static int simple_read_folio(struct file *file, struct folio *folio)
  480. {
  481. folio_zero_range(folio, 0, folio_size(folio));
  482. flush_dcache_folio(folio);
  483. folio_mark_uptodate(folio);
  484. folio_unlock(folio);
  485. return 0;
  486. }
  487. int simple_write_begin(struct file *file, struct address_space *mapping,
  488. loff_t pos, unsigned len,
  489. struct page **pagep, void **fsdata)
  490. {
  491. struct page *page;
  492. pgoff_t index;
  493. index = pos >> PAGE_SHIFT;
  494. page = grab_cache_page_write_begin(mapping, index);
  495. if (!page)
  496. return -ENOMEM;
  497. *pagep = page;
  498. if (!PageUptodate(page) && (len != PAGE_SIZE)) {
  499. unsigned from = pos & (PAGE_SIZE - 1);
  500. zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
  501. }
  502. return 0;
  503. }
  504. EXPORT_SYMBOL(simple_write_begin);
  505. /**
  506. * simple_write_end - .write_end helper for non-block-device FSes
  507. * @file: See .write_end of address_space_operations
  508. * @mapping: "
  509. * @pos: "
  510. * @len: "
  511. * @copied: "
  512. * @page: "
  513. * @fsdata: "
  514. *
  515. * simple_write_end does the minimum needed for updating a page after writing is
  516. * done. It has the same API signature as the .write_end of
  517. * address_space_operations vector. So it can just be set onto .write_end for
  518. * FSes that don't need any other processing. i_mutex is assumed to be held.
  519. * Block based filesystems should use generic_write_end().
  520. * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
  521. * is not called, so a filesystem that actually does store data in .write_inode
  522. * should extend on what's done here with a call to mark_inode_dirty() in the
  523. * case that i_size has changed.
  524. *
  525. * Use *ONLY* with simple_read_folio()
  526. */
  527. static int simple_write_end(struct file *file, struct address_space *mapping,
  528. loff_t pos, unsigned len, unsigned copied,
  529. struct page *page, void *fsdata)
  530. {
  531. struct inode *inode = page->mapping->host;
  532. loff_t last_pos = pos + copied;
  533. /* zero the stale part of the page if we did a short copy */
  534. if (!PageUptodate(page)) {
  535. if (copied < len) {
  536. unsigned from = pos & (PAGE_SIZE - 1);
  537. zero_user(page, from + copied, len - copied);
  538. }
  539. SetPageUptodate(page);
  540. }
  541. /*
  542. * No need to use i_size_read() here, the i_size
  543. * cannot change under us because we hold the i_mutex.
  544. */
  545. if (last_pos > inode->i_size)
  546. i_size_write(inode, last_pos);
  547. set_page_dirty(page);
  548. unlock_page(page);
  549. put_page(page);
  550. return copied;
  551. }
  552. /*
  553. * Provides ramfs-style behavior: data in the pagecache, but no writeback.
  554. */
  555. const struct address_space_operations ram_aops = {
  556. .read_folio = simple_read_folio,
  557. .write_begin = simple_write_begin,
  558. .write_end = simple_write_end,
  559. .dirty_folio = noop_dirty_folio,
  560. };
  561. EXPORT_SYMBOL(ram_aops);
  562. /*
  563. * the inodes created here are not hashed. If you use iunique to generate
  564. * unique inode values later for this filesystem, then you must take care
  565. * to pass it an appropriate max_reserved value to avoid collisions.
  566. */
  567. int simple_fill_super(struct super_block *s, unsigned long magic,
  568. const struct tree_descr *files)
  569. {
  570. struct inode *inode;
  571. struct dentry *root;
  572. struct dentry *dentry;
  573. int i;
  574. s->s_blocksize = PAGE_SIZE;
  575. s->s_blocksize_bits = PAGE_SHIFT;
  576. s->s_magic = magic;
  577. s->s_op = &simple_super_operations;
  578. s->s_time_gran = 1;
  579. inode = new_inode(s);
  580. if (!inode)
  581. return -ENOMEM;
  582. /*
  583. * because the root inode is 1, the files array must not contain an
  584. * entry at index 1
  585. */
  586. inode->i_ino = 1;
  587. inode->i_mode = S_IFDIR | 0755;
  588. inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
  589. inode->i_op = &simple_dir_inode_operations;
  590. inode->i_fop = &simple_dir_operations;
  591. set_nlink(inode, 2);
  592. root = d_make_root(inode);
  593. if (!root)
  594. return -ENOMEM;
  595. for (i = 0; !files->name || files->name[0]; i++, files++) {
  596. if (!files->name)
  597. continue;
  598. /* warn if it tries to conflict with the root inode */
  599. if (unlikely(i == 1))
  600. printk(KERN_WARNING "%s: %s passed in a files array"
  601. "with an index of 1!\n", __func__,
  602. s->s_type->name);
  603. dentry = d_alloc_name(root, files->name);
  604. if (!dentry)
  605. goto out;
  606. inode = new_inode(s);
  607. if (!inode) {
  608. dput(dentry);
  609. goto out;
  610. }
  611. inode->i_mode = S_IFREG | files->mode;
  612. inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
  613. inode->i_fop = files->ops;
  614. inode->i_ino = i;
  615. d_add(dentry, inode);
  616. }
  617. s->s_root = root;
  618. return 0;
  619. out:
  620. d_genocide(root);
  621. shrink_dcache_parent(root);
  622. dput(root);
  623. return -ENOMEM;
  624. }
  625. EXPORT_SYMBOL(simple_fill_super);
  626. static DEFINE_SPINLOCK(pin_fs_lock);
  627. int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
  628. {
  629. struct vfsmount *mnt = NULL;
  630. spin_lock(&pin_fs_lock);
  631. if (unlikely(!*mount)) {
  632. spin_unlock(&pin_fs_lock);
  633. mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
  634. if (IS_ERR(mnt))
  635. return PTR_ERR(mnt);
  636. spin_lock(&pin_fs_lock);
  637. if (!*mount)
  638. *mount = mnt;
  639. }
  640. mntget(*mount);
  641. ++*count;
  642. spin_unlock(&pin_fs_lock);
  643. mntput(mnt);
  644. return 0;
  645. }
  646. EXPORT_SYMBOL(simple_pin_fs);
  647. void simple_release_fs(struct vfsmount **mount, int *count)
  648. {
  649. struct vfsmount *mnt;
  650. spin_lock(&pin_fs_lock);
  651. mnt = *mount;
  652. if (!--*count)
  653. *mount = NULL;
  654. spin_unlock(&pin_fs_lock);
  655. mntput(mnt);
  656. }
  657. EXPORT_SYMBOL(simple_release_fs);
  658. /**
  659. * simple_read_from_buffer - copy data from the buffer to user space
  660. * @to: the user space buffer to read to
  661. * @count: the maximum number of bytes to read
  662. * @ppos: the current position in the buffer
  663. * @from: the buffer to read from
  664. * @available: the size of the buffer
  665. *
  666. * The simple_read_from_buffer() function reads up to @count bytes from the
  667. * buffer @from at offset @ppos into the user space address starting at @to.
  668. *
  669. * On success, the number of bytes read is returned and the offset @ppos is
  670. * advanced by this number, or negative value is returned on error.
  671. **/
  672. ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
  673. const void *from, size_t available)
  674. {
  675. loff_t pos = *ppos;
  676. size_t ret;
  677. if (pos < 0)
  678. return -EINVAL;
  679. if (pos >= available || !count)
  680. return 0;
  681. if (count > available - pos)
  682. count = available - pos;
  683. ret = copy_to_user(to, from + pos, count);
  684. if (ret == count)
  685. return -EFAULT;
  686. count -= ret;
  687. *ppos = pos + count;
  688. return count;
  689. }
  690. EXPORT_SYMBOL(simple_read_from_buffer);
  691. /**
  692. * simple_write_to_buffer - copy data from user space to the buffer
  693. * @to: the buffer to write to
  694. * @available: the size of the buffer
  695. * @ppos: the current position in the buffer
  696. * @from: the user space buffer to read from
  697. * @count: the maximum number of bytes to read
  698. *
  699. * The simple_write_to_buffer() function reads up to @count bytes from the user
  700. * space address starting at @from into the buffer @to at offset @ppos.
  701. *
  702. * On success, the number of bytes written is returned and the offset @ppos is
  703. * advanced by this number, or negative value is returned on error.
  704. **/
  705. ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
  706. const void __user *from, size_t count)
  707. {
  708. loff_t pos = *ppos;
  709. size_t res;
  710. if (pos < 0)
  711. return -EINVAL;
  712. if (pos >= available || !count)
  713. return 0;
  714. if (count > available - pos)
  715. count = available - pos;
  716. res = copy_from_user(to + pos, from, count);
  717. if (res == count)
  718. return -EFAULT;
  719. count -= res;
  720. *ppos = pos + count;
  721. return count;
  722. }
  723. EXPORT_SYMBOL(simple_write_to_buffer);
  724. /**
  725. * memory_read_from_buffer - copy data from the buffer
  726. * @to: the kernel space buffer to read to
  727. * @count: the maximum number of bytes to read
  728. * @ppos: the current position in the buffer
  729. * @from: the buffer to read from
  730. * @available: the size of the buffer
  731. *
  732. * The memory_read_from_buffer() function reads up to @count bytes from the
  733. * buffer @from at offset @ppos into the kernel space address starting at @to.
  734. *
  735. * On success, the number of bytes read is returned and the offset @ppos is
  736. * advanced by this number, or negative value is returned on error.
  737. **/
  738. ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
  739. const void *from, size_t available)
  740. {
  741. loff_t pos = *ppos;
  742. if (pos < 0)
  743. return -EINVAL;
  744. if (pos >= available)
  745. return 0;
  746. if (count > available - pos)
  747. count = available - pos;
  748. memcpy(to, from + pos, count);
  749. *ppos = pos + count;
  750. return count;
  751. }
  752. EXPORT_SYMBOL(memory_read_from_buffer);
  753. /*
  754. * Transaction based IO.
  755. * The file expects a single write which triggers the transaction, and then
  756. * possibly a read which collects the result - which is stored in a
  757. * file-local buffer.
  758. */
  759. void simple_transaction_set(struct file *file, size_t n)
  760. {
  761. struct simple_transaction_argresp *ar = file->private_data;
  762. BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
  763. /*
  764. * The barrier ensures that ar->size will really remain zero until
  765. * ar->data is ready for reading.
  766. */
  767. smp_mb();
  768. ar->size = n;
  769. }
  770. EXPORT_SYMBOL(simple_transaction_set);
  771. char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
  772. {
  773. struct simple_transaction_argresp *ar;
  774. static DEFINE_SPINLOCK(simple_transaction_lock);
  775. if (size > SIMPLE_TRANSACTION_LIMIT - 1)
  776. return ERR_PTR(-EFBIG);
  777. ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
  778. if (!ar)
  779. return ERR_PTR(-ENOMEM);
  780. spin_lock(&simple_transaction_lock);
  781. /* only one write allowed per open */
  782. if (file->private_data) {
  783. spin_unlock(&simple_transaction_lock);
  784. free_page((unsigned long)ar);
  785. return ERR_PTR(-EBUSY);
  786. }
  787. file->private_data = ar;
  788. spin_unlock(&simple_transaction_lock);
  789. if (copy_from_user(ar->data, buf, size))
  790. return ERR_PTR(-EFAULT);
  791. return ar->data;
  792. }
  793. EXPORT_SYMBOL(simple_transaction_get);
  794. ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
  795. {
  796. struct simple_transaction_argresp *ar = file->private_data;
  797. if (!ar)
  798. return 0;
  799. return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
  800. }
  801. EXPORT_SYMBOL(simple_transaction_read);
  802. int simple_transaction_release(struct inode *inode, struct file *file)
  803. {
  804. free_page((unsigned long)file->private_data);
  805. return 0;
  806. }
  807. EXPORT_SYMBOL(simple_transaction_release);
  808. /* Simple attribute files */
  809. struct simple_attr {
  810. int (*get)(void *, u64 *);
  811. int (*set)(void *, u64);
  812. char get_buf[24]; /* enough to store a u64 and "\n\0" */
  813. char set_buf[24];
  814. void *data;
  815. const char *fmt; /* format for read operation */
  816. struct mutex mutex; /* protects access to these buffers */
  817. };
  818. /* simple_attr_open is called by an actual attribute open file operation
  819. * to set the attribute specific access operations. */
  820. int simple_attr_open(struct inode *inode, struct file *file,
  821. int (*get)(void *, u64 *), int (*set)(void *, u64),
  822. const char *fmt)
  823. {
  824. struct simple_attr *attr;
  825. attr = kzalloc(sizeof(*attr), GFP_KERNEL);
  826. if (!attr)
  827. return -ENOMEM;
  828. attr->get = get;
  829. attr->set = set;
  830. attr->data = inode->i_private;
  831. attr->fmt = fmt;
  832. mutex_init(&attr->mutex);
  833. file->private_data = attr;
  834. return nonseekable_open(inode, file);
  835. }
  836. EXPORT_SYMBOL_GPL(simple_attr_open);
  837. int simple_attr_release(struct inode *inode, struct file *file)
  838. {
  839. kfree(file->private_data);
  840. return 0;
  841. }
  842. EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */
  843. /* read from the buffer that is filled with the get function */
  844. ssize_t simple_attr_read(struct file *file, char __user *buf,
  845. size_t len, loff_t *ppos)
  846. {
  847. struct simple_attr *attr;
  848. size_t size;
  849. ssize_t ret;
  850. attr = file->private_data;
  851. if (!attr->get)
  852. return -EACCES;
  853. ret = mutex_lock_interruptible(&attr->mutex);
  854. if (ret)
  855. return ret;
  856. if (*ppos && attr->get_buf[0]) {
  857. /* continued read */
  858. size = strlen(attr->get_buf);
  859. } else {
  860. /* first read */
  861. u64 val;
  862. ret = attr->get(attr->data, &val);
  863. if (ret)
  864. goto out;
  865. size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
  866. attr->fmt, (unsigned long long)val);
  867. }
  868. ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
  869. out:
  870. mutex_unlock(&attr->mutex);
  871. return ret;
  872. }
  873. EXPORT_SYMBOL_GPL(simple_attr_read);
  874. /* interpret the buffer as a number to call the set function with */
  875. static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
  876. size_t len, loff_t *ppos, bool is_signed)
  877. {
  878. struct simple_attr *attr;
  879. unsigned long long val;
  880. size_t size;
  881. ssize_t ret;
  882. attr = file->private_data;
  883. if (!attr->set)
  884. return -EACCES;
  885. ret = mutex_lock_interruptible(&attr->mutex);
  886. if (ret)
  887. return ret;
  888. ret = -EFAULT;
  889. size = min(sizeof(attr->set_buf) - 1, len);
  890. if (copy_from_user(attr->set_buf, buf, size))
  891. goto out;
  892. attr->set_buf[size] = '\0';
  893. if (is_signed)
  894. ret = kstrtoll(attr->set_buf, 0, &val);
  895. else
  896. ret = kstrtoull(attr->set_buf, 0, &val);
  897. if (ret)
  898. goto out;
  899. ret = attr->set(attr->data, val);
  900. if (ret == 0)
  901. ret = len; /* on success, claim we got the whole input */
  902. out:
  903. mutex_unlock(&attr->mutex);
  904. return ret;
  905. }
  906. ssize_t simple_attr_write(struct file *file, const char __user *buf,
  907. size_t len, loff_t *ppos)
  908. {
  909. return simple_attr_write_xsigned(file, buf, len, ppos, false);
  910. }
  911. EXPORT_SYMBOL_GPL(simple_attr_write);
  912. ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
  913. size_t len, loff_t *ppos)
  914. {
  915. return simple_attr_write_xsigned(file, buf, len, ppos, true);
  916. }
  917. EXPORT_SYMBOL_GPL(simple_attr_write_signed);
  918. /**
  919. * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
  920. * @sb: filesystem to do the file handle conversion on
  921. * @fid: file handle to convert
  922. * @fh_len: length of the file handle in bytes
  923. * @fh_type: type of file handle
  924. * @get_inode: filesystem callback to retrieve inode
  925. *
  926. * This function decodes @fid as long as it has one of the well-known
  927. * Linux filehandle types and calls @get_inode on it to retrieve the
  928. * inode for the object specified in the file handle.
  929. */
  930. struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
  931. int fh_len, int fh_type, struct inode *(*get_inode)
  932. (struct super_block *sb, u64 ino, u32 gen))
  933. {
  934. struct inode *inode = NULL;
  935. if (fh_len < 2)
  936. return NULL;
  937. switch (fh_type) {
  938. case FILEID_INO32_GEN:
  939. case FILEID_INO32_GEN_PARENT:
  940. inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
  941. break;
  942. }
  943. return d_obtain_alias(inode);
  944. }
  945. EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
  946. /**
  947. * generic_fh_to_parent - generic helper for the fh_to_parent export operation
  948. * @sb: filesystem to do the file handle conversion on
  949. * @fid: file handle to convert
  950. * @fh_len: length of the file handle in bytes
  951. * @fh_type: type of file handle
  952. * @get_inode: filesystem callback to retrieve inode
  953. *
  954. * This function decodes @fid as long as it has one of the well-known
  955. * Linux filehandle types and calls @get_inode on it to retrieve the
  956. * inode for the _parent_ object specified in the file handle if it
  957. * is specified in the file handle, or NULL otherwise.
  958. */
  959. struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
  960. int fh_len, int fh_type, struct inode *(*get_inode)
  961. (struct super_block *sb, u64 ino, u32 gen))
  962. {
  963. struct inode *inode = NULL;
  964. if (fh_len <= 2)
  965. return NULL;
  966. switch (fh_type) {
  967. case FILEID_INO32_GEN_PARENT:
  968. inode = get_inode(sb, fid->i32.parent_ino,
  969. (fh_len > 3 ? fid->i32.parent_gen : 0));
  970. break;
  971. }
  972. return d_obtain_alias(inode);
  973. }
  974. EXPORT_SYMBOL_GPL(generic_fh_to_parent);
  975. /**
  976. * __generic_file_fsync - generic fsync implementation for simple filesystems
  977. *
  978. * @file: file to synchronize
  979. * @start: start offset in bytes
  980. * @end: end offset in bytes (inclusive)
  981. * @datasync: only synchronize essential metadata if true
  982. *
  983. * This is a generic implementation of the fsync method for simple
  984. * filesystems which track all non-inode metadata in the buffers list
  985. * hanging off the address_space structure.
  986. */
  987. int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
  988. int datasync)
  989. {
  990. struct inode *inode = file->f_mapping->host;
  991. int err;
  992. int ret;
  993. err = file_write_and_wait_range(file, start, end);
  994. if (err)
  995. return err;
  996. inode_lock(inode);
  997. ret = sync_mapping_buffers(inode->i_mapping);
  998. if (!(inode->i_state & I_DIRTY_ALL))
  999. goto out;
  1000. if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
  1001. goto out;
  1002. err = sync_inode_metadata(inode, 1);
  1003. if (ret == 0)
  1004. ret = err;
  1005. out:
  1006. inode_unlock(inode);
  1007. /* check and advance again to catch errors after syncing out buffers */
  1008. err = file_check_and_advance_wb_err(file);
  1009. if (ret == 0)
  1010. ret = err;
  1011. return ret;
  1012. }
  1013. EXPORT_SYMBOL(__generic_file_fsync);
  1014. /**
  1015. * generic_file_fsync - generic fsync implementation for simple filesystems
  1016. * with flush
  1017. * @file: file to synchronize
  1018. * @start: start offset in bytes
  1019. * @end: end offset in bytes (inclusive)
  1020. * @datasync: only synchronize essential metadata if true
  1021. *
  1022. */
  1023. int generic_file_fsync(struct file *file, loff_t start, loff_t end,
  1024. int datasync)
  1025. {
  1026. struct inode *inode = file->f_mapping->host;
  1027. int err;
  1028. err = __generic_file_fsync(file, start, end, datasync);
  1029. if (err)
  1030. return err;
  1031. return blkdev_issue_flush(inode->i_sb->s_bdev);
  1032. }
  1033. EXPORT_SYMBOL(generic_file_fsync);
  1034. /**
  1035. * generic_check_addressable - Check addressability of file system
  1036. * @blocksize_bits: log of file system block size
  1037. * @num_blocks: number of blocks in file system
  1038. *
  1039. * Determine whether a file system with @num_blocks blocks (and a
  1040. * block size of 2**@blocksize_bits) is addressable by the sector_t
  1041. * and page cache of the system. Return 0 if so and -EFBIG otherwise.
  1042. */
  1043. int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
  1044. {
  1045. u64 last_fs_block = num_blocks - 1;
  1046. u64 last_fs_page =
  1047. last_fs_block >> (PAGE_SHIFT - blocksize_bits);
  1048. if (unlikely(num_blocks == 0))
  1049. return 0;
  1050. if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
  1051. return -EINVAL;
  1052. if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
  1053. (last_fs_page > (pgoff_t)(~0ULL))) {
  1054. return -EFBIG;
  1055. }
  1056. return 0;
  1057. }
  1058. EXPORT_SYMBOL(generic_check_addressable);
  1059. /*
  1060. * No-op implementation of ->fsync for in-memory filesystems.
  1061. */
  1062. int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  1063. {
  1064. return 0;
  1065. }
  1066. EXPORT_SYMBOL(noop_fsync);
  1067. ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
  1068. {
  1069. /*
  1070. * iomap based filesystems support direct I/O without need for
  1071. * this callback. However, it still needs to be set in
  1072. * inode->a_ops so that open/fcntl know that direct I/O is
  1073. * generally supported.
  1074. */
  1075. return -EINVAL;
  1076. }
  1077. EXPORT_SYMBOL_GPL(noop_direct_IO);
  1078. /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
  1079. void kfree_link(void *p)
  1080. {
  1081. kfree(p);
  1082. }
  1083. EXPORT_SYMBOL(kfree_link);
  1084. struct inode *alloc_anon_inode(struct super_block *s)
  1085. {
  1086. static const struct address_space_operations anon_aops = {
  1087. .dirty_folio = noop_dirty_folio,
  1088. };
  1089. struct inode *inode = new_inode_pseudo(s);
  1090. if (!inode)
  1091. return ERR_PTR(-ENOMEM);
  1092. inode->i_ino = get_next_ino();
  1093. inode->i_mapping->a_ops = &anon_aops;
  1094. /*
  1095. * Mark the inode dirty from the very beginning,
  1096. * that way it will never be moved to the dirty
  1097. * list because mark_inode_dirty() will think
  1098. * that it already _is_ on the dirty list.
  1099. */
  1100. inode->i_state = I_DIRTY;
  1101. inode->i_mode = S_IRUSR | S_IWUSR;
  1102. inode->i_uid = current_fsuid();
  1103. inode->i_gid = current_fsgid();
  1104. inode->i_flags |= S_PRIVATE;
  1105. inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
  1106. return inode;
  1107. }
  1108. EXPORT_SYMBOL(alloc_anon_inode);
  1109. /**
  1110. * simple_nosetlease - generic helper for prohibiting leases
  1111. * @filp: file pointer
  1112. * @arg: type of lease to obtain
  1113. * @flp: new lease supplied for insertion
  1114. * @priv: private data for lm_setup operation
  1115. *
  1116. * Generic helper for filesystems that do not wish to allow leases to be set.
  1117. * All arguments are ignored and it just returns -EINVAL.
  1118. */
  1119. int
  1120. simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
  1121. void **priv)
  1122. {
  1123. return -EINVAL;
  1124. }
  1125. EXPORT_SYMBOL(simple_nosetlease);
  1126. /**
  1127. * simple_get_link - generic helper to get the target of "fast" symlinks
  1128. * @dentry: not used here
  1129. * @inode: the symlink inode
  1130. * @done: not used here
  1131. *
  1132. * Generic helper for filesystems to use for symlink inodes where a pointer to
  1133. * the symlink target is stored in ->i_link. NOTE: this isn't normally called,
  1134. * since as an optimization the path lookup code uses any non-NULL ->i_link
  1135. * directly, without calling ->get_link(). But ->get_link() still must be set,
  1136. * to mark the inode_operations as being for a symlink.
  1137. *
  1138. * Return: the symlink target
  1139. */
  1140. const char *simple_get_link(struct dentry *dentry, struct inode *inode,
  1141. struct delayed_call *done)
  1142. {
  1143. return inode->i_link;
  1144. }
  1145. EXPORT_SYMBOL(simple_get_link);
  1146. const struct inode_operations simple_symlink_inode_operations = {
  1147. .get_link = simple_get_link,
  1148. };
  1149. EXPORT_SYMBOL(simple_symlink_inode_operations);
  1150. /*
  1151. * Operations for a permanently empty directory.
  1152. */
  1153. static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  1154. {
  1155. return ERR_PTR(-ENOENT);
  1156. }
  1157. static int empty_dir_getattr(struct user_namespace *mnt_userns,
  1158. const struct path *path, struct kstat *stat,
  1159. u32 request_mask, unsigned int query_flags)
  1160. {
  1161. struct inode *inode = d_inode(path->dentry);
  1162. generic_fillattr(&init_user_ns, inode, stat);
  1163. return 0;
  1164. }
  1165. static int empty_dir_setattr(struct user_namespace *mnt_userns,
  1166. struct dentry *dentry, struct iattr *attr)
  1167. {
  1168. return -EPERM;
  1169. }
  1170. static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
  1171. {
  1172. return -EOPNOTSUPP;
  1173. }
  1174. static const struct inode_operations empty_dir_inode_operations = {
  1175. .lookup = empty_dir_lookup,
  1176. .permission = generic_permission,
  1177. .setattr = empty_dir_setattr,
  1178. .getattr = empty_dir_getattr,
  1179. .listxattr = empty_dir_listxattr,
  1180. };
  1181. static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
  1182. {
  1183. /* An empty directory has two entries . and .. at offsets 0 and 1 */
  1184. return generic_file_llseek_size(file, offset, whence, 2, 2);
  1185. }
  1186. static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
  1187. {
  1188. dir_emit_dots(file, ctx);
  1189. return 0;
  1190. }
  1191. static const struct file_operations empty_dir_operations = {
  1192. .llseek = empty_dir_llseek,
  1193. .read = generic_read_dir,
  1194. .iterate_shared = empty_dir_readdir,
  1195. .fsync = noop_fsync,
  1196. };
  1197. void make_empty_dir_inode(struct inode *inode)
  1198. {
  1199. set_nlink(inode, 2);
  1200. inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
  1201. inode->i_uid = GLOBAL_ROOT_UID;
  1202. inode->i_gid = GLOBAL_ROOT_GID;
  1203. inode->i_rdev = 0;
  1204. inode->i_size = 0;
  1205. inode->i_blkbits = PAGE_SHIFT;
  1206. inode->i_blocks = 0;
  1207. inode->i_op = &empty_dir_inode_operations;
  1208. inode->i_opflags &= ~IOP_XATTR;
  1209. inode->i_fop = &empty_dir_operations;
  1210. }
  1211. bool is_empty_dir_inode(struct inode *inode)
  1212. {
  1213. return (inode->i_fop == &empty_dir_operations) &&
  1214. (inode->i_op == &empty_dir_inode_operations);
  1215. }
  1216. #if IS_ENABLED(CONFIG_UNICODE)
  1217. /*
  1218. * Determine if the name of a dentry should be casefolded.
  1219. *
  1220. * Return: if names will need casefolding
  1221. */
  1222. static bool needs_casefold(const struct inode *dir)
  1223. {
  1224. return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
  1225. }
  1226. /**
  1227. * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
  1228. * @dentry: dentry whose name we are checking against
  1229. * @len: len of name of dentry
  1230. * @str: str pointer to name of dentry
  1231. * @name: Name to compare against
  1232. *
  1233. * Return: 0 if names match, 1 if mismatch, or -ERRNO
  1234. */
  1235. static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
  1236. const char *str, const struct qstr *name)
  1237. {
  1238. const struct dentry *parent = READ_ONCE(dentry->d_parent);
  1239. const struct inode *dir = READ_ONCE(parent->d_inode);
  1240. const struct super_block *sb = dentry->d_sb;
  1241. const struct unicode_map *um = sb->s_encoding;
  1242. struct qstr qstr = QSTR_INIT(str, len);
  1243. char strbuf[DNAME_INLINE_LEN];
  1244. int ret;
  1245. if (!dir || !needs_casefold(dir))
  1246. goto fallback;
  1247. /*
  1248. * If the dentry name is stored in-line, then it may be concurrently
  1249. * modified by a rename. If this happens, the VFS will eventually retry
  1250. * the lookup, so it doesn't matter what ->d_compare() returns.
  1251. * However, it's unsafe to call utf8_strncasecmp() with an unstable
  1252. * string. Therefore, we have to copy the name into a temporary buffer.
  1253. */
  1254. if (len <= DNAME_INLINE_LEN - 1) {
  1255. memcpy(strbuf, str, len);
  1256. strbuf[len] = 0;
  1257. qstr.name = strbuf;
  1258. /* prevent compiler from optimizing out the temporary buffer */
  1259. barrier();
  1260. }
  1261. ret = utf8_strncasecmp(um, name, &qstr);
  1262. if (ret >= 0)
  1263. return ret;
  1264. if (sb_has_strict_encoding(sb))
  1265. return -EINVAL;
  1266. fallback:
  1267. if (len != name->len)
  1268. return 1;
  1269. return !!memcmp(str, name->name, len);
  1270. }
  1271. /**
  1272. * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
  1273. * @dentry: dentry of the parent directory
  1274. * @str: qstr of name whose hash we should fill in
  1275. *
  1276. * Return: 0 if hash was successful or unchanged, and -EINVAL on error
  1277. */
  1278. static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
  1279. {
  1280. const struct inode *dir = READ_ONCE(dentry->d_inode);
  1281. struct super_block *sb = dentry->d_sb;
  1282. const struct unicode_map *um = sb->s_encoding;
  1283. int ret = 0;
  1284. if (!dir || !needs_casefold(dir))
  1285. return 0;
  1286. ret = utf8_casefold_hash(um, dentry, str);
  1287. if (ret < 0 && sb_has_strict_encoding(sb))
  1288. return -EINVAL;
  1289. return 0;
  1290. }
  1291. static const struct dentry_operations generic_ci_dentry_ops = {
  1292. .d_hash = generic_ci_d_hash,
  1293. .d_compare = generic_ci_d_compare,
  1294. };
  1295. #endif
  1296. #ifdef CONFIG_FS_ENCRYPTION
  1297. static const struct dentry_operations generic_encrypted_dentry_ops = {
  1298. .d_revalidate = fscrypt_d_revalidate,
  1299. };
  1300. #endif
  1301. #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
  1302. static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
  1303. .d_hash = generic_ci_d_hash,
  1304. .d_compare = generic_ci_d_compare,
  1305. .d_revalidate = fscrypt_d_revalidate,
  1306. };
  1307. #endif
  1308. /**
  1309. * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
  1310. * @dentry: dentry to set ops on
  1311. *
  1312. * Casefolded directories need d_hash and d_compare set, so that the dentries
  1313. * contained in them are handled case-insensitively. Note that these operations
  1314. * are needed on the parent directory rather than on the dentries in it, and
  1315. * while the casefolding flag can be toggled on and off on an empty directory,
  1316. * dentry_operations can't be changed later. As a result, if the filesystem has
  1317. * casefolding support enabled at all, we have to give all dentries the
  1318. * casefolding operations even if their inode doesn't have the casefolding flag
  1319. * currently (and thus the casefolding ops would be no-ops for now).
  1320. *
  1321. * Encryption works differently in that the only dentry operation it needs is
  1322. * d_revalidate, which it only needs on dentries that have the no-key name flag.
  1323. * The no-key flag can't be set "later", so we don't have to worry about that.
  1324. *
  1325. * Finally, to maximize compatibility with overlayfs (which isn't compatible
  1326. * with certain dentry operations) and to avoid taking an unnecessary
  1327. * performance hit, we use custom dentry_operations for each possible
  1328. * combination rather than always installing all operations.
  1329. */
  1330. void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
  1331. {
  1332. #ifdef CONFIG_FS_ENCRYPTION
  1333. bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
  1334. #endif
  1335. #if IS_ENABLED(CONFIG_UNICODE)
  1336. bool needs_ci_ops = dentry->d_sb->s_encoding;
  1337. #endif
  1338. #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
  1339. if (needs_encrypt_ops && needs_ci_ops) {
  1340. d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
  1341. return;
  1342. }
  1343. #endif
  1344. #ifdef CONFIG_FS_ENCRYPTION
  1345. if (needs_encrypt_ops) {
  1346. d_set_d_op(dentry, &generic_encrypted_dentry_ops);
  1347. return;
  1348. }
  1349. #endif
  1350. #if IS_ENABLED(CONFIG_UNICODE)
  1351. if (needs_ci_ops) {
  1352. d_set_d_op(dentry, &generic_ci_dentry_ops);
  1353. return;
  1354. }
  1355. #endif
  1356. }
  1357. EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
  1358. /**
  1359. * inode_maybe_inc_iversion - increments i_version
  1360. * @inode: inode with the i_version that should be updated
  1361. * @force: increment the counter even if it's not necessary?
  1362. *
  1363. * Every time the inode is modified, the i_version field must be seen to have
  1364. * changed by any observer.
  1365. *
  1366. * If "force" is set or the QUERIED flag is set, then ensure that we increment
  1367. * the value, and clear the queried flag.
  1368. *
  1369. * In the common case where neither is set, then we can return "false" without
  1370. * updating i_version.
  1371. *
  1372. * If this function returns false, and no other metadata has changed, then we
  1373. * can avoid logging the metadata.
  1374. */
  1375. bool inode_maybe_inc_iversion(struct inode *inode, bool force)
  1376. {
  1377. u64 cur, new;
  1378. /*
  1379. * The i_version field is not strictly ordered with any other inode
  1380. * information, but the legacy inode_inc_iversion code used a spinlock
  1381. * to serialize increments.
  1382. *
  1383. * Here, we add full memory barriers to ensure that any de-facto
  1384. * ordering with other info is preserved.
  1385. *
  1386. * This barrier pairs with the barrier in inode_query_iversion()
  1387. */
  1388. smp_mb();
  1389. cur = inode_peek_iversion_raw(inode);
  1390. do {
  1391. /* If flag is clear then we needn't do anything */
  1392. if (!force && !(cur & I_VERSION_QUERIED))
  1393. return false;
  1394. /* Since lowest bit is flag, add 2 to avoid it */
  1395. new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
  1396. } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
  1397. return true;
  1398. }
  1399. EXPORT_SYMBOL(inode_maybe_inc_iversion);