nfs42xattr.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved.
  4. *
  5. * User extended attribute client side cache functions.
  6. *
  7. * Author: Frank van der Linden <[email protected]>
  8. */
  9. #include <linux/errno.h>
  10. #include <linux/nfs_fs.h>
  11. #include <linux/hashtable.h>
  12. #include <linux/refcount.h>
  13. #include <uapi/linux/xattr.h>
  14. #include "nfs4_fs.h"
  15. #include "internal.h"
  16. /*
  17. * User extended attributes client side caching is implemented by having
  18. * a cache structure attached to NFS inodes. This structure is allocated
  19. * when needed, and freed when the cache is zapped.
  20. *
  21. * The cache structure contains as hash table of entries, and a pointer
  22. * to a special-cased entry for the listxattr cache.
  23. *
  24. * Accessing and allocating / freeing the caches is done via reference
  25. * counting. The cache entries use a similar refcounting scheme.
  26. *
  27. * This makes freeing a cache, both from the shrinker and from the
  28. * zap cache path, easy. It also means that, in current use cases,
  29. * the large majority of inodes will not waste any memory, as they
  30. * will never have any user extended attributes assigned to them.
  31. *
  32. * Attribute entries are hashed in to a simple hash table. They are
  33. * also part of an LRU.
  34. *
  35. * There are three shrinkers.
  36. *
  37. * Two shrinkers deal with the cache entries themselves: one for
  38. * large entries (> PAGE_SIZE), and one for smaller entries. The
  39. * shrinker for the larger entries works more aggressively than
  40. * those for the smaller entries.
  41. *
  42. * The other shrinker frees the cache structures themselves.
  43. */
  44. /*
  45. * 64 buckets is a good default. There is likely no reasonable
  46. * workload that uses more than even 64 user extended attributes.
  47. * You can certainly add a lot more - but you get what you ask for
  48. * in those circumstances.
  49. */
  50. #define NFS4_XATTR_HASH_SIZE 64
  51. #define NFSDBG_FACILITY NFSDBG_XATTRCACHE
  52. struct nfs4_xattr_cache;
  53. struct nfs4_xattr_entry;
  54. struct nfs4_xattr_bucket {
  55. spinlock_t lock;
  56. struct hlist_head hlist;
  57. struct nfs4_xattr_cache *cache;
  58. bool draining;
  59. };
  60. struct nfs4_xattr_cache {
  61. struct kref ref;
  62. struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE];
  63. struct list_head lru;
  64. struct list_head dispose;
  65. atomic_long_t nent;
  66. spinlock_t listxattr_lock;
  67. struct inode *inode;
  68. struct nfs4_xattr_entry *listxattr;
  69. };
  70. struct nfs4_xattr_entry {
  71. struct kref ref;
  72. struct hlist_node hnode;
  73. struct list_head lru;
  74. struct list_head dispose;
  75. char *xattr_name;
  76. void *xattr_value;
  77. size_t xattr_size;
  78. struct nfs4_xattr_bucket *bucket;
  79. uint32_t flags;
  80. };
  81. #define NFS4_XATTR_ENTRY_EXTVAL 0x0001
  82. /*
  83. * LRU list of NFS inodes that have xattr caches.
  84. */
  85. static struct list_lru nfs4_xattr_cache_lru;
  86. static struct list_lru nfs4_xattr_entry_lru;
  87. static struct list_lru nfs4_xattr_large_entry_lru;
  88. static struct kmem_cache *nfs4_xattr_cache_cachep;
  89. /*
  90. * Hashing helper functions.
  91. */
  92. static void
  93. nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache)
  94. {
  95. unsigned int i;
  96. for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
  97. INIT_HLIST_HEAD(&cache->buckets[i].hlist);
  98. spin_lock_init(&cache->buckets[i].lock);
  99. cache->buckets[i].cache = cache;
  100. cache->buckets[i].draining = false;
  101. }
  102. }
  103. /*
  104. * Locking order:
  105. * 1. inode i_lock or bucket lock
  106. * 2. list_lru lock (taken by list_lru_* functions)
  107. */
  108. /*
  109. * Wrapper functions to add a cache entry to the right LRU.
  110. */
  111. static bool
  112. nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry)
  113. {
  114. struct list_lru *lru;
  115. lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
  116. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  117. return list_lru_add(lru, &entry->lru);
  118. }
  119. static bool
  120. nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
  121. {
  122. struct list_lru *lru;
  123. lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
  124. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  125. return list_lru_del(lru, &entry->lru);
  126. }
  127. /*
  128. * This function allocates cache entries. They are the normal
  129. * extended attribute name/value pairs, but may also be a listxattr
  130. * cache. Those allocations use the same entry so that they can be
  131. * treated as one by the memory shrinker.
  132. *
  133. * xattr cache entries are allocated together with names. If the
  134. * value fits in to one page with the entry structure and the name,
  135. * it will also be part of the same allocation (kmalloc). This is
  136. * expected to be the vast majority of cases. Larger allocations
  137. * have a value pointer that is allocated separately by kvmalloc.
  138. *
  139. * Parameters:
  140. *
  141. * @name: Name of the extended attribute. NULL for listxattr cache
  142. * entry.
  143. * @value: Value of attribute, or listxattr cache. NULL if the
  144. * value is to be copied from pages instead.
  145. * @pages: Pages to copy the value from, if not NULL. Passed in to
  146. * make it easier to copy the value after an RPC, even if
  147. * the value will not be passed up to application (e.g.
  148. * for a 'query' getxattr with NULL buffer).
  149. * @len: Length of the value. Can be 0 for zero-length attributes.
  150. * @value and @pages will be NULL if @len is 0.
  151. */
  152. static struct nfs4_xattr_entry *
  153. nfs4_xattr_alloc_entry(const char *name, const void *value,
  154. struct page **pages, size_t len)
  155. {
  156. struct nfs4_xattr_entry *entry;
  157. void *valp;
  158. char *namep;
  159. size_t alloclen, slen;
  160. char *buf;
  161. uint32_t flags;
  162. BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) +
  163. XATTR_NAME_MAX + 1 > PAGE_SIZE);
  164. alloclen = sizeof(struct nfs4_xattr_entry);
  165. if (name != NULL) {
  166. slen = strlen(name) + 1;
  167. alloclen += slen;
  168. } else
  169. slen = 0;
  170. if (alloclen + len <= PAGE_SIZE) {
  171. alloclen += len;
  172. flags = 0;
  173. } else {
  174. flags = NFS4_XATTR_ENTRY_EXTVAL;
  175. }
  176. buf = kmalloc(alloclen, GFP_KERNEL);
  177. if (buf == NULL)
  178. return NULL;
  179. entry = (struct nfs4_xattr_entry *)buf;
  180. if (name != NULL) {
  181. namep = buf + sizeof(struct nfs4_xattr_entry);
  182. memcpy(namep, name, slen);
  183. } else {
  184. namep = NULL;
  185. }
  186. if (flags & NFS4_XATTR_ENTRY_EXTVAL) {
  187. valp = kvmalloc(len, GFP_KERNEL);
  188. if (valp == NULL) {
  189. kfree(buf);
  190. return NULL;
  191. }
  192. } else if (len != 0) {
  193. valp = buf + sizeof(struct nfs4_xattr_entry) + slen;
  194. } else
  195. valp = NULL;
  196. if (valp != NULL) {
  197. if (value != NULL)
  198. memcpy(valp, value, len);
  199. else
  200. _copy_from_pages(valp, pages, 0, len);
  201. }
  202. entry->flags = flags;
  203. entry->xattr_value = valp;
  204. kref_init(&entry->ref);
  205. entry->xattr_name = namep;
  206. entry->xattr_size = len;
  207. entry->bucket = NULL;
  208. INIT_LIST_HEAD(&entry->lru);
  209. INIT_LIST_HEAD(&entry->dispose);
  210. INIT_HLIST_NODE(&entry->hnode);
  211. return entry;
  212. }
  213. static void
  214. nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry)
  215. {
  216. if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL)
  217. kvfree(entry->xattr_value);
  218. kfree(entry);
  219. }
  220. static void
  221. nfs4_xattr_free_entry_cb(struct kref *kref)
  222. {
  223. struct nfs4_xattr_entry *entry;
  224. entry = container_of(kref, struct nfs4_xattr_entry, ref);
  225. if (WARN_ON(!list_empty(&entry->lru)))
  226. return;
  227. nfs4_xattr_free_entry(entry);
  228. }
  229. static void
  230. nfs4_xattr_free_cache_cb(struct kref *kref)
  231. {
  232. struct nfs4_xattr_cache *cache;
  233. int i;
  234. cache = container_of(kref, struct nfs4_xattr_cache, ref);
  235. for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
  236. if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist)))
  237. return;
  238. cache->buckets[i].draining = false;
  239. }
  240. cache->listxattr = NULL;
  241. kmem_cache_free(nfs4_xattr_cache_cachep, cache);
  242. }
  243. static struct nfs4_xattr_cache *
  244. nfs4_xattr_alloc_cache(void)
  245. {
  246. struct nfs4_xattr_cache *cache;
  247. cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, GFP_KERNEL);
  248. if (cache == NULL)
  249. return NULL;
  250. kref_init(&cache->ref);
  251. atomic_long_set(&cache->nent, 0);
  252. return cache;
  253. }
  254. /*
  255. * Set the listxattr cache, which is a special-cased cache entry.
  256. * The special value ERR_PTR(-ESTALE) is used to indicate that
  257. * the cache is being drained - this prevents a new listxattr
  258. * cache from being added to what is now a stale cache.
  259. */
  260. static int
  261. nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache,
  262. struct nfs4_xattr_entry *new)
  263. {
  264. struct nfs4_xattr_entry *old;
  265. int ret = 1;
  266. spin_lock(&cache->listxattr_lock);
  267. old = cache->listxattr;
  268. if (old == ERR_PTR(-ESTALE)) {
  269. ret = 0;
  270. goto out;
  271. }
  272. cache->listxattr = new;
  273. if (new != NULL && new != ERR_PTR(-ESTALE))
  274. nfs4_xattr_entry_lru_add(new);
  275. if (old != NULL) {
  276. nfs4_xattr_entry_lru_del(old);
  277. kref_put(&old->ref, nfs4_xattr_free_entry_cb);
  278. }
  279. out:
  280. spin_unlock(&cache->listxattr_lock);
  281. return ret;
  282. }
  283. /*
  284. * Unlink a cache from its parent inode, clearing out an invalid
  285. * cache. Must be called with i_lock held.
  286. */
  287. static struct nfs4_xattr_cache *
  288. nfs4_xattr_cache_unlink(struct inode *inode)
  289. {
  290. struct nfs_inode *nfsi;
  291. struct nfs4_xattr_cache *oldcache;
  292. nfsi = NFS_I(inode);
  293. oldcache = nfsi->xattr_cache;
  294. if (oldcache != NULL) {
  295. list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru);
  296. oldcache->inode = NULL;
  297. }
  298. nfsi->xattr_cache = NULL;
  299. nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR;
  300. return oldcache;
  301. }
  302. /*
  303. * Discard a cache. Called by get_cache() if there was an old,
  304. * invalid cache. Can also be called from a shrinker callback.
  305. *
  306. * The cache is dead, it has already been unlinked from its inode,
  307. * and no longer appears on the cache LRU list.
  308. *
  309. * Mark all buckets as draining, so that no new entries are added. This
  310. * could still happen in the unlikely, but possible case that another
  311. * thread had grabbed a reference before it was unlinked from the inode,
  312. * and is still holding it for an add operation.
  313. *
  314. * Remove all entries from the LRU lists, so that there is no longer
  315. * any way to 'find' this cache. Then, remove the entries from the hash
  316. * table.
  317. *
  318. * At that point, the cache will remain empty and can be freed when the final
  319. * reference drops, which is very likely the kref_put at the end of
  320. * this function, or the one called immediately afterwards in the
  321. * shrinker callback.
  322. */
  323. static void
  324. nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache)
  325. {
  326. unsigned int i;
  327. struct nfs4_xattr_entry *entry;
  328. struct nfs4_xattr_bucket *bucket;
  329. struct hlist_node *n;
  330. nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE));
  331. for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
  332. bucket = &cache->buckets[i];
  333. spin_lock(&bucket->lock);
  334. bucket->draining = true;
  335. hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) {
  336. nfs4_xattr_entry_lru_del(entry);
  337. hlist_del_init(&entry->hnode);
  338. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  339. }
  340. spin_unlock(&bucket->lock);
  341. }
  342. atomic_long_set(&cache->nent, 0);
  343. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  344. }
  345. /*
  346. * Get a referenced copy of the cache structure. Avoid doing allocs
  347. * while holding i_lock. Which means that we do some optimistic allocation,
  348. * and might have to free the result in rare cases.
  349. *
  350. * This function only checks the NFS_INO_INVALID_XATTR cache validity bit
  351. * and acts accordingly, replacing the cache when needed. For the read case
  352. * (!add), this means that the caller must make sure that the cache
  353. * is valid before caling this function. getxattr and listxattr call
  354. * revalidate_inode to do this. The attribute cache timeout (for the
  355. * non-delegated case) is expected to be dealt with in the revalidate
  356. * call.
  357. */
  358. static struct nfs4_xattr_cache *
  359. nfs4_xattr_get_cache(struct inode *inode, int add)
  360. {
  361. struct nfs_inode *nfsi;
  362. struct nfs4_xattr_cache *cache, *oldcache, *newcache;
  363. nfsi = NFS_I(inode);
  364. cache = oldcache = NULL;
  365. spin_lock(&inode->i_lock);
  366. if (nfsi->cache_validity & NFS_INO_INVALID_XATTR)
  367. oldcache = nfs4_xattr_cache_unlink(inode);
  368. else
  369. cache = nfsi->xattr_cache;
  370. if (cache != NULL)
  371. kref_get(&cache->ref);
  372. spin_unlock(&inode->i_lock);
  373. if (add && cache == NULL) {
  374. newcache = NULL;
  375. cache = nfs4_xattr_alloc_cache();
  376. if (cache == NULL)
  377. goto out;
  378. spin_lock(&inode->i_lock);
  379. if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) {
  380. /*
  381. * The cache was invalidated again. Give up,
  382. * since what we want to enter is now likely
  383. * outdated anyway.
  384. */
  385. spin_unlock(&inode->i_lock);
  386. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  387. cache = NULL;
  388. goto out;
  389. }
  390. /*
  391. * Check if someone beat us to it.
  392. */
  393. if (nfsi->xattr_cache != NULL) {
  394. newcache = nfsi->xattr_cache;
  395. kref_get(&newcache->ref);
  396. } else {
  397. kref_get(&cache->ref);
  398. nfsi->xattr_cache = cache;
  399. cache->inode = inode;
  400. list_lru_add(&nfs4_xattr_cache_lru, &cache->lru);
  401. }
  402. spin_unlock(&inode->i_lock);
  403. /*
  404. * If there was a race, throw away the cache we just
  405. * allocated, and use the new one allocated by someone
  406. * else.
  407. */
  408. if (newcache != NULL) {
  409. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  410. cache = newcache;
  411. }
  412. }
  413. out:
  414. /*
  415. * Discard the now orphaned old cache.
  416. */
  417. if (oldcache != NULL)
  418. nfs4_xattr_discard_cache(oldcache);
  419. return cache;
  420. }
  421. static inline struct nfs4_xattr_bucket *
  422. nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name)
  423. {
  424. return &cache->buckets[jhash(name, strlen(name), 0) &
  425. (ARRAY_SIZE(cache->buckets) - 1)];
  426. }
  427. static struct nfs4_xattr_entry *
  428. nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name)
  429. {
  430. struct nfs4_xattr_entry *entry;
  431. entry = NULL;
  432. hlist_for_each_entry(entry, &bucket->hlist, hnode) {
  433. if (!strcmp(entry->xattr_name, name))
  434. break;
  435. }
  436. return entry;
  437. }
  438. static int
  439. nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache,
  440. struct nfs4_xattr_entry *entry)
  441. {
  442. struct nfs4_xattr_bucket *bucket;
  443. struct nfs4_xattr_entry *oldentry = NULL;
  444. int ret = 1;
  445. bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name);
  446. entry->bucket = bucket;
  447. spin_lock(&bucket->lock);
  448. if (bucket->draining) {
  449. ret = 0;
  450. goto out;
  451. }
  452. oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name);
  453. if (oldentry != NULL) {
  454. hlist_del_init(&oldentry->hnode);
  455. nfs4_xattr_entry_lru_del(oldentry);
  456. } else {
  457. atomic_long_inc(&cache->nent);
  458. }
  459. hlist_add_head(&entry->hnode, &bucket->hlist);
  460. nfs4_xattr_entry_lru_add(entry);
  461. out:
  462. spin_unlock(&bucket->lock);
  463. if (oldentry != NULL)
  464. kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb);
  465. return ret;
  466. }
  467. static void
  468. nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name)
  469. {
  470. struct nfs4_xattr_bucket *bucket;
  471. struct nfs4_xattr_entry *entry;
  472. bucket = nfs4_xattr_hash_bucket(cache, name);
  473. spin_lock(&bucket->lock);
  474. entry = nfs4_xattr_get_entry(bucket, name);
  475. if (entry != NULL) {
  476. hlist_del_init(&entry->hnode);
  477. nfs4_xattr_entry_lru_del(entry);
  478. atomic_long_dec(&cache->nent);
  479. }
  480. spin_unlock(&bucket->lock);
  481. if (entry != NULL)
  482. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  483. }
  484. static struct nfs4_xattr_entry *
  485. nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name)
  486. {
  487. struct nfs4_xattr_bucket *bucket;
  488. struct nfs4_xattr_entry *entry;
  489. bucket = nfs4_xattr_hash_bucket(cache, name);
  490. spin_lock(&bucket->lock);
  491. entry = nfs4_xattr_get_entry(bucket, name);
  492. if (entry != NULL)
  493. kref_get(&entry->ref);
  494. spin_unlock(&bucket->lock);
  495. return entry;
  496. }
  497. /*
  498. * Entry point to retrieve an entry from the cache.
  499. */
  500. ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf,
  501. ssize_t buflen)
  502. {
  503. struct nfs4_xattr_cache *cache;
  504. struct nfs4_xattr_entry *entry;
  505. ssize_t ret;
  506. cache = nfs4_xattr_get_cache(inode, 0);
  507. if (cache == NULL)
  508. return -ENOENT;
  509. ret = 0;
  510. entry = nfs4_xattr_hash_find(cache, name);
  511. if (entry != NULL) {
  512. dprintk("%s: cache hit '%s', len %lu\n", __func__,
  513. entry->xattr_name, (unsigned long)entry->xattr_size);
  514. if (buflen == 0) {
  515. /* Length probe only */
  516. ret = entry->xattr_size;
  517. } else if (buflen < entry->xattr_size)
  518. ret = -ERANGE;
  519. else {
  520. memcpy(buf, entry->xattr_value, entry->xattr_size);
  521. ret = entry->xattr_size;
  522. }
  523. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  524. } else {
  525. dprintk("%s: cache miss '%s'\n", __func__, name);
  526. ret = -ENOENT;
  527. }
  528. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  529. return ret;
  530. }
  531. /*
  532. * Retrieve a cached list of xattrs from the cache.
  533. */
  534. ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen)
  535. {
  536. struct nfs4_xattr_cache *cache;
  537. struct nfs4_xattr_entry *entry;
  538. ssize_t ret;
  539. cache = nfs4_xattr_get_cache(inode, 0);
  540. if (cache == NULL)
  541. return -ENOENT;
  542. spin_lock(&cache->listxattr_lock);
  543. entry = cache->listxattr;
  544. if (entry != NULL && entry != ERR_PTR(-ESTALE)) {
  545. if (buflen == 0) {
  546. /* Length probe only */
  547. ret = entry->xattr_size;
  548. } else if (entry->xattr_size > buflen)
  549. ret = -ERANGE;
  550. else {
  551. memcpy(buf, entry->xattr_value, entry->xattr_size);
  552. ret = entry->xattr_size;
  553. }
  554. } else {
  555. ret = -ENOENT;
  556. }
  557. spin_unlock(&cache->listxattr_lock);
  558. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  559. return ret;
  560. }
  561. /*
  562. * Add an xattr to the cache.
  563. *
  564. * This also invalidates the xattr list cache.
  565. */
  566. void nfs4_xattr_cache_add(struct inode *inode, const char *name,
  567. const char *buf, struct page **pages, ssize_t buflen)
  568. {
  569. struct nfs4_xattr_cache *cache;
  570. struct nfs4_xattr_entry *entry;
  571. dprintk("%s: add '%s' len %lu\n", __func__,
  572. name, (unsigned long)buflen);
  573. cache = nfs4_xattr_get_cache(inode, 1);
  574. if (cache == NULL)
  575. return;
  576. entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen);
  577. if (entry == NULL)
  578. goto out;
  579. (void)nfs4_xattr_set_listcache(cache, NULL);
  580. if (!nfs4_xattr_hash_add(cache, entry))
  581. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  582. out:
  583. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  584. }
  585. /*
  586. * Remove an xattr from the cache.
  587. *
  588. * This also invalidates the xattr list cache.
  589. */
  590. void nfs4_xattr_cache_remove(struct inode *inode, const char *name)
  591. {
  592. struct nfs4_xattr_cache *cache;
  593. dprintk("%s: remove '%s'\n", __func__, name);
  594. cache = nfs4_xattr_get_cache(inode, 0);
  595. if (cache == NULL)
  596. return;
  597. (void)nfs4_xattr_set_listcache(cache, NULL);
  598. nfs4_xattr_hash_remove(cache, name);
  599. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  600. }
  601. /*
  602. * Cache listxattr output, replacing any possible old one.
  603. */
  604. void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf,
  605. ssize_t buflen)
  606. {
  607. struct nfs4_xattr_cache *cache;
  608. struct nfs4_xattr_entry *entry;
  609. cache = nfs4_xattr_get_cache(inode, 1);
  610. if (cache == NULL)
  611. return;
  612. entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen);
  613. if (entry == NULL)
  614. goto out;
  615. /*
  616. * This is just there to be able to get to bucket->cache,
  617. * which is obviously the same for all buckets, so just
  618. * use bucket 0.
  619. */
  620. entry->bucket = &cache->buckets[0];
  621. if (!nfs4_xattr_set_listcache(cache, entry))
  622. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  623. out:
  624. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  625. }
  626. /*
  627. * Zap the entire cache. Called when an inode is evicted.
  628. */
  629. void nfs4_xattr_cache_zap(struct inode *inode)
  630. {
  631. struct nfs4_xattr_cache *oldcache;
  632. spin_lock(&inode->i_lock);
  633. oldcache = nfs4_xattr_cache_unlink(inode);
  634. spin_unlock(&inode->i_lock);
  635. if (oldcache)
  636. nfs4_xattr_discard_cache(oldcache);
  637. }
  638. /*
  639. * The entry LRU is shrunk more aggressively than the cache LRU,
  640. * by settings @seeks to 1.
  641. *
  642. * Cache structures are freed only when they've become empty, after
  643. * pruning all but one entry.
  644. */
  645. static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink,
  646. struct shrink_control *sc);
  647. static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink,
  648. struct shrink_control *sc);
  649. static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink,
  650. struct shrink_control *sc);
  651. static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink,
  652. struct shrink_control *sc);
  653. static struct shrinker nfs4_xattr_cache_shrinker = {
  654. .count_objects = nfs4_xattr_cache_count,
  655. .scan_objects = nfs4_xattr_cache_scan,
  656. .seeks = DEFAULT_SEEKS,
  657. .flags = SHRINKER_MEMCG_AWARE,
  658. };
  659. static struct shrinker nfs4_xattr_entry_shrinker = {
  660. .count_objects = nfs4_xattr_entry_count,
  661. .scan_objects = nfs4_xattr_entry_scan,
  662. .seeks = DEFAULT_SEEKS,
  663. .batch = 512,
  664. .flags = SHRINKER_MEMCG_AWARE,
  665. };
  666. static struct shrinker nfs4_xattr_large_entry_shrinker = {
  667. .count_objects = nfs4_xattr_entry_count,
  668. .scan_objects = nfs4_xattr_entry_scan,
  669. .seeks = 1,
  670. .batch = 512,
  671. .flags = SHRINKER_MEMCG_AWARE,
  672. };
  673. static enum lru_status
  674. cache_lru_isolate(struct list_head *item,
  675. struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
  676. {
  677. struct list_head *dispose = arg;
  678. struct inode *inode;
  679. struct nfs4_xattr_cache *cache = container_of(item,
  680. struct nfs4_xattr_cache, lru);
  681. if (atomic_long_read(&cache->nent) > 1)
  682. return LRU_SKIP;
  683. /*
  684. * If a cache structure is on the LRU list, we know that
  685. * its inode is valid. Try to lock it to break the link.
  686. * Since we're inverting the lock order here, only try.
  687. */
  688. inode = cache->inode;
  689. if (!spin_trylock(&inode->i_lock))
  690. return LRU_SKIP;
  691. kref_get(&cache->ref);
  692. cache->inode = NULL;
  693. NFS_I(inode)->xattr_cache = NULL;
  694. NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR;
  695. list_lru_isolate(lru, &cache->lru);
  696. spin_unlock(&inode->i_lock);
  697. list_add_tail(&cache->dispose, dispose);
  698. return LRU_REMOVED;
  699. }
  700. static unsigned long
  701. nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
  702. {
  703. LIST_HEAD(dispose);
  704. unsigned long freed;
  705. struct nfs4_xattr_cache *cache;
  706. freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc,
  707. cache_lru_isolate, &dispose);
  708. while (!list_empty(&dispose)) {
  709. cache = list_first_entry(&dispose, struct nfs4_xattr_cache,
  710. dispose);
  711. list_del_init(&cache->dispose);
  712. nfs4_xattr_discard_cache(cache);
  713. kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
  714. }
  715. return freed;
  716. }
  717. static unsigned long
  718. nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
  719. {
  720. unsigned long count;
  721. count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc);
  722. return vfs_pressure_ratio(count);
  723. }
  724. static enum lru_status
  725. entry_lru_isolate(struct list_head *item,
  726. struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
  727. {
  728. struct list_head *dispose = arg;
  729. struct nfs4_xattr_bucket *bucket;
  730. struct nfs4_xattr_cache *cache;
  731. struct nfs4_xattr_entry *entry = container_of(item,
  732. struct nfs4_xattr_entry, lru);
  733. bucket = entry->bucket;
  734. cache = bucket->cache;
  735. /*
  736. * Unhook the entry from its parent (either a cache bucket
  737. * or a cache structure if it's a listxattr buf), so that
  738. * it's no longer found. Then add it to the isolate list,
  739. * to be freed later.
  740. *
  741. * In both cases, we're reverting lock order, so use
  742. * trylock and skip the entry if we can't get the lock.
  743. */
  744. if (entry->xattr_name != NULL) {
  745. /* Regular cache entry */
  746. if (!spin_trylock(&bucket->lock))
  747. return LRU_SKIP;
  748. kref_get(&entry->ref);
  749. hlist_del_init(&entry->hnode);
  750. atomic_long_dec(&cache->nent);
  751. list_lru_isolate(lru, &entry->lru);
  752. spin_unlock(&bucket->lock);
  753. } else {
  754. /* Listxattr cache entry */
  755. if (!spin_trylock(&cache->listxattr_lock))
  756. return LRU_SKIP;
  757. kref_get(&entry->ref);
  758. cache->listxattr = NULL;
  759. list_lru_isolate(lru, &entry->lru);
  760. spin_unlock(&cache->listxattr_lock);
  761. }
  762. list_add_tail(&entry->dispose, dispose);
  763. return LRU_REMOVED;
  764. }
  765. static unsigned long
  766. nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc)
  767. {
  768. LIST_HEAD(dispose);
  769. unsigned long freed;
  770. struct nfs4_xattr_entry *entry;
  771. struct list_lru *lru;
  772. lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
  773. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  774. freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose);
  775. while (!list_empty(&dispose)) {
  776. entry = list_first_entry(&dispose, struct nfs4_xattr_entry,
  777. dispose);
  778. list_del_init(&entry->dispose);
  779. /*
  780. * Drop two references: the one that we just grabbed
  781. * in entry_lru_isolate, and the one that was set
  782. * when the entry was first allocated.
  783. */
  784. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  785. kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
  786. }
  787. return freed;
  788. }
  789. static unsigned long
  790. nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
  791. {
  792. unsigned long count;
  793. struct list_lru *lru;
  794. lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
  795. &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
  796. count = list_lru_shrink_count(lru, sc);
  797. return vfs_pressure_ratio(count);
  798. }
  799. static void nfs4_xattr_cache_init_once(void *p)
  800. {
  801. struct nfs4_xattr_cache *cache = p;
  802. spin_lock_init(&cache->listxattr_lock);
  803. atomic_long_set(&cache->nent, 0);
  804. nfs4_xattr_hash_init(cache);
  805. cache->listxattr = NULL;
  806. INIT_LIST_HEAD(&cache->lru);
  807. INIT_LIST_HEAD(&cache->dispose);
  808. }
  809. static int nfs4_xattr_shrinker_init(struct shrinker *shrinker,
  810. struct list_lru *lru, const char *name)
  811. {
  812. int ret = 0;
  813. ret = register_shrinker(shrinker, name);
  814. if (ret)
  815. return ret;
  816. ret = list_lru_init_memcg(lru, shrinker);
  817. if (ret)
  818. unregister_shrinker(shrinker);
  819. return ret;
  820. }
  821. static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker,
  822. struct list_lru *lru)
  823. {
  824. unregister_shrinker(shrinker);
  825. list_lru_destroy(lru);
  826. }
  827. int __init nfs4_xattr_cache_init(void)
  828. {
  829. int ret = 0;
  830. nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
  831. sizeof(struct nfs4_xattr_cache), 0,
  832. (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
  833. nfs4_xattr_cache_init_once);
  834. if (nfs4_xattr_cache_cachep == NULL)
  835. return -ENOMEM;
  836. ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker,
  837. &nfs4_xattr_cache_lru,
  838. "nfs-xattr_cache");
  839. if (ret)
  840. goto out1;
  841. ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker,
  842. &nfs4_xattr_entry_lru,
  843. "nfs-xattr_entry");
  844. if (ret)
  845. goto out2;
  846. ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker,
  847. &nfs4_xattr_large_entry_lru,
  848. "nfs-xattr_large_entry");
  849. if (!ret)
  850. return 0;
  851. nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker,
  852. &nfs4_xattr_entry_lru);
  853. out2:
  854. nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker,
  855. &nfs4_xattr_cache_lru);
  856. out1:
  857. kmem_cache_destroy(nfs4_xattr_cache_cachep);
  858. return ret;
  859. }
  860. void nfs4_xattr_cache_exit(void)
  861. {
  862. nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker,
  863. &nfs4_xattr_large_entry_lru);
  864. nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker,
  865. &nfs4_xattr_entry_lru);
  866. nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker,
  867. &nfs4_xattr_cache_lru);
  868. kmem_cache_destroy(nfs4_xattr_cache_cachep);
  869. }