123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651 |
- // SPDX-License-Identifier: GPL-2.0-or-later
- /* kiocb-using read/write
- *
- * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells ([email protected])
- */
- #include <linux/mount.h>
- #include <linux/slab.h>
- #include <linux/file.h>
- #include <linux/uio.h>
- #include <linux/falloc.h>
- #include <linux/sched/mm.h>
- #include <trace/events/fscache.h>
- #include "internal.h"
- struct cachefiles_kiocb {
- struct kiocb iocb;
- refcount_t ki_refcnt;
- loff_t start;
- union {
- size_t skipped;
- size_t len;
- };
- struct cachefiles_object *object;
- netfs_io_terminated_t term_func;
- void *term_func_priv;
- bool was_async;
- unsigned int inval_counter; /* Copy of cookie->inval_counter */
- u64 b_writing;
- };
- static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
- {
- if (refcount_dec_and_test(&ki->ki_refcnt)) {
- cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
- fput(ki->iocb.ki_filp);
- kfree(ki);
- }
- }
- /*
- * Handle completion of a read from the cache.
- */
- static void cachefiles_read_complete(struct kiocb *iocb, long ret)
- {
- struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
- struct inode *inode = file_inode(ki->iocb.ki_filp);
- _enter("%ld", ret);
- if (ret < 0)
- trace_cachefiles_io_error(ki->object, inode, ret,
- cachefiles_trace_read_error);
- if (ki->term_func) {
- if (ret >= 0) {
- if (ki->object->cookie->inval_counter == ki->inval_counter)
- ki->skipped += ret;
- else
- ret = -ESTALE;
- }
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
- }
- cachefiles_put_kiocb(ki);
- }
- /*
- * Initiate a read from the cache.
- */
- static int cachefiles_read(struct netfs_cache_resources *cres,
- loff_t start_pos,
- struct iov_iter *iter,
- enum netfs_read_from_hole read_hole,
- netfs_io_terminated_t term_func,
- void *term_func_priv)
- {
- struct cachefiles_object *object;
- struct cachefiles_kiocb *ki;
- struct file *file;
- unsigned int old_nofs;
- ssize_t ret = -ENOBUFS;
- size_t len = iov_iter_count(iter), skipped = 0;
- if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
- goto presubmission_error;
- fscache_count_read();
- object = cachefiles_cres_object(cres);
- file = cachefiles_cres_file(cres);
- _enter("%pD,%li,%llx,%zx/%llx",
- file, file_inode(file)->i_ino, start_pos, len,
- i_size_read(file_inode(file)));
- /* If the caller asked us to seek for data before doing the read, then
- * we should do that now. If we find a gap, we fill it with zeros.
- */
- if (read_hole != NETFS_READ_HOLE_IGNORE) {
- loff_t off = start_pos, off2;
- off2 = cachefiles_inject_read_error();
- if (off2 == 0)
- off2 = vfs_llseek(file, off, SEEK_DATA);
- if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
- skipped = 0;
- ret = off2;
- goto presubmission_error;
- }
- if (off2 == -ENXIO || off2 >= start_pos + len) {
- /* The region is beyond the EOF or there's no more data
- * in the region, so clear the rest of the buffer and
- * return success.
- */
- ret = -ENODATA;
- if (read_hole == NETFS_READ_HOLE_FAIL)
- goto presubmission_error;
- iov_iter_zero(len, iter);
- skipped = len;
- ret = 0;
- goto presubmission_error;
- }
- skipped = off2 - off;
- iov_iter_zero(skipped, iter);
- }
- ret = -ENOMEM;
- ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
- if (!ki)
- goto presubmission_error;
- refcount_set(&ki->ki_refcnt, 2);
- ki->iocb.ki_filp = file;
- ki->iocb.ki_pos = start_pos + skipped;
- ki->iocb.ki_flags = IOCB_DIRECT;
- ki->iocb.ki_ioprio = get_current_ioprio();
- ki->skipped = skipped;
- ki->object = object;
- ki->inval_counter = cres->inval_counter;
- ki->term_func = term_func;
- ki->term_func_priv = term_func_priv;
- ki->was_async = true;
- if (ki->term_func)
- ki->iocb.ki_complete = cachefiles_read_complete;
- get_file(ki->iocb.ki_filp);
- cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
- trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
- old_nofs = memalloc_nofs_save();
- ret = cachefiles_inject_read_error();
- if (ret == 0)
- ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
- memalloc_nofs_restore(old_nofs);
- switch (ret) {
- case -EIOCBQUEUED:
- goto in_progress;
- case -ERESTARTSYS:
- case -ERESTARTNOINTR:
- case -ERESTARTNOHAND:
- case -ERESTART_RESTARTBLOCK:
- /* There's no easy way to restart the syscall since other AIO's
- * may be already running. Just fail this IO with EINTR.
- */
- ret = -EINTR;
- fallthrough;
- default:
- ki->was_async = false;
- cachefiles_read_complete(&ki->iocb, ret);
- if (ret > 0)
- ret = 0;
- break;
- }
- in_progress:
- cachefiles_put_kiocb(ki);
- _leave(" = %zd", ret);
- return ret;
- presubmission_error:
- if (term_func)
- term_func(term_func_priv, ret < 0 ? ret : skipped, false);
- return ret;
- }
- /*
- * Query the occupancy of the cache in a region, returning where the next chunk
- * of data starts and how long it is.
- */
- static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
- loff_t start, size_t len, size_t granularity,
- loff_t *_data_start, size_t *_data_len)
- {
- struct cachefiles_object *object;
- struct file *file;
- loff_t off, off2;
- *_data_start = -1;
- *_data_len = 0;
- if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
- return -ENOBUFS;
- object = cachefiles_cres_object(cres);
- file = cachefiles_cres_file(cres);
- granularity = max_t(size_t, object->volume->cache->bsize, granularity);
- _enter("%pD,%li,%llx,%zx/%llx",
- file, file_inode(file)->i_ino, start, len,
- i_size_read(file_inode(file)));
- off = cachefiles_inject_read_error();
- if (off == 0)
- off = vfs_llseek(file, start, SEEK_DATA);
- if (off == -ENXIO)
- return -ENODATA; /* Beyond EOF */
- if (off < 0 && off >= (loff_t)-MAX_ERRNO)
- return -ENOBUFS; /* Error. */
- if (round_up(off, granularity) >= start + len)
- return -ENODATA; /* No data in range */
- off2 = cachefiles_inject_read_error();
- if (off2 == 0)
- off2 = vfs_llseek(file, off, SEEK_HOLE);
- if (off2 == -ENXIO)
- return -ENODATA; /* Beyond EOF */
- if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
- return -ENOBUFS; /* Error. */
- /* Round away partial blocks */
- off = round_up(off, granularity);
- off2 = round_down(off2, granularity);
- if (off2 <= off)
- return -ENODATA;
- *_data_start = off;
- if (off2 > start + len)
- *_data_len = len;
- else
- *_data_len = off2 - off;
- return 0;
- }
- /*
- * Handle completion of a write to the cache.
- */
- static void cachefiles_write_complete(struct kiocb *iocb, long ret)
- {
- struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
- struct cachefiles_object *object = ki->object;
- struct inode *inode = file_inode(ki->iocb.ki_filp);
- _enter("%ld", ret);
- /* Tell lockdep we inherited freeze protection from submission thread */
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
- if (ret < 0)
- trace_cachefiles_io_error(object, inode, ret,
- cachefiles_trace_write_error);
- atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
- set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
- if (ki->term_func)
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
- cachefiles_put_kiocb(ki);
- }
- /*
- * Initiate a write to the cache.
- */
- int __cachefiles_write(struct cachefiles_object *object,
- struct file *file,
- loff_t start_pos,
- struct iov_iter *iter,
- netfs_io_terminated_t term_func,
- void *term_func_priv)
- {
- struct cachefiles_cache *cache;
- struct cachefiles_kiocb *ki;
- struct inode *inode;
- unsigned int old_nofs;
- ssize_t ret;
- size_t len = iov_iter_count(iter);
- fscache_count_write();
- cache = object->volume->cache;
- _enter("%pD,%li,%llx,%zx/%llx",
- file, file_inode(file)->i_ino, start_pos, len,
- i_size_read(file_inode(file)));
- ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
- if (!ki) {
- if (term_func)
- term_func(term_func_priv, -ENOMEM, false);
- return -ENOMEM;
- }
- refcount_set(&ki->ki_refcnt, 2);
- ki->iocb.ki_filp = file;
- ki->iocb.ki_pos = start_pos;
- ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
- ki->iocb.ki_ioprio = get_current_ioprio();
- ki->object = object;
- ki->start = start_pos;
- ki->len = len;
- ki->term_func = term_func;
- ki->term_func_priv = term_func_priv;
- ki->was_async = true;
- ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
- if (ki->term_func)
- ki->iocb.ki_complete = cachefiles_write_complete;
- atomic_long_add(ki->b_writing, &cache->b_writing);
- /* Open-code file_start_write here to grab freeze protection, which
- * will be released by another thread in aio_complete_rw(). Fool
- * lockdep by telling it the lock got released so that it doesn't
- * complain about the held lock when we return to userspace.
- */
- inode = file_inode(file);
- __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
- __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
- get_file(ki->iocb.ki_filp);
- cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
- trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
- old_nofs = memalloc_nofs_save();
- ret = cachefiles_inject_write_error();
- if (ret == 0)
- ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
- memalloc_nofs_restore(old_nofs);
- switch (ret) {
- case -EIOCBQUEUED:
- goto in_progress;
- case -ERESTARTSYS:
- case -ERESTARTNOINTR:
- case -ERESTARTNOHAND:
- case -ERESTART_RESTARTBLOCK:
- /* There's no easy way to restart the syscall since other AIO's
- * may be already running. Just fail this IO with EINTR.
- */
- ret = -EINTR;
- fallthrough;
- default:
- ki->was_async = false;
- cachefiles_write_complete(&ki->iocb, ret);
- if (ret > 0)
- ret = 0;
- break;
- }
- in_progress:
- cachefiles_put_kiocb(ki);
- _leave(" = %zd", ret);
- return ret;
- }
- static int cachefiles_write(struct netfs_cache_resources *cres,
- loff_t start_pos,
- struct iov_iter *iter,
- netfs_io_terminated_t term_func,
- void *term_func_priv)
- {
- if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
- if (term_func)
- term_func(term_func_priv, -ENOBUFS, false);
- return -ENOBUFS;
- }
- return __cachefiles_write(cachefiles_cres_object(cres),
- cachefiles_cres_file(cres),
- start_pos, iter,
- term_func, term_func_priv);
- }
- /*
- * Prepare a read operation, shortening it to a cached/uncached
- * boundary as appropriate.
- */
- static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
- loff_t i_size)
- {
- enum cachefiles_prepare_read_trace why;
- struct netfs_io_request *rreq = subreq->rreq;
- struct netfs_cache_resources *cres = &rreq->cache_resources;
- struct cachefiles_object *object;
- struct cachefiles_cache *cache;
- struct fscache_cookie *cookie = fscache_cres_cookie(cres);
- const struct cred *saved_cred;
- struct file *file = cachefiles_cres_file(cres);
- enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
- loff_t off, to;
- ino_t ino = file ? file_inode(file)->i_ino : 0;
- int rc;
- _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
- if (subreq->start >= i_size) {
- ret = NETFS_FILL_WITH_ZEROES;
- why = cachefiles_trace_read_after_eof;
- goto out_no_object;
- }
- if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
- __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
- why = cachefiles_trace_read_no_data;
- if (!test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags))
- goto out_no_object;
- }
- /* The object and the file may be being created in the background. */
- if (!file) {
- why = cachefiles_trace_read_no_file;
- if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
- goto out_no_object;
- file = cachefiles_cres_file(cres);
- if (!file)
- goto out_no_object;
- ino = file_inode(file)->i_ino;
- }
- object = cachefiles_cres_object(cres);
- cache = object->volume->cache;
- cachefiles_begin_secure(cache, &saved_cred);
- retry:
- off = cachefiles_inject_read_error();
- if (off == 0)
- off = vfs_llseek(file, subreq->start, SEEK_DATA);
- if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
- if (off == (loff_t)-ENXIO) {
- why = cachefiles_trace_read_seek_nxio;
- goto download_and_store;
- }
- trace_cachefiles_io_error(object, file_inode(file), off,
- cachefiles_trace_seek_error);
- why = cachefiles_trace_read_seek_error;
- goto out;
- }
- if (off >= subreq->start + subreq->len) {
- why = cachefiles_trace_read_found_hole;
- goto download_and_store;
- }
- if (off > subreq->start) {
- off = round_up(off, cache->bsize);
- subreq->len = off - subreq->start;
- why = cachefiles_trace_read_found_part;
- goto download_and_store;
- }
- to = cachefiles_inject_read_error();
- if (to == 0)
- to = vfs_llseek(file, subreq->start, SEEK_HOLE);
- if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
- trace_cachefiles_io_error(object, file_inode(file), to,
- cachefiles_trace_seek_error);
- why = cachefiles_trace_read_seek_error;
- goto out;
- }
- if (to < subreq->start + subreq->len) {
- if (subreq->start + subreq->len >= i_size)
- to = round_up(to, cache->bsize);
- else
- to = round_down(to, cache->bsize);
- subreq->len = to - subreq->start;
- }
- why = cachefiles_trace_read_have_data;
- ret = NETFS_READ_FROM_CACHE;
- goto out;
- download_and_store:
- __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
- if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) {
- rc = cachefiles_ondemand_read(object, subreq->start,
- subreq->len);
- if (!rc) {
- __clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags);
- goto retry;
- }
- ret = NETFS_INVALID_READ;
- }
- out:
- cachefiles_end_secure(cache, saved_cred);
- out_no_object:
- trace_cachefiles_prep_read(subreq, ret, why, ino);
- return ret;
- }
- /*
- * Prepare for a write to occur.
- */
- int __cachefiles_prepare_write(struct cachefiles_object *object,
- struct file *file,
- loff_t *_start, size_t *_len,
- bool no_space_allocated_yet)
- {
- struct cachefiles_cache *cache = object->volume->cache;
- loff_t start = *_start, pos;
- size_t len = *_len, down;
- int ret;
- /* Round to DIO size */
- down = start - round_down(start, PAGE_SIZE);
- *_start = start - down;
- *_len = round_up(down + len, PAGE_SIZE);
- /* We need to work out whether there's sufficient disk space to perform
- * the write - but we can skip that check if we have space already
- * allocated.
- */
- if (no_space_allocated_yet)
- goto check_space;
- pos = cachefiles_inject_read_error();
- if (pos == 0)
- pos = vfs_llseek(file, *_start, SEEK_DATA);
- if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
- if (pos == -ENXIO)
- goto check_space; /* Unallocated tail */
- trace_cachefiles_io_error(object, file_inode(file), pos,
- cachefiles_trace_seek_error);
- return pos;
- }
- if ((u64)pos >= (u64)*_start + *_len)
- goto check_space; /* Unallocated region */
- /* We have a block that's at least partially filled - if we're low on
- * space, we need to see if it's fully allocated. If it's not, we may
- * want to cull it.
- */
- if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
- cachefiles_has_space_check) == 0)
- return 0; /* Enough space to simply overwrite the whole block */
- pos = cachefiles_inject_read_error();
- if (pos == 0)
- pos = vfs_llseek(file, *_start, SEEK_HOLE);
- if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
- trace_cachefiles_io_error(object, file_inode(file), pos,
- cachefiles_trace_seek_error);
- return pos;
- }
- if ((u64)pos >= (u64)*_start + *_len)
- return 0; /* Fully allocated */
- /* Partially allocated, but insufficient space: cull. */
- fscache_count_no_write_space();
- ret = cachefiles_inject_remove_error();
- if (ret == 0)
- ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- *_start, *_len);
- if (ret < 0) {
- trace_cachefiles_io_error(object, file_inode(file), ret,
- cachefiles_trace_fallocate_error);
- cachefiles_io_error_obj(object,
- "CacheFiles: fallocate failed (%d)\n", ret);
- ret = -EIO;
- }
- return ret;
- check_space:
- return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
- cachefiles_has_space_for_write);
- }
- static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
- loff_t *_start, size_t *_len, loff_t i_size,
- bool no_space_allocated_yet)
- {
- struct cachefiles_object *object = cachefiles_cres_object(cres);
- struct cachefiles_cache *cache = object->volume->cache;
- const struct cred *saved_cred;
- int ret;
- if (!cachefiles_cres_file(cres)) {
- if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
- return -ENOBUFS;
- if (!cachefiles_cres_file(cres))
- return -ENOBUFS;
- }
- cachefiles_begin_secure(cache, &saved_cred);
- ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
- _start, _len,
- no_space_allocated_yet);
- cachefiles_end_secure(cache, saved_cred);
- return ret;
- }
- /*
- * Clean up an operation.
- */
- static void cachefiles_end_operation(struct netfs_cache_resources *cres)
- {
- struct file *file = cachefiles_cres_file(cres);
- if (file)
- fput(file);
- fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
- }
- static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
- .end_operation = cachefiles_end_operation,
- .read = cachefiles_read,
- .write = cachefiles_write,
- .prepare_read = cachefiles_prepare_read,
- .prepare_write = cachefiles_prepare_write,
- .query_occupancy = cachefiles_query_occupancy,
- };
- /*
- * Open the cache file when beginning a cache operation.
- */
- bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
- enum fscache_want_state want_state)
- {
- struct cachefiles_object *object = cachefiles_cres_object(cres);
- if (!cachefiles_cres_file(cres)) {
- cres->ops = &cachefiles_netfs_cache_ops;
- if (object->file) {
- spin_lock(&object->lock);
- if (!cres->cache_priv2 && object->file)
- cres->cache_priv2 = get_file(object->file);
- spin_unlock(&object->lock);
- }
- }
- if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
- pr_err("failed to get cres->file\n");
- return false;
- }
- return true;
- }
|