ceph: track read contexts in ceph_file_info

Previously ceph_read_iter() uses current->journal to pass context info
to ceph_readpages(), so that ceph_readpages() can distinguish read(2)
from readahead(2)/fadvise(2)/madvise(2). The problem is that page fault
can happen when copying data to userspace memory. Page fault may call
other filesystem's page_mkwrite() if the userspace memory is mapped to a
file. The later filesystem may also want to use current->journal.

The fix is define a on-stack data structure in ceph_read_iter(), add it
to context list in ceph_file_info. ceph_readpages() searches the list,
find if there is a context belongs to current thread.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Yan, Zheng
2017-12-15 11:15:36 +08:00
committed by Ilya Dryomov
parent 5495c2d04f
commit 5d98830828
3 changed files with 66 additions and 9 deletions

View File

@@ -299,7 +299,8 @@ unlock:
* start an async read(ahead) operation. return nr_pages we submitted
* a read for on success, or negative error code.
*/
static int start_read(struct inode *inode, struct list_head *page_list, int max)
static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
struct list_head *page_list, int max)
{
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
@@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
int got = 0;
int ret = 0;
if (!current->journal_info) {
if (!rw_ctx) {
/* caller of readpages does not hold buffer and read caps
* (fadvise, madvise and readahead cases) */
int want = CEPH_CAP_FILE_CACHE;
@@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
{
struct inode *inode = file_inode(file);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_file_info *ci = file->private_data;
struct ceph_rw_context *rw_ctx;
int rc = 0;
int max = 0;
@@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
if (rc == 0)
goto out;
rw_ctx = ceph_find_rw_context(ci);
max = fsc->mount_options->rsize >> PAGE_SHIFT;
dout("readpages %p file %p nr_pages %d max %d\n",
inode, file, nr_pages, max);
dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
inode, file, rw_ctx, nr_pages, max);
while (!list_empty(page_list)) {
rc = start_read(inode, page_list, max);
rc = start_read(inode, rw_ctx, page_list, max);
if (rc < 0)
goto out;
}
@@ -1450,9 +1454,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE) {
current->journal_info = vma->vm_file;
CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
current->journal_info = NULL;
ceph_del_rw_context(fi, &rw_ctx);
} else
ret = -EAGAIN;