Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: prevent NMI timeouts in cmn_err
  xfs: Add log level to assertion printk
  xfs: fix an assignment within an ASSERT()
  xfs: fix error handling for synchronous writes
  xfs: add FITRIM support
  xfs: ensure log covering transactions are synchronous
  xfs: serialise unaligned direct IOs
  xfs: factor common write setup code
  xfs: split buffered IO write path from xfs_file_aio_write
  xfs: split direct IO write path from xfs_file_aio_write
  xfs: introduce xfs_rw_lock() helpers for locking the inode
  xfs: factor post-write newsize updates
  xfs: factor common post-write isize handling code
  xfs: ensure sync write errors are returned
此提交包含在:
Linus Torvalds
2011-01-14 15:24:17 -08:00
當前提交 7cb3920a65
共有 23 個檔案被更改,包括 740 行新增493 行删除

查看文件

@@ -896,7 +896,6 @@ xfs_buf_rele(
trace_xfs_buf_rele(bp, _RET_IP_);
if (!pag) {
ASSERT(!bp->b_relse);
ASSERT(list_empty(&bp->b_lru));
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
@@ -908,11 +907,7 @@ xfs_buf_rele(
ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
spin_unlock(&pag->pag_buf_lock);
bp->b_relse(bp);
} else if (!(bp->b_flags & XBF_STALE) &&
if (!(bp->b_flags & XBF_STALE) &&
atomic_read(&bp->b_lru_ref)) {
xfs_buf_lru_add(bp);
spin_unlock(&pag->pag_buf_lock);

查看文件

@@ -152,8 +152,6 @@ typedef struct xfs_buftarg {
struct xfs_buf;
typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
#define XB_PAGES 2
@@ -183,7 +181,6 @@ typedef struct xfs_buf {
void *b_addr; /* virtual address of buffer */
struct work_struct b_iodone_work;
xfs_buf_iodone_t b_iodone; /* I/O completion function */
xfs_buf_relse_t b_relse; /* releasing function */
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
@@ -323,7 +320,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
#define XFS_BUF_SET_START(bp) do { } while (0)
#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
@@ -360,8 +356,7 @@ xfs_buf_set_ref(
static inline void xfs_buf_relse(xfs_buf_t *bp)
{
if (!bp->b_relse)
xfs_buf_unlock(bp);
xfs_buf_unlock(bp);
xfs_buf_rele(bp);
}

查看文件

@@ -0,0 +1,191 @@
/*
* Copyright (C) 2010 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
#include "xfs_alloc_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_btree.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_discard.h"
#include "xfs_trace.h"
STATIC int
xfs_trim_extents(
struct xfs_mount *mp,
xfs_agnumber_t agno,
xfs_fsblock_t start,
xfs_fsblock_t len,
xfs_fsblock_t minlen,
__uint64_t *blocks_trimmed)
{
struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
struct xfs_perag *pag;
int error;
int i;
pag = xfs_perag_get(mp, agno);
error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
if (error || !agbp)
goto out_put_perag;
cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
/*
* Force out the log. This means any transactions that might have freed
* space before we took the AGF buffer lock are now on disk, and the
* volatile disk cache is flushed.
*/
xfs_log_force(mp, XFS_LOG_SYNC);
/*
* Look up the longest btree in the AGF and start with it.
*/
error = xfs_alloc_lookup_le(cur, 0,
XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
if (error)
goto out_del_cursor;
/*
* Loop until we are done with all extents that are large
* enough to be worth discarding.
*/
while (i) {
xfs_agblock_t fbno;
xfs_extlen_t flen;
error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
if (error)
goto out_del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
/*
* Too small? Give up.
*/
if (flen < minlen) {
trace_xfs_discard_toosmall(mp, agno, fbno, flen);
goto out_del_cursor;
}
/*
* If the extent is entirely outside of the range we are
* supposed to discard skip it. Do not bother to trim
* down partially overlapping ranges for now.
*/
if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
trace_xfs_discard_exclude(mp, agno, fbno, flen);
goto next_extent;
}
/*
* If any blocks in the range are still busy, skip the
* discard and try again the next time.
*/
if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
trace_xfs_discard_busy(mp, agno, fbno, flen);
goto next_extent;
}
trace_xfs_discard_extent(mp, agno, fbno, flen);
error = -blkdev_issue_discard(bdev,
XFS_AGB_TO_DADDR(mp, agno, fbno),
XFS_FSB_TO_BB(mp, flen),
GFP_NOFS, 0);
if (error)
goto out_del_cursor;
*blocks_trimmed += flen;
next_extent:
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto out_del_cursor;
}
out_del_cursor:
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
xfs_buf_relse(agbp);
out_put_perag:
xfs_perag_put(pag);
return error;
}
int
xfs_ioc_trim(
struct xfs_mount *mp,
struct fstrim_range __user *urange)
{
struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
unsigned int granularity = q->limits.discard_granularity;
struct fstrim_range range;
xfs_fsblock_t start, len, minlen;
xfs_agnumber_t start_agno, end_agno, agno;
__uint64_t blocks_trimmed = 0;
int error, last_error = 0;
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
if (copy_from_user(&range, urange, sizeof(range)))
return -XFS_ERROR(EFAULT);
/*
* Truncating down the len isn't actually quite correct, but using
* XFS_B_TO_FSB would mean we trivially get overflows for values
* of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
* used by the fstrim application. In the end it really doesn't
* matter as trimming blocks is an advisory interface.
*/
start = XFS_B_TO_FSBT(mp, range.start);
len = XFS_B_TO_FSBT(mp, range.len);
minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
start_agno = XFS_FSB_TO_AGNO(mp, start);
if (start_agno >= mp->m_sb.sb_agcount)
return -XFS_ERROR(EINVAL);
end_agno = XFS_FSB_TO_AGNO(mp, start + len);
if (end_agno >= mp->m_sb.sb_agcount)
end_agno = mp->m_sb.sb_agcount - 1;
for (agno = start_agno; agno <= end_agno; agno++) {
error = -xfs_trim_extents(mp, agno, start, len, minlen,
&blocks_trimmed);
if (error)
last_error = error;
}
if (last_error)
return last_error;
range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
if (copy_to_user(urange, &range, sizeof(range)))
return -XFS_ERROR(EFAULT);
return 0;
}

查看文件

@@ -0,0 +1,8 @@
#ifndef XFS_DISCARD_H
#define XFS_DISCARD_H 1
struct fstrim_range;
extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
#endif /* XFS_DISCARD_H */

查看文件

@@ -40,6 +40,40 @@
static const struct vm_operations_struct xfs_file_vm_ops;
/*
* Locking primitives for read and write IO paths to ensure we consistently use
* and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
*/
static inline void
xfs_rw_ilock(
struct xfs_inode *ip,
int type)
{
if (type & XFS_IOLOCK_EXCL)
mutex_lock(&VFS_I(ip)->i_mutex);
xfs_ilock(ip, type);
}
static inline void
xfs_rw_iunlock(
struct xfs_inode *ip,
int type)
{
xfs_iunlock(ip, type);
if (type & XFS_IOLOCK_EXCL)
mutex_unlock(&VFS_I(ip)->i_mutex);
}
static inline void
xfs_rw_ilock_demote(
struct xfs_inode *ip,
int type)
{
xfs_ilock_demote(ip, type);
if (type & XFS_IOLOCK_EXCL)
mutex_unlock(&VFS_I(ip)->i_mutex);
}
/*
* xfs_iozero
*
@@ -262,22 +296,21 @@ xfs_file_aio_read(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
if (unlikely(ioflags & IO_ISDIRECT))
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (unlikely(ioflags & IO_ISDIRECT)) {
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
ret = -xfs_flushinval_pages(ip,
(iocb->ki_pos & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
}
mutex_unlock(&inode->i_mutex);
if (ret) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
} else
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
@@ -285,7 +318,7 @@ xfs_file_aio_read(
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
@@ -309,7 +342,7 @@ xfs_file_splice_read(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
@@ -317,10 +350,61 @@ xfs_file_splice_read(
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
STATIC void
xfs_aio_write_isize_update(
struct inode *inode,
loff_t *ppos,
ssize_t bytes_written)
{
struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t isize = i_size_read(inode);
if (bytes_written > 0)
XFS_STATS_ADD(xs_write_bytes, bytes_written);
if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
*ppos > isize))
*ppos = isize;
if (*ppos > ip->i_size) {
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
if (*ppos > ip->i_size)
ip->i_size = *ppos;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
}
}
/*
* If this was a direct or synchronous I/O that failed (such as ENOSPC) then
* part of the I/O may have been written to disk before the error occured. In
* this case the on-disk file size may have been adjusted beyond the in-memory
* file size and now needs to be truncated back.
*/
STATIC void
xfs_aio_write_newsize_update(
struct xfs_inode *ip)
{
if (ip->i_new_size) {
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
ip->i_new_size = 0;
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
}
}
/*
* xfs_file_splice_write() does not use xfs_rw_ilock() because
* generic_file_splice_write() takes the i_mutex itself. This, in theory,
* couuld cause lock inversions between the aio_write path and the splice path
* if someone is doing concurrent splice(2) based writes and write(2) based
* writes to the same inode. The only real way to fix this is to re-implement
* the generic code here with correct locking orders.
*/
STATIC ssize_t
xfs_file_splice_write(
struct pipe_inode_info *pipe,
@@ -331,7 +415,7 @@ xfs_file_splice_write(
{
struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t isize, new_size;
xfs_fsize_t new_size;
int ioflags = 0;
ssize_t ret;
@@ -355,27 +439,9 @@ xfs_file_splice_write(
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_write_bytes, ret);
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
*ppos = isize;
if (*ppos > ip->i_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (*ppos > ip->i_size)
ip->i_size = *ppos;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (ip->i_new_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
ip->i_new_size = 0;
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
xfs_aio_write_isize_update(inode, ppos, ret);
xfs_aio_write_newsize_update(ip);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
@@ -562,6 +628,194 @@ out_lock:
return error;
}
/*
* Common pre-write limit and setup checks.
*
* Returns with iolock held according to @iolock.
*/
STATIC ssize_t
xfs_file_aio_write_checks(
struct file *file,
loff_t *pos,
size_t *count,
int *iolock)
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t new_size;
int error = 0;
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
if (error) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
*iolock = 0;
return error;
}
new_size = *pos + *count;
if (new_size > ip->i_size)
ip->i_new_size = new_size;
if (likely(!(file->f_mode & FMODE_NOCMTIME)))
file_update_time(file);
/*
* If the offset is beyond the size of the file, we need to zero any
* blocks that fall between the existing EOF and the start of this
* write.
*/
if (*pos > ip->i_size)
error = -xfs_zero_eof(ip, *pos, ip->i_size);
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
/*
* If we're writing the file then make sure to clear the setuid and
* setgid bits if the process is not being run by root. This keeps
* people from modifying setuid and setgid binaries.
*/
return file_remove_suid(file);
}
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
* Lock the inode appropriately to prepare for and issue a direct IO write.
* By separating it from the buffered write path we remove all the tricky to
* follow locking changes and looping.
*
* If there are cached pages or we're extending the file, we need IOLOCK_EXCL
* until we're sure the bytes at the new EOF have been zeroed and/or the cached
* pages are flushed out.
*
* In most cases the direct IO writes will be done holding IOLOCK_SHARED
* allowing them to be done in parallel with reads and other direct IO writes.
* However, if the IO is not aligned to filesystem blocks, the direct IO layer
* needs to do sub-block zeroing and that requires serialisation against other
* direct IOs to the same block. In this case we need to serialise the
* submission of the unaligned IOs so that we don't get racing block zeroing in
* the dio layer. To avoid the problem with aio, we also need to wait for
* outstanding IOs to complete so that unwritten extent conversion is completed
* before we try to map the overlapping block. This is currently implemented by
* hitting it with a big hammer (i.e. xfs_ioend_wait()).
*
* Returns with locks held indicated by @iolock and errors indicated by
* negative return values.
*/
STATIC ssize_t
xfs_file_dio_aio_write(
struct kiocb *iocb,
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos,
size_t ocount,
int *iolock)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0;
size_t count = ocount;
int unaligned_io = 0;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
*iolock = 0;
if ((pos & target->bt_smask) || (count & target->bt_smask))
return -XFS_ERROR(EINVAL);
if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
unaligned_io = 1;
if (unaligned_io || mapping->nrpages || pos > ip->i_size)
*iolock = XFS_IOLOCK_EXCL;
else
*iolock = XFS_IOLOCK_SHARED;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
if (ret)
return ret;
if (mapping->nrpages) {
WARN_ON(*iolock != XFS_IOLOCK_EXCL);
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED);
if (ret)
return ret;
}
/*
* If we are doing unaligned IO, wait for all other IO to drain,
* otherwise demote the lock if we had to flush cached pages
*/
if (unaligned_io)
xfs_ioend_wait(ip);
else if (*iolock == XFS_IOLOCK_EXCL) {
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
*iolock = XFS_IOLOCK_SHARED;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_direct_write(iocb, iovp,
&nr_segs, pos, &iocb->ki_pos, count, ocount);
/* No fallback to buffered IO on errors for XFS. */
ASSERT(ret < 0 || ret == count);
return ret;
}
STATIC ssize_t
xfs_file_buffered_aio_write(
struct kiocb *iocb,
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos,
size_t ocount,
int *iolock)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
int enospc = 0;
size_t count = ocount;
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
if (ret)
return ret;
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
write_retry:
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_buffered_write(iocb, iovp, nr_segs,
pos, &iocb->ki_pos, count, ret);
/*
* if we just got an ENOSPC, flush the inode now we aren't holding any
* page locks and retry *once*
*/
if (ret == -ENOSPC && !enospc) {
ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (ret)
return ret;
enospc = 1;
goto write_retry;
}
current->backing_dev_info = NULL;
return ret;
}
STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
@@ -573,234 +827,59 @@ xfs_file_aio_write(
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0, error = 0;
int ioflags = 0;
xfs_fsize_t isize, new_size;
ssize_t ret;
int iolock;
size_t ocount = 0, count;
int need_i_mutex;
size_t ocount = 0;
XFS_STATS_INC(xs_write_calls);
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
if (ret)
return ret;
error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
if (error)
return error;
count = ocount;
if (count == 0)
if (ocount == 0)
return 0;
xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
if (XFS_FORCED_SHUTDOWN(mp))
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
relock:
if (ioflags & IO_ISDIRECT) {
iolock = XFS_IOLOCK_SHARED;
need_i_mutex = 0;
} else {
iolock = XFS_IOLOCK_EXCL;
need_i_mutex = 1;
mutex_lock(&inode->i_mutex);
}
if (unlikely(file->f_flags & O_DIRECT))
ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
ocount, &iolock);
else
ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
ocount, &iolock);
xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
start:
error = -generic_write_checks(file, &pos, &count,
S_ISBLK(inode->i_mode));
if (error) {
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
goto out_unlock_mutex;
}
if (ioflags & IO_ISDIRECT) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((pos & target->bt_smask) || (count & target->bt_smask)) {
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
return XFS_ERROR(-EINVAL);
}
if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
iolock = XFS_IOLOCK_EXCL;
need_i_mutex = 1;
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
goto start;
}
}
new_size = pos + count;
if (new_size > ip->i_size)
ip->i_new_size = new_size;
if (likely(!(ioflags & IO_INVIS)))
file_update_time(file);
/*
* If the offset is beyond the size of the file, we have a couple
* of things to do. First, if there is already space allocated
* we need to either create holes or zero the disk or ...
*
* If there is a page where the previous size lands, we need
* to zero it out up to the new size.
*/
if (pos > ip->i_size) {
error = xfs_zero_eof(ip, pos, ip->i_size);
if (error) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out_unlock_internal;
}
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
* If we're writing the file then make sure to clear the
* setuid and setgid bits if the process is not being run
* by root. This keeps people from modifying setuid and
* setgid binaries.
*/
error = -file_remove_suid(file);
if (unlikely(error))
goto out_unlock_internal;
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
if ((ioflags & IO_ISDIRECT)) {
if (mapping->nrpages) {
WARN_ON(need_i_mutex == 0);
error = xfs_flushinval_pages(ip,
(pos & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
if (error)
goto out_unlock_internal;
}
if (need_i_mutex) {
/* demote the lock now the cached pages are gone */
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
mutex_unlock(&inode->i_mutex);
iolock = XFS_IOLOCK_SHARED;
need_i_mutex = 0;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
ret = generic_file_direct_write(iocb, iovp,
&nr_segs, pos, &iocb->ki_pos, count, ocount);
/*
* direct-io write to a hole: fall through to buffered I/O
* for completing the rest of the request.
*/
if (ret >= 0 && ret != count) {
XFS_STATS_ADD(xs_write_bytes, ret);
pos += ret;
count -= ret;
ioflags &= ~IO_ISDIRECT;
xfs_iunlock(ip, iolock);
goto relock;
}
} else {
int enospc = 0;
ssize_t ret2 = 0;
write_retry:
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
pos, &iocb->ki_pos, count, ret);
/*
* if we just got an ENOSPC, flush the inode now we
* aren't holding any page locks and retry *once*
*/
if (ret2 == -ENOSPC && !enospc) {
error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (error)
goto out_unlock_internal;
enospc = 1;
goto write_retry;
}
ret = ret2;
}
current->backing_dev_info = NULL;
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
iocb->ki_pos = isize;
if (iocb->ki_pos > ip->i_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (iocb->ki_pos > ip->i_size)
ip->i_size = iocb->ki_pos;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
error = -ret;
if (ret <= 0)
goto out_unlock_internal;
XFS_STATS_ADD(xs_write_bytes, ret);
goto out_unlock;
/* Handle various SYNC-type writes */
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
int error2;
int error, error2;
xfs_iunlock(ip, iolock);
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
error2 = filemap_write_and_wait_range(mapping, pos, end);
if (!error)
error = error2;
if (need_i_mutex)
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, iolock);
xfs_rw_iunlock(ip, iolock);
error = filemap_write_and_wait_range(mapping, pos, end);
xfs_rw_ilock(ip, iolock);
error2 = -xfs_file_fsync(file,
(file->f_flags & __O_SYNC) ? 0 : 1);
if (!error)
error = error2;
if (error)
ret = error;
else if (error2)
ret = error2;
}
out_unlock_internal:
if (ip->i_new_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
ip->i_new_size = 0;
/*
* If this was a direct or synchronous I/O that failed (such
* as ENOSPC) then part of the I/O may have been written to
* disk before the error occured. In this case the on-disk
* file size may have been adjusted beyond the in-memory file
* size and now needs to be truncated back.
*/
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
xfs_iunlock(ip, iolock);
out_unlock_mutex:
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
return -error;
out_unlock:
xfs_aio_write_newsize_update(ip);
xfs_rw_iunlock(ip, iolock);
return ret;
}
STATIC int

查看文件

@@ -39,6 +39,7 @@
#include "xfs_dfrag.h"
#include "xfs_fsops.h"
#include "xfs_vnodeops.h"
#include "xfs_discard.h"
#include "xfs_quota.h"
#include "xfs_inode_item.h"
#include "xfs_export.h"
@@ -1294,6 +1295,8 @@ xfs_file_ioctl(
trace_xfs_file_ioctl(ip);
switch (cmd) {
case FITRIM:
return xfs_ioc_trim(mp, arg);
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_RESVSP:

查看文件

@@ -1414,7 +1414,7 @@ xfs_fs_freeze(
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
return -xfs_fs_log_dummy(mp, SYNC_WAIT);
return -xfs_fs_log_dummy(mp);
}
STATIC int

查看文件

@@ -362,7 +362,7 @@ xfs_quiesce_data(
/* mark the log as covered if needed */
if (xfs_log_need_covered(mp))
error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
error2 = xfs_fs_log_dummy(mp);
/* flush data-only devices */
if (mp->m_rtdev_targp)
@@ -503,13 +503,14 @@ xfs_sync_worker(
int error;
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_log_force(mp, 0);
xfs_reclaim_inodes(mp, 0);
/* dgc: errors ignored here */
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
if (mp->m_super->s_frozen == SB_UNFROZEN &&
xfs_log_need_covered(mp))
error = xfs_fs_log_dummy(mp, 0);
error = xfs_fs_log_dummy(mp);
else
xfs_log_force(mp, 0);
xfs_reclaim_inodes(mp, 0);
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
}
mp->m_sync_seq++;
wake_up(&mp->m_wait_single_sync_task);

查看文件

@@ -18,6 +18,7 @@
#include "xfs.h"
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include "xfs_error.h"
static struct ctl_table_header *xfs_table_header;
@@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler(
return ret;
}
STATIC int
xfs_panic_mask_proc_handler(
ctl_table *ctl,
int write,
void __user *buffer,
size_t *lenp,
loff_t *ppos)
{
int ret, *valp = ctl->data;
ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
if (!ret && write) {
xfs_panic_mask = *valp;
#ifdef DEBUG
xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
#endif
}
return ret;
}
#endif /* CONFIG_PROC_FS */
static ctl_table xfs_table[] = {
@@ -77,7 +98,7 @@ static ctl_table xfs_table[] = {
.data = &xfs_params.panic_mask.val,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.proc_handler = xfs_panic_mask_proc_handler,
.extra1 = &xfs_params.panic_mask.min,
.extra2 = &xfs_params.panic_mask.max
},

查看文件

@@ -1759,6 +1759,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
DECLARE_EVENT_CLASS(xfs_discard_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len),
TP_ARGS(mp, agno, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
),
TP_printk("dev %d:%d agno %u agbno %u len %u\n",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len)
)
#define DEFINE_DISCARD_EVENT(name) \
DEFINE_EVENT(xfs_discard_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
xfs_agblock_t agbno, xfs_extlen_t len), \
TP_ARGS(mp, agno, agbno, len))
DEFINE_DISCARD_EVENT(xfs_discard_extent);
DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
DEFINE_DISCARD_EVENT(xfs_discard_exclude);
DEFINE_DISCARD_EVENT(xfs_discard_busy);
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH