Merge branch 'xfs-libxfs-restructure' into for-next

This commit is contained in:
Dave Chinner
2014-07-15 07:37:18 +10:00
102 changed files with 1180 additions and 1212 deletions

281
fs/xfs/libxfs/xfs_ag.h Normal file
View File

@@ -0,0 +1,281 @@
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_AG_H__
#define __XFS_AG_H__
/*
* Allocation group header
* This is divided into three structures, placed in sequential 512-byte
* buffers after a copy of the superblock (also in a 512-byte buffer).
*/
struct xfs_buf;
struct xfs_mount;
struct xfs_trans;
#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */
#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */
#define XFS_AGFL_MAGIC 0x5841464c /* 'XAFL' */
#define XFS_AGF_VERSION 1
#define XFS_AGI_VERSION 1
#define XFS_AGF_GOOD_VERSION(v) ((v) == XFS_AGF_VERSION)
#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
/*
* Btree number 0 is bno, 1 is cnt. This value gives the size of the
* arrays below.
*/
#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1)
/*
* The second word of agf_levels in the first a.g. overlaps the EFS
* superblock's magic number. Since the magic numbers valid for EFS
* are > 64k, our value cannot be confused for an EFS superblock's.
*/
typedef struct xfs_agf {
/*
* Common allocation group header information
*/
__be32 agf_magicnum; /* magic number == XFS_AGF_MAGIC */
__be32 agf_versionnum; /* header version == XFS_AGF_VERSION */
__be32 agf_seqno; /* sequence # starting from 0 */
__be32 agf_length; /* size in blocks of a.g. */
/*
* Freespace information
*/
__be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */
__be32 agf_spare0; /* spare field */
__be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
__be32 agf_spare1; /* spare field */
__be32 agf_flfirst; /* first freelist block's index */
__be32 agf_fllast; /* last freelist block's index */
__be32 agf_flcount; /* count of blocks in freelist */
__be32 agf_freeblks; /* total free blocks */
__be32 agf_longest; /* longest free space */
__be32 agf_btreeblks; /* # of blocks held in AGF btrees */
uuid_t agf_uuid; /* uuid of filesystem */
/*
* reserve some contiguous space for future logged fields before we add
* the unlogged fields. This makes the range logging via flags and
* structure offsets much simpler.
*/
__be64 agf_spare64[16];
/* unlogged fields, written during buffer writeback. */
__be64 agf_lsn; /* last write sequence */
__be32 agf_crc; /* crc of agf sector */
__be32 agf_spare2;
/* structure must be padded to 64 bit alignment */
} xfs_agf_t;
#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc)
#define XFS_AGF_MAGICNUM 0x00000001
#define XFS_AGF_VERSIONNUM 0x00000002
#define XFS_AGF_SEQNO 0x00000004
#define XFS_AGF_LENGTH 0x00000008
#define XFS_AGF_ROOTS 0x00000010
#define XFS_AGF_LEVELS 0x00000020
#define XFS_AGF_FLFIRST 0x00000040
#define XFS_AGF_FLLAST 0x00000080
#define XFS_AGF_FLCOUNT 0x00000100
#define XFS_AGF_FREEBLKS 0x00000200
#define XFS_AGF_LONGEST 0x00000400
#define XFS_AGF_BTREEBLKS 0x00000800
#define XFS_AGF_UUID 0x00001000
#define XFS_AGF_NUM_BITS 13
#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
#define XFS_AGF_FLAGS \
{ XFS_AGF_MAGICNUM, "MAGICNUM" }, \
{ XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \
{ XFS_AGF_SEQNO, "SEQNO" }, \
{ XFS_AGF_LENGTH, "LENGTH" }, \
{ XFS_AGF_ROOTS, "ROOTS" }, \
{ XFS_AGF_LEVELS, "LEVELS" }, \
{ XFS_AGF_FLFIRST, "FLFIRST" }, \
{ XFS_AGF_FLLAST, "FLLAST" }, \
{ XFS_AGF_FLCOUNT, "FLCOUNT" }, \
{ XFS_AGF_FREEBLKS, "FREEBLKS" }, \
{ XFS_AGF_LONGEST, "LONGEST" }, \
{ XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \
{ XFS_AGF_UUID, "UUID" }
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr))
extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
/*
* Size of the unlinked inode hash table in the agi.
*/
#define XFS_AGI_UNLINKED_BUCKETS 64
typedef struct xfs_agi {
/*
* Common allocation group header information
*/
__be32 agi_magicnum; /* magic number == XFS_AGI_MAGIC */
__be32 agi_versionnum; /* header version == XFS_AGI_VERSION */
__be32 agi_seqno; /* sequence # starting from 0 */
__be32 agi_length; /* size in blocks of a.g. */
/*
* Inode information
* Inodes are mapped by interpreting the inode number, so no
* mapping data is needed here.
*/
__be32 agi_count; /* count of allocated inodes */
__be32 agi_root; /* root of inode btree */
__be32 agi_level; /* levels in inode btree */
__be32 agi_freecount; /* number of free inodes */
__be32 agi_newino; /* new inode just allocated */
__be32 agi_dirino; /* last directory inode chunk */
/*
* Hash table of inodes which have been unlinked but are
* still being referenced.
*/
__be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
/*
* This marks the end of logging region 1 and start of logging region 2.
*/
uuid_t agi_uuid; /* uuid of filesystem */
__be32 agi_crc; /* crc of agi sector */
__be32 agi_pad32;
__be64 agi_lsn; /* last write sequence */
__be32 agi_free_root; /* root of the free inode btree */
__be32 agi_free_level;/* levels in free inode btree */
/* structure must be padded to 64 bit alignment */
} xfs_agi_t;
#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
#define XFS_AGI_MAGICNUM (1 << 0)
#define XFS_AGI_VERSIONNUM (1 << 1)
#define XFS_AGI_SEQNO (1 << 2)
#define XFS_AGI_LENGTH (1 << 3)
#define XFS_AGI_COUNT (1 << 4)
#define XFS_AGI_ROOT (1 << 5)
#define XFS_AGI_LEVEL (1 << 6)
#define XFS_AGI_FREECOUNT (1 << 7)
#define XFS_AGI_NEWINO (1 << 8)
#define XFS_AGI_DIRINO (1 << 9)
#define XFS_AGI_UNLINKED (1 << 10)
#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */
#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
#define XFS_AGI_FREE_ROOT (1 << 11)
#define XFS_AGI_FREE_LEVEL (1 << 12)
#define XFS_AGI_NUM_BITS_R2 13
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr))
extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);
/*
* The third a.g. block contains the a.g. freelist, an array
* of block pointers to blocks owned by the allocation btree code.
*/
#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr))
#define XFS_BUF_TO_AGFL_BNO(mp, bp) \
(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
&(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
(__be32 *)(bp)->b_addr)
/*
* Size of the AGFL. For CRC-enabled filesystes we steal a couple of
* slots in the beginning of the block for a proper header with the
* location information and CRC.
*/
#define XFS_AGFL_SIZE(mp) \
(((mp)->m_sb.sb_sectsize - \
(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
sizeof(struct xfs_agfl) : 0)) / \
sizeof(xfs_agblock_t))
typedef struct xfs_agfl {
__be32 agfl_magicnum;
__be32 agfl_seqno;
uuid_t agfl_uuid;
__be64 agfl_lsn;
__be32 agfl_crc;
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
} xfs_agfl_t;
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
/*
* tags for inode radix tree
*/
#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
(MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
#define XFS_MIN_FREELIST(a,mp) \
(XFS_MIN_FREELIST_RAW( \
be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \
be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
#define XFS_MIN_FREELIST_PAG(pag,mp) \
(XFS_MIN_FREELIST_RAW( \
(unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
(unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
#define XFS_AGB_TO_FSB(mp,agno,agbno) \
(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
#define XFS_FSB_TO_AGNO(mp,fsbno) \
((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog))
#define XFS_FSB_TO_AGBNO(mp,fsbno) \
((xfs_agblock_t)((fsbno) & xfs_mask32lo((mp)->m_sb.sb_agblklog)))
#define XFS_AGB_TO_DADDR(mp,agno,agbno) \
((xfs_daddr_t)XFS_FSB_TO_BB(mp, \
(xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno)))
#define XFS_AG_DADDR(mp,agno,d) (XFS_AGB_TO_DADDR(mp, agno, 0) + (d))
/*
* For checking for bad ranges of xfs_daddr_t's, covering multiple
* allocation groups or a single xfs_daddr_t that's a superblock copy.
*/
#define XFS_AG_CHECK_DADDR(mp,d,len) \
((len) == 1 ? \
ASSERT((d) == XFS_SB_DADDR || \
xfs_daddr_to_agbno(mp, d) != XFS_SB_DADDR) : \
ASSERT(xfs_daddr_to_agno(mp, d) == \
xfs_daddr_to_agno(mp, (d) + (len) - 1)))
#endif /* __XFS_AG_H__ */

2630
fs/xfs/libxfs/xfs_alloc.c Normal file

File diff suppressed because it is too large Load Diff

234
fs/xfs/libxfs/xfs_alloc.h Normal file
View File

@@ -0,0 +1,234 @@
/*
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_ALLOC_H__
#define __XFS_ALLOC_H__
struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
struct xfs_perag;
struct xfs_trans;
extern struct workqueue_struct *xfs_alloc_wq;
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/
#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */
#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */
#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */
#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */
#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */
#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */
#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */
/* this should become an enum again when the tracing code is fixed */
typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_TYPES \
{ XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
{ XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \
{ XFS_ALLOCTYPE_START_AG, "START_AG" }, \
{ XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \
{ XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \
{ XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \
{ XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" }
/*
* Flags for xfs_alloc_fix_freelist.
*/
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
/*
* In order to avoid ENOSPC-related deadlock caused by
* out-of-order locking of AGF buffer (PV 947395), we place
* constraints on the relationship among actual allocations for
* data blocks, freelist blocks, and potential file data bmap
* btree blocks. However, these restrictions may result in no
* actual space allocated for a delayed extent, for example, a data
* block in a certain AG is allocated but there is no additional
* block for the additional bmap btree block due to a split of the
* bmap btree of the file. The result of this may lead to an
* infinite loop in xfssyncd when the file gets flushed to disk and
* all delayed extents need to be actually allocated. To get around
* this, we explicitly set aside a few blocks which will not be
* reserved in delayed allocation. Considering the minimum number of
* needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
* btree requires 1 fsb, so we set the number of set-aside blocks
* to 4 + 4*agcount.
*/
#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
/*
* When deciding how much space to allocate out of an AG, we limit the
* allocation maximum size to the size the AG. However, we cannot use all the
* blocks in the AG - some are permanently used by metadata. These
* blocks are generally:
* - the AG superblock, AGF, AGI and AGFL
* - the AGF (bno and cnt) and AGI btree root blocks
* - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
*
* The AG headers are sector sized, so the amount of space they take up is
* dependent on filesystem geometry. The others are all single blocks.
*/
#define XFS_ALLOC_AG_MAX_USABLE(mp) \
((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
/*
* Argument structure for xfs_alloc routines.
* This is turned into a structure to avoid having 20 arguments passed
* down several levels of the stack.
*/
typedef struct xfs_alloc_arg {
struct xfs_trans *tp; /* transaction pointer */
struct xfs_mount *mp; /* file system mount point */
struct xfs_buf *agbp; /* buffer for a.g. freelist header */
struct xfs_perag *pag; /* per-ag struct for this agno */
xfs_fsblock_t fsbno; /* file system block number */
xfs_agnumber_t agno; /* allocation group number */
xfs_agblock_t agbno; /* allocation group-relative block # */
xfs_extlen_t minlen; /* minimum size of extent */
xfs_extlen_t maxlen; /* maximum size of extent */
xfs_extlen_t mod; /* mod value for extent size */
xfs_extlen_t prod; /* prod value for extent size */
xfs_extlen_t minleft; /* min blocks must be left after us */
xfs_extlen_t total; /* total blocks needed in xaction */
xfs_extlen_t alignment; /* align answer to multiple of this */
xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */
xfs_extlen_t len; /* output: actual size of extent */
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
xfs_alloctype_t otype; /* original allocation type */
char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
xfs_fsblock_t firstblock; /* io first block allocated */
} xfs_alloc_arg_t;
/*
* Defines for userdata
*/
#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
/*
* Find the length of the longest extent in an AG.
*/
xfs_extlen_t
xfs_alloc_longest_free_extent(struct xfs_mount *mp,
struct xfs_perag *pag);
/*
* Compute and fill in value of m_ag_maxlevels.
*/
void
xfs_alloc_compute_maxlevels(
struct xfs_mount *mp); /* file system mount structure */
/*
* Get a block from the freelist.
* Returns with the buffer for the block gotten.
*/
int /* error */
xfs_alloc_get_freelist(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *agbp, /* buffer containing the agf structure */
xfs_agblock_t *bnop, /* block address retrieved from freelist */
int btreeblk); /* destination is a AGF btree */
/*
* Log the given fields from the agf structure.
*/
void
xfs_alloc_log_agf(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *bp, /* buffer for a.g. freelist header */
int fields);/* mask of fields to be logged (XFS_AGF_...) */
/*
* Interface for inode allocation to force the pag data to be initialized.
*/
int /* error */
xfs_alloc_pagf_init(
struct xfs_mount *mp, /* file system mount structure */
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
int flags); /* XFS_ALLOC_FLAGS_... */
/*
* Put the block on the freelist for the allocation group.
*/
int /* error */
xfs_alloc_put_freelist(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *agbp, /* buffer for a.g. freelist header */
struct xfs_buf *agflbp,/* buffer for a.g. free block array */
xfs_agblock_t bno, /* block being freed */
int btreeblk); /* owner was a AGF btree */
/*
* Read in the allocation group header (free/alloc section).
*/
int /* error */
xfs_alloc_read_agf(
struct xfs_mount *mp, /* mount point structure */
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
int flags, /* XFS_ALLOC_FLAG_... */
struct xfs_buf **bpp); /* buffer for the ag freelist header */
/*
* Allocate an extent (variable-size).
*/
int /* error */
xfs_alloc_vextent(
xfs_alloc_arg_t *args); /* allocation argument structure */
/*
* Free an extent.
*/
int /* error */
xfs_free_extent(
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len); /* length of extent */
int /* error */
xfs_alloc_lookup_le(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
xfs_extlen_t len, /* length of extent */
int *stat); /* success/failure */
int /* error */
xfs_alloc_lookup_ge(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
xfs_extlen_t len, /* length of extent */
int *stat); /* success/failure */
int /* error */
xfs_alloc_get_rec(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t *bno, /* output: starting block of extent */
xfs_extlen_t *len, /* output: length of extent */
int *stat); /* output: success/failure */
#endif /* __XFS_ALLOC_H__ */

View File

@@ -0,0 +1,504 @@
/*
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_trans.h"
STATIC struct xfs_btree_cur *
xfs_allocbt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
cur->bc_private.a.agbp, cur->bc_private.a.agno,
cur->bc_btnum);
}
STATIC void
xfs_allocbt_set_root(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr,
int inc)
{
struct xfs_buf *agbp = cur->bc_private.a.agbp;
struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
int btnum = cur->bc_btnum;
struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
ASSERT(ptr->s != 0);
agf->agf_roots[btnum] = ptr->s;
be32_add_cpu(&agf->agf_levels[btnum], inc);
pag->pagf_levels[btnum] += inc;
xfs_perag_put(pag);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
STATIC int
xfs_allocbt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
int *stat)
{
int error;
xfs_agblock_t bno;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
/* Allocate the new block from the freelist. If we can't, give up. */
error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
&bno, 1);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
if (bno == NULLAGBLOCK) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
xfs_trans_agbtree_delta(cur->bc_tp, 1);
new->s = cpu_to_be32(bno);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
STATIC int
xfs_allocbt_free_block(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
{
struct xfs_buf *agbp = cur->bc_private.a.agbp;
struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
xfs_agblock_t bno;
int error;
bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
if (error)
return error;
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
xfs_trans_binval(cur->bc_tp, bp);
return 0;
}
/*
* Update the longest extent in the AGF
*/
STATIC void
xfs_allocbt_update_lastrec(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
union xfs_btree_rec *rec,
int ptr,
int reason)
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
struct xfs_perag *pag;
__be32 len;
int numrecs;
ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
switch (reason) {
case LASTREC_UPDATE:
/*
* If this is the last leaf block and it's the last record,
* then update the size of the longest extent in the AG.
*/
if (ptr != xfs_btree_get_numrecs(block))
return;
len = rec->alloc.ar_blockcount;
break;
case LASTREC_INSREC:
if (be32_to_cpu(rec->alloc.ar_blockcount) <=
be32_to_cpu(agf->agf_longest))
return;
len = rec->alloc.ar_blockcount;
break;
case LASTREC_DELREC:
numrecs = xfs_btree_get_numrecs(block);
if (ptr <= numrecs)
return;
ASSERT(ptr == numrecs + 1);
if (numrecs) {
xfs_alloc_rec_t *rrp;
rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
len = rrp->ar_blockcount;
} else {
len = 0;
}
break;
default:
ASSERT(0);
return;
}
agf->agf_longest = len;
pag = xfs_perag_get(cur->bc_mp, seqno);
pag->pagf_longest = be32_to_cpu(len);
xfs_perag_put(pag);
xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
}
STATIC int
xfs_allocbt_get_minrecs(
struct xfs_btree_cur *cur,
int level)
{
return cur->bc_mp->m_alloc_mnr[level != 0];
}
STATIC int
xfs_allocbt_get_maxrecs(
struct xfs_btree_cur *cur,
int level)
{
return cur->bc_mp->m_alloc_mxr[level != 0];
}
STATIC void
xfs_allocbt_init_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
ASSERT(rec->alloc.ar_startblock != 0);
key->alloc.ar_startblock = rec->alloc.ar_startblock;
key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
}
STATIC void
xfs_allocbt_init_rec_from_key(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
ASSERT(key->alloc.ar_startblock != 0);
rec->alloc.ar_startblock = key->alloc.ar_startblock;
rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
}
STATIC void
xfs_allocbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
ASSERT(cur->bc_rec.a.ar_startblock != 0);
rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
STATIC void
xfs_allocbt_init_ptr_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
ptr->s = agf->agf_roots[cur->bc_btnum];
}
STATIC __int64_t
xfs_allocbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
xfs_alloc_key_t *kp = &key->alloc;
__int64_t diff;
if (cur->bc_btnum == XFS_BTNUM_BNO) {
return (__int64_t)be32_to_cpu(kp->ar_startblock) -
rec->ar_startblock;
}
diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
if (diff)
return diff;
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
static bool
xfs_allocbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
unsigned int level;
/*
* magic number and level verification
*
* During growfs operations, we can't verify the exact level or owner as
* the perag is not fully initialised and hence not attached to the
* buffer. In this case, check against the maximum tree depth.
*
* Similarly, during log recovery we will have a perag structure
* attached, but the agf information will not yet have been initialised
* from the on disk AGF. Again, we can only check against maximum limits
* in this case.
*/
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
return false;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
return false;
} else if (level >= mp->m_ag_maxlevels)
return false;
break;
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
return false;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
return false;
} else if (level >= mp->m_ag_maxlevels)
return false;
break;
default:
return false;
}
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
return false;
/* sibling pointer verification */
if (!block->bb_u.s.bb_leftsib ||
(be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
return false;
if (!block->bb_u.s.bb_rightsib ||
(be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
return false;
return true;
}
static void
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
if (!xfs_btree_sblock_verify_crc(bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_allocbt_verify(bp))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_verifier_error(bp);
}
}
static void
xfs_allocbt_write_verify(
struct xfs_buf *bp)
{
if (!xfs_allocbt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
xfs_btree_sblock_calc_crc(bp);
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};
#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_allocbt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
if (cur->bc_btnum == XFS_BTNUM_BNO) {
return be32_to_cpu(k1->alloc.ar_startblock) <
be32_to_cpu(k2->alloc.ar_startblock);
} else {
return be32_to_cpu(k1->alloc.ar_blockcount) <
be32_to_cpu(k2->alloc.ar_blockcount) ||
(k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
be32_to_cpu(k1->alloc.ar_startblock) <
be32_to_cpu(k2->alloc.ar_startblock));
}
}
STATIC int
xfs_allocbt_recs_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
if (cur->bc_btnum == XFS_BTNUM_BNO) {
return be32_to_cpu(r1->alloc.ar_startblock) +
be32_to_cpu(r1->alloc.ar_blockcount) <=
be32_to_cpu(r2->alloc.ar_startblock);
} else {
return be32_to_cpu(r1->alloc.ar_blockcount) <
be32_to_cpu(r2->alloc.ar_blockcount) ||
(r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
be32_to_cpu(r1->alloc.ar_startblock) <
be32_to_cpu(r2->alloc.ar_startblock));
}
}
#endif /* DEBUG */
static const struct xfs_btree_ops xfs_allocbt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
.dup_cursor = xfs_allocbt_dup_cursor,
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
.update_lastrec = xfs_allocbt_update_lastrec,
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
.init_rec_from_key = xfs_allocbt_init_rec_from_key,
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
.key_diff = xfs_allocbt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_allocbt_keys_inorder,
.recs_inorder = xfs_allocbt_recs_inorder,
#endif
};
/*
* Allocate a new allocation btree cursor.
*/
struct xfs_btree_cur * /* new alloc btree cursor */
xfs_allocbt_init_cursor(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *agbp, /* buffer for agf structure */
xfs_agnumber_t agno, /* allocation group number */
xfs_btnum_t btnum) /* btree identifier */
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
struct xfs_btree_cur *cur;
ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
cur->bc_tp = tp;
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_allocbt_ops;
if (btnum == XFS_BTNUM_CNT) {
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
} else {
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
}
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
if (xfs_sb_version_hascrc(&mp->m_sb))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
return cur;
}
/*
* Calculate number of records in an alloc btree block.
*/
int
xfs_allocbt_maxrecs(
struct xfs_mount *mp,
int blocklen,
int leaf)
{
blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
if (leaf)
return blocklen / sizeof(xfs_alloc_rec_t);
return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
}

View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_ALLOC_BTREE_H__
#define __XFS_ALLOC_BTREE_H__
/*
* Freespace on-disk structures
*/
struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
/*
* Btree block header size depends on a superblock flag.
*/
#define XFS_ALLOC_BLOCK_LEN(mp) \
(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
/*
* Record, key, and pointer address macros for btree blocks.
*
* (note that some of these may appear unused, but they are used in userspace)
*/
#define XFS_ALLOC_REC_ADDR(mp, block, index) \
((xfs_alloc_rec_t *) \
((char *)(block) + \
XFS_ALLOC_BLOCK_LEN(mp) + \
(((index) - 1) * sizeof(xfs_alloc_rec_t))))
#define XFS_ALLOC_KEY_ADDR(mp, block, index) \
((xfs_alloc_key_t *) \
((char *)(block) + \
XFS_ALLOC_BLOCK_LEN(mp) + \
((index) - 1) * sizeof(xfs_alloc_key_t)))
#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \
((xfs_alloc_ptr_t *) \
((char *)(block) + \
XFS_ALLOC_BLOCK_LEN(mp) + \
(maxrecs) * sizeof(xfs_alloc_key_t) + \
((index) - 1) * sizeof(xfs_alloc_ptr_t)))
extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_buf *,
xfs_agnumber_t, xfs_btnum_t);
extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
#endif /* __XFS_ALLOC_BTREE_H__ */

1459
fs/xfs/libxfs/xfs_attr.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
/*
* Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_ATTR_LEAF_H__
#define __XFS_ATTR_LEAF_H__
struct attrlist;
struct attrlist_cursor_kern;
struct xfs_attr_list_context;
struct xfs_da_args;
struct xfs_da_state;
struct xfs_da_state_blk;
struct xfs_inode;
struct xfs_trans;
/*
* Used to keep a list of "remote value" extents when unlinking an inode.
*/
typedef struct xfs_attr_inactive_list {
xfs_dablk_t valueblk; /* block number of value bytes */
int valuelen; /* number of bytes in value */
} xfs_attr_inactive_list_t;
/*========================================================================
* Function prototypes for the kernel.
*========================================================================*/
/*
* Internal routines when attribute fork size < XFS_LITINO(mp).
*/
void xfs_attr_shortform_create(struct xfs_da_args *args);
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
int xfs_attr_shortform_lookup(struct xfs_da_args *args);
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_list(struct xfs_attr_list_context *context);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
/*
* Internal routines when attribute fork size == XFS_LBSIZE(mp).
*/
int xfs_attr3_leaf_to_node(struct xfs_da_args *args);
int xfs_attr3_leaf_to_shortform(struct xfs_buf *bp,
struct xfs_da_args *args, int forkoff);
int xfs_attr3_leaf_clearflag(struct xfs_da_args *args);
int xfs_attr3_leaf_setflag(struct xfs_da_args *args);
int xfs_attr3_leaf_flipflags(struct xfs_da_args *args);
/*
* Routines used for growing the Btree.
*/
int xfs_attr3_leaf_split(struct xfs_da_state *state,
struct xfs_da_state_blk *oldblk,
struct xfs_da_state_blk *newblk);
int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
struct xfs_da_args *args);
int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
struct xfs_attr_list_context *context);
/*
* Routines used for shrinking the Btree.
*/
int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
void xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
struct xfs_da_state_blk *drop_blk,
struct xfs_da_state_blk *save_blk);
int xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
/*
* Utility routines.
*/
xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count);
int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp);
void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
struct xfs_attr_leafblock *from);
void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,
struct xfs_attr3_icleaf_hdr *from);
#endif /* __XFS_ATTR_LEAF_H__ */

View File

@@ -0,0 +1,628 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_remote.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_buf_item.h"
#include "xfs_error.h"
#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
/*
* Each contiguous block has a header, so it is not just a simple attribute
* length to FSB conversion.
*/
int
xfs_attr3_rmt_blocks(
struct xfs_mount *mp,
int attrlen)
{
if (xfs_sb_version_hascrc(&mp->m_sb)) {
int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
return (attrlen + buflen - 1) / buflen;
}
return XFS_B_TO_FSB(mp, attrlen);
}
/*
* Checking of the remote attribute header is split into two parts. The verifier
* does CRC, location and bounds checking, the unpacking function checks the
* attribute parameters and owner.
*/
static bool
xfs_attr3_rmt_hdr_ok(
void *ptr,
xfs_ino_t ino,
uint32_t offset,
uint32_t size,
xfs_daddr_t bno)
{
struct xfs_attr3_rmt_hdr *rmt = ptr;
if (bno != be64_to_cpu(rmt->rm_blkno))
return false;
if (offset != be32_to_cpu(rmt->rm_offset))
return false;
if (size != be32_to_cpu(rmt->rm_bytes))
return false;
if (ino != be64_to_cpu(rmt->rm_owner))
return false;
/* ok */
return true;
}
static bool
xfs_attr3_rmt_verify(
struct xfs_mount *mp,
void *ptr,
int fsbsize,
xfs_daddr_t bno)
{
struct xfs_attr3_rmt_hdr *rmt = ptr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
return false;
if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
return false;
if (be64_to_cpu(rmt->rm_blkno) != bno)
return false;
if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
return false;
if (be32_to_cpu(rmt->rm_offset) +
be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
return false;
if (rmt->rm_owner == 0)
return false;
return true;
}
static void
xfs_attr3_rmt_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
char *ptr;
int len;
xfs_daddr_t bno;
int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
ptr = bp->b_addr;
bno = bp->b_bn;
len = BBTOB(bp->b_length);
ASSERT(len >= blksize);
while (len > 0) {
if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
xfs_buf_ioerror(bp, -EFSBADCRC);
break;
}
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
break;
}
len -= blksize;
ptr += blksize;
bno += BTOBB(blksize);
}
if (bp->b_error)
xfs_verifier_error(bp);
else
ASSERT(len == 0);
}
static void
xfs_attr3_rmt_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_buf_log_item *bip = bp->b_fspriv;
char *ptr;
int len;
xfs_daddr_t bno;
int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
ptr = bp->b_addr;
bno = bp->b_bn;
len = BBTOB(bp->b_length);
ASSERT(len >= blksize);
while (len > 0) {
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
if (bip) {
struct xfs_attr3_rmt_hdr *rmt;
rmt = (struct xfs_attr3_rmt_hdr *)ptr;
rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
}
xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
len -= blksize;
ptr += blksize;
bno += BTOBB(blksize);
}
ASSERT(len == 0);
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
.verify_read = xfs_attr3_rmt_read_verify,
.verify_write = xfs_attr3_rmt_write_verify,
};
STATIC int
xfs_attr3_rmt_hdr_set(
struct xfs_mount *mp,
void *ptr,
xfs_ino_t ino,
uint32_t offset,
uint32_t size,
xfs_daddr_t bno)
{
struct xfs_attr3_rmt_hdr *rmt = ptr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return 0;
rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
rmt->rm_offset = cpu_to_be32(offset);
rmt->rm_bytes = cpu_to_be32(size);
uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
rmt->rm_owner = cpu_to_be64(ino);
rmt->rm_blkno = cpu_to_be64(bno);
return sizeof(struct xfs_attr3_rmt_hdr);
}
/*
* Helper functions to copy attribute data in and out of the one disk extents
*/
STATIC int
xfs_attr_rmtval_copyout(
struct xfs_mount *mp,
struct xfs_buf *bp,
xfs_ino_t ino,
int *offset,
int *valuelen,
__uint8_t **dst)
{
char *src = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
int len = BBTOB(bp->b_length);
int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
int hdr_size = 0;
int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
byte_cnt = min(*valuelen, byte_cnt);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
byte_cnt, bno)) {
xfs_alert(mp,
"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
bno, *offset, byte_cnt, ino);
return -EFSCORRUPTED;
}
hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
}
memcpy(*dst, src + hdr_size, byte_cnt);
/* roll buffer forwards */
len -= blksize;
src += blksize;
bno += BTOBB(blksize);
/* roll attribute data forwards */
*valuelen -= byte_cnt;
*dst += byte_cnt;
*offset += byte_cnt;
}
return 0;
}
STATIC void
xfs_attr_rmtval_copyin(
struct xfs_mount *mp,
struct xfs_buf *bp,
xfs_ino_t ino,
int *offset,
int *valuelen,
__uint8_t **src)
{
char *dst = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
int len = BBTOB(bp->b_length);
int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
int hdr_size;
int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
byte_cnt = min(*valuelen, byte_cnt);
hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
byte_cnt, bno);
memcpy(dst + hdr_size, *src, byte_cnt);
/*
* If this is the last block, zero the remainder of it.
* Check that we are actually the last block, too.
*/
if (byte_cnt + hdr_size < blksize) {
ASSERT(*valuelen - byte_cnt == 0);
ASSERT(len == blksize);
memset(dst + hdr_size + byte_cnt, 0,
blksize - hdr_size - byte_cnt);
}
/* roll buffer forwards */
len -= blksize;
dst += blksize;
bno += BTOBB(blksize);
/* roll attribute data forwards */
*valuelen -= byte_cnt;
*src += byte_cnt;
*offset += byte_cnt;
}
}
/*
* Read the value associated with an attribute from the out-of-line buffer
* that we stored it in.
*/
int
xfs_attr_rmtval_get(
struct xfs_da_args *args)
{
struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
struct xfs_mount *mp = args->dp->i_mount;
struct xfs_buf *bp;
xfs_dablk_t lblkno = args->rmtblkno;
__uint8_t *dst = args->value;
int valuelen;
int nmap;
int error;
int blkcnt = args->rmtblkcnt;
int i;
int offset = 0;
trace_xfs_attr_rmtval_get(args);
ASSERT(!(args->flags & ATTR_KERNOVAL));
ASSERT(args->rmtvaluelen == args->valuelen);
valuelen = args->rmtvaluelen;
while (valuelen > 0) {
nmap = ATTR_RMTVALUE_MAPSIZE;
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
blkcnt, map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return error;
ASSERT(nmap >= 1);
for (i = 0; (i < nmap) && (valuelen > 0); i++) {
xfs_daddr_t dblkno;
int dblkcnt;
ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
(map[i].br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
dblkno, dblkcnt, 0, &bp,
&xfs_attr3_rmt_buf_ops);
if (error)
return error;
error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
&offset, &valuelen,
&dst);
xfs_buf_relse(bp);
if (error)
return error;
/* roll attribute extent map forwards */
lblkno += map[i].br_blockcount;
blkcnt -= map[i].br_blockcount;
}
}
ASSERT(valuelen == 0);
return 0;
}
/*
* Write the value associated with an attribute into the out-of-line buffer
* that we have defined for it.
*/
int
xfs_attr_rmtval_set(
struct xfs_da_args *args)
{
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
struct xfs_bmbt_irec map;
xfs_dablk_t lblkno;
xfs_fileoff_t lfileoff = 0;
__uint8_t *src = args->value;
int blkcnt;
int valuelen;
int nmap;
int error;
int offset = 0;
trace_xfs_attr_rmtval_set(args);
/*
* Find a "hole" in the attribute address space large enough for
* us to drop the new attribute's value into. Because CRC enable
* attributes have headers, we can't just do a straight byte to FSB
* conversion and have to take the header space into account.
*/
blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
XFS_ATTR_FORK);
if (error)
return error;
args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
args->rmtblkcnt = blkcnt;
/*
* Roll through the "value", allocating blocks on disk as required.
*/
while (blkcnt > 0) {
int committed;
/*
* Allocate a single extent, up to the size of the value.
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
blkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, args->total, &map, &nmap,
args->flist);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
lblkno += map.br_blockcount;
blkcnt -= map.br_blockcount;
/*
* Start the next trans in the chain.
*/
error = xfs_trans_roll(&args->trans, dp);
if (error)
return error;
}
/*
* Roll through the "value", copying the attribute value to the
* already-allocated blocks. Blocks are written synchronously
* so that we can know they are all on disk before we turn off
* the INCOMPLETE flag.
*/
lblkno = args->rmtblkno;
blkcnt = args->rmtblkcnt;
valuelen = args->rmtvaluelen;
while (valuelen > 0) {
struct xfs_buf *bp;
xfs_daddr_t dblkno;
int dblkcnt;
ASSERT(blkcnt > 0);
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
blkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return error;
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
if (!bp)
return -ENOMEM;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
&valuelen, &src);
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
xfs_buf_relse(bp);
if (error)
return error;
/* roll attribute extent map forwards */
lblkno += map.br_blockcount;
blkcnt -= map.br_blockcount;
}
ASSERT(valuelen == 0);
return 0;
}
/*
* Remove the value associated with an attribute by deleting the
* out-of-line buffer that it is stored on.
*/
int
xfs_attr_rmtval_remove(
struct xfs_da_args *args)
{
struct xfs_mount *mp = args->dp->i_mount;
xfs_dablk_t lblkno;
int blkcnt;
int error;
int done;
trace_xfs_attr_rmtval_remove(args);
/*
* Roll through the "value", invalidating the attribute value's blocks.
*/
lblkno = args->rmtblkno;
blkcnt = args->rmtblkcnt;
while (blkcnt > 0) {
struct xfs_bmbt_irec map;
struct xfs_buf *bp;
xfs_daddr_t dblkno;
int dblkcnt;
int nmap;
/*
* Try to remember where we decided to put the value.
*/
nmap = 1;
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
if (error)
return error;
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
/*
* If the "remote" value is in the cache, remove it.
*/
bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
if (bp) {
xfs_buf_stale(bp);
xfs_buf_relse(bp);
bp = NULL;
}
lblkno += map.br_blockcount;
blkcnt -= map.br_blockcount;
}
/*
* Keep de-allocating extents until the remote-value region is gone.
*/
lblkno = args->rmtblkno;
blkcnt = args->rmtblkcnt;
done = 0;
while (!done) {
int committed;
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
1, args->firstblock, args->flist,
&done);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, args->dp, 0);
/*
* Close out trans and start the next one in the chain.
*/
error = xfs_trans_roll(&args->trans, args->dp);
if (error)
return error;
}
return 0;
}

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_ATTR_REMOTE_H__
#define __XFS_ATTR_REMOTE_H__
int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_set(struct xfs_da_args *args);
int xfs_attr_rmtval_remove(struct xfs_da_args *args);
#endif /* __XFS_ATTR_REMOTE_H__ */

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_ATTR_SF_H__
#define __XFS_ATTR_SF_H__
/*
* Attribute storage when stored inside the inode.
*
* Small attribute lists are packed as tightly as possible so as
* to fit into the literal area of the inode.
*/
/*
* Entries are packed toward the top as tight as possible.
*/
typedef struct xfs_attr_shortform {
struct xfs_attr_sf_hdr { /* constant-structure header block */
__be16 totsize; /* total bytes in shortform list */
__u8 count; /* count of active entries */
} hdr;
struct xfs_attr_sf_entry {
__uint8_t namelen; /* actual length of name (no NULL) */
__uint8_t valuelen; /* actual length of value (no NULL) */
__uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
__uint8_t nameval[1]; /* name & value bytes concatenated */
} list[1]; /* variable sized array */
} xfs_attr_shortform_t;
typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
/*
* We generate this then sort it, attr_list() must return things in hash-order.
*/
typedef struct xfs_attr_sf_sort {
__uint8_t entno; /* entry number in original list */
__uint8_t namelen; /* length of name value (no null) */
__uint8_t valuelen; /* length of value */
__uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
xfs_dahash_t hash; /* this entry's hash value */
unsigned char *name; /* name value, pointer into buffer */
} xfs_attr_sf_sort_t;
#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \
(((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)))
#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \
((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
#define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \
((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
#define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \
((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
#define XFS_ATTR_SF_TOTSIZE(dp) /* total space in use */ \
(be16_to_cpu(((xfs_attr_shortform_t *) \
((dp)->i_afp->if_u1.if_data))->hdr.totsize))
#endif /* __XFS_ATTR_SF_H__ */

87
fs/xfs/libxfs/xfs_bit.h Normal file
View File

@@ -0,0 +1,87 @@
/*
* Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_BIT_H__
#define __XFS_BIT_H__
/*
* XFS bit manipulation routines.
*/
/*
* masks with n high/low bits set, 64-bit values
*/
static inline __uint64_t xfs_mask64hi(int n)
{
return (__uint64_t)-1 << (64 - (n));
}
static inline __uint32_t xfs_mask32lo(int n)
{
return ((__uint32_t)1 << (n)) - 1;
}
static inline __uint64_t xfs_mask64lo(int n)
{
return ((__uint64_t)1 << (n)) - 1;
}
/* Get high bit set out of 32-bit argument, -1 if none set */
static inline int xfs_highbit32(__uint32_t v)
{
return fls(v) - 1;
}
/* Get high bit set out of 64-bit argument, -1 if none set */
static inline int xfs_highbit64(__uint64_t v)
{
return fls64(v) - 1;
}
/* Get low bit set out of 32-bit argument, -1 if none set */
static inline int xfs_lowbit32(__uint32_t v)
{
return ffs(v) - 1;
}
/* Get low bit set out of 64-bit argument, -1 if none set */
static inline int xfs_lowbit64(__uint64_t v)
{
__uint32_t w = (__uint32_t)v;
int n = 0;
if (w) { /* lower bits */
n = ffs(w);
} else { /* upper bits */
w = (__uint32_t)(v >> 32);
if (w) {
n = ffs(w);
if (n)
n += 32;
}
}
return n - 1;
}
/* Return whether bitmap is empty (1 == empty) */
extern int xfs_bitmap_empty(uint *map, uint size);
/* Count continuous one bits in map starting with start_bit */
extern int xfs_contig_bits(uint *map, uint size, uint start_bit);
/* Find next set bit in map */
extern int xfs_next_bit(uint *map, uint size, uint start_bit);
#endif /* __XFS_BIT_H__ */

5606
fs/xfs/libxfs/xfs_bmap.c Normal file

File diff suppressed because it is too large Load Diff

186
fs/xfs/libxfs/xfs_bmap.h Normal file
View File

@@ -0,0 +1,186 @@
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_BMAP_H__
#define __XFS_BMAP_H__
struct getbmap;
struct xfs_bmbt_irec;
struct xfs_ifork;
struct xfs_inode;
struct xfs_mount;
struct xfs_trans;
extern kmem_zone_t *xfs_bmap_free_item_zone;
/*
* List of extents to be free "later".
* The list is kept sorted on xbf_startblock.
*/
typedef struct xfs_bmap_free_item
{
xfs_fsblock_t xbfi_startblock;/* starting fs block number */
xfs_extlen_t xbfi_blockcount;/* number of blocks in extent */
struct xfs_bmap_free_item *xbfi_next; /* link to next entry */
} xfs_bmap_free_item_t;
/*
* Header for free extent list.
*
* xbf_low is used by the allocator to activate the lowspace algorithm -
* when free space is running low the extent allocator may choose to
* allocate an extent from an AG without leaving sufficient space for
* a btree split when inserting the new extent. In this case the allocator
* will enable the lowspace algorithm which is supposed to allow further
* allocations (such as btree splits and newroots) to allocate from
* sequential AGs. In order to avoid locking AGs out of order the lowspace
* algorithm will start searching for free space from AG 0. If the correct
* transaction reservations have been made then this algorithm will eventually
* find all the space it needs.
*/
typedef struct xfs_bmap_free
{
xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */
int xbf_count; /* count of items on list */
int xbf_low; /* alloc in low mode */
} xfs_bmap_free_t;
#define XFS_BMAP_MAX_NMAP 4
/*
* Flags for xfs_bmapi_*
*/
#define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */
#define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */
#define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */
#define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */
#define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */
/* combine contig. space */
#define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */
/*
* unwritten extent conversion - this needs write cache flushing and no additional
* allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
{ XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
{ XFS_BMAPI_CONVERT, "CONVERT" }
static inline int xfs_bmapi_aflag(int w)
{
return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0);
}
/*
* Special values for xfs_bmbt_irec_t br_startblock field.
*/
#define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL)
#define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL)
static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
{
((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
(flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK);
}
/*
* Flags for xfs_bmap_add_extent*.
*/
#define BMAP_LEFT_CONTIG (1 << 0)
#define BMAP_RIGHT_CONTIG (1 << 1)
#define BMAP_LEFT_FILLING (1 << 2)
#define BMAP_RIGHT_FILLING (1 << 3)
#define BMAP_LEFT_DELAY (1 << 4)
#define BMAP_RIGHT_DELAY (1 << 5)
#define BMAP_LEFT_VALID (1 << 6)
#define BMAP_RIGHT_VALID (1 << 7)
#define BMAP_ATTRFORK (1 << 8)
#define XFS_BMAP_EXT_FLAGS \
{ BMAP_LEFT_CONTIG, "LC" }, \
{ BMAP_RIGHT_CONTIG, "RC" }, \
{ BMAP_LEFT_FILLING, "LF" }, \
{ BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" }
/*
* This macro is used to determine how many extents will be shifted
* in one write transaction. We could require two splits,
* an extent move on the first and an extent merge on the second,
* So it is proper that one extent is shifted inside write transaction
* at a time.
*/
#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
#ifdef DEBUG
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip);
#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
#else
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
#endif
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
struct xfs_bmap_free *flist, struct xfs_mount *mp);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *last_block, int whichfork);
int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused,
int whichfork);
int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork);
int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_fsblock_t *firstblock, xfs_extlen_t total,
struct xfs_bmbt_irec *mval, int *nmap,
struct xfs_bmap_free *flist);
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
struct xfs_bmap_free *flist, int *done);
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int *done, xfs_fileoff_t start_fsb,
xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
int num_exts);
#endif /* __XFS_BMAP_H__ */

View File

@@ -0,0 +1,967 @@
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_dinode.h"
/*
* Determine the extent state.
*/
/* ARGSUSED */
STATIC xfs_exntst_t
xfs_extent_state(
xfs_filblks_t blks,
int extent_flag)
{
if (extent_flag) {
ASSERT(blks != 0); /* saved for DMIG */
return XFS_EXT_UNWRITTEN;
}
return XFS_EXT_NORM;
}
/*
* Convert on-disk form of btree root to in-memory form.
*/
void
xfs_bmdr_to_bmbt(
struct xfs_inode *ip,
xfs_bmdr_block_t *dblock,
int dblocklen,
struct xfs_btree_block *rblock,
int rblocklen)
{
struct xfs_mount *mp = ip->i_mount;
int dmxr;
xfs_bmbt_key_t *fkp;
__be64 *fpp;
xfs_bmbt_key_t *tkp;
__be64 *tpp;
if (xfs_sb_version_hascrc(&mp->m_sb))
xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
else
xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS);
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
* Convert a compressed bmap extent record to an uncompressed form.
* This code must be in sync with the routines xfs_bmbt_get_startoff,
* xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
*/
STATIC void
__xfs_bmbt_get_all(
__uint64_t l0,
__uint64_t l1,
xfs_bmbt_irec_t *s)
{
int ext_flag;
xfs_exntst_t st;
ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
s->br_startoff = ((xfs_fileoff_t)l0 &
xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
#if XFS_BIG_BLKNOS
s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
(((xfs_fsblock_t)l1) >> 21);
#else
#ifdef DEBUG
{
xfs_dfsbno_t b;
b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) |
(((xfs_dfsbno_t)l1) >> 21);
ASSERT((b >> 32) == 0 || isnulldstartblock(b));
s->br_startblock = (xfs_fsblock_t)b;
}
#else /* !DEBUG */
s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21);
#endif /* DEBUG */
#endif /* XFS_BIG_BLKNOS */
s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
/* This is xfs_extent_state() in-line */
if (ext_flag) {
ASSERT(s->br_blockcount != 0); /* saved for DMIG */
st = XFS_EXT_UNWRITTEN;
} else
st = XFS_EXT_NORM;
s->br_state = st;
}
void
xfs_bmbt_get_all(
xfs_bmbt_rec_host_t *r,
xfs_bmbt_irec_t *s)
{
__xfs_bmbt_get_all(r->l0, r->l1, s);
}
/*
* Extract the blockcount field from an in memory bmap extent record.
*/
xfs_filblks_t
xfs_bmbt_get_blockcount(
xfs_bmbt_rec_host_t *r)
{
return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21));
}
/*
* Extract the startblock field from an in memory bmap extent record.
*/
xfs_fsblock_t
xfs_bmbt_get_startblock(
xfs_bmbt_rec_host_t *r)
{
#if XFS_BIG_BLKNOS
return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
(((xfs_fsblock_t)r->l1) >> 21);
#else
#ifdef DEBUG
xfs_dfsbno_t b;
b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) |
(((xfs_dfsbno_t)r->l1) >> 21);
ASSERT((b >> 32) == 0 || isnulldstartblock(b));
return (xfs_fsblock_t)b;
#else /* !DEBUG */
return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
#endif /* DEBUG */
#endif /* XFS_BIG_BLKNOS */
}
/*
* Extract the startoff field from an in memory bmap extent record.
*/
xfs_fileoff_t
xfs_bmbt_get_startoff(
xfs_bmbt_rec_host_t *r)
{
return ((xfs_fileoff_t)r->l0 &
xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
}
xfs_exntst_t
xfs_bmbt_get_state(
xfs_bmbt_rec_host_t *r)
{
int ext_flag;
ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
return xfs_extent_state(xfs_bmbt_get_blockcount(r),
ext_flag);
}
/*
* Extract the blockcount field from an on disk bmap extent record.
*/
xfs_filblks_t
xfs_bmbt_disk_get_blockcount(
xfs_bmbt_rec_t *r)
{
return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21));
}
/*
* Extract the startoff field from a disk format bmap extent record.
*/
xfs_fileoff_t
xfs_bmbt_disk_get_startoff(
xfs_bmbt_rec_t *r)
{
return ((xfs_fileoff_t)be64_to_cpu(r->l0) &
xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
}
/*
* Set all the fields in a bmap extent record from the arguments.
*/
void
xfs_bmbt_set_allf(
xfs_bmbt_rec_host_t *r,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
xfs_filblks_t blockcount,
xfs_exntst_t state)
{
int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
#if XFS_BIG_BLKNOS
ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
((xfs_bmbt_rec_base_t)startoff << 9) |
((xfs_bmbt_rec_base_t)startblock >> 43);
r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21));
#else /* !XFS_BIG_BLKNOS */
if (isnullstartblock(startblock)) {
r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
((xfs_bmbt_rec_base_t)startoff << 9) |
(xfs_bmbt_rec_base_t)xfs_mask64lo(9);
r->l1 = xfs_mask64hi(11) |
((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21));
} else {
r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
((xfs_bmbt_rec_base_t)startoff << 9);
r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21));
}
#endif /* XFS_BIG_BLKNOS */
}
/*
* Set all the fields in a bmap extent record from the uncompressed form.
*/
void
xfs_bmbt_set_all(
xfs_bmbt_rec_host_t *r,
xfs_bmbt_irec_t *s)
{
xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock,
s->br_blockcount, s->br_state);
}
/*
* Set all the fields in a disk format bmap extent record from the arguments.
*/
void
xfs_bmbt_disk_set_allf(
xfs_bmbt_rec_t *r,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
xfs_filblks_t blockcount,
xfs_exntst_t state)
{
int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
#if XFS_BIG_BLKNOS
ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
r->l0 = cpu_to_be64(
((xfs_bmbt_rec_base_t)extent_flag << 63) |
((xfs_bmbt_rec_base_t)startoff << 9) |
((xfs_bmbt_rec_base_t)startblock >> 43));
r->l1 = cpu_to_be64(
((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
#else /* !XFS_BIG_BLKNOS */
if (isnullstartblock(startblock)) {
r->l0 = cpu_to_be64(
((xfs_bmbt_rec_base_t)extent_flag << 63) |
((xfs_bmbt_rec_base_t)startoff << 9) |
(xfs_bmbt_rec_base_t)xfs_mask64lo(9));
r->l1 = cpu_to_be64(xfs_mask64hi(11) |
((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
} else {
r->l0 = cpu_to_be64(
((xfs_bmbt_rec_base_t)extent_flag << 63) |
((xfs_bmbt_rec_base_t)startoff << 9));
r->l1 = cpu_to_be64(
((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
}
#endif /* XFS_BIG_BLKNOS */
}
/*
* Set all the fields in a bmap extent record from the uncompressed form.
*/
STATIC void
xfs_bmbt_disk_set_all(
xfs_bmbt_rec_t *r,
xfs_bmbt_irec_t *s)
{
xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock,
s->br_blockcount, s->br_state);
}
/*
* Set the blockcount field in a bmap extent record.
*/
void
xfs_bmbt_set_blockcount(
xfs_bmbt_rec_host_t *r,
xfs_filblks_t v)
{
ASSERT((v & xfs_mask64hi(43)) == 0);
r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) |
(xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21));
}
/*
* Set the startblock field in a bmap extent record.
*/
void
xfs_bmbt_set_startblock(
xfs_bmbt_rec_host_t *r,
xfs_fsblock_t v)
{
#if XFS_BIG_BLKNOS
ASSERT((v & xfs_mask64hi(12)) == 0);
r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
(xfs_bmbt_rec_base_t)(v >> 43);
r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
(xfs_bmbt_rec_base_t)(v << 21);
#else /* !XFS_BIG_BLKNOS */
if (isnullstartblock(v)) {
r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) |
((xfs_bmbt_rec_base_t)v << 21) |
(r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
} else {
r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9);
r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
(r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
}
#endif /* XFS_BIG_BLKNOS */
}
/*
* Set the startoff field in a bmap extent record.
*/
void
xfs_bmbt_set_startoff(
xfs_bmbt_rec_host_t *r,
xfs_fileoff_t v)
{
ASSERT((v & xfs_mask64hi(9)) == 0);
r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) |
((xfs_bmbt_rec_base_t)v << 9) |
(r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
}
/*
* Set the extent state field in a bmap extent record.
*/
void
xfs_bmbt_set_state(
xfs_bmbt_rec_host_t *r,
xfs_exntst_t v)
{
ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
if (v == XFS_EXT_NORM)
r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN);
else
r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN);
}
/*
* Convert in-memory form of btree root to on-disk form.
*/
void
xfs_bmbt_to_bmdr(
struct xfs_mount *mp,
struct xfs_btree_block *rblock,
int rblocklen,
xfs_bmdr_block_t *dblock,
int dblocklen)
{
int dmxr;
xfs_bmbt_key_t *fkp;
__be64 *fpp;
xfs_bmbt_key_t *tkp;
__be64 *tpp;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
ASSERT(rblock->bb_u.l.bb_blkno ==
cpu_to_be64(XFS_BUF_DADDR_NULL));
} else
ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
ASSERT(rblock->bb_level != 0);
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
* Check extent records, which have just been read, for
* any bit in the extent flag field. ASSERT on debug
* kernels, as this condition should not occur.
* Return an error condition (1) if any flags found,
* otherwise return 0.
*/
int
xfs_check_nostate_extents(
xfs_ifork_t *ifp,
xfs_extnum_t idx,
xfs_extnum_t num)
{
for (; num > 0; num--, idx++) {
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
if ((ep->l0 >>
(64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
ASSERT(0);
return 1;
}
}
return 0;
}
STATIC struct xfs_btree_cur *
xfs_bmbt_dup_cursor(
struct xfs_btree_cur *cur)
{
struct xfs_btree_cur *new;
new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
cur->bc_private.b.ip, cur->bc_private.b.whichfork);
/*
* Copy the firstblock, flist, and flags values,
* since init cursor doesn't get them.
*/
new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
new->bc_private.b.flist = cur->bc_private.b.flist;
new->bc_private.b.flags = cur->bc_private.b.flags;
return new;
}
STATIC void
xfs_bmbt_update_cursor(
struct xfs_btree_cur *src,
struct xfs_btree_cur *dst)
{
ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
(dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
dst->bc_private.b.allocated += src->bc_private.b.allocated;
dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
src->bc_private.b.allocated = 0;
}
STATIC int
xfs_bmbt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
int *stat)
{
xfs_alloc_arg_t args; /* block allocation args */
int error; /* error return value */
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
args.fsbno = cur->bc_private.b.firstblock;
args.firstblock = args.fsbno;
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
args.type = XFS_ALLOCTYPE_START_BNO;
/*
* Make sure there is sufficient room left in the AG to
* complete a full tree split for an extent insert. If
* we are converting the middle part of an extent then
* we may need space for two tree splits.
*
* We are relying on the caller to make the correct block
* reservation for this operation to succeed. If the
* reservation amount is insufficient then we may fail a
* block allocation here and corrupt the filesystem.
*/
args.minleft = xfs_trans_get_block_res(args.tp);
} else if (cur->bc_private.b.flist->xbf_low) {
args.type = XFS_ALLOCTYPE_START_BNO;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
}
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
error = -ENOSPC;
goto error0;
}
error = xfs_alloc_vextent(&args);
if (error)
goto error0;
if (args.fsbno == NULLFSBLOCK && args.minleft) {
/*
* Could not find an AG with enough free space to satisfy
* a full btree split. Try again without minleft and if
* successful activate the lowspace algorithm.
*/
args.fsbno = 0;
args.type = XFS_ALLOCTYPE_FIRST_AG;
args.minleft = 0;
error = xfs_alloc_vextent(&args);
if (error)
goto error0;
cur->bc_private.b.flist->xbf_low = 1;
}
if (args.fsbno == NULLFSBLOCK) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
ASSERT(args.len == 1);
cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
cur->bc_private.b.ip->i_d.di_nblocks++;
xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
XFS_TRANS_DQ_BCOUNT, 1L);
new->l = cpu_to_be64(args.fsbno);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
STATIC int
xfs_bmbt_free_block(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
{
struct xfs_mount *mp = cur->bc_mp;
struct xfs_inode *ip = cur->bc_private.b.ip;
struct xfs_trans *tp = cur->bc_tp;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
ip->i_d.di_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, bp);
return 0;
}
STATIC int
xfs_bmbt_get_minrecs(
struct xfs_btree_cur *cur,
int level)
{
if (level == cur->bc_nlevels - 1) {
struct xfs_ifork *ifp;
ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
cur->bc_private.b.whichfork);
return xfs_bmbt_maxrecs(cur->bc_mp,
ifp->if_broot_bytes, level == 0) / 2;
}
return cur->bc_mp->m_bmap_dmnr[level != 0];
}
int
xfs_bmbt_get_maxrecs(
struct xfs_btree_cur *cur,
int level)
{
if (level == cur->bc_nlevels - 1) {
struct xfs_ifork *ifp;
ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
cur->bc_private.b.whichfork);
return xfs_bmbt_maxrecs(cur->bc_mp,
ifp->if_broot_bytes, level == 0);
}
return cur->bc_mp->m_bmap_dmxr[level != 0];
}
/*
* Get the maximum records we could store in the on-disk format.
*
* For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
* for the root node this checks the available space in the dinode fork
* so that we can resize the in-memory buffer to match it. After a
* resize to the maximum size this function returns the same value
* as xfs_bmbt_get_maxrecs for the root node, too.
*/
STATIC int
xfs_bmbt_get_dmaxrecs(
struct xfs_btree_cur *cur,
int level)
{
if (level != cur->bc_nlevels - 1)
return cur->bc_mp->m_bmap_dmxr[level != 0];
return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
}
STATIC void
xfs_bmbt_init_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
key->bmbt.br_startoff =
cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
}
STATIC void
xfs_bmbt_init_rec_from_key(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
ASSERT(key->bmbt.br_startoff != 0);
xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
0, 0, XFS_EXT_NORM);
}
STATIC void
xfs_bmbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
}
STATIC void
xfs_bmbt_init_ptr_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
ptr->l = 0;
}
STATIC __int64_t
xfs_bmbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
cur->bc_rec.b.br_startoff;
}
static bool
xfs_bmbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
unsigned int level;
switch (block->bb_magic) {
case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
return false;
if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
return false;
/*
* XXX: need a better way of verifying the owner here. Right now
* just make sure there has been one set.
*/
if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
return false;
/* fall through */
case cpu_to_be32(XFS_BMAP_MAGIC):
break;
default:
return false;
}
/*
* numrecs and level verification.
*
* We don't know what fork we belong to, so just verify that the level
* is less than the maximum of the two. Later checks will be more
* precise.
*/
level = be16_to_cpu(block->bb_level);
if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
return false;
if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
return false;
/* sibling pointer verification */
if (!block->bb_u.l.bb_leftsib ||
(block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
!XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
return false;
if (!block->bb_u.l.bb_rightsib ||
(block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
!XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
return false;
return true;
}
static void
xfs_bmbt_read_verify(
struct xfs_buf *bp)
{
if (!xfs_btree_lblock_verify_crc(bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_bmbt_verify(bp))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_verifier_error(bp);
}
}
static void
xfs_bmbt_write_verify(
struct xfs_buf *bp)
{
if (!xfs_bmbt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
xfs_btree_lblock_calc_crc(bp);
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
};
#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_bmbt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
return be64_to_cpu(k1->bmbt.br_startoff) <
be64_to_cpu(k2->bmbt.br_startoff);
}
STATIC int
xfs_bmbt_recs_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
xfs_bmbt_disk_get_startoff(&r2->bmbt);
}
#endif /* DEBUG */
static const struct xfs_btree_ops xfs_bmbt_ops = {
.rec_len = sizeof(xfs_bmbt_rec_t),
.key_len = sizeof(xfs_bmbt_key_t),
.dup_cursor = xfs_bmbt_dup_cursor,
.update_cursor = xfs_bmbt_update_cursor,
.alloc_block = xfs_bmbt_alloc_block,
.free_block = xfs_bmbt_free_block,
.get_maxrecs = xfs_bmbt_get_maxrecs,
.get_minrecs = xfs_bmbt_get_minrecs,
.get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
.init_key_from_rec = xfs_bmbt_init_key_from_rec,
.init_rec_from_key = xfs_bmbt_init_rec_from_key,
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
.buf_ops = &xfs_bmbt_buf_ops,
#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
#endif
};
/*
* Allocate a new bmap btree cursor.
*/
struct xfs_btree_cur * /* new bmap btree cursor */
xfs_bmbt_init_cursor(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* inode owning the btree */
int whichfork) /* data or attr fork */
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_btree_cur *cur;
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
cur->bc_tp = tp;
cur->bc_mp = mp;
cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
cur->bc_btnum = XFS_BTNUM_BMAP;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_bmbt_ops;
cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
if (xfs_sb_version_hascrc(&mp->m_sb))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
cur->bc_private.b.ip = ip;
cur->bc_private.b.firstblock = NULLFSBLOCK;
cur->bc_private.b.flist = NULL;
cur->bc_private.b.allocated = 0;
cur->bc_private.b.flags = 0;
cur->bc_private.b.whichfork = whichfork;
return cur;
}
/*
* Calculate number of records in a bmap btree block.
*/
int
xfs_bmbt_maxrecs(
struct xfs_mount *mp,
int blocklen,
int leaf)
{
blocklen -= XFS_BMBT_BLOCK_LEN(mp);
if (leaf)
return blocklen / sizeof(xfs_bmbt_rec_t);
return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
}
/*
* Calculate number of records in a bmap btree inode root.
*/
int
xfs_bmdr_maxrecs(
int blocklen,
int leaf)
{
blocklen -= sizeof(xfs_bmdr_block_t);
if (leaf)
return blocklen / sizeof(xfs_bmdr_rec_t);
return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
}
/*
* Change the owner of a btree format fork fo the inode passed in. Change it to
* the owner of that is passed in so that we can change owners before or after
* we switch forks between inodes. The operation that the caller is doing will
* determine whether is needs to change owner before or after the switch.
*
* For demand paged transactional modification, the fork switch should be done
* after reading in all the blocks, modifying them and pinning them in the
* transaction. For modification when the buffers are already pinned in memory,
* the fork switch can be done before changing the owner as we won't need to
* validate the owner until the btree buffers are unpinned and writes can occur
* again.
*
* For recovery based ownership change, there is no transactional context and
* so a buffer list must be supplied so that we can record the buffers that we
* modified for the caller to issue IO on.
*/
int
xfs_bmbt_change_owner(
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork,
xfs_ino_t new_owner,
struct list_head *buffer_list)
{
struct xfs_btree_cur *cur;
int error;
ASSERT(tp || buffer_list);
ASSERT(!(tp && buffer_list));
if (whichfork == XFS_DATA_FORK)
ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
else
ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
if (!cur)
return -ENOMEM;
error = xfs_btree_change_owner(cur, new_owner, buffer_list);
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
return error;
}

View File

@@ -0,0 +1,143 @@
/*
* Copyright (c) 2000,2002-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_BMAP_BTREE_H__
#define __XFS_BMAP_BTREE_H__
struct xfs_btree_cur;
struct xfs_btree_block;
struct xfs_mount;
struct xfs_inode;
struct xfs_trans;
/*
* Extent state and extent format macros.
*/
#define XFS_EXTFMT_INODE(x) \
(xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \
XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN)
/*
* Btree block header size depends on a superblock flag.
*/
#define XFS_BMBT_BLOCK_LEN(mp) \
(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
#define XFS_BMBT_REC_ADDR(mp, block, index) \
((xfs_bmbt_rec_t *) \
((char *)(block) + \
XFS_BMBT_BLOCK_LEN(mp) + \
((index) - 1) * sizeof(xfs_bmbt_rec_t)))
#define XFS_BMBT_KEY_ADDR(mp, block, index) \
((xfs_bmbt_key_t *) \
((char *)(block) + \
XFS_BMBT_BLOCK_LEN(mp) + \
((index) - 1) * sizeof(xfs_bmbt_key_t)))
#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \
((xfs_bmbt_ptr_t *) \
((char *)(block) + \
XFS_BMBT_BLOCK_LEN(mp) + \
(maxrecs) * sizeof(xfs_bmbt_key_t) + \
((index) - 1) * sizeof(xfs_bmbt_ptr_t)))
#define XFS_BMDR_REC_ADDR(block, index) \
((xfs_bmdr_rec_t *) \
((char *)(block) + \
sizeof(struct xfs_bmdr_block) + \
((index) - 1) * sizeof(xfs_bmdr_rec_t)))
#define XFS_BMDR_KEY_ADDR(block, index) \
((xfs_bmdr_key_t *) \
((char *)(block) + \
sizeof(struct xfs_bmdr_block) + \
((index) - 1) * sizeof(xfs_bmdr_key_t)))
#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \
((xfs_bmdr_ptr_t *) \
((char *)(block) + \
sizeof(struct xfs_bmdr_block) + \
(maxrecs) * sizeof(xfs_bmdr_key_t) + \
((index) - 1) * sizeof(xfs_bmdr_ptr_t)))
/*
* These are to be used when we know the size of the block and
* we don't have a cursor.
*/
#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \
(int)(XFS_BMBT_BLOCK_LEN(mp) + \
((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
#define XFS_BMAP_BROOT_SPACE(mp, bb) \
(XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))
#define XFS_BMDR_SPACE_CALC(nrecs) \
(int)(sizeof(xfs_bmdr_block_t) + \
((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
#define XFS_BMAP_BMDR_SPACE(bb) \
(XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))
/*
* Maximum number of bmap btree levels.
*/
#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)])
/*
* Prototypes for xfs_bmap.c to call.
*/
extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
struct xfs_btree_block *, int);
extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r);
extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_host_t *r, xfs_filblks_t v);
extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v);
extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v);
extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v);
extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
xfs_bmdr_block_t *, int);
extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
extern int xfs_bmdr_maxrecs(int blocklen, int leaf);
extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, xfs_ino_t new_owner,
struct list_head *buffer_list);
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
#endif /* __XFS_BMAP_BTREE_H__ */

4069
fs/xfs/libxfs/xfs_btree.c Normal file

File diff suppressed because it is too large Load Diff

468
fs/xfs/libxfs/xfs_btree.h Normal file
View File

@@ -0,0 +1,468 @@
/*
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_BTREE_H__
#define __XFS_BTREE_H__
struct xfs_buf;
struct xfs_bmap_free;
struct xfs_inode;
struct xfs_mount;
struct xfs_trans;
extern kmem_zone_t *xfs_btree_cur_zone;
/*
* Generic key, ptr and record wrapper structures.
*
* These are disk format structures, and are converted where necessary
* by the btree specific code that needs to interpret them.
*/
union xfs_btree_ptr {
__be32 s; /* short form ptr */
__be64 l; /* long form ptr */
};
union xfs_btree_key {
xfs_bmbt_key_t bmbt;
xfs_bmdr_key_t bmbr; /* bmbt root block */
xfs_alloc_key_t alloc;
xfs_inobt_key_t inobt;
};
union xfs_btree_rec {
xfs_bmbt_rec_t bmbt;
xfs_bmdr_rec_t bmbr; /* bmbt root block */
xfs_alloc_rec_t alloc;
xfs_inobt_rec_t inobt;
};
/*
* This nonsense is to make -wlint happy.
*/
#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi)
#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi)
#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi)
#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi)
#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
/*
* For logging record fields.
*/
#define XFS_BB_MAGIC (1 << 0)
#define XFS_BB_LEVEL (1 << 1)
#define XFS_BB_NUMRECS (1 << 2)
#define XFS_BB_LEFTSIB (1 << 3)
#define XFS_BB_RIGHTSIB (1 << 4)
#define XFS_BB_BLKNO (1 << 5)
#define XFS_BB_LSN (1 << 6)
#define XFS_BB_UUID (1 << 7)
#define XFS_BB_OWNER (1 << 8)
#define XFS_BB_NUM_BITS 5
#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
#define XFS_BB_NUM_BITS_CRC 9
#define XFS_BB_ALL_BITS_CRC ((1 << XFS_BB_NUM_BITS_CRC) - 1)
/*
* Generic stats interface
*/
#define __XFS_BTREE_STATS_INC(type, stat) \
XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
#define XFS_BTREE_STATS_INC(cur, stat) \
do { \
switch (cur->bc_btnum) { \
case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
#define __XFS_BTREE_STATS_ADD(type, stat, val) \
XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
#define XFS_BTREE_STATS_ADD(cur, stat, val) \
do { \
switch (cur->bc_btnum) { \
case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */
struct xfs_btree_ops {
/* size of the key and record structures */
size_t key_len;
size_t rec_len;
/* cursor operations */
struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *);
void (*update_cursor)(struct xfs_btree_cur *src,
struct xfs_btree_cur *dst);
/* update btree root pointer */
void (*set_root)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *nptr, int level_change);
/* block allocation / freeing */
int (*alloc_block)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *start_bno,
union xfs_btree_ptr *new_bno,
int *stat);
int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
/* update last record information */
void (*update_lastrec)(struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
union xfs_btree_rec *rec,
int ptr, int reason);
/* records in block/level */
int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
/* records on disk. Matter for the root in inode case. */
int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level);
/* init values of btree structures */
void (*init_key_from_rec)(union xfs_btree_key *key,
union xfs_btree_rec *rec);
void (*init_rec_from_key)(union xfs_btree_key *key,
union xfs_btree_rec *rec);
void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
union xfs_btree_rec *rec);
void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr);
/* difference between key value and cursor value */
__int64_t (*key_diff)(struct xfs_btree_cur *cur,
union xfs_btree_key *key);
const struct xfs_buf_ops *buf_ops;
#if defined(DEBUG) || defined(XFS_WARN)
/* check that k1 is lower than k2 */
int (*keys_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2);
/* check that r1 is lower than r2 */
int (*recs_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2);
#endif
};
/*
* Reasons for the update_lastrec method to be called.
*/
#define LASTREC_UPDATE 0
#define LASTREC_INSREC 1
#define LASTREC_DELREC 2
/*
* Btree cursor structure.
* This collects all information needed by the btree code in one place.
*/
typedef struct xfs_btree_cur
{
struct xfs_trans *bc_tp; /* transaction we're in, if any */
struct xfs_mount *bc_mp; /* file system mount struct */
const struct xfs_btree_ops *bc_ops;
uint bc_flags; /* btree features - below */
union {
xfs_alloc_rec_incore_t a;
xfs_bmbt_irec_t b;
xfs_inobt_rec_incore_t i;
} bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
__uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
#define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */
#define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */
__uint8_t bc_nlevels; /* number of levels in the tree */
__uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
xfs_btnum_t bc_btnum; /* identifies which btree type */
union {
struct { /* needed for BNO, CNT, INO */
struct xfs_buf *agbp; /* agf/agi buffer pointer */
xfs_agnumber_t agno; /* ag number */
} a;
struct { /* needed for BMAP */
struct xfs_inode *ip; /* pointer to our inode */
struct xfs_bmap_free *flist; /* list to free after */
xfs_fsblock_t firstblock; /* 1st blk allocated */
int allocated; /* count of alloced */
short forksize; /* fork's inode space */
char whichfork; /* data or attr fork */
char flags; /* flags */
#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */
} b;
} bc_private; /* per-btree type data */
} xfs_btree_cur_t;
/* cursor flags */
#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
#define XFS_BTREE_NOERROR 0
#define XFS_BTREE_ERROR 1
/*
* Convert from buffer to btree block header.
*/
#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr))
/*
* Check that block header is ok.
*/
int
xfs_btree_check_block(
struct xfs_btree_cur *cur, /* btree cursor */
struct xfs_btree_block *block, /* generic btree block pointer */
int level, /* level of the btree block */
struct xfs_buf *bp); /* buffer containing block, if any */
/*
* Check that (long) pointer is ok.
*/
int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_dfsbno_t ptr, /* btree block disk address */
int level); /* btree block level */
/*
* Delete the btree cursor.
*/
void
xfs_btree_del_cursor(
xfs_btree_cur_t *cur, /* btree cursor */
int error); /* del because of error */
/*
* Duplicate the btree cursor.
* Allocate a new one, copy the record, re-get the buffers.
*/
int /* error */
xfs_btree_dup_cursor(
xfs_btree_cur_t *cur, /* input cursor */
xfs_btree_cur_t **ncur);/* output cursor */
/*
* Get a buffer for the block, return it with no data read.
* Long-form addressing.
*/
struct xfs_buf * /* buffer for fsbno */
xfs_btree_get_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t fsbno, /* file system block number */
uint lock); /* lock flags for get_buf */
/*
* Get a buffer for the block, return it with no data read.
* Short-form addressing.
*/
struct xfs_buf * /* buffer for agno/agbno */
xfs_btree_get_bufs(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
xfs_agblock_t agbno, /* allocation group block number */
uint lock); /* lock flags for get_buf */
/*
* Check for the cursor referring to the last block at the given level.
*/
int /* 1=is last block, 0=not last block */
xfs_btree_islastblock(
xfs_btree_cur_t *cur, /* btree cursor */
int level); /* level to check */
/*
* Compute first and last byte offsets for the fields given.
* Interprets the offsets table, which contains struct field offsets.
*/
void
xfs_btree_offsets(
__int64_t fields, /* bitmask of fields */
const short *offsets,/* table of field offsets */
int nbits, /* number of bits to inspect */
int *first, /* output: first byte offset */
int *last); /* output: last byte offset */
/*
* Get a buffer for the block, return it read in.
* Long-form addressing.
*/
int /* error */
xfs_btree_read_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t fsbno, /* file system block number */
uint lock, /* lock flags for read_buf */
struct xfs_buf **bpp, /* buffer for fsbno */
int refval, /* ref count value for buffer */
const struct xfs_buf_ops *ops);
/*
* Read-ahead the block, don't wait for it, don't return a buffer.
* Long-form addressing.
*/
void /* error */
xfs_btree_reada_bufl(
struct xfs_mount *mp, /* file system mount point */
xfs_fsblock_t fsbno, /* file system block number */
xfs_extlen_t count, /* count of filesystem blocks */
const struct xfs_buf_ops *ops);
/*
* Read-ahead the block, don't wait for it, don't return a buffer.
* Short-form addressing.
*/
void /* error */
xfs_btree_reada_bufs(
struct xfs_mount *mp, /* file system mount point */
xfs_agnumber_t agno, /* allocation group number */
xfs_agblock_t agbno, /* allocation group block number */
xfs_extlen_t count, /* count of filesystem blocks */
const struct xfs_buf_ops *ops);
/*
* Initialise a new btree block header
*/
void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
__u32 magic,
__u16 level,
__u16 numrecs,
__u64 owner,
unsigned int flags);
void
xfs_btree_init_block_int(
struct xfs_mount *mp,
struct xfs_btree_block *buf,
xfs_daddr_t blkno,
__u32 magic,
__u16 level,
__u16 numrecs,
__u64 owner,
unsigned int flags);
/*
* Common btree core entry points.
*/
int xfs_btree_increment(struct xfs_btree_cur *, int, int *);
int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
int xfs_btree_insert(struct xfs_btree_cur *, int *);
int xfs_btree_delete(struct xfs_btree_cur *, int *);
int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
struct list_head *buffer_list);
/*
* btree block CRC helpers
*/
void xfs_btree_lblock_calc_crc(struct xfs_buf *);
bool xfs_btree_lblock_verify_crc(struct xfs_buf *);
void xfs_btree_sblock_calc_crc(struct xfs_buf *);
bool xfs_btree_sblock_verify_crc(struct xfs_buf *);
/*
* Internal btree helpers also used by xfs_bmap.c.
*/
void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int);
/*
* Helpers.
*/
static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
{
return be16_to_cpu(block->bb_numrecs);
}
static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
__uint16_t numrecs)
{
block->bb_numrecs = cpu_to_be16(numrecs);
}
static inline int xfs_btree_get_level(struct xfs_btree_block *block)
{
return be16_to_cpu(block->bb_level);
}
/*
* Min and max functions for extlen, agblock, fileoff, and filblks types.
*/
#define XFS_EXTLEN_MIN(a,b) min_t(xfs_extlen_t, (a), (b))
#define XFS_EXTLEN_MAX(a,b) max_t(xfs_extlen_t, (a), (b))
#define XFS_AGBLOCK_MIN(a,b) min_t(xfs_agblock_t, (a), (b))
#define XFS_AGBLOCK_MAX(a,b) max_t(xfs_agblock_t, (a), (b))
#define XFS_FILEOFF_MIN(a,b) min_t(xfs_fileoff_t, (a), (b))
#define XFS_FILEOFF_MAX(a,b) max_t(xfs_fileoff_t, (a), (b))
#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
#define XFS_FSB_SANITY_CHECK(mp,fsb) \
(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
/*
* Trace hooks. Currently not implemented as they need to be ported
* over to the generic tracing functionality, which is some effort.
*
* i,j = integer (32 bit)
* b = btree block buffer (xfs_buf_t)
* p = btree ptr
* r = btree record
* k = btree key
*/
#define XFS_BTREE_TRACE_ARGBI(c, b, i)
#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
#define XFS_BTREE_TRACE_ARGI(c, i)
#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
#define XFS_BTREE_TRACE_ARGIK(c, i, k)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
#endif /* __XFS_BTREE_H__ */

63
fs/xfs/libxfs/xfs_cksum.h Normal file
View File

@@ -0,0 +1,63 @@
#ifndef _XFS_CKSUM_H
#define _XFS_CKSUM_H 1
#define XFS_CRC_SEED (~(__uint32_t)0)
/*
* Calculate the intermediate checksum for a buffer that has the CRC field
* inside it. The offset of the 32bit crc fields is passed as the
* cksum_offset parameter.
*/
static inline __uint32_t
xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t zero = 0;
__uint32_t crc;
/* Calculate CRC up to the checksum. */
crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset);
/* Skip checksum field */
crc = crc32c(crc, &zero, sizeof(__u32));
/* Calculate the rest of the CRC. */
return crc32c(crc, &buffer[cksum_offset + sizeof(__be32)],
length - (cksum_offset + sizeof(__be32)));
}
/*
* Convert the intermediate checksum to the final ondisk format.
*
* The CRC32c calculation uses LE format even on BE machines, but returns the
* result in host endian format. Hence we need to byte swap it back to LE format
* so that it is consistent on disk.
*/
static inline __le32
xfs_end_cksum(__uint32_t crc)
{
return ~cpu_to_le32(crc);
}
/*
* Helper to generate the checksum for a buffer.
*/
static inline void
xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
}
/*
* Helper to verify the checksum for a buffer.
*/
static inline int
xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
}
#endif /* _XFS_CKSUM_H */

2665
fs/xfs/libxfs/xfs_da_btree.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,221 @@
/*
* Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DA_BTREE_H__
#define __XFS_DA_BTREE_H__
struct xfs_bmap_free;
struct xfs_inode;
struct xfs_trans;
struct zone;
struct xfs_dir_ops;
/*
* Directory/attribute geometry information. There will be one of these for each
* data fork type, and it will be passed around via the xfs_da_args. Global
* structures will be attached to the xfs_mount.
*/
struct xfs_da_geometry {
int blksize; /* da block size in bytes */
int fsbcount; /* da block size in filesystem blocks */
uint8_t fsblog; /* log2 of _filesystem_ block size */
uint8_t blklog; /* log2 of da block size */
uint node_ents; /* # of entries in a danode */
int magicpct; /* 37% of block size in bytes */
xfs_dablk_t datablk; /* blockno of dir data v2 */
xfs_dablk_t leafblk; /* blockno of leaf data v2 */
xfs_dablk_t freeblk; /* blockno of free data v2 */
};
/*========================================================================
* Btree searching and modification structure definitions.
*========================================================================*/
/*
* Search comparison results
*/
enum xfs_dacmp {
XFS_CMP_DIFFERENT, /* names are completely different */
XFS_CMP_EXACT, /* names are exactly the same */
XFS_CMP_CASE /* names are same but differ in case */
};
/*
* Structure to ease passing around component names.
*/
typedef struct xfs_da_args {
struct xfs_da_geometry *geo; /* da block geometry */
const __uint8_t *name; /* string (maybe not NULL terminated) */
int namelen; /* length of string (maybe no NULL) */
__uint8_t filetype; /* filetype of inode for directories */
__uint8_t *value; /* set of bytes (maybe contain NULLs) */
int valuelen; /* length of value */
int flags; /* argument flags (eg: ATTR_NOCREATE) */
xfs_dahash_t hashval; /* hash value of name */
xfs_ino_t inumber; /* input/output inode number */
struct xfs_inode *dp; /* directory inode to manipulate */
xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */
struct xfs_bmap_free *flist; /* ptr to freelist for bmap_finish */
struct xfs_trans *trans; /* current trans (changes over time) */
xfs_extlen_t total; /* total blocks needed, for 1st bmap */
int whichfork; /* data or attribute fork */
xfs_dablk_t blkno; /* blkno of attr leaf of interest */
int index; /* index of attr of interest in blk */
xfs_dablk_t rmtblkno; /* remote attr value starting blkno */
int rmtblkcnt; /* remote attr value block count */
int rmtvaluelen; /* remote attr value length in bytes */
xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */
int index2; /* index of 2nd attr in blk */
xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
int rmtblkcnt2; /* remote attr value block count */
int rmtvaluelen2; /* remote attr value length in bytes */
int op_flags; /* operation flags */
enum xfs_dacmp cmpresult; /* name compare result for lookups */
} xfs_da_args_t;
/*
* Operation flags:
*/
#define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */
#define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */
#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */
#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
{ XFS_DA_OP_RENAME, "RENAME" }, \
{ XFS_DA_OP_ADDNAME, "ADDNAME" }, \
{ XFS_DA_OP_OKNOENT, "OKNOENT" }, \
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }
/*
* Storage for holding state during Btree searches and split/join ops.
*
* Only need space for 5 intermediate nodes. With a minimum of 62-way
* fanout to the Btree, we can support over 900 million directory blocks,
* which is slightly more than enough.
*/
typedef struct xfs_da_state_blk {
struct xfs_buf *bp; /* buffer containing block */
xfs_dablk_t blkno; /* filesystem blkno of buffer */
xfs_daddr_t disk_blkno; /* on-disk blkno (in BBs) of buffer */
int index; /* relevant index into block */
xfs_dahash_t hashval; /* last hash value in block */
int magic; /* blk's magic number, ie: blk type */
} xfs_da_state_blk_t;
typedef struct xfs_da_state_path {
int active; /* number of active levels */
xfs_da_state_blk_t blk[XFS_DA_NODE_MAXDEPTH];
} xfs_da_state_path_t;
typedef struct xfs_da_state {
xfs_da_args_t *args; /* filename arguments */
struct xfs_mount *mp; /* filesystem mount point */
xfs_da_state_path_t path; /* search/split paths */
xfs_da_state_path_t altpath; /* alternate path for join */
unsigned char inleaf; /* insert into 1->lf, 0->splf */
unsigned char extravalid; /* T/F: extrablk is in use */
unsigned char extraafter; /* T/F: extrablk is after new */
xfs_da_state_blk_t extrablk; /* for double-splits on leaves */
/* for dirv2 extrablk is data */
} xfs_da_state_t;
/*
* Utility macros to aid in logging changed structure fields.
*/
#define XFS_DA_LOGOFF(BASE, ADDR) ((char *)(ADDR) - (char *)(BASE))
#define XFS_DA_LOGRANGE(BASE, ADDR, SIZE) \
(uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
(uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
/*
* Name ops for directory and/or attr name operations
*/
struct xfs_nameops {
xfs_dahash_t (*hashname)(struct xfs_name *);
enum xfs_dacmp (*compname)(struct xfs_da_args *,
const unsigned char *, int);
};
/*========================================================================
* Function prototypes.
*========================================================================*/
/*
* Routines used for growing the Btree.
*/
int xfs_da3_node_create(struct xfs_da_args *args, xfs_dablk_t blkno,
int level, struct xfs_buf **bpp, int whichfork);
int xfs_da3_split(xfs_da_state_t *state);
/*
* Routines used for shrinking the Btree.
*/
int xfs_da3_join(xfs_da_state_t *state);
void xfs_da3_fixhashpath(struct xfs_da_state *state,
struct xfs_da_state_path *path_to_to_fix);
/*
* Routines used for finding things in the Btree.
*/
int xfs_da3_node_lookup_int(xfs_da_state_t *state, int *result);
int xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
int forward, int release, int *result);
/*
* Utility routines.
*/
int xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
xfs_da_state_blk_t *new_blk);
int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int which_fork);
/*
* Utility routines.
*/
int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
int count);
int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bp, int whichfork);
int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int whichfork,
const struct xfs_buf_ops *ops);
xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
xfs_daddr_t mapped_bno, int whichfork,
const struct xfs_buf_ops *ops);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
struct xfs_buf *dead_buf);
uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
const unsigned char *name, int len);
xfs_da_state_t *xfs_da_state_alloc(void);
void xfs_da_state_free(xfs_da_state_t *state);
extern struct kmem_zone *xfs_da_state_zone;
extern const struct xfs_nameops xfs_default_nameops;
#endif /* __XFS_DA_BTREE_H__ */

View File

@@ -0,0 +1,911 @@
/*
* Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
/*
* Shortform directory ops
*/
static int
xfs_dir2_sf_entsize(
struct xfs_dir2_sf_hdr *hdr,
int len)
{
int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */
count += len; /* name */
count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
sizeof(xfs_dir2_ino4_t); /* ino # */
return count;
}
static int
xfs_dir3_sf_entsize(
struct xfs_dir2_sf_hdr *hdr,
int len)
{
return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
}
static struct xfs_dir2_sf_entry *
xfs_dir2_sf_nextentry(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return (struct xfs_dir2_sf_entry *)
((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
}
static struct xfs_dir2_sf_entry *
xfs_dir3_sf_nextentry(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return (struct xfs_dir2_sf_entry *)
((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen));
}
/*
* For filetype enabled shortform directories, the file type field is stored at
* the end of the name. Because it's only a single byte, endian conversion is
* not necessary. For non-filetype enable directories, the type is always
* unknown and we never store the value.
*/
static __uint8_t
xfs_dir2_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
return XFS_DIR3_FT_UNKNOWN;
}
static void
xfs_dir2_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
__uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
static __uint8_t
xfs_dir3_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
__uint8_t ftype;
ftype = sfep->name[sfep->namelen];
if (ftype >= XFS_DIR3_FT_MAX)
return XFS_DIR3_FT_UNKNOWN;
return ftype;
}
static void
xfs_dir3_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
__uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
sfep->name[sfep->namelen] = ftype;
}
/*
* Inode numbers in short-form directories can come in two versions,
* either 4 bytes or 8 bytes wide. These helpers deal with the
* two forms transparently by looking at the headers i8count field.
*
* For 64-bit inode number the most significant byte must be zero.
*/
static xfs_ino_t
xfs_dir2_sf_get_ino(
struct xfs_dir2_sf_hdr *hdr,
xfs_dir2_inou_t *from)
{
if (hdr->i8count)
return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
else
return get_unaligned_be32(&from->i4.i);
}
static void
xfs_dir2_sf_put_ino(
struct xfs_dir2_sf_hdr *hdr,
xfs_dir2_inou_t *to,
xfs_ino_t ino)
{
ASSERT((ino & 0xff00000000000000ULL) == 0);
if (hdr->i8count)
put_unaligned_be64(ino, &to->i8.i);
else
put_unaligned_be32(ino, &to->i4.i);
}
static xfs_ino_t
xfs_dir2_sf_get_parent_ino(
struct xfs_dir2_sf_hdr *hdr)
{
return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
}
static void
xfs_dir2_sf_put_parent_ino(
struct xfs_dir2_sf_hdr *hdr,
xfs_ino_t ino)
{
xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
}
/*
* In short-form directory entries the inode numbers are stored at variable
* offset behind the entry name. If the entry stores a filetype value, then it
* sits between the name and the inode number. Hence the inode numbers may only
* be accessed through the helpers below.
*/
static xfs_ino_t
xfs_dir2_sfe_get_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return xfs_dir2_sf_get_ino(hdr,
(xfs_dir2_inou_t *)&sfep->name[sfep->namelen]);
}
static void
xfs_dir2_sfe_put_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep,
xfs_ino_t ino)
{
xfs_dir2_sf_put_ino(hdr,
(xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino);
}
static xfs_ino_t
xfs_dir3_sfe_get_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return xfs_dir2_sf_get_ino(hdr,
(xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]);
}
static void
xfs_dir3_sfe_put_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep,
xfs_ino_t ino)
{
xfs_dir2_sf_put_ino(hdr,
(xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino);
}
/*
* Directory data block operations
*/
/*
* For special situations, the dirent size ends up fixed because we always know
* what the size of the entry is. That's true for the "." and "..", and
* therefore we know that they are a fixed size and hence their offsets are
* constant, as is the first entry.
*
* Hence, this calculation is written as a macro to be able to be calculated at
* compile time and so certain offsets can be calculated directly in the
* structure initaliser via the macro. There are two macros - one for dirents
* with ftype and without so there are no unresolvable conditionals in the
* calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power
* of 2 and the compiler doesn't reject it (unlike roundup()).
*/
#define XFS_DIR2_DATA_ENTSIZE(n) \
round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN)
#define XFS_DIR3_DATA_ENTSIZE(n) \
round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \
XFS_DIR2_DATA_ALIGN)
static int
xfs_dir2_data_entsize(
int n)
{
return XFS_DIR2_DATA_ENTSIZE(n);
}
static int
xfs_dir3_data_entsize(
int n)
{
return XFS_DIR3_DATA_ENTSIZE(n);
}
static __uint8_t
xfs_dir2_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
return XFS_DIR3_FT_UNKNOWN;
}
static void
xfs_dir2_data_put_ftype(
struct xfs_dir2_data_entry *dep,
__uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
static __uint8_t
xfs_dir3_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
__uint8_t ftype = dep->name[dep->namelen];
ASSERT(ftype < XFS_DIR3_FT_MAX);
if (ftype >= XFS_DIR3_FT_MAX)
return XFS_DIR3_FT_UNKNOWN;
return ftype;
}
static void
xfs_dir3_data_put_ftype(
struct xfs_dir2_data_entry *dep,
__uint8_t type)
{
ASSERT(type < XFS_DIR3_FT_MAX);
ASSERT(dep->namelen != 0);
dep->name[dep->namelen] = type;
}
/*
* Pointer to an entry's tag word.
*/
static __be16 *
xfs_dir2_data_entry_tag_p(
struct xfs_dir2_data_entry *dep)
{
return (__be16 *)((char *)dep +
xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
}
static __be16 *
xfs_dir3_data_entry_tag_p(
struct xfs_dir2_data_entry *dep)
{
return (__be16 *)((char *)dep +
xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16));
}
/*
* location of . and .. in data space (always block 0)
*/
static struct xfs_dir2_data_entry *
xfs_dir2_data_dot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
}
static struct xfs_dir2_data_entry *
xfs_dir2_data_dotdot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1));
}
static struct xfs_dir2_data_entry *
xfs_dir2_data_first_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1) +
XFS_DIR2_DATA_ENTSIZE(2));
}
static struct xfs_dir2_data_entry *
xfs_dir2_ftype_data_dotdot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1));
}
static struct xfs_dir2_data_entry *
xfs_dir2_ftype_data_first_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_dot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_dotdot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_first_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2));
}
static struct xfs_dir2_data_free *
xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
{
return hdr->bestfree;
}
static struct xfs_dir2_data_free *
xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
{
return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
}
static struct xfs_dir2_data_entry *
xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
}
static struct xfs_dir2_data_unused *
xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_unused *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
}
static struct xfs_dir2_data_unused *
xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_unused *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
}
/*
* Directory Leaf block operations
*/
static int
xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) /
(uint)sizeof(struct xfs_dir2_leaf_entry);
}
static struct xfs_dir2_leaf_entry *
xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp)
{
return lp->__ents;
}
static int
xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) /
(uint)sizeof(struct xfs_dir2_leaf_entry);
}
static struct xfs_dir2_leaf_entry *
xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
{
return ((struct xfs_dir3_leaf *)lp)->__ents;
}
static void
xfs_dir2_leaf_hdr_from_disk(
struct xfs_dir3_icleaf_hdr *to,
struct xfs_dir2_leaf *from)
{
to->forw = be32_to_cpu(from->hdr.info.forw);
to->back = be32_to_cpu(from->hdr.info.back);
to->magic = be16_to_cpu(from->hdr.info.magic);
to->count = be16_to_cpu(from->hdr.count);
to->stale = be16_to_cpu(from->hdr.stale);
ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
to->magic == XFS_DIR2_LEAFN_MAGIC);
}
static void
xfs_dir2_leaf_hdr_to_disk(
struct xfs_dir2_leaf *to,
struct xfs_dir3_icleaf_hdr *from)
{
ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
from->magic == XFS_DIR2_LEAFN_MAGIC);
to->hdr.info.forw = cpu_to_be32(from->forw);
to->hdr.info.back = cpu_to_be32(from->back);
to->hdr.info.magic = cpu_to_be16(from->magic);
to->hdr.count = cpu_to_be16(from->count);
to->hdr.stale = cpu_to_be16(from->stale);
}
static void
xfs_dir3_leaf_hdr_from_disk(
struct xfs_dir3_icleaf_hdr *to,
struct xfs_dir2_leaf *from)
{
struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
to->forw = be32_to_cpu(hdr3->info.hdr.forw);
to->back = be32_to_cpu(hdr3->info.hdr.back);
to->magic = be16_to_cpu(hdr3->info.hdr.magic);
to->count = be16_to_cpu(hdr3->count);
to->stale = be16_to_cpu(hdr3->stale);
ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
to->magic == XFS_DIR3_LEAFN_MAGIC);
}
static void
xfs_dir3_leaf_hdr_to_disk(
struct xfs_dir2_leaf *to,
struct xfs_dir3_icleaf_hdr *from)
{
struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
from->magic == XFS_DIR3_LEAFN_MAGIC);
hdr3->info.hdr.forw = cpu_to_be32(from->forw);
hdr3->info.hdr.back = cpu_to_be32(from->back);
hdr3->info.hdr.magic = cpu_to_be16(from->magic);
hdr3->count = cpu_to_be16(from->count);
hdr3->stale = cpu_to_be16(from->stale);
}
/*
* Directory/Attribute Node block operations
*/
static struct xfs_da_node_entry *
xfs_da2_node_tree_p(struct xfs_da_intnode *dap)
{
return dap->__btree;
}
static struct xfs_da_node_entry *
xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
{
return ((struct xfs_da3_intnode *)dap)->__btree;
}
static void
xfs_da2_node_hdr_from_disk(
struct xfs_da3_icnode_hdr *to,
struct xfs_da_intnode *from)
{
ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
to->forw = be32_to_cpu(from->hdr.info.forw);
to->back = be32_to_cpu(from->hdr.info.back);
to->magic = be16_to_cpu(from->hdr.info.magic);
to->count = be16_to_cpu(from->hdr.__count);
to->level = be16_to_cpu(from->hdr.__level);
}
static void
xfs_da2_node_hdr_to_disk(
struct xfs_da_intnode *to,
struct xfs_da3_icnode_hdr *from)
{
ASSERT(from->magic == XFS_DA_NODE_MAGIC);
to->hdr.info.forw = cpu_to_be32(from->forw);
to->hdr.info.back = cpu_to_be32(from->back);
to->hdr.info.magic = cpu_to_be16(from->magic);
to->hdr.__count = cpu_to_be16(from->count);
to->hdr.__level = cpu_to_be16(from->level);
}
static void
xfs_da3_node_hdr_from_disk(
struct xfs_da3_icnode_hdr *to,
struct xfs_da_intnode *from)
{
struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
to->forw = be32_to_cpu(hdr3->info.hdr.forw);
to->back = be32_to_cpu(hdr3->info.hdr.back);
to->magic = be16_to_cpu(hdr3->info.hdr.magic);
to->count = be16_to_cpu(hdr3->__count);
to->level = be16_to_cpu(hdr3->__level);
}
static void
xfs_da3_node_hdr_to_disk(
struct xfs_da_intnode *to,
struct xfs_da3_icnode_hdr *from)
{
struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
hdr3->info.hdr.forw = cpu_to_be32(from->forw);
hdr3->info.hdr.back = cpu_to_be32(from->back);
hdr3->info.hdr.magic = cpu_to_be16(from->magic);
hdr3->__count = cpu_to_be16(from->count);
hdr3->__level = cpu_to_be16(from->level);
}
/*
* Directory free space block operations
*/
static int
xfs_dir2_free_max_bests(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) /
sizeof(xfs_dir2_data_off_t);
}
static __be16 *
xfs_dir2_free_bests_p(struct xfs_dir2_free *free)
{
return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr));
}
/*
* Convert data space db to the corresponding free db.
*/
static xfs_dir2_db_t
xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
(db / xfs_dir2_free_max_bests(geo));
}
/*
* Convert data space db to the corresponding index in a free db.
*/
static int
xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return db % xfs_dir2_free_max_bests(geo);
}
static int
xfs_dir3_free_max_bests(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) /
sizeof(xfs_dir2_data_off_t);
}
static __be16 *
xfs_dir3_free_bests_p(struct xfs_dir2_free *free)
{
return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr));
}
/*
* Convert data space db to the corresponding free db.
*/
static xfs_dir2_db_t
xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
(db / xfs_dir3_free_max_bests(geo));
}
/*
* Convert data space db to the corresponding index in a free db.
*/
static int
xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return db % xfs_dir3_free_max_bests(geo);
}
static void
xfs_dir2_free_hdr_from_disk(
struct xfs_dir3_icfree_hdr *to,
struct xfs_dir2_free *from)
{
to->magic = be32_to_cpu(from->hdr.magic);
to->firstdb = be32_to_cpu(from->hdr.firstdb);
to->nvalid = be32_to_cpu(from->hdr.nvalid);
to->nused = be32_to_cpu(from->hdr.nused);
ASSERT(to->magic == XFS_DIR2_FREE_MAGIC);
}
static void
xfs_dir2_free_hdr_to_disk(
struct xfs_dir2_free *to,
struct xfs_dir3_icfree_hdr *from)
{
ASSERT(from->magic == XFS_DIR2_FREE_MAGIC);
to->hdr.magic = cpu_to_be32(from->magic);
to->hdr.firstdb = cpu_to_be32(from->firstdb);
to->hdr.nvalid = cpu_to_be32(from->nvalid);
to->hdr.nused = cpu_to_be32(from->nused);
}
static void
xfs_dir3_free_hdr_from_disk(
struct xfs_dir3_icfree_hdr *to,
struct xfs_dir2_free *from)
{
struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
to->magic = be32_to_cpu(hdr3->hdr.magic);
to->firstdb = be32_to_cpu(hdr3->firstdb);
to->nvalid = be32_to_cpu(hdr3->nvalid);
to->nused = be32_to_cpu(hdr3->nused);
ASSERT(to->magic == XFS_DIR3_FREE_MAGIC);
}
static void
xfs_dir3_free_hdr_to_disk(
struct xfs_dir2_free *to,
struct xfs_dir3_icfree_hdr *from)
{
struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
hdr3->hdr.magic = cpu_to_be32(from->magic);
hdr3->firstdb = cpu_to_be32(from->firstdb);
hdr3->nvalid = cpu_to_be32(from->nvalid);
hdr3->nused = cpu_to_be32(from->nused);
}
static const struct xfs_dir_ops xfs_dir2_ops = {
.sf_entsize = xfs_dir2_sf_entsize,
.sf_nextentry = xfs_dir2_sf_nextentry,
.sf_get_ftype = xfs_dir2_sfe_get_ftype,
.sf_put_ftype = xfs_dir2_sfe_put_ftype,
.sf_get_ino = xfs_dir2_sfe_get_ino,
.sf_put_ino = xfs_dir2_sfe_put_ino,
.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
.data_entsize = xfs_dir2_data_entsize,
.data_get_ftype = xfs_dir2_data_get_ftype,
.data_put_ftype = xfs_dir2_data_put_ftype,
.data_entry_tag_p = xfs_dir2_data_entry_tag_p,
.data_bestfree_p = xfs_dir2_data_bestfree_p,
.data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1),
.data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1) +
XFS_DIR2_DATA_ENTSIZE(2),
.data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dot_entry_p = xfs_dir2_data_dot_entry_p,
.data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p,
.data_first_entry_p = xfs_dir2_data_first_entry_p,
.data_entry_p = xfs_dir2_data_entry_p,
.data_unused_p = xfs_dir2_data_unused_p,
.leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
.leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
.leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
.leaf_max_ents = xfs_dir2_max_leaf_ents,
.leaf_ents_p = xfs_dir2_leaf_ents_p,
.node_hdr_size = sizeof(struct xfs_da_node_hdr),
.node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
.node_tree_p = xfs_da2_node_tree_p,
.free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
.free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
.free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
.free_max_bests = xfs_dir2_free_max_bests,
.free_bests_p = xfs_dir2_free_bests_p,
.db_to_fdb = xfs_dir2_db_to_fdb,
.db_to_fdindex = xfs_dir2_db_to_fdindex,
};
static const struct xfs_dir_ops xfs_dir2_ftype_ops = {
.sf_entsize = xfs_dir3_sf_entsize,
.sf_nextentry = xfs_dir3_sf_nextentry,
.sf_get_ftype = xfs_dir3_sfe_get_ftype,
.sf_put_ftype = xfs_dir3_sfe_put_ftype,
.sf_get_ino = xfs_dir3_sfe_get_ino,
.sf_put_ino = xfs_dir3_sfe_put_ino,
.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
.data_entsize = xfs_dir3_data_entsize,
.data_get_ftype = xfs_dir3_data_get_ftype,
.data_put_ftype = xfs_dir3_data_put_ftype,
.data_entry_tag_p = xfs_dir3_data_entry_tag_p,
.data_bestfree_p = xfs_dir2_data_bestfree_p,
.data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1),
.data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2),
.data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dot_entry_p = xfs_dir2_data_dot_entry_p,
.data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p,
.data_first_entry_p = xfs_dir2_ftype_data_first_entry_p,
.data_entry_p = xfs_dir2_data_entry_p,
.data_unused_p = xfs_dir2_data_unused_p,
.leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
.leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
.leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
.leaf_max_ents = xfs_dir2_max_leaf_ents,
.leaf_ents_p = xfs_dir2_leaf_ents_p,
.node_hdr_size = sizeof(struct xfs_da_node_hdr),
.node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
.node_tree_p = xfs_da2_node_tree_p,
.free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
.free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
.free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
.free_max_bests = xfs_dir2_free_max_bests,
.free_bests_p = xfs_dir2_free_bests_p,
.db_to_fdb = xfs_dir2_db_to_fdb,
.db_to_fdindex = xfs_dir2_db_to_fdindex,
};
static const struct xfs_dir_ops xfs_dir3_ops = {
.sf_entsize = xfs_dir3_sf_entsize,
.sf_nextentry = xfs_dir3_sf_nextentry,
.sf_get_ftype = xfs_dir3_sfe_get_ftype,
.sf_put_ftype = xfs_dir3_sfe_put_ftype,
.sf_get_ino = xfs_dir3_sfe_get_ino,
.sf_put_ino = xfs_dir3_sfe_put_ino,
.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
.data_entsize = xfs_dir3_data_entsize,
.data_get_ftype = xfs_dir3_data_get_ftype,
.data_put_ftype = xfs_dir3_data_put_ftype,
.data_entry_tag_p = xfs_dir3_data_entry_tag_p,
.data_bestfree_p = xfs_dir3_data_bestfree_p,
.data_dot_offset = sizeof(struct xfs_dir3_data_hdr),
.data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1),
.data_first_offset = sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2),
.data_entry_offset = sizeof(struct xfs_dir3_data_hdr),
.data_dot_entry_p = xfs_dir3_data_dot_entry_p,
.data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p,
.data_first_entry_p = xfs_dir3_data_first_entry_p,
.data_entry_p = xfs_dir3_data_entry_p,
.data_unused_p = xfs_dir3_data_unused_p,
.leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr),
.leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk,
.leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk,
.leaf_max_ents = xfs_dir3_max_leaf_ents,
.leaf_ents_p = xfs_dir3_leaf_ents_p,
.node_hdr_size = sizeof(struct xfs_da3_node_hdr),
.node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
.node_tree_p = xfs_da3_node_tree_p,
.free_hdr_size = sizeof(struct xfs_dir3_free_hdr),
.free_hdr_to_disk = xfs_dir3_free_hdr_to_disk,
.free_hdr_from_disk = xfs_dir3_free_hdr_from_disk,
.free_max_bests = xfs_dir3_free_max_bests,
.free_bests_p = xfs_dir3_free_bests_p,
.db_to_fdb = xfs_dir3_db_to_fdb,
.db_to_fdindex = xfs_dir3_db_to_fdindex,
};
static const struct xfs_dir_ops xfs_dir2_nondir_ops = {
.node_hdr_size = sizeof(struct xfs_da_node_hdr),
.node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
.node_tree_p = xfs_da2_node_tree_p,
};
static const struct xfs_dir_ops xfs_dir3_nondir_ops = {
.node_hdr_size = sizeof(struct xfs_da3_node_hdr),
.node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
.node_tree_p = xfs_da3_node_tree_p,
};
/*
* Return the ops structure according to the current config. If we are passed
* an inode, then that overrides the default config we use which is based on
* feature bits.
*/
const struct xfs_dir_ops *
xfs_dir_get_ops(
struct xfs_mount *mp,
struct xfs_inode *dp)
{
if (dp)
return dp->d_ops;
if (mp->m_dir_inode_ops)
return mp->m_dir_inode_ops;
if (xfs_sb_version_hascrc(&mp->m_sb))
return &xfs_dir3_ops;
if (xfs_sb_version_hasftype(&mp->m_sb))
return &xfs_dir2_ftype_ops;
return &xfs_dir2_ops;
}
const struct xfs_dir_ops *
xfs_nondir_get_ops(
struct xfs_mount *mp,
struct xfs_inode *dp)
{
if (dp)
return dp->d_ops;
if (mp->m_nondir_inode_ops)
return mp->m_nondir_inode_ops;
if (xfs_sb_version_hascrc(&mp->m_sb))
return &xfs_dir3_nondir_ops;
return &xfs_dir2_nondir_ops;
}

View File

@@ -0,0 +1,861 @@
/*
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DA_FORMAT_H__
#define __XFS_DA_FORMAT_H__
/*
* This structure is common to both leaf nodes and non-leaf nodes in the Btree.
*
* It is used to manage a doubly linked list of all blocks at the same
* level in the Btree, and to identify which type of block this is.
*/
#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */
#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
typedef struct xfs_da_blkinfo {
__be32 forw; /* previous block in list */
__be32 back; /* following block in list */
__be16 magic; /* validity check on block */
__be16 pad; /* unused */
} xfs_da_blkinfo_t;
/*
* CRC enabled directory structure types
*
* The headers change size for the additional verification information, but
* otherwise the tree layouts and contents are unchanged. Hence the da btree
* code can use the struct xfs_da_blkinfo for manipulating the tree links and
* magic numbers without modification for both v2 and v3 nodes.
*/
#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
struct xfs_da3_blkinfo {
/*
* the node link manipulation code relies on the fact that the first
* element of this structure is the struct xfs_da_blkinfo so it can
* ignore the differences in the rest of the structures.
*/
struct xfs_da_blkinfo hdr;
__be32 crc; /* CRC of block */
__be64 blkno; /* first block of the buffer */
__be64 lsn; /* sequence number of last write */
uuid_t uuid; /* filesystem we belong to */
__be64 owner; /* inode that owns the block */
};
/*
* This is the structure of the root and intermediate nodes in the Btree.
* The leaf nodes are defined above.
*
* Entries are not packed.
*
* Since we have duplicate keys, use a binary search but always follow
* all match in the block, not just the first match found.
*/
#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
typedef struct xfs_da_node_hdr {
struct xfs_da_blkinfo info; /* block type, links, etc. */
__be16 __count; /* count of active entries */
__be16 __level; /* level above leaves (leaf == 0) */
} xfs_da_node_hdr_t;
struct xfs_da3_node_hdr {
struct xfs_da3_blkinfo info; /* block type, links, etc. */
__be16 __count; /* count of active entries */
__be16 __level; /* level above leaves (leaf == 0) */
__be32 __pad32;
};
#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc))
typedef struct xfs_da_node_entry {
__be32 hashval; /* hash value for this descendant */
__be32 before; /* Btree block before this key */
} xfs_da_node_entry_t;
typedef struct xfs_da_intnode {
struct xfs_da_node_hdr hdr;
struct xfs_da_node_entry __btree[];
} xfs_da_intnode_t;
struct xfs_da3_intnode {
struct xfs_da3_node_hdr hdr;
struct xfs_da_node_entry __btree[];
};
/*
* In-core version of the node header to abstract the differences in the v2 and
* v3 disk format of the headers. Callers need to convert to/from disk format as
* appropriate.
*/
struct xfs_da3_icnode_hdr {
__uint32_t forw;
__uint32_t back;
__uint16_t magic;
__uint16_t count;
__uint16_t level;
};
/*
* Directory version 2.
*
* There are 4 possible formats:
* - shortform - embedded into the inode
* - single block - data with embedded leaf at the end
* - multiple data blocks, single leaf+freeindex block
* - data blocks, node and leaf blocks (btree), freeindex blocks
*
* Note: many node blocks structures and constants are shared with the attr
* code and defined in xfs_da_btree.h.
*/
#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */
#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */
#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */
/*
* Directory Version 3 With CRCs.
*
* The tree formats are the same as for version 2 directories. The difference
* is in the block header and dirent formats. In many cases the v3 structures
* use v2 definitions as they are no different and this makes code sharing much
* easier.
*
* Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
* format is v2 then they switch to the existing v2 code, or the format is v3
* they implement the v3 functionality. This means the existing dir2 is a mix of
* xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
* where there is a difference in the formats, otherwise the code is unchanged.
*
* Where it is possible, the code decides what to do based on the magic numbers
* in the blocks rather than feature bits in the superblock. This means the code
* is as independent of the external XFS code as possible as doesn't require
* passing struct xfs_mount pointers into places where it isn't really
* necessary.
*
* Version 3 includes:
*
* - a larger block header for CRC and identification purposes and so the
* offsets of all the structures inside the blocks are different.
*
* - new magic numbers to be able to detect the v2/v3 types on the fly.
*/
#define XFS_DIR3_BLOCK_MAGIC 0x58444233 /* XDB3: single block dirs */
#define XFS_DIR3_DATA_MAGIC 0x58444433 /* XDD3: multiblock dirs */
#define XFS_DIR3_FREE_MAGIC 0x58444633 /* XDF3: free index blocks */
/*
* Dirents in version 3 directories have a file type field. Additions to this
* list are an on-disk format change, requiring feature bits. Valid values
* are as follows:
*/
#define XFS_DIR3_FT_UNKNOWN 0
#define XFS_DIR3_FT_REG_FILE 1
#define XFS_DIR3_FT_DIR 2
#define XFS_DIR3_FT_CHRDEV 3
#define XFS_DIR3_FT_BLKDEV 4
#define XFS_DIR3_FT_FIFO 5
#define XFS_DIR3_FT_SOCK 6
#define XFS_DIR3_FT_SYMLINK 7
#define XFS_DIR3_FT_WHT 8
#define XFS_DIR3_FT_MAX 9
/*
* Byte offset in data block and shortform entry.
*/
typedef __uint16_t xfs_dir2_data_off_t;
#define NULLDATAOFF 0xffffU
typedef uint xfs_dir2_data_aoff_t; /* argument form */
/*
* Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
* Only need 16 bits, this is the byte offset into the single block form.
*/
typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
/*
* Offset in data space of a data entry.
*/
typedef __uint32_t xfs_dir2_dataptr_t;
#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
/*
* Byte offset in a directory.
*/
typedef xfs_off_t xfs_dir2_off_t;
/*
* Directory block number (logical dirblk in file)
*/
typedef __uint32_t xfs_dir2_db_t;
/*
* Inode number stored as 8 8-bit values.
*/
typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
/*
* Inode number stored as 4 8-bit values.
* Works a lot of the time, when all the inode numbers in a directory
* fit in 32 bits.
*/
typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
typedef union {
xfs_dir2_ino8_t i8;
xfs_dir2_ino4_t i4;
} xfs_dir2_inou_t;
#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
/*
* Directory layout when stored internal to an inode.
*
* Small directories are packed as tightly as possible so as to fit into the
* literal area of the inode. These "shortform" directories consist of a
* single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
* structures. Due the different inode number storage size and the variable
* length name field in the xfs_dir2_sf_entry all these structure are
* variable length, and the accessors in this file should be used to iterate
* over them.
*/
typedef struct xfs_dir2_sf_hdr {
__uint8_t count; /* count of entries */
__uint8_t i8count; /* count of 8-byte inode #s */
xfs_dir2_inou_t parent; /* parent dir inode number */
} __arch_pack xfs_dir2_sf_hdr_t;
typedef struct xfs_dir2_sf_entry {
__u8 namelen; /* actual name length */
xfs_dir2_sf_off_t offset; /* saved offset */
__u8 name[]; /* name, variable size */
/*
* A single byte containing the file type field follows the inode
* number for version 3 directory entries.
*
* A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
* variable offset after the name.
*/
} __arch_pack xfs_dir2_sf_entry_t;
static inline int xfs_dir2_sf_hdr_size(int i8count)
{
return sizeof(struct xfs_dir2_sf_hdr) -
(i8count == 0) *
(sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
}
static inline xfs_dir2_data_aoff_t
xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
{
return get_unaligned_be16(&sfep->offset.i);
}
static inline void
xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
{
put_unaligned_be16(off, &sfep->offset.i);
}
static inline struct xfs_dir2_sf_entry *
xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
{
return (struct xfs_dir2_sf_entry *)
((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
}
/*
* Data block structures.
*
* A pure data block looks like the following drawing on disk:
*
* +-------------------------------------------------+
* | xfs_dir2_data_hdr_t |
* +-------------------------------------------------+
* | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
* | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
* | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
* | ... |
* +-------------------------------------------------+
* | unused space |
* +-------------------------------------------------+
*
* As all the entries are variable size structures the accessors below should
* be used to iterate over them.
*
* In addition to the pure data blocks for the data and node formats,
* most structures are also used for the combined data/freespace "block"
* format below.
*/
#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */
#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG)
#define XFS_DIR2_DATA_FREE_TAG 0xffff
#define XFS_DIR2_DATA_FD_COUNT 3
/*
* Directory address space divided into sections,
* spaces separated by 32GB.
*/
#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
#define XFS_DIR2_DATA_SPACE 0
#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
/*
* Describe a free area in the data block.
*
* The freespace will be formatted as a xfs_dir2_data_unused_t.
*/
typedef struct xfs_dir2_data_free {
__be16 offset; /* start of freespace */
__be16 length; /* length of freespace */
} xfs_dir2_data_free_t;
/*
* Header for the data blocks.
*
* The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
*/
typedef struct xfs_dir2_data_hdr {
__be32 magic; /* XFS_DIR2_DATA_MAGIC or */
/* XFS_DIR2_BLOCK_MAGIC */
xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT];
} xfs_dir2_data_hdr_t;
/*
* define a structure for all the verification fields we are adding to the
* directory block structures. This will be used in several structures.
* The magic number must be the first entry to align with all the dir2
* structures so we determine how to decode them just by the magic number.
*/
struct xfs_dir3_blk_hdr {
__be32 magic; /* magic number */
__be32 crc; /* CRC of block */
__be64 blkno; /* first block of the buffer */
__be64 lsn; /* sequence number of last write */
uuid_t uuid; /* filesystem we belong to */
__be64 owner; /* inode that owns the block */
};
struct xfs_dir3_data_hdr {
struct xfs_dir3_blk_hdr hdr;
xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT];
__be32 pad; /* 64 bit alignment */
};
#define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc)
/*
* Active entry in a data block.
*
* Aligned to 8 bytes. After the variable length name field there is a
* 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p.
*
* For dir3 structures, there is file type field between the name and the tag.
* This can only be manipulated by helper functions. It is packed hard against
* the end of the name so any padding for rounding is between the file type and
* the tag.
*/
typedef struct xfs_dir2_data_entry {
__be64 inumber; /* inode number */
__u8 namelen; /* name length */
__u8 name[]; /* name bytes, no null */
/* __u8 filetype; */ /* type of inode we point to */
/* __be16 tag; */ /* starting offset of us */
} xfs_dir2_data_entry_t;
/*
* Unused entry in a data block.
*
* Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed
* using xfs_dir2_data_unused_tag_p.
*/
typedef struct xfs_dir2_data_unused {
__be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */
__be16 length; /* total free length */
/* variable offset */
__be16 tag; /* starting offset of us */
} xfs_dir2_data_unused_t;
/*
* Pointer to a freespace's tag word.
*/
static inline __be16 *
xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
{
return (__be16 *)((char *)dup +
be16_to_cpu(dup->length) - sizeof(__be16));
}
/*
* Leaf block structures.
*
* A pure leaf block looks like the following drawing on disk:
*
* +---------------------------+
* | xfs_dir2_leaf_hdr_t |
* +---------------------------+
* | xfs_dir2_leaf_entry_t |
* | xfs_dir2_leaf_entry_t |
* | xfs_dir2_leaf_entry_t |
* | xfs_dir2_leaf_entry_t |
* | ... |
* +---------------------------+
* | xfs_dir2_data_off_t |
* | xfs_dir2_data_off_t |
* | xfs_dir2_data_off_t |
* | ... |
* +---------------------------+
* | xfs_dir2_leaf_tail_t |
* +---------------------------+
*
* The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
* for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
* for directories with separate leaf nodes and free space blocks
* (magic = XFS_DIR2_LEAFN_MAGIC).
*
* As all the entries are variable size structures the accessors below should
* be used to iterate over them.
*/
/*
* Offset of the leaf/node space. First block in this space
* is the btree root.
*/
#define XFS_DIR2_LEAF_SPACE 1
#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
/*
* Leaf block header.
*/
typedef struct xfs_dir2_leaf_hdr {
xfs_da_blkinfo_t info; /* header for da routines */
__be16 count; /* count of entries */
__be16 stale; /* count of stale entries */
} xfs_dir2_leaf_hdr_t;
struct xfs_dir3_leaf_hdr {
struct xfs_da3_blkinfo info; /* header for da routines */
__be16 count; /* count of entries */
__be16 stale; /* count of stale entries */
__be32 pad; /* 64 bit alignment */
};
struct xfs_dir3_icleaf_hdr {
__uint32_t forw;
__uint32_t back;
__uint16_t magic;
__uint16_t count;
__uint16_t stale;
};
/*
* Leaf block entry.
*/
typedef struct xfs_dir2_leaf_entry {
__be32 hashval; /* hash value of name */
__be32 address; /* address of data entry */
} xfs_dir2_leaf_entry_t;
/*
* Leaf block tail.
*/
typedef struct xfs_dir2_leaf_tail {
__be32 bestcount;
} xfs_dir2_leaf_tail_t;
/*
* Leaf block.
*/
typedef struct xfs_dir2_leaf {
xfs_dir2_leaf_hdr_t hdr; /* leaf header */
xfs_dir2_leaf_entry_t __ents[]; /* entries */
} xfs_dir2_leaf_t;
struct xfs_dir3_leaf {
struct xfs_dir3_leaf_hdr hdr; /* leaf header */
struct xfs_dir2_leaf_entry __ents[]; /* entries */
};
#define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc)
/*
* Get address of the bests array in the single-leaf block.
*/
static inline __be16 *
xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
{
return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
}
/*
* Free space block defintions for the node format.
*/
/*
* Offset of the freespace index.
*/
#define XFS_DIR2_FREE_SPACE 2
#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
typedef struct xfs_dir2_free_hdr {
__be32 magic; /* XFS_DIR2_FREE_MAGIC */
__be32 firstdb; /* db of first entry */
__be32 nvalid; /* count of valid entries */
__be32 nused; /* count of used entries */
} xfs_dir2_free_hdr_t;
typedef struct xfs_dir2_free {
xfs_dir2_free_hdr_t hdr; /* block header */
__be16 bests[]; /* best free counts */
/* unused entries are -1 */
} xfs_dir2_free_t;
struct xfs_dir3_free_hdr {
struct xfs_dir3_blk_hdr hdr;
__be32 firstdb; /* db of first entry */
__be32 nvalid; /* count of valid entries */
__be32 nused; /* count of used entries */
__be32 pad; /* 64 bit alignment */
};
struct xfs_dir3_free {
struct xfs_dir3_free_hdr hdr;
__be16 bests[]; /* best free counts */
/* unused entries are -1 */
};
#define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc)
/*
* In core version of the free block header, abstracted away from on-disk format
* differences. Use this in the code, and convert to/from the disk version using
* xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
*/
struct xfs_dir3_icfree_hdr {
__uint32_t magic;
__uint32_t firstdb;
__uint32_t nvalid;
__uint32_t nused;
};
/*
* Single block format.
*
* The single block format looks like the following drawing on disk:
*
* +-------------------------------------------------+
* | xfs_dir2_data_hdr_t |
* +-------------------------------------------------+
* | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
* | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
* | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
* | ... |
* +-------------------------------------------------+
* | unused space |
* +-------------------------------------------------+
* | ... |
* | xfs_dir2_leaf_entry_t |
* | xfs_dir2_leaf_entry_t |
* +-------------------------------------------------+
* | xfs_dir2_block_tail_t |
* +-------------------------------------------------+
*
* As all the entries are variable size structures the accessors below should
* be used to iterate over them.
*/
typedef struct xfs_dir2_block_tail {
__be32 count; /* count of leaf entries */
__be32 stale; /* count of stale lf entries */
} xfs_dir2_block_tail_t;
/*
* Pointer to the leaf entries embedded in a data block (1-block format)
*/
static inline struct xfs_dir2_leaf_entry *
xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
{
return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
}
/*
* Attribute storage layout
*
* Attribute lists are structured around Btrees where all the data
* elements are in the leaf nodes. Attribute names are hashed into an int,
* then that int is used as the index into the Btree. Since the hashval
* of an attribute name may not be unique, we may have duplicate keys. The
* internal links in the Btree are logical block offsets into the file.
*
* Struct leaf_entry's are packed from the top. Name/values grow from the
* bottom but are not packed. The freemap contains run-length-encoded entries
* for the free bytes after the leaf_entry's, but only the N largest such,
* smaller runs are dropped. When the freemap doesn't show enough space
* for an allocation, we compact the name/value area and try again. If we
* still don't have enough space, then we have to split the block. The
* name/value structs (both local and remote versions) must be 32bit aligned.
*
* Since we have duplicate hash keys, for each key that matches, compare
* the actual name string. The root and intermediate node search always
* takes the first-in-the-block key match found, so we should only have
* to work "forw"ard. If none matches, continue with the "forw"ard leaf
* nodes until the hash key changes or the attribute name is found.
*
* We store the fact that an attribute is a ROOT/USER/SECURE attribute in
* the leaf_entry. The namespaces are independent only because we also look
* at the namespace bit when we are looking for a matching attribute name.
*
* We also store an "incomplete" bit in the leaf_entry. It shows that an
* attribute is in the middle of being created and should not be shown to
* the user if we crash during the time that the bit is set. We clear the
* bit when we have finished setting up the attribute. We do this because
* we cannot create some large attributes inside a single transaction, and we
* need some indication that we weren't finished if we crash in the middle.
*/
#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
__be16 base; /* base of free region */
__be16 size; /* length of free region */
} xfs_attr_leaf_map_t;
typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
xfs_da_blkinfo_t info; /* block type, links, etc. */
__be16 count; /* count of active leaf_entry's */
__be16 usedbytes; /* num bytes of names/values stored */
__be16 firstused; /* first used byte in name area */
__u8 holes; /* != 0 if blk needs compaction */
__u8 pad1;
xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
/* N largest free regions */
} xfs_attr_leaf_hdr_t;
typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
__be32 hashval; /* hash value of name */
__be16 nameidx; /* index into buffer of name/value */
__u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
__u8 pad2; /* unused pad byte */
} xfs_attr_leaf_entry_t;
typedef struct xfs_attr_leaf_name_local {
__be16 valuelen; /* number of bytes in value */
__u8 namelen; /* length of name bytes */
__u8 nameval[1]; /* name/value bytes */
} xfs_attr_leaf_name_local_t;
typedef struct xfs_attr_leaf_name_remote {
__be32 valueblk; /* block number of value bytes */
__be32 valuelen; /* number of bytes in value */
__u8 namelen; /* length of name bytes */
__u8 name[1]; /* name bytes */
} xfs_attr_leaf_name_remote_t;
typedef struct xfs_attr_leafblock {
xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
} xfs_attr_leafblock_t;
/*
* CRC enabled leaf structures. Called "version 3" structures to match the
* version number of the directory and dablk structures for this feature, and
* attr2 is already taken by the variable inode attribute fork size feature.
*/
struct xfs_attr3_leaf_hdr {
struct xfs_da3_blkinfo info;
__be16 count;
__be16 usedbytes;
__be16 firstused;
__u8 holes;
__u8 pad1;
struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
__be32 pad2; /* 64 bit alignment */
};
#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
struct xfs_attr3_leafblock {
struct xfs_attr3_leaf_hdr hdr;
struct xfs_attr_leaf_entry entries[1];
/*
* The rest of the block contains the following structures after the
* leaf entries, growing from the bottom up. The variables are never
* referenced, the locations accessed purely from helper functions.
*
* struct xfs_attr_leaf_name_local
* struct xfs_attr_leaf_name_remote
*/
};
/*
* incore, neutral version of the attribute leaf header
*/
struct xfs_attr3_icleaf_hdr {
__uint32_t forw;
__uint32_t back;
__uint16_t magic;
__uint16_t count;
__uint16_t usedbytes;
__uint16_t firstused;
__u8 holes;
struct {
__uint16_t base;
__uint16_t size;
} freemap[XFS_ATTR_LEAF_MAPSIZE];
};
/*
* Flags used in the leaf_entry[i].flags field.
* NOTE: the INCOMPLETE bit must not collide with the flags bits specified
* on the system call, they are "or"ed together for various operations.
*/
#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT)
#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT)
#define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT)
#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT)
/*
* Conversion macros for converting namespace bits from argument flags
* to ondisk flags.
*/
#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE)
#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK)
#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK)
#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
/*
* Alignment for namelist and valuelist entries (since they are mixed
* there can be only one alignment value)
*/
#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
static inline int
xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
{
if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
return sizeof(struct xfs_attr3_leaf_hdr);
return sizeof(struct xfs_attr_leaf_hdr);
}
static inline struct xfs_attr_leaf_entry *
xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
{
if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
return &leafp->entries[0];
}
/*
* Cast typed pointers for "local" and "remote" name/value structs.
*/
static inline char *
xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
{
struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
}
static inline xfs_attr_leaf_name_remote_t *
xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
{
return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
}
static inline xfs_attr_leaf_name_local_t *
xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
{
return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
}
/*
* Calculate total bytes used (including trailing pad for alignment) for
* a "local" name/value structure, a "remote" name/value structure, and
* a pointer which might be either.
*/
static inline int xfs_attr_leaf_entsize_remote(int nlen)
{
return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
}
static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
{
return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
}
static inline int xfs_attr_leaf_entsize_local_max(int bsize)
{
return (((bsize) >> 1) + ((bsize) >> 2));
}
/*
* Remote attribute block format definition
*
* There is one of these headers per filesystem block in a remote attribute.
* This is done to ensure there is a 1:1 mapping between the attribute value
* length and the number of blocks needed to store the attribute. This makes the
* verification of a buffer a little more complex, but greatly simplifies the
* allocation, reading and writing of these attributes as we don't have to guess
* the number of blocks needed to store the attribute data.
*/
#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
struct xfs_attr3_rmt_hdr {
__be32 rm_magic;
__be32 rm_offset;
__be32 rm_bytes;
__be32 rm_crc;
uuid_t rm_uuid;
__be64 rm_owner;
__be64 rm_blkno;
__be64 rm_lsn;
};
#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
sizeof(struct xfs_attr3_rmt_hdr) : 0))
#endif /* __XFS_DA_FORMAT_H__ */

243
fs/xfs/libxfs/xfs_dinode.h Normal file
View File

@@ -0,0 +1,243 @@
/*
* Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DINODE_H__
#define __XFS_DINODE_H__
#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
typedef struct xfs_timestamp {
__be32 t_sec; /* timestamp seconds */
__be32 t_nsec; /* timestamp nanoseconds */
} xfs_timestamp_t;
/*
* On-disk inode structure.
*
* This is just the header or "dinode core", the inode is expanded to fill a
* variable size the leftover area split into a data and an attribute fork.
* The format of the data and attribute fork depends on the format of the
* inode as indicated by di_format and di_aformat. To access the data and
* attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros
* below.
*
* There is a very similar struct icdinode in xfs_inode which matches the
* layout of the first 96 bytes of this structure, but is kept in native
* format instead of big endian.
*
* Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
* padding field for v3 inodes.
*/
typedef struct xfs_dinode {
__be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
__be16 di_mode; /* mode and type of file */
__u8 di_version; /* inode version */
__u8 di_format; /* format of di_c data */
__be16 di_onlink; /* old number of links to file */
__be32 di_uid; /* owner's user id */
__be32 di_gid; /* owner's group id */
__be32 di_nlink; /* number of links to file */
__be16 di_projid_lo; /* lower part of owner's project id */
__be16 di_projid_hi; /* higher part owner's project id */
__u8 di_pad[6]; /* unused, zeroed space */
__be16 di_flushiter; /* incremented on flush */
xfs_timestamp_t di_atime; /* time last accessed */
xfs_timestamp_t di_mtime; /* time last modified */
xfs_timestamp_t di_ctime; /* time created/inode modified */
__be64 di_size; /* number of bytes in file */
__be64 di_nblocks; /* # of direct & btree blocks used */
__be32 di_extsize; /* basic/minimum extent size for file */
__be32 di_nextents; /* number of extents in data fork */
__be16 di_anextents; /* number of extents in attribute fork*/
__u8 di_forkoff; /* attr fork offs, <<3 for 64b align */
__s8 di_aformat; /* format of attr fork's data */
__be32 di_dmevmask; /* DMIG event mask */
__be16 di_dmstate; /* DMIG state info */
__be16 di_flags; /* random flags, XFS_DIFLAG_... */
__be32 di_gen; /* generation number */
/* di_next_unlinked is the only non-core field in the old dinode */
__be32 di_next_unlinked;/* agi unlinked list ptr */
/* start of the extended dinode, writable fields */
__le32 di_crc; /* CRC of the inode */
__be64 di_changecount; /* number of attribute changes */
__be64 di_lsn; /* flush sequence */
__be64 di_flags2; /* more random flags */
__u8 di_pad2[16]; /* more padding for future expansion */
/* fields only written to during inode creation */
xfs_timestamp_t di_crtime; /* time created */
__be64 di_ino; /* inode number */
uuid_t di_uuid; /* UUID of the filesystem */
/* structure must be padded to 64 bit alignment */
} xfs_dinode_t;
#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc)
#define DI_MAX_FLUSH 0xffff
/*
* Size of the core inode on disk. Version 1 and 2 inodes have
* the same size, but version 3 has grown a few additional fields.
*/
static inline uint xfs_dinode_size(int version)
{
if (version == 3)
return sizeof(struct xfs_dinode);
return offsetof(struct xfs_dinode, di_crc);
}
/*
* The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
* Since the pathconf interface is signed, we use 2^31 - 1 instead.
* The old inode format had a 16 bit link count, so its maximum is USHRT_MAX.
*/
#define XFS_MAXLINK ((1U << 31) - 1U)
#define XFS_MAXLINK_1 65535U
/*
* Values for di_format
*/
typedef enum xfs_dinode_fmt {
XFS_DINODE_FMT_DEV, /* xfs_dev_t */
XFS_DINODE_FMT_LOCAL, /* bulk data */
XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */
XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */
XFS_DINODE_FMT_UUID /* uuid_t */
} xfs_dinode_fmt_t;
/*
* Inode minimum and maximum sizes.
*/
#define XFS_DINODE_MIN_LOG 8
#define XFS_DINODE_MAX_LOG 11
#define XFS_DINODE_MIN_SIZE (1 << XFS_DINODE_MIN_LOG)
#define XFS_DINODE_MAX_SIZE (1 << XFS_DINODE_MAX_LOG)
/*
* Inode size for given fs.
*/
#define XFS_LITINO(mp, version) \
((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version)))
/*
* Inode data & attribute fork sizes, per inode.
*/
#define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0)
#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3))
#define XFS_DFORK_DSIZE(dip,mp) \
(XFS_DFORK_Q(dip) ? \
XFS_DFORK_BOFF(dip) : \
XFS_LITINO(mp, (dip)->di_version))
#define XFS_DFORK_ASIZE(dip,mp) \
(XFS_DFORK_Q(dip) ? \
XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \
0)
#define XFS_DFORK_SIZE(dip,mp,w) \
((w) == XFS_DATA_FORK ? \
XFS_DFORK_DSIZE(dip, mp) : \
XFS_DFORK_ASIZE(dip, mp))
/*
* Return pointers to the data or attribute forks.
*/
#define XFS_DFORK_DPTR(dip) \
((char *)dip + xfs_dinode_size(dip->di_version))
#define XFS_DFORK_APTR(dip) \
(XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
#define XFS_DFORK_PTR(dip,w) \
((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip))
#define XFS_DFORK_FORMAT(dip,w) \
((w) == XFS_DATA_FORK ? \
(dip)->di_format : \
(dip)->di_aformat)
#define XFS_DFORK_NEXTENTS(dip,w) \
((w) == XFS_DATA_FORK ? \
be32_to_cpu((dip)->di_nextents) : \
be16_to_cpu((dip)->di_anextents))
#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr))
/*
* For block and character special files the 32bit dev_t is stored at the
* beginning of the data fork.
*/
static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip)
{
return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip));
}
static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
{
*(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev);
}
/*
* Values for di_flags
* There should be a one-to-one correspondence between these flags and the
* XFS_XFLAG_s.
*/
#define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */
#define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */
#define XFS_DIFLAG_NEWRTBM_BIT 2 /* for rtbitmap inode, new format */
#define XFS_DIFLAG_IMMUTABLE_BIT 3 /* inode is immutable */
#define XFS_DIFLAG_APPEND_BIT 4 /* inode is append-only */
#define XFS_DIFLAG_SYNC_BIT 5 /* inode is written synchronously */
#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */
#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */
#define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */
#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
#define XFS_DIFLAG_IMMUTABLE (1 << XFS_DIFLAG_IMMUTABLE_BIT)
#define XFS_DIFLAG_APPEND (1 << XFS_DIFLAG_APPEND_BIT)
#define XFS_DIFLAG_SYNC (1 << XFS_DIFLAG_SYNC_BIT)
#define XFS_DIFLAG_NOATIME (1 << XFS_DIFLAG_NOATIME_BIT)
#define XFS_DIFLAG_NODUMP (1 << XFS_DIFLAG_NODUMP_BIT)
#define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT)
#define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT)
#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT)
#define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT)
#ifdef CONFIG_XFS_RT
#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
#else
#define XFS_IS_REALTIME_INODE(ip) (0)
#endif
#define XFS_DIFLAG_ANY \
(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
#endif /* __XFS_DINODE_H__ */

762
fs/xfs/libxfs/xfs_dir2.c Normal file
View File

@@ -0,0 +1,762 @@
/*
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_inum.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_dinode.h"
struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
/*
* ASCII case-insensitive (ie. A-Z) support for directories that was
* used in IRIX.
*/
STATIC xfs_dahash_t
xfs_ascii_ci_hashname(
struct xfs_name *name)
{
xfs_dahash_t hash;
int i;
for (i = 0, hash = 0; i < name->len; i++)
hash = tolower(name->name[i]) ^ rol32(hash, 7);
return hash;
}
STATIC enum xfs_dacmp
xfs_ascii_ci_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
enum xfs_dacmp result;
int i;
if (args->namelen != len)
return XFS_CMP_DIFFERENT;
result = XFS_CMP_EXACT;
for (i = 0; i < len; i++) {
if (args->name[i] == name[i])
continue;
if (tolower(args->name[i]) != tolower(name[i]))
return XFS_CMP_DIFFERENT;
result = XFS_CMP_CASE;
}
return result;
}
static struct xfs_nameops xfs_ascii_ci_nameops = {
.hashname = xfs_ascii_ci_hashname,
.compname = xfs_ascii_ci_compname,
};
int
xfs_da_mount(
struct xfs_mount *mp)
{
struct xfs_da_geometry *dageo;
int nodehdr_size;
ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
XFS_MAX_BLOCKSIZE);
mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
nodehdr_size = mp->m_dir_inode_ops->node_hdr_size;
mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
KM_SLEEP | KM_MAYFAIL);
mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
KM_SLEEP | KM_MAYFAIL);
if (!mp->m_dir_geo || !mp->m_attr_geo) {
kmem_free(mp->m_dir_geo);
kmem_free(mp->m_attr_geo);
return -ENOMEM;
}
/* set up directory geometry */
dageo = mp->m_dir_geo;
dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
dageo->fsblog = mp->m_sb.sb_blocklog;
dageo->blksize = 1 << dageo->blklog;
dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
/*
* Now we've set up the block conversion variables, we can calculate the
* segment block constants using the geometry structure.
*/
dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET);
dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET);
dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
dageo->node_ents = (dageo->blksize - nodehdr_size) /
(uint)sizeof(xfs_da_node_entry_t);
dageo->magicpct = (dageo->blksize * 37) / 100;
/* set up attribute geometry - single fsb only */
dageo = mp->m_attr_geo;
dageo->blklog = mp->m_sb.sb_blocklog;
dageo->fsblog = mp->m_sb.sb_blocklog;
dageo->blksize = 1 << dageo->blklog;
dageo->fsbcount = 1;
dageo->node_ents = (dageo->blksize - nodehdr_size) /
(uint)sizeof(xfs_da_node_entry_t);
dageo->magicpct = (dageo->blksize * 37) / 100;
if (xfs_sb_version_hasasciici(&mp->m_sb))
mp->m_dirnameops = &xfs_ascii_ci_nameops;
else
mp->m_dirnameops = &xfs_default_nameops;
return 0;
}
void
xfs_da_unmount(
struct xfs_mount *mp)
{
kmem_free(mp->m_dir_geo);
kmem_free(mp->m_attr_geo);
}
/*
* Return 1 if directory contains only "." and "..".
*/
int
xfs_dir_isempty(
xfs_inode_t *dp)
{
xfs_dir2_sf_hdr_t *sfp;
ASSERT(S_ISDIR(dp->i_d.di_mode));
if (dp->i_d.di_size == 0) /* might happen during shutdown. */
return 1;
if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
return 0;
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
return !sfp->count;
}
/*
* Validate a given inode number.
*/
int
xfs_dir_ino_validate(
xfs_mount_t *mp,
xfs_ino_t ino)
{
xfs_agblock_t agblkno;
xfs_agino_t agino;
xfs_agnumber_t agno;
int ino_ok;
int ioff;
agno = XFS_INO_TO_AGNO(mp, ino);
agblkno = XFS_INO_TO_AGBNO(mp, ino);
ioff = XFS_INO_TO_OFFSET(mp, ino);
agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
ino_ok =
agno < mp->m_sb.sb_agcount &&
agblkno < mp->m_sb.sb_agblocks &&
agblkno != 0 &&
ioff < (1 << mp->m_sb.sb_inopblog) &&
XFS_AGINO_TO_INO(mp, agno, agino) == ino;
if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
XFS_RANDOM_DIR_INO_VALIDATE))) {
xfs_warn(mp, "Invalid inode number 0x%Lx",
(unsigned long long) ino);
XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
return 0;
}
/*
* Initialize a directory with its "." and ".." entries.
*/
int
xfs_dir_init(
xfs_trans_t *tp,
xfs_inode_t *dp,
xfs_inode_t *pdp)
{
struct xfs_da_args *args;
int error;
ASSERT(S_ISDIR(dp->i_d.di_mode));
error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
if (error)
return error;
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->dp = dp;
args->trans = tp;
error = xfs_dir2_sf_create(args, pdp->i_ino);
kmem_free(args);
return error;
}
/*
Enter a name in a directory.
*/
int
xfs_dir_createname(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name,
xfs_ino_t inum, /* new entry inode number */
xfs_fsblock_t *first, /* bmap's firstblock */
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
rval = xfs_dir_ino_validate(tp->t_mountp, inum);
if (rval)
return rval;
XFS_STATS_INC(xs_dir_create);
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
args->hashval = dp->i_mount->m_dirnameops->hashname(name);
args->inumber = inum;
args->dp = dp;
args->firstblock = first;
args->flist = flist;
args->total = total;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_addname(args);
goto out_free;
}
rval = xfs_dir2_isblock(args, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_addname(args);
goto out_free;
}
rval = xfs_dir2_isleaf(args, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_addname(args);
else
rval = xfs_dir2_node_addname(args);
out_free:
kmem_free(args);
return rval;
}
/*
* If doing a CI lookup and case-insensitive match, dup actual name into
* args.value. Return EEXIST for success (ie. name found) or an error.
*/
int
xfs_dir_cilookup_result(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (args->cmpresult == XFS_CMP_DIFFERENT)
return -ENOENT;
if (args->cmpresult != XFS_CMP_CASE ||
!(args->op_flags & XFS_DA_OP_CILOOKUP))
return -EEXIST;
args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
if (!args->value)
return -ENOMEM;
memcpy(args->value, name, len);
args->valuelen = len;
return -EEXIST;
}
/*
* Lookup a name in a directory, give back the inode number.
* If ci_name is not NULL, returns the actual name in ci_name if it differs
* to name, or ci_name->name is set to NULL for an exact match.
*/
int
xfs_dir_lookup(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name,
xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */
{
struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
/*
* We need to use KM_NOFS here so that lockdep will not throw false
* positive deadlock warnings on a non-transactional lookup path. It is
* safe to recurse into inode recalim in that case, but lockdep can't
* easily be taught about it. Hence KM_NOFS avoids having to add more
* lockdep Doing this avoids having to add a bunch of lockdep class
* annotations into the reclaim path for the ilock.
*/
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
args->hashval = dp->i_mount->m_dirnameops->hashname(name);
args->dp = dp;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_OKNOENT;
if (ci_name)
args->op_flags |= XFS_DA_OP_CILOOKUP;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_lookup(args);
goto out_check_rval;
}
rval = xfs_dir2_isblock(args, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_lookup(args);
goto out_check_rval;
}
rval = xfs_dir2_isleaf(args, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_lookup(args);
else
rval = xfs_dir2_node_lookup(args);
out_check_rval:
if (rval == -EEXIST)
rval = 0;
if (!rval) {
*inum = args->inumber;
if (ci_name) {
ci_name->name = args->value;
ci_name->len = args->valuelen;
}
}
out_free:
kmem_free(args);
return rval;
}
/*
* Remove an entry from a directory.
*/
int
xfs_dir_removename(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name,
xfs_ino_t ino,
xfs_fsblock_t *first, /* bmap's firstblock */
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
args->hashval = dp->i_mount->m_dirnameops->hashname(name);
args->inumber = ino;
args->dp = dp;
args->firstblock = first;
args->flist = flist;
args->total = total;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_removename(args);
goto out_free;
}
rval = xfs_dir2_isblock(args, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_removename(args);
goto out_free;
}
rval = xfs_dir2_isleaf(args, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_removename(args);
else
rval = xfs_dir2_node_removename(args);
out_free:
kmem_free(args);
return rval;
}
/*
* Replace the inode number of a directory entry.
*/
int
xfs_dir_replace(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name, /* name of entry to replace */
xfs_ino_t inum, /* new inode number */
xfs_fsblock_t *first, /* bmap's firstblock */
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
rval = xfs_dir_ino_validate(tp->t_mountp, inum);
if (rval)
return rval;
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
args->hashval = dp->i_mount->m_dirnameops->hashname(name);
args->inumber = inum;
args->dp = dp;
args->firstblock = first;
args->flist = flist;
args->total = total;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_replace(args);
goto out_free;
}
rval = xfs_dir2_isblock(args, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_replace(args);
goto out_free;
}
rval = xfs_dir2_isleaf(args, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_replace(args);
else
rval = xfs_dir2_node_replace(args);
out_free:
kmem_free(args);
return rval;
}
/*
* See if this entry can be added to the directory without allocating space.
* First checks that the caller couldn't reserve enough space (resblks = 0).
*/
int
xfs_dir_canenter(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name, /* name of entry to add */
uint resblks)
{
struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
if (resblks)
return 0;
ASSERT(S_ISDIR(dp->i_d.di_mode));
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
args->hashval = dp->i_mount->m_dirnameops->hashname(name);
args->dp = dp;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_addname(args);
goto out_free;
}
rval = xfs_dir2_isblock(args, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_addname(args);
goto out_free;
}
rval = xfs_dir2_isleaf(args, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_addname(args);
else
rval = xfs_dir2_node_addname(args);
out_free:
kmem_free(args);
return rval;
}
/*
* Utility routines.
*/
/*
* Add a block to the directory.
*
* This routine is for data and free blocks, not leaf/node blocks which are
* handled by xfs_da_grow_inode.
*/
int
xfs_dir2_grow_inode(
struct xfs_da_args *args,
int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */
xfs_dir2_db_t *dbp) /* out: block number added */
{
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
xfs_fileoff_t bno; /* directory offset of new block */
int count; /* count of filesystem blocks */
int error;
trace_xfs_dir2_grow_inode(args, space);
/*
* Set lowest possible block in the space requested.
*/
bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
count = args->geo->fsbcount;
error = xfs_da_grow_inode_int(args, &bno, count);
if (error)
return error;
*dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno);
/*
* Update file's size if this is the data space and it grew.
*/
if (space == XFS_DIR2_DATA_SPACE) {
xfs_fsize_t size; /* directory file (data) size */
size = XFS_FSB_TO_B(mp, bno + count);
if (size > dp->i_d.di_size) {
dp->i_d.di_size = size;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
}
}
return 0;
}
/*
* See if the directory is a single-block form directory.
*/
int
xfs_dir2_isblock(
struct xfs_da_args *args,
int *vp) /* out: 1 is block, 0 is not block */
{
xfs_fileoff_t last; /* last file offset */
int rval;
if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
return rval;
rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);
*vp = rval;
return 0;
}
/*
* See if the directory is a single-leaf form directory.
*/
int
xfs_dir2_isleaf(
struct xfs_da_args *args,
int *vp) /* out: 1 is block, 0 is not block */
{
xfs_fileoff_t last; /* last file offset */
int rval;
if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
return rval;
*vp = last == args->geo->leafblk + args->geo->fsbcount;
return 0;
}
/*
* Remove the given block from the directory.
* This routine is used for data and free blocks, leaf/node are done
* by xfs_da_shrink_inode.
*/
int
xfs_dir2_shrink_inode(
xfs_da_args_t *args,
xfs_dir2_db_t db,
struct xfs_buf *bp)
{
xfs_fileoff_t bno; /* directory file offset */
xfs_dablk_t da; /* directory file offset */
int done; /* bunmap is finished */
xfs_inode_t *dp;
int error;
xfs_mount_t *mp;
xfs_trans_t *tp;
trace_xfs_dir2_shrink_inode(args, db);
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
da = xfs_dir2_db_to_da(args->geo, db);
/*
* Unmap the fsblock(s).
*/
if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount,
XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
&done))) {
/*
* ENOSPC actually can happen if we're in a removename with
* no space reservation, and the resulting block removal
* would cause a bmap btree split or conversion from extents
* to btree. This can only happen for un-fragmented
* directory blocks, since you need to be punching out
* the middle of an extent.
* In this case we need to leave the block in the file,
* and not binval it.
* So the block has to be in a consistent empty state
* and appropriately logged.
* We don't free up the buffer, the caller can tell it
* hasn't happened since it got an error back.
*/
return error;
}
ASSERT(done);
/*
* Invalidate the buffer from the transaction.
*/
xfs_trans_binval(tp, bp);
/*
* If it's not a data block, we're done.
*/
if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET))
return 0;
/*
* If the block isn't the last one in the directory, we're done.
*/
if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0))
return 0;
bno = da;
if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
/*
* This can't really happen unless there's kernel corruption.
*/
return error;
}
if (db == args->geo->datablk)
ASSERT(bno == 0);
else
ASSERT(bno > 0);
/*
* Set the size to the new last block.
*/
dp->i_d.di_size = XFS_FSB_TO_B(mp, bno);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
return 0;
}

180
fs/xfs/libxfs/xfs_dir2.h Normal file
View File

@@ -0,0 +1,180 @@
/*
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DIR2_H__
#define __XFS_DIR2_H__
struct xfs_bmap_free;
struct xfs_da_args;
struct xfs_inode;
struct xfs_mount;
struct xfs_trans;
struct xfs_dir2_sf_hdr;
struct xfs_dir2_sf_entry;
struct xfs_dir2_data_hdr;
struct xfs_dir2_data_entry;
struct xfs_dir2_data_unused;
extern struct xfs_name xfs_name_dotdot;
/*
* directory operations vector for encode/decode routines
*/
struct xfs_dir_ops {
int (*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len);
struct xfs_dir2_sf_entry *
(*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep);
__uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep,
__uint8_t ftype);
xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep);
void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep,
xfs_ino_t ino);
xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr);
void (*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr,
xfs_ino_t ino);
int (*data_entsize)(int len);
__uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
void (*data_put_ftype)(struct xfs_dir2_data_entry *dep,
__uint8_t ftype);
__be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep);
struct xfs_dir2_data_free *
(*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr);
xfs_dir2_data_aoff_t data_dot_offset;
xfs_dir2_data_aoff_t data_dotdot_offset;
xfs_dir2_data_aoff_t data_first_offset;
size_t data_entry_offset;
struct xfs_dir2_data_entry *
(*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr);
struct xfs_dir2_data_entry *
(*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr);
struct xfs_dir2_data_entry *
(*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr);
struct xfs_dir2_data_entry *
(*data_entry_p)(struct xfs_dir2_data_hdr *hdr);
struct xfs_dir2_data_unused *
(*data_unused_p)(struct xfs_dir2_data_hdr *hdr);
int leaf_hdr_size;
void (*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to,
struct xfs_dir3_icleaf_hdr *from);
void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to,
struct xfs_dir2_leaf *from);
int (*leaf_max_ents)(struct xfs_da_geometry *geo);
struct xfs_dir2_leaf_entry *
(*leaf_ents_p)(struct xfs_dir2_leaf *lp);
int node_hdr_size;
void (*node_hdr_to_disk)(struct xfs_da_intnode *to,
struct xfs_da3_icnode_hdr *from);
void (*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to,
struct xfs_da_intnode *from);
struct xfs_da_node_entry *
(*node_tree_p)(struct xfs_da_intnode *dap);
int free_hdr_size;
void (*free_hdr_to_disk)(struct xfs_dir2_free *to,
struct xfs_dir3_icfree_hdr *from);
void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to,
struct xfs_dir2_free *from);
int (*free_max_bests)(struct xfs_da_geometry *geo);
__be16 * (*free_bests_p)(struct xfs_dir2_free *free);
xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo,
xfs_dir2_db_t db);
int (*db_to_fdindex)(struct xfs_da_geometry *geo,
xfs_dir2_db_t db);
};
extern const struct xfs_dir_ops *
xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp);
extern const struct xfs_dir_ops *
xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp);
/*
* Generic directory interface routines
*/
extern void xfs_dir_startup(void);
extern int xfs_da_mount(struct xfs_mount *mp);
extern void xfs_da_unmount(struct xfs_mount *mp);
extern int xfs_dir_isempty(struct xfs_inode *dp);
extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_inode *pdp);
extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t inum,
xfs_fsblock_t *first,
struct xfs_bmap_free *flist, xfs_extlen_t tot);
extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t *inum,
struct xfs_name *ci_name);
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t ino,
xfs_fsblock_t *first,
struct xfs_bmap_free *flist, xfs_extlen_t tot);
extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t inum,
xfs_fsblock_t *first,
struct xfs_bmap_free *flist, xfs_extlen_t tot);
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, uint resblks);
/*
* Direct call from the bmap code, bypassing the generic directory layer.
*/
extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
/*
* Interface routines used by userspace utilities
*/
extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r);
extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
extern void xfs_dir2_data_freescan(struct xfs_inode *dp,
struct xfs_dir2_data_hdr *hdr, int *loghead);
extern void xfs_dir2_data_log_entry(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_entry *dep);
extern void xfs_dir2_data_log_header(struct xfs_da_args *args,
struct xfs_buf *bp);
extern void xfs_dir2_data_log_unused(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_unused *dup);
extern void xfs_dir2_data_make_free(struct xfs_da_args *args,
struct xfs_buf *bp, xfs_dir2_data_aoff_t offset,
xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
extern void xfs_dir2_data_use_free(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,
xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
int *needlogp, int *needscanp);
extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
struct xfs_dir2_data_unused *dup);
extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
#endif /* __XFS_DIR2_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,274 @@
/*
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DIR2_PRIV_H__
#define __XFS_DIR2_PRIV_H__
struct dir_context;
/*
* Directory offset/block conversion functions.
*
* DB blocks here are logical directory block numbers, not filesystem blocks.
*/
/*
* Convert dataptr to byte in file space
*/
static inline xfs_dir2_off_t
xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp)
{
return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
}
/*
* Convert byte in file space to dataptr. It had better be aligned.
*/
static inline xfs_dir2_dataptr_t
xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by)
{
return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
}
/*
* Convert byte in space to (DB) block
*/
static inline xfs_dir2_db_t
xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
{
return (xfs_dir2_db_t)(by >> geo->blklog);
}
/*
* Convert dataptr to a block number
*/
static inline xfs_dir2_db_t
xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
{
return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp));
}
/*
* Convert byte in space to offset in a block
*/
static inline xfs_dir2_data_aoff_t
xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
{
return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1));
}
/*
* Convert dataptr to a byte offset in a block
*/
static inline xfs_dir2_data_aoff_t
xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
{
return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp));
}
/*
* Convert block and offset to byte in space
*/
static inline xfs_dir2_off_t
xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
xfs_dir2_data_aoff_t o)
{
return ((xfs_dir2_off_t)db << geo->blklog) + o;
}
/*
* Convert block (DB) to block (dablk)
*/
static inline xfs_dablk_t
xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog));
}
/*
* Convert byte in space to (DA) block
*/
static inline xfs_dablk_t
xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
{
return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by));
}
/*
* Convert block and offset to dataptr
*/
static inline xfs_dir2_dataptr_t
xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
xfs_dir2_data_aoff_t o)
{
return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o));
}
/*
* Convert block (dablk) to block (DB)
*/
static inline xfs_dir2_db_t
xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da)
{
return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog));
}
/*
* Convert block (dablk) to byte offset in space
*/
static inline xfs_dir2_off_t
xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da)
{
return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0);
}
/*
* Directory tail pointer accessor functions. Based on block geometry.
*/
static inline struct xfs_dir2_block_tail *
xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr)
{
return ((struct xfs_dir2_block_tail *)
((char *)hdr + geo->blksize)) - 1;
}
static inline struct xfs_dir2_leaf_tail *
xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
{
return (struct xfs_dir2_leaf_tail *)
((char *)lp + geo->blksize -
sizeof(struct xfs_dir2_leaf_tail));
}
/* xfs_dir2.c */
extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
xfs_dir2_db_t *dbp);
extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
const unsigned char *name, int len);
#define S_SHIFT 12
extern const unsigned char xfs_mode_to_ftype[];
extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp,
__uint8_t filetype);
/* xfs_dir2_block.c */
extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_buf **bpp);
extern int xfs_dir2_block_addname(struct xfs_da_args *args);
extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
extern int xfs_dir2_block_removename(struct xfs_da_args *args);
extern int xfs_dir2_block_replace(struct xfs_da_args *args);
extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
struct xfs_buf *lbp, struct xfs_buf *dbp);
/* xfs_dir2_data.c */
#ifdef DEBUG
#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
#else
#define xfs_dir3_data_check(dp,bp)
#endif
extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
xfs_daddr_t mapped_bno);
extern struct xfs_dir2_data_free *
xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup,
int *loghead);
extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
struct xfs_buf **bpp);
/* xfs_dir2_leaf.c */
extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
struct xfs_buf *dbp);
extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,
struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp);
extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
struct xfs_dir2_leaf_entry *ents, int *indexp,
int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
struct xfs_buf **bpp, __uint16_t magic);
extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
struct xfs_buf *bp, int first, int last);
extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
struct xfs_buf *bp);
extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
struct xfs_buf *lbp);
extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
struct xfs_buf *lbp, xfs_dir2_db_t db);
extern struct xfs_dir2_leaf_entry *
xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
struct xfs_dir2_leaf_entry *ents, int index, int compact,
int lowstale, int highstale, int *lfloglow, int *lfloghigh);
extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
/* xfs_dir2_node.c */
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
struct xfs_buf *lbp);
extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
struct xfs_buf *bp, int *count);
extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
struct xfs_da_args *args, int *indexp,
struct xfs_da_state *state);
extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
struct xfs_da_state_blk *drop_blk,
struct xfs_da_state_blk *save_blk);
extern int xfs_dir2_node_addname(struct xfs_da_args *args);
extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
extern int xfs_dir2_node_removename(struct xfs_da_args *args);
extern int xfs_dir2_node_replace(struct xfs_da_args *args);
extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
int *rvalp);
extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t fbno, struct xfs_buf **bpp);
/* xfs_dir2_sf.c */
extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
int size, xfs_dir2_sf_hdr_t *sfhp);
extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
size_t bufsize);
#endif /* __XFS_DIR2_PRIV_H__ */

1184
fs/xfs/libxfs/xfs_dir2_sf.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,290 @@
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_error.h"
#include "xfs_cksum.h"
#include "xfs_trace.h"
int
xfs_calc_dquots_per_chunk(
unsigned int nbblks) /* basic block units */
{
unsigned int ndquots;
ASSERT(nbblks > 0);
ndquots = BBTOB(nbblks);
do_div(ndquots, sizeof(xfs_dqblk_t));
return ndquots;
}
/*
* Do some primitive error checking on ondisk dquot data structures.
*/
int
xfs_dqcheck(
struct xfs_mount *mp,
xfs_disk_dquot_t *ddq,
xfs_dqid_t id,
uint type, /* used only when IO_dorepair is true */
uint flags,
char *str)
{
xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
int errs = 0;
/*
* We can encounter an uninitialized dquot buffer for 2 reasons:
* 1. If we crash while deleting the quotainode(s), and those blks got
* used for user data. This is because we take the path of regular
* file deletion; however, the size field of quotainodes is never
* updated, so all the tricks that we play in itruncate_finish
* don't quite matter.
*
* 2. We don't play the quota buffers when there's a quotaoff logitem.
* But the allocation will be replayed so we'll end up with an
* uninitialized quota block.
*
* This is all fine; things are still consistent, and we haven't lost
* any quota information. Just don't complain about bad dquot blks.
*/
if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
errs++;
}
if (ddq->d_version != XFS_DQUOT_VERSION) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
str, id, ddq->d_version, XFS_DQUOT_VERSION);
errs++;
}
if (ddq->d_flags != XFS_DQ_USER &&
ddq->d_flags != XFS_DQ_PROJ &&
ddq->d_flags != XFS_DQ_GROUP) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : XFS dquot ID 0x%x, unknown flags 0x%x",
str, id, ddq->d_flags);
errs++;
}
if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : ondisk-dquot 0x%p, ID mismatch: "
"0x%x expected, found id 0x%x",
str, ddq, id, be32_to_cpu(ddq->d_id));
errs++;
}
if (!errs && ddq->d_id) {
if (ddq->d_blk_softlimit &&
be64_to_cpu(ddq->d_bcount) >
be64_to_cpu(ddq->d_blk_softlimit)) {
if (!ddq->d_btimer) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
str, (int)be32_to_cpu(ddq->d_id), ddq);
errs++;
}
}
if (ddq->d_ino_softlimit &&
be64_to_cpu(ddq->d_icount) >
be64_to_cpu(ddq->d_ino_softlimit)) {
if (!ddq->d_itimer) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
str, (int)be32_to_cpu(ddq->d_id), ddq);
errs++;
}
}
if (ddq->d_rtb_softlimit &&
be64_to_cpu(ddq->d_rtbcount) >
be64_to_cpu(ddq->d_rtb_softlimit)) {
if (!ddq->d_rtbtimer) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
str, (int)be32_to_cpu(ddq->d_id), ddq);
errs++;
}
}
}
if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
return errs;
if (flags & XFS_QMOPT_DOWARN)
xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
/*
* Typically, a repair is only requested by quotacheck.
*/
ASSERT(id != -1);
ASSERT(flags & XFS_QMOPT_DQREPAIR);
memset(d, 0, sizeof(xfs_dqblk_t));
d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
d->dd_diskdq.d_flags = type;
d->dd_diskdq.d_id = cpu_to_be32(id);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF);
}
return errs;
}
STATIC bool
xfs_dquot_buf_verify_crc(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
int ndquots;
int i;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return true;
/*
* if we are in log recovery, the quota subsystem has not been
* initialised so we have no quotainfo structure. In that case, we need
* to manually calculate the number of dquots in the buffer.
*/
if (mp->m_quotainfo)
ndquots = mp->m_quotainfo->qi_dqperchunk;
else
ndquots = xfs_calc_dquots_per_chunk(
XFS_BB_TO_FSB(mp, bp->b_length));
for (i = 0; i < ndquots; i++, d++) {
if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF))
return false;
if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
return false;
}
return true;
}
STATIC bool
xfs_dquot_buf_verify(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
xfs_dqid_t id = 0;
int ndquots;
int i;
/*
* if we are in log recovery, the quota subsystem has not been
* initialised so we have no quotainfo structure. In that case, we need
* to manually calculate the number of dquots in the buffer.
*/
if (mp->m_quotainfo)
ndquots = mp->m_quotainfo->qi_dqperchunk;
else
ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
/*
* On the first read of the buffer, verify that each dquot is valid.
* We don't know what the id of the dquot is supposed to be, just that
* they should be increasing monotonically within the buffer. If the
* first id is corrupt, then it will fail on the second dquot in the
* buffer so corruptions could point to the wrong dquot in this case.
*/
for (i = 0; i < ndquots; i++) {
struct xfs_disk_dquot *ddq;
int error;
ddq = &d[i].dd_diskdq;
if (i == 0)
id = be32_to_cpu(ddq->d_id);
error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
"xfs_dquot_buf_verify");
if (error)
return false;
}
return true;
}
static void
xfs_dquot_buf_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_dquot_buf_verify(mp, bp))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
}
/*
* we don't calculate the CRC here as that is done when the dquot is flushed to
* the buffer after the update is done. This ensures that the dquot in the
* buffer always has an up-to-date CRC value.
*/
static void
xfs_dquot_buf_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify(mp, bp)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
};

428
fs/xfs/libxfs/xfs_format.h Normal file
View File

@@ -0,0 +1,428 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_FORMAT_H__
#define __XFS_FORMAT_H__
/*
* XFS On Disk Format Definitions
*
* This header file defines all the on-disk format definitions for
* general XFS objects. Directory and attribute related objects are defined in
* xfs_da_format.h, which log and log item formats are defined in
* xfs_log_format.h. Everything else goes here.
*/
struct xfs_mount;
struct xfs_trans;
struct xfs_inode;
struct xfs_buf;
struct xfs_ifork;
/*
* RealTime Device format definitions
*/
/* Min and max rt extent sizes, specified in bytes */
#define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */
#define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64kB */
#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */
#define XFS_BLOCKSIZE(mp) ((mp)->m_sb.sb_blocksize)
#define XFS_BLOCKMASK(mp) ((mp)->m_blockmask)
#define XFS_BLOCKWSIZE(mp) ((mp)->m_blockwsize)
#define XFS_BLOCKWMASK(mp) ((mp)->m_blockwmask)
/*
* RT Summary and bit manipulation macros.
*/
#define XFS_SUMOFFS(mp,ls,bb) ((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
#define XFS_SUMOFFSTOBLOCK(mp,s) \
(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
#define XFS_SUMPTR(mp,bp,so) \
((xfs_suminfo_t *)((bp)->b_addr + \
(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
#define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log)
#define XFS_BLOCKTOBIT(mp,bb) ((bb) << (mp)->m_blkbit_log)
#define XFS_BITTOWORD(mp,bi) \
((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
#define XFS_RTMIN(a,b) ((a) < (b) ? (a) : (b))
#define XFS_RTMAX(a,b) ((a) > (b) ? (a) : (b))
#define XFS_RTLOBIT(w) xfs_lowbit32(w)
#define XFS_RTHIBIT(w) xfs_highbit32(w)
#if XFS_BIG_BLKNOS
#define XFS_RTBLOCKLOG(b) xfs_highbit64(b)
#else
#define XFS_RTBLOCKLOG(b) xfs_highbit32(b)
#endif
/*
* Dquot and dquot block format definitions
*/
#define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */
#define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */
/*
* This is the main portion of the on-disk representation of quota
* information for a user. This is the q_core of the xfs_dquot_t that
* is kept in kernel memory. We pad this with some more expansion room
* to construct the on disk structure.
*/
typedef struct xfs_disk_dquot {
__be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */
__u8 d_version; /* dquot version */
__u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */
__be32 d_id; /* user,project,group id */
__be64 d_blk_hardlimit;/* absolute limit on disk blks */
__be64 d_blk_softlimit;/* preferred limit on disk blks */
__be64 d_ino_hardlimit;/* maximum # allocated inodes */
__be64 d_ino_softlimit;/* preferred inode limit */
__be64 d_bcount; /* disk blocks owned by the user */
__be64 d_icount; /* inodes owned by the user */
__be32 d_itimer; /* zero if within inode limits if not,
this is when we refuse service */
__be32 d_btimer; /* similar to above; for disk blocks */
__be16 d_iwarns; /* warnings issued wrt num inodes */
__be16 d_bwarns; /* warnings issued wrt disk blocks */
__be32 d_pad0; /* 64 bit align */
__be64 d_rtb_hardlimit;/* absolute limit on realtime blks */
__be64 d_rtb_softlimit;/* preferred limit on RT disk blks */
__be64 d_rtbcount; /* realtime blocks owned */
__be32 d_rtbtimer; /* similar to above; for RT disk blocks */
__be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */
__be16 d_pad;
} xfs_disk_dquot_t;
/*
* This is what goes on disk. This is separated from the xfs_disk_dquot because
* carrying the unnecessary padding would be a waste of memory.
*/
typedef struct xfs_dqblk {
xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */
char dd_fill[4]; /* filling for posterity */
/*
* These two are only present on filesystems with the CRC bits set.
*/
__be32 dd_crc; /* checksum */
__be64 dd_lsn; /* last modification in log */
uuid_t dd_uuid; /* location information */
} xfs_dqblk_t;
#define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc)
/*
* Remote symlink format and access functions.
*/
#define XFS_SYMLINK_MAGIC 0x58534c4d /* XSLM */
struct xfs_dsymlink_hdr {
__be32 sl_magic;
__be32 sl_offset;
__be32 sl_bytes;
__be32 sl_crc;
uuid_t sl_uuid;
__be64 sl_owner;
__be64 sl_blkno;
__be64 sl_lsn;
};
#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc)
/*
* The maximum pathlen is 1024 bytes. Since the minimum file system
* blocksize is 512 bytes, we can get a max of 3 extents back from
* bmapi when crc headers are taken into account.
*/
#define XFS_SYMLINK_MAPS 3
#define XFS_SYMLINK_BUF_SPACE(mp, bufsize) \
((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
sizeof(struct xfs_dsymlink_hdr) : 0))
/*
* Allocation Btree format definitions
*
* There are two on-disk btrees, one sorted by blockno and one sorted
* by blockcount and blockno. All blocks look the same to make the code
* simpler; if we have time later, we'll make the optimizations.
*/
#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */
#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */
/*
* Data record/key structure
*/
typedef struct xfs_alloc_rec {
__be32 ar_startblock; /* starting block number */
__be32 ar_blockcount; /* count of free blocks */
} xfs_alloc_rec_t, xfs_alloc_key_t;
typedef struct xfs_alloc_rec_incore {
xfs_agblock_t ar_startblock; /* starting block number */
xfs_extlen_t ar_blockcount; /* count of free blocks */
} xfs_alloc_rec_incore_t;
/* btree pointer type */
typedef __be32 xfs_alloc_ptr_t;
/*
* Block numbers in the AG:
* SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
*/
#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1))
#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
/*
* Inode Allocation Btree format definitions
*
* There is a btree for the inode map per allocation group.
*/
#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
typedef __uint64_t xfs_inofree_t;
#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3)
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
{
return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
}
/*
* Data record structure
*/
typedef struct xfs_inobt_rec {
__be32 ir_startino; /* starting inode number */
__be32 ir_freecount; /* count of free inodes (set bits) */
__be64 ir_free; /* free inode mask */
} xfs_inobt_rec_t;
typedef struct xfs_inobt_rec_incore {
xfs_agino_t ir_startino; /* starting inode number */
__int32_t ir_freecount; /* count of free inodes (set bits) */
xfs_inofree_t ir_free; /* free inode mask */
} xfs_inobt_rec_incore_t;
/*
* Key structure
*/
typedef struct xfs_inobt_key {
__be32 ir_startino; /* starting inode number */
} xfs_inobt_key_t;
/* btree pointer type */
typedef __be32 xfs_inobt_ptr_t;
/*
* block numbers in the AG.
*/
#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
/*
* The first data block of an AG depends on whether the filesystem was formatted
* with the finobt feature. If so, account for the finobt reserved root btree
* block.
*/
#define XFS_PREALLOC_BLOCKS(mp) \
(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
XFS_FIBT_BLOCK(mp) + 1 : \
XFS_IBT_BLOCK(mp) + 1)
/*
* BMAP Btree format definitions
*
* This includes both the root block definition that sits inside an inode fork
* and the record/pointer formats for the leaf/node in the blocks.
*/
#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */
/*
* Bmap root header, on-disk form only.
*/
typedef struct xfs_bmdr_block {
__be16 bb_level; /* 0 is a leaf */
__be16 bb_numrecs; /* current # of data records */
} xfs_bmdr_block_t;
/*
* Bmap btree record and extent descriptor.
* l0:63 is an extent flag (value 1 indicates non-normal).
* l0:9-62 are startoff.
* l0:0-8 and l1:21-63 are startblock.
* l1:0-20 are blockcount.
*/
#define BMBT_EXNTFLAG_BITLEN 1
#define BMBT_STARTOFF_BITLEN 54
#define BMBT_STARTBLOCK_BITLEN 52
#define BMBT_BLOCKCOUNT_BITLEN 21
typedef struct xfs_bmbt_rec {
__be64 l0, l1;
} xfs_bmbt_rec_t;
typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
typedef struct xfs_bmbt_rec_host {
__uint64_t l0, l1;
} xfs_bmbt_rec_host_t;
/*
* Values and macros for delayed-allocation startblock fields.
*/
#define STARTBLOCKVALBITS 17
#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20)
#define DSTARTBLOCKMASKBITS (15 + 20)
#define STARTBLOCKMASK \
(((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
#define DSTARTBLOCKMASK \
(((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
static inline int isnullstartblock(xfs_fsblock_t x)
{
return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK;
}
static inline int isnulldstartblock(xfs_dfsbno_t x)
{
return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK;
}
static inline xfs_fsblock_t nullstartblock(int k)
{
ASSERT(k < (1 << STARTBLOCKVALBITS));
return STARTBLOCKMASK | (k);
}
static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
{
return (xfs_filblks_t)((x) & ~STARTBLOCKMASK);
}
/*
* Possible extent formats.
*/
typedef enum {
XFS_EXTFMT_NOSTATE = 0,
XFS_EXTFMT_HASSTATE
} xfs_exntfmt_t;
/*
* Possible extent states.
*/
typedef enum {
XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID
} xfs_exntst_t;
/*
* Incore version of above.
*/
typedef struct xfs_bmbt_irec
{
xfs_fileoff_t br_startoff; /* starting file offset */
xfs_fsblock_t br_startblock; /* starting block number */
xfs_filblks_t br_blockcount; /* number of blocks */
xfs_exntst_t br_state; /* extent state */
} xfs_bmbt_irec_t;
/*
* Key structure for non-leaf levels of the tree.
*/
typedef struct xfs_bmbt_key {
__be64 br_startoff; /* starting file offset */
} xfs_bmbt_key_t, xfs_bmdr_key_t;
/* btree pointer type */
typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
/*
* Generic Btree block format definitions
*
* This is a combination of the actual format used on disk for short and long
* format btrees. The first three fields are shared by both format, but the
* pointers are different and should be used with care.
*
* To get the size of the actual short or long form headers please use the size
* macros below. Never use sizeof(xfs_btree_block).
*
* The blkno, crc, lsn, owner and uuid fields are only available in filesystems
* with the crc feature bit, and all accesses to them must be conditional on
* that flag.
*/
struct xfs_btree_block {
__be32 bb_magic; /* magic number for block type */
__be16 bb_level; /* 0 is a leaf */
__be16 bb_numrecs; /* current # of data records */
union {
struct {
__be32 bb_leftsib;
__be32 bb_rightsib;
__be64 bb_blkno;
__be64 bb_lsn;
uuid_t bb_uuid;
__be32 bb_owner;
__le32 bb_crc;
} s; /* short form pointers */
struct {
__be64 bb_leftsib;
__be64 bb_rightsib;
__be64 bb_blkno;
__be64 bb_lsn;
uuid_t bb_uuid;
__be64 bb_owner;
__le32 bb_crc;
__be32 bb_pad; /* padding for alignment */
} l; /* long form pointers */
} bb_u; /* rest */
};
#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
/* sizes of CRC enabled btree blocks */
#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40)
#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48)
#define XFS_BTREE_SBLOCK_CRC_OFF \
offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
#define XFS_BTREE_LBLOCK_CRC_OFF \
offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
#endif /* __XFS_FORMAT_H__ */

2189
fs/xfs/libxfs/xfs_ialloc.c Normal file

File diff suppressed because it is too large Load Diff

163
fs/xfs/libxfs/xfs_ialloc.h Normal file
View File

@@ -0,0 +1,163 @@
/*
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_IALLOC_H__
#define __XFS_IALLOC_H__
struct xfs_buf;
struct xfs_dinode;
struct xfs_imap;
struct xfs_mount;
struct xfs_trans;
struct xfs_btree_cur;
/* Move inodes in clusters of this size */
#define XFS_INODE_BIG_CLUSTER_SIZE 8192
/* Calculate and return the number of filesystem blocks per inode cluster */
static inline int
xfs_icluster_size_fsb(
struct xfs_mount *mp)
{
if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size)
return 1;
return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
}
/*
* Make an inode pointer out of the buffer/offset.
*/
static inline struct xfs_dinode *
xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
{
return (struct xfs_dinode *)
(xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
}
/*
* Allocate an inode on disk.
* Mode is used to tell whether the new inode will need space, and whether
* it is a directory.
*
* To work within the constraint of one allocation per transaction,
* xfs_dialloc() is designed to be called twice if it has to do an
* allocation to make more free inodes. If an inode is
* available without an allocation, agbp would be set to the current
* agbp and alloc_done set to false.
* If an allocation needed to be done, agbp would be set to the
* inode header of the allocation group and alloc_done set to true.
* The caller should then commit the current transaction and allocate a new
* transaction. xfs_dialloc() should then be called again with
* the agbp value returned from the previous call.
*
* Once we successfully pick an inode its number is returned and the
* on-disk data structures are updated. The inode itself is not read
* in, since doing so would break ordering constraints with xfs_reclaim.
*
* *agbp should be set to NULL on the first call, *alloc_done set to FALSE.
*/
int /* error */
xfs_dialloc(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t parent, /* parent inode (directory) */
umode_t mode, /* mode bits for new inode */
int okalloc, /* ok to allocate more space */
struct xfs_buf **agbp, /* buf for a.g. inode header */
xfs_ino_t *inop); /* inode number allocated */
/*
* Free disk inode. Carefully avoids touching the incore inode, all
* manipulations incore are the caller's responsibility.
* The on-disk inode is not changed by this operation, only the
* btree (free inode mask) is changed.
*/
int /* error */
xfs_difree(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
int *deleted, /* set if inode cluster was deleted */
xfs_ino_t *first_ino); /* first inode in deleted cluster */
/*
* Return the location of the inode in imap, for mapping it into a buffer.
*/
int
xfs_imap(
struct xfs_mount *mp, /* file system mount structure */
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t ino, /* inode to locate */
struct xfs_imap *imap, /* location map structure */
uint flags); /* flags for inode btree lookup */
/*
* Compute and fill in value of m_in_maxlevels.
*/
void
xfs_ialloc_compute_maxlevels(
struct xfs_mount *mp); /* file system mount structure */
/*
* Log specified fields for the ag hdr (inode section)
*/
void
xfs_ialloc_log_agi(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *bp, /* allocation group header buffer */
int fields); /* bitmask of fields to log */
/*
* Read in the allocation group header (inode allocation section)
*/
int /* error */
xfs_ialloc_read_agi(
struct xfs_mount *mp, /* file system mount structure */
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
struct xfs_buf **bpp); /* allocation group hdr buf */
/*
* Read in the allocation group header to initialise the per-ag data
* in the mount structure
*/
int
xfs_ialloc_pagi_init(
struct xfs_mount *mp, /* file system mount structure */
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno); /* allocation group number */
/*
* Lookup a record by ino in the btree given by cur.
*/
int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
xfs_lookup_t dir, int *stat);
/*
* Get the data from the pointed-to record.
*/
int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
xfs_inobt_rec_incore_t *rec, int *stat);
/*
* Inode chunk initialisation routine
*/
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
struct list_head *buffer_list,
xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_agblock_t length, unsigned int gen);
#endif /* __XFS_IALLOC_H__ */

View File

@@ -0,0 +1,422 @@
/*
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_trans.h"
STATIC int
xfs_inobt_get_minrecs(
struct xfs_btree_cur *cur,
int level)
{
return cur->bc_mp->m_inobt_mnr[level != 0];
}
STATIC struct xfs_btree_cur *
xfs_inobt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
cur->bc_private.a.agbp, cur->bc_private.a.agno,
cur->bc_btnum);
}
STATIC void
xfs_inobt_set_root(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *nptr,
int inc) /* level change */
{
struct xfs_buf *agbp = cur->bc_private.a.agbp;
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
agi->agi_root = nptr->s;
be32_add_cpu(&agi->agi_level, inc);
xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
}
STATIC void
xfs_finobt_set_root(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *nptr,
int inc) /* level change */
{
struct xfs_buf *agbp = cur->bc_private.a.agbp;
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
agi->agi_free_root = nptr->s;
be32_add_cpu(&agi->agi_free_level, inc);
xfs_ialloc_log_agi(cur->bc_tp, agbp,
XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
}
STATIC int
xfs_inobt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
int *stat)
{
xfs_alloc_arg_t args; /* block allocation args */
int error; /* error return value */
xfs_agblock_t sbno = be32_to_cpu(start->s);
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
args.minlen = 1;
args.maxlen = 1;
args.prod = 1;
args.type = XFS_ALLOCTYPE_NEAR_BNO;
error = xfs_alloc_vextent(&args);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
if (args.fsbno == NULLFSBLOCK) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
ASSERT(args.len == 1);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
*stat = 1;
return 0;
}
STATIC int
xfs_inobt_free_block(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
{
xfs_fsblock_t fsbno;
int error;
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
error = xfs_free_extent(cur->bc_tp, fsbno, 1);
if (error)
return error;
xfs_trans_binval(cur->bc_tp, bp);
return error;
}
STATIC int
xfs_inobt_get_maxrecs(
struct xfs_btree_cur *cur,
int level)
{
return cur->bc_mp->m_inobt_mxr[level != 0];
}
STATIC void
xfs_inobt_init_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
key->inobt.ir_startino = rec->inobt.ir_startino;
}
STATIC void
xfs_inobt_init_rec_from_key(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
rec->inobt.ir_startino = key->inobt.ir_startino;
}
STATIC void
xfs_inobt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
}
/*
* initial value of ptr for lookup
*/
STATIC void
xfs_inobt_init_ptr_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
ptr->s = agi->agi_root;
}
STATIC void
xfs_finobt_init_ptr_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
ptr->s = agi->agi_free_root;
}
STATIC __int64_t
xfs_inobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
cur->bc_rec.i.ir_startino;
}
static int
xfs_inobt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
unsigned int level;
/*
* During growfs operations, we can't verify the exact owner as the
* perag is not fully initialised and hence not attached to the buffer.
*
* Similarly, during log recovery we will have a perag structure
* attached, but the agi information will not yet have been initialised
* from the on disk AGI. We don't currently use any of this information,
* but beware of the landmine (i.e. need to check pag->pagi_init) if we
* ever do.
*/
switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
return false;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
case cpu_to_be32(XFS_FIBT_MAGIC):
break;
default:
return 0;
}
/* numrecs and level verification */
level = be16_to_cpu(block->bb_level);
if (level >= mp->m_in_maxlevels)
return false;
if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
return false;
/* sibling pointer verification */
if (!block->bb_u.s.bb_leftsib ||
(be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
return false;
if (!block->bb_u.s.bb_rightsib ||
(be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
return false;
return true;
}
static void
xfs_inobt_read_verify(
struct xfs_buf *bp)
{
if (!xfs_btree_sblock_verify_crc(bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_inobt_verify(bp))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_verifier_error(bp);
}
}
static void
xfs_inobt_write_verify(
struct xfs_buf *bp)
{
if (!xfs_inobt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
xfs_btree_sblock_calc_crc(bp);
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
};
#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_inobt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
return be32_to_cpu(k1->inobt.ir_startino) <
be32_to_cpu(k2->inobt.ir_startino);
}
STATIC int
xfs_inobt_recs_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
be32_to_cpu(r2->inobt.ir_startino);
}
#endif /* DEBUG */
static const struct xfs_btree_ops xfs_inobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
.dup_cursor = xfs_inobt_dup_cursor,
.set_root = xfs_inobt_set_root,
.alloc_block = xfs_inobt_alloc_block,
.free_block = xfs_inobt_free_block,
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
.init_rec_from_key = xfs_inobt_init_rec_from_key,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
#endif
};
static const struct xfs_btree_ops xfs_finobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
.dup_cursor = xfs_inobt_dup_cursor,
.set_root = xfs_finobt_set_root,
.alloc_block = xfs_inobt_alloc_block,
.free_block = xfs_inobt_free_block,
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
.init_rec_from_key = xfs_inobt_init_rec_from_key,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
#endif
};
/*
* Allocate a new inode btree cursor.
*/
struct xfs_btree_cur * /* new inode btree cursor */
xfs_inobt_init_cursor(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *agbp, /* buffer for agi structure */
xfs_agnumber_t agno, /* allocation group number */
xfs_btnum_t btnum) /* ialloc or free ino btree */
{
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
struct xfs_btree_cur *cur;
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
cur->bc_tp = tp;
cur->bc_mp = mp;
cur->bc_btnum = btnum;
if (btnum == XFS_BTNUM_INO) {
cur->bc_nlevels = be32_to_cpu(agi->agi_level);
cur->bc_ops = &xfs_inobt_ops;
} else {
cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
cur->bc_ops = &xfs_finobt_ops;
}
cur->bc_blocklog = mp->m_sb.sb_blocklog;
if (xfs_sb_version_hascrc(&mp->m_sb))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
return cur;
}
/*
* Calculate number of records in an inobt btree block.
*/
int
xfs_inobt_maxrecs(
struct xfs_mount *mp,
int blocklen,
int leaf)
{
blocklen -= XFS_INOBT_BLOCK_LEN(mp);
if (leaf)
return blocklen / sizeof(xfs_inobt_rec_t);
return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
}

View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_IALLOC_BTREE_H__
#define __XFS_IALLOC_BTREE_H__
/*
* Inode map on-disk structures
*/
struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
/*
* Btree block header size depends on a superblock flag.
*/
#define XFS_INOBT_BLOCK_LEN(mp) \
(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
/*
* Record, key, and pointer address macros for btree blocks.
*
* (note that some of these may appear unused, but they are used in userspace)
*/
#define XFS_INOBT_REC_ADDR(mp, block, index) \
((xfs_inobt_rec_t *) \
((char *)(block) + \
XFS_INOBT_BLOCK_LEN(mp) + \
(((index) - 1) * sizeof(xfs_inobt_rec_t))))
#define XFS_INOBT_KEY_ADDR(mp, block, index) \
((xfs_inobt_key_t *) \
((char *)(block) + \
XFS_INOBT_BLOCK_LEN(mp) + \
((index) - 1) * sizeof(xfs_inobt_key_t)))
#define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \
((xfs_inobt_ptr_t *) \
((char *)(block) + \
XFS_INOBT_BLOCK_LEN(mp) + \
(maxrecs) * sizeof(xfs_inobt_key_t) + \
((index) - 1) * sizeof(xfs_inobt_ptr_t)))
extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t,
xfs_btnum_t);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
#endif /* __XFS_IALLOC_BTREE_H__ */

View File

@@ -0,0 +1,479 @@
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_cksum.h"
#include "xfs_icache.h"
#include "xfs_trans.h"
#include "xfs_ialloc.h"
#include "xfs_dinode.h"
/*
* Check that none of the inode's in the buffer have a next
* unlinked field of 0.
*/
#if defined(DEBUG)
void
xfs_inobp_check(
xfs_mount_t *mp,
xfs_buf_t *bp)
{
int i;
int j;
xfs_dinode_t *dip;
j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
for (i = 0; i < j; i++) {
dip = (xfs_dinode_t *)xfs_buf_offset(bp,
i * mp->m_sb.sb_inodesize);
if (!dip->di_next_unlinked) {
xfs_alert(mp,
"Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
i, (long long)bp->b_bn);
}
}
}
#endif
/*
* If we are doing readahead on an inode buffer, we might be in log recovery
* reading an inode allocation buffer that hasn't yet been replayed, and hence
* has not had the inode cores stamped into it. Hence for readahead, the buffer
* may be potentially invalid.
*
* If the readahead buffer is invalid, we don't want to mark it with an error,
* but we do want to clear the DONE status of the buffer so that a followup read
* will re-read it from disk. This will ensure that we don't get an unnecessary
* warnings during log recovery and we don't get unnecssary panics on debug
* kernels.
*/
static void
xfs_inode_buf_verify(
struct xfs_buf *bp,
bool readahead)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
int i;
int ni;
/*
* Validate the magic number and version of every inode in the buffer
*/
ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
for (i = 0; i < ni; i++) {
int di_ok;
xfs_dinode_t *dip;
dip = (struct xfs_dinode *)xfs_buf_offset(bp,
(i << mp->m_sb.sb_inodelog));
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
XFS_RANDOM_ITOBP_INOTOBP))) {
if (readahead) {
bp->b_flags &= ~XBF_DONE;
return;
}
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
#ifdef DEBUG
xfs_alert(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
(unsigned long long)bp->b_bn, i,
be16_to_cpu(dip->di_magic));
#endif
}
}
xfs_inobp_check(mp, bp);
}
static void
xfs_inode_buf_read_verify(
struct xfs_buf *bp)
{
xfs_inode_buf_verify(bp, false);
}
static void
xfs_inode_buf_readahead_verify(
struct xfs_buf *bp)
{
xfs_inode_buf_verify(bp, true);
}
static void
xfs_inode_buf_write_verify(
struct xfs_buf *bp)
{
xfs_inode_buf_verify(bp, false);
}
const struct xfs_buf_ops xfs_inode_buf_ops = {
.verify_read = xfs_inode_buf_read_verify,
.verify_write = xfs_inode_buf_write_verify,
};
const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
.verify_read = xfs_inode_buf_readahead_verify,
.verify_write = xfs_inode_buf_write_verify,
};
/*
* This routine is called to map an inode to the buffer containing the on-disk
* version of the inode. It returns a pointer to the buffer containing the
* on-disk inode in the bpp parameter, and in the dipp parameter it returns a
* pointer to the on-disk inode within that buffer.
*
* If a non-zero error is returned, then the contents of bpp and dipp are
* undefined.
*/
int
xfs_imap_to_bp(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_imap *imap,
struct xfs_dinode **dipp,
struct xfs_buf **bpp,
uint buf_flags,
uint iget_flags)
{
struct xfs_buf *bp;
int error;
buf_flags |= XBF_UNMAPPED;
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
(int)imap->im_len, buf_flags, &bp,
&xfs_inode_buf_ops);
if (error) {
if (error == -EAGAIN) {
ASSERT(buf_flags & XBF_TRYLOCK);
return error;
}
if (error == -EFSCORRUPTED &&
(iget_flags & XFS_IGET_UNTRUSTED))
return -EINVAL;
xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
__func__, error);
return error;
}
*bpp = bp;
*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
return 0;
}
void
xfs_dinode_from_disk(
xfs_icdinode_t *to,
xfs_dinode_t *from)
{
to->di_magic = be16_to_cpu(from->di_magic);
to->di_mode = be16_to_cpu(from->di_mode);
to->di_version = from ->di_version;
to->di_format = from->di_format;
to->di_onlink = be16_to_cpu(from->di_onlink);
to->di_uid = be32_to_cpu(from->di_uid);
to->di_gid = be32_to_cpu(from->di_gid);
to->di_nlink = be32_to_cpu(from->di_nlink);
to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
to->di_flushiter = be16_to_cpu(from->di_flushiter);
to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
to->di_size = be64_to_cpu(from->di_size);
to->di_nblocks = be64_to_cpu(from->di_nblocks);
to->di_extsize = be32_to_cpu(from->di_extsize);
to->di_nextents = be32_to_cpu(from->di_nextents);
to->di_anextents = be16_to_cpu(from->di_anextents);
to->di_forkoff = from->di_forkoff;
to->di_aformat = from->di_aformat;
to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
to->di_dmstate = be16_to_cpu(from->di_dmstate);
to->di_flags = be16_to_cpu(from->di_flags);
to->di_gen = be32_to_cpu(from->di_gen);
if (to->di_version == 3) {
to->di_changecount = be64_to_cpu(from->di_changecount);
to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
to->di_flags2 = be64_to_cpu(from->di_flags2);
to->di_ino = be64_to_cpu(from->di_ino);
to->di_lsn = be64_to_cpu(from->di_lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
}
}
void
xfs_dinode_to_disk(
xfs_dinode_t *to,
xfs_icdinode_t *from)
{
to->di_magic = cpu_to_be16(from->di_magic);
to->di_mode = cpu_to_be16(from->di_mode);
to->di_version = from ->di_version;
to->di_format = from->di_format;
to->di_onlink = cpu_to_be16(from->di_onlink);
to->di_uid = cpu_to_be32(from->di_uid);
to->di_gid = cpu_to_be32(from->di_gid);
to->di_nlink = cpu_to_be32(from->di_nlink);
to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
to->di_size = cpu_to_be64(from->di_size);
to->di_nblocks = cpu_to_be64(from->di_nblocks);
to->di_extsize = cpu_to_be32(from->di_extsize);
to->di_nextents = cpu_to_be32(from->di_nextents);
to->di_anextents = cpu_to_be16(from->di_anextents);
to->di_forkoff = from->di_forkoff;
to->di_aformat = from->di_aformat;
to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
to->di_dmstate = cpu_to_be16(from->di_dmstate);
to->di_flags = cpu_to_be16(from->di_flags);
to->di_gen = cpu_to_be32(from->di_gen);
if (from->di_version == 3) {
to->di_changecount = cpu_to_be64(from->di_changecount);
to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
to->di_flags2 = cpu_to_be64(from->di_flags2);
to->di_ino = cpu_to_be64(from->di_ino);
to->di_lsn = cpu_to_be64(from->di_lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
} else {
to->di_flushiter = cpu_to_be16(from->di_flushiter);
}
}
static bool
xfs_dinode_verify(
struct xfs_mount *mp,
struct xfs_inode *ip,
struct xfs_dinode *dip)
{
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
return false;
/* only version 3 or greater inodes are extensively verified here */
if (dip->di_version < 3)
return true;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
XFS_DINODE_CRC_OFF))
return false;
if (be64_to_cpu(dip->di_ino) != ip->i_ino)
return false;
if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
return false;
return true;
}
void
xfs_dinode_calc_crc(
struct xfs_mount *mp,
struct xfs_dinode *dip)
{
__uint32_t crc;
if (dip->di_version < 3)
return;
ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
XFS_DINODE_CRC_OFF);
dip->di_crc = xfs_end_cksum(crc);
}
/*
* Read the disk inode attributes into the in-core inode structure.
*
* For version 5 superblocks, if we are initialising a new inode and we are not
* utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
* inode core with a random generation number. If we are keeping inodes around,
* we need to read the inode cluster to get the existing generation number off
* disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
* format) then log recovery is dependent on the di_flushiter field being
* initialised from the current on-disk value and hence we must also read the
* inode off disk.
*/
int
xfs_iread(
xfs_mount_t *mp,
xfs_trans_t *tp,
xfs_inode_t *ip,
uint iget_flags)
{
xfs_buf_t *bp;
xfs_dinode_t *dip;
int error;
/*
* Fill in the location information in the in-core inode.
*/
error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
if (error)
return error;
/* shortcut IO on inode allocation if possible */
if ((iget_flags & XFS_IGET_CREATE) &&
xfs_sb_version_hascrc(&mp->m_sb) &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
/* initialise the on-disk inode core */
memset(&ip->i_d, 0, sizeof(ip->i_d));
ip->i_d.di_magic = XFS_DINODE_MAGIC;
ip->i_d.di_gen = prandom_u32();
if (xfs_sb_version_hascrc(&mp->m_sb)) {
ip->i_d.di_version = 3;
ip->i_d.di_ino = ip->i_ino;
uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
} else
ip->i_d.di_version = 2;
return 0;
}
/*
* Get pointers to the on-disk inode and the buffer containing it.
*/
error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
if (error)
return error;
/* even unallocated inodes are verified */
if (!xfs_dinode_verify(mp, ip, dip)) {
xfs_alert(mp, "%s: validation failed for inode %lld failed",
__func__, ip->i_ino);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
error = -EFSCORRUPTED;
goto out_brelse;
}
/*
* If the on-disk inode is already linked to a directory
* entry, copy all of the inode into the in-core inode.
* xfs_iformat_fork() handles copying in the inode format
* specific information.
* Otherwise, just get the truly permanent information.
*/
if (dip->di_mode) {
xfs_dinode_from_disk(&ip->i_d, dip);
error = xfs_iformat_fork(ip, dip);
if (error) {
#ifdef DEBUG
xfs_alert(mp, "%s: xfs_iformat() returned error %d",
__func__, error);
#endif /* DEBUG */
goto out_brelse;
}
} else {
/*
* Partial initialisation of the in-core inode. Just the bits
* that xfs_ialloc won't overwrite or relies on being correct.
*/
ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
ip->i_d.di_version = dip->di_version;
ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
if (dip->di_version == 3) {
ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
}
/*
* Make sure to pull in the mode here as well in
* case the inode is released without being used.
* This ensures that xfs_inactive() will see that
* the inode is already free and not try to mess
* with the uninitialized part of it.
*/
ip->i_d.di_mode = 0;
}
/*
* Automatically convert version 1 inode formats in memory to version 2
* inode format. If the inode is modified, it will get logged and
* rewritten as a version 2 inode. We can do this because we set the
* superblock feature bit for v2 inodes unconditionally during mount
* and it means the reast of the code can assume the inode version is 2
* or higher.
*/
if (ip->i_d.di_version == 1) {
ip->i_d.di_version = 2;
memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
ip->i_d.di_nlink = ip->i_d.di_onlink;
ip->i_d.di_onlink = 0;
xfs_set_projid(ip, 0);
}
ip->i_delayed_blks = 0;
/*
* Mark the buffer containing the inode as something to keep
* around for a while. This helps to keep recently accessed
* meta-data in-core longer.
*/
xfs_buf_set_ref(bp, XFS_INO_REF);
/*
* Use xfs_trans_brelse() to release the buffer containing the on-disk
* inode, because it was acquired with xfs_trans_read_buf() in
* xfs_imap_to_bp() above. If tp is NULL, this is just a normal
* brelse(). If we're within a transaction, then xfs_trans_brelse()
* will only release the buffer if it is not dirty within the
* transaction. It will be OK to release the buffer in this case,
* because inodes on disk are never destroyed and we will be locking the
* new in-core inode before putting it in the cache where other
* processes can find it. Thus we don't have to worry about the inode
* being changed just because we released the buffer.
*/
out_brelse:
xfs_trans_brelse(tp, bp);
return error;
}

View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_INODE_BUF_H__
#define __XFS_INODE_BUF_H__
struct xfs_inode;
struct xfs_dinode;
struct xfs_icdinode;
/*
* Inode location information. Stored in the inode and passed to
* xfs_imap_to_bp() to get a buffer and dinode for a given inode.
*/
struct xfs_imap {
xfs_daddr_t im_blkno; /* starting BB of inode chunk */
ushort im_len; /* length in BBs of inode chunk */
ushort im_boffset; /* inode offset in block in bytes */
};
int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
struct xfs_imap *, struct xfs_dinode **,
struct xfs_buf **, uint, uint);
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
struct xfs_inode *, uint);
void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
void xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
void xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
#if defined(DEBUG)
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#else
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
#endif /* __XFS_INODE_BUF_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,171 @@
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_INODE_FORK_H__
#define __XFS_INODE_FORK_H__
struct xfs_inode_log_item;
struct xfs_dinode;
/*
* The following xfs_ext_irec_t struct introduces a second (top) level
* to the in-core extent allocation scheme. These structs are allocated
* in a contiguous block, creating an indirection array where each entry
* (irec) contains a pointer to a buffer of in-core extent records which
* it manages. Each extent buffer is 4k in size, since 4k is the system
* page size on Linux i386 and systems with larger page sizes don't seem
* to gain much, if anything, by using their native page size as the
* extent buffer size. Also, using 4k extent buffers everywhere provides
* a consistent interface for CXFS across different platforms.
*
* There is currently no limit on the number of irec's (extent lists)
* allowed, so heavily fragmented files may require an indirection array
* which spans multiple system pages of memory. The number of extents
* which would require this amount of contiguous memory is very large
* and should not cause problems in the foreseeable future. However,
* if the memory needed for the contiguous array ever becomes a problem,
* it is possible that a third level of indirection may be required.
*/
typedef struct xfs_ext_irec {
xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */
xfs_extnum_t er_extoff; /* extent offset in file */
xfs_extnum_t er_extcount; /* number of extents in page/block */
} xfs_ext_irec_t;
/*
* File incore extent information, present for each of data & attr forks.
*/
#define XFS_IEXT_BUFSZ 4096
#define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
#define XFS_INLINE_EXTS 2
#define XFS_INLINE_DATA 32
typedef struct xfs_ifork {
int if_bytes; /* bytes in if_u1 */
int if_real_bytes; /* bytes allocated in if_u1 */
struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */
unsigned char if_flags; /* per-fork flags */
union {
xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
char *if_data; /* inline file data */
} if_u1;
union {
xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
/* very small file extents */
char if_inline_data[XFS_INLINE_DATA];
/* very small file data */
xfs_dev_t if_rdev; /* dev number if special */
uuid_t if_uuid; /* mount point value */
} if_u2;
} xfs_ifork_t;
/*
* Per-fork incore inode flags.
*/
#define XFS_IFINLINE 0x01 /* Inline data is read in */
#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
/*
* Fork handling.
*/
#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0)
#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3))
#define XFS_IFORK_PTR(ip,w) \
((w) == XFS_DATA_FORK ? \
&(ip)->i_df : \
(ip)->i_afp)
#define XFS_IFORK_DSIZE(ip) \
(XFS_IFORK_Q(ip) ? \
XFS_IFORK_BOFF(ip) : \
XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
#define XFS_IFORK_ASIZE(ip) \
(XFS_IFORK_Q(ip) ? \
XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
XFS_IFORK_BOFF(ip) : \
0)
#define XFS_IFORK_SIZE(ip,w) \
((w) == XFS_DATA_FORK ? \
XFS_IFORK_DSIZE(ip) : \
XFS_IFORK_ASIZE(ip))
#define XFS_IFORK_FORMAT(ip,w) \
((w) == XFS_DATA_FORK ? \
(ip)->i_d.di_format : \
(ip)->i_d.di_aformat)
#define XFS_IFORK_FMT_SET(ip,w,n) \
((w) == XFS_DATA_FORK ? \
((ip)->i_d.di_format = (n)) : \
((ip)->i_d.di_aformat = (n)))
#define XFS_IFORK_NEXTENTS(ip,w) \
((w) == XFS_DATA_FORK ? \
(ip)->i_d.di_nextents : \
(ip)->i_d.di_anextents)
#define XFS_IFORK_NEXT_SET(ip,w,n) \
((w) == XFS_DATA_FORK ? \
((ip)->i_d.di_nextents = (n)) : \
((ip)->i_d.di_anextents = (n)))
#define XFS_IFORK_MAXEXT(ip, w) \
(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
void xfs_idata_realloc(struct xfs_inode *, int, int);
void xfs_iroot_realloc(struct xfs_inode *, int, int);
int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
int);
struct xfs_bmbt_rec_host *
xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
struct xfs_bmbt_irec *, int);
void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
void xfs_iext_add_indirect_multi(struct xfs_ifork *, int,
xfs_extnum_t, int);
void xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int);
void xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int);
void xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int);
void xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int);
void xfs_iext_realloc_direct(struct xfs_ifork *, int);
void xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t);
void xfs_iext_inline_to_direct(struct xfs_ifork *, int);
void xfs_iext_destroy(struct xfs_ifork *);
struct xfs_bmbt_rec_host *
xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *);
struct xfs_ext_irec *
xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *);
struct xfs_ext_irec *
xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *,
int);
void xfs_iext_irec_init(struct xfs_ifork *);
struct xfs_ext_irec *
xfs_iext_irec_new(struct xfs_ifork *, int);
void xfs_iext_irec_remove(struct xfs_ifork *, int);
void xfs_iext_irec_compact(struct xfs_ifork *);
void xfs_iext_irec_compact_pages(struct xfs_ifork *);
void xfs_iext_irec_compact_full(struct xfs_ifork *);
void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
extern struct kmem_zone *xfs_ifork_zone;
#endif /* __XFS_INODE_FORK_H__ */

64
fs/xfs/libxfs/xfs_inum.h Normal file
View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_INUM_H__
#define __XFS_INUM_H__
/*
* Inode number format:
* low inopblog bits - offset in block
* next agblklog bits - block number in ag
* next agno_log bits - ag number
* high agno_log-agblklog-inopblog bits - 0
*/
struct xfs_mount;
#define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1)
#define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog
#define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog
#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log
#define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log
#define XFS_INO_BITS(mp) \
XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
#define XFS_INO_TO_AGNO(mp,i) \
((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp)))
#define XFS_INO_TO_AGINO(mp,i) \
((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp)))
#define XFS_INO_TO_AGBNO(mp,i) \
(((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \
XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp)))
#define XFS_INO_TO_OFFSET(mp,i) \
((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
#define XFS_INO_TO_FSB(mp,i) \
XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i))
#define XFS_AGINO_TO_INO(mp,a,i) \
(((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i))
#define XFS_AGINO_TO_AGBNO(mp,i) ((i) >> XFS_INO_OFFSET_BITS(mp))
#define XFS_AGINO_TO_OFFSET(mp,i) \
((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
#define XFS_OFFBNO_TO_AGINO(mp,b,o) \
((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
#if XFS_BIG_INUMS
#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL))
#else
#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL))
#endif
#define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL))
#endif /* __XFS_INUM_H__ */

View File

@@ -0,0 +1,679 @@
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_LOG_FORMAT_H__
#define __XFS_LOG_FORMAT_H__
struct xfs_mount;
struct xfs_trans_res;
/*
* On-disk Log Format definitions.
*
* This file contains all the on-disk format definitions used within the log. It
* includes the physical log structure itself, as well as all the log item
* format structures that are written into the log and intepreted by log
* recovery. We start with the physical log format definitions, and then work
* through all the log items definitions and everything they encode into the
* log.
*/
typedef __uint32_t xlog_tid_t;
#define XLOG_MIN_ICLOGS 2
#define XLOG_MAX_ICLOGS 8
#define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Invalid cycle number */
#define XLOG_VERSION_1 1
#define XLOG_VERSION_2 2 /* Large IClogs, Log sunit */
#define XLOG_VERSION_OKBITS (XLOG_VERSION_1 | XLOG_VERSION_2)
#define XLOG_MIN_RECORD_BSIZE (16*1024) /* eventually 32k */
#define XLOG_BIG_RECORD_BSIZE (32*1024) /* 32k buffers */
#define XLOG_MAX_RECORD_BSIZE (256*1024)
#define XLOG_HEADER_CYCLE_SIZE (32*1024) /* cycle data in header */
#define XLOG_MIN_RECORD_BSHIFT 14 /* 16384 == 1 << 14 */
#define XLOG_BIG_RECORD_BSHIFT 15 /* 32k == 1 << 15 */
#define XLOG_MAX_RECORD_BSHIFT 18 /* 256k == 1 << 18 */
#define XLOG_BTOLSUNIT(log, b) (((b)+(log)->l_mp->m_sb.sb_logsunit-1) / \
(log)->l_mp->m_sb.sb_logsunit)
#define XLOG_LSUNITTOB(log, su) ((su) * (log)->l_mp->m_sb.sb_logsunit)
#define XLOG_HEADER_SIZE 512
/* Minimum number of transactions that must fit in the log (defined by mkfs) */
#define XFS_MIN_LOG_FACTOR 3
#define XLOG_REC_SHIFT(log) \
BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
#define XLOG_TOTAL_REC_SHIFT(log) \
BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
/* get lsn fields */
#define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
#define BLOCK_LSN(lsn) ((uint)(lsn))
/* this is used in a spot where we might otherwise double-endian-flip */
#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0])
static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
{
return ((xfs_lsn_t)cycle << 32) | block;
}
static inline uint xlog_get_cycle(char *ptr)
{
if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
return be32_to_cpu(*((__be32 *)ptr + 1));
else
return be32_to_cpu(*(__be32 *)ptr);
}
/* Log Clients */
#define XFS_TRANSACTION 0x69
#define XFS_VOLUME 0x2
#define XFS_LOG 0xaa
#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */
/* Region types for iovec's i_type */
#define XLOG_REG_TYPE_BFORMAT 1
#define XLOG_REG_TYPE_BCHUNK 2
#define XLOG_REG_TYPE_EFI_FORMAT 3
#define XLOG_REG_TYPE_EFD_FORMAT 4
#define XLOG_REG_TYPE_IFORMAT 5
#define XLOG_REG_TYPE_ICORE 6
#define XLOG_REG_TYPE_IEXT 7
#define XLOG_REG_TYPE_IBROOT 8
#define XLOG_REG_TYPE_ILOCAL 9
#define XLOG_REG_TYPE_IATTR_EXT 10
#define XLOG_REG_TYPE_IATTR_BROOT 11
#define XLOG_REG_TYPE_IATTR_LOCAL 12
#define XLOG_REG_TYPE_QFORMAT 13
#define XLOG_REG_TYPE_DQUOT 14
#define XLOG_REG_TYPE_QUOTAOFF 15
#define XLOG_REG_TYPE_LRHEADER 16
#define XLOG_REG_TYPE_UNMOUNT 17
#define XLOG_REG_TYPE_COMMIT 18
#define XLOG_REG_TYPE_TRANSHDR 19
#define XLOG_REG_TYPE_ICREATE 20
#define XLOG_REG_TYPE_MAX 20
/*
* Flags to log operation header
*
* The first write of a new transaction will be preceded with a start
* record, XLOG_START_TRANS. Once a transaction is committed, a commit
* record is written, XLOG_COMMIT_TRANS. If a single region can not fit into
* the remainder of the current active in-core log, it is split up into
* multiple regions. Each partial region will be marked with a
* XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS.
*
*/
#define XLOG_START_TRANS 0x01 /* Start a new transaction */
#define XLOG_COMMIT_TRANS 0x02 /* Commit this transaction */
#define XLOG_CONTINUE_TRANS 0x04 /* Cont this trans into new region */
#define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */
#define XLOG_END_TRANS 0x10 /* End a continued transaction */
#define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */
typedef struct xlog_op_header {
__be32 oh_tid; /* transaction id of operation : 4 b */
__be32 oh_len; /* bytes in data region : 4 b */
__u8 oh_clientid; /* who sent me this : 1 b */
__u8 oh_flags; /* : 1 b */
__u16 oh_res2; /* 32 bit align : 2 b */
} xlog_op_header_t;
/* valid values for h_fmt */
#define XLOG_FMT_UNKNOWN 0
#define XLOG_FMT_LINUX_LE 1
#define XLOG_FMT_LINUX_BE 2
#define XLOG_FMT_IRIX_BE 3
/* our fmt */
#ifdef XFS_NATIVE_HOST
#define XLOG_FMT XLOG_FMT_LINUX_BE
#else
#define XLOG_FMT XLOG_FMT_LINUX_LE
#endif
typedef struct xlog_rec_header {
__be32 h_magicno; /* log record (LR) identifier : 4 */
__be32 h_cycle; /* write cycle of log : 4 */
__be32 h_version; /* LR version : 4 */
__be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */
__be64 h_lsn; /* lsn of this LR : 8 */
__be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */
__le32 h_crc; /* crc of log record : 4 */
__be32 h_prev_block; /* block number to previous LR : 4 */
__be32 h_num_logops; /* number of log operations in this LR : 4 */
__be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
/* new fields */
__be32 h_fmt; /* format of log record : 4 */
uuid_t h_fs_uuid; /* uuid of FS : 16 */
__be32 h_size; /* iclog size : 4 */
} xlog_rec_header_t;
typedef struct xlog_rec_ext_header {
__be32 xh_cycle; /* write cycle of log : 4 */
__be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */
} xlog_rec_ext_header_t;
/*
* Quite misnamed, because this union lays out the actual on-disk log buffer.
*/
typedef union xlog_in_core2 {
xlog_rec_header_t hic_header;
xlog_rec_ext_header_t hic_xheader;
char hic_sector[XLOG_HEADER_SIZE];
} xlog_in_core_2_t;
/* not an on-disk structure, but needed by log recovery in userspace */
typedef struct xfs_log_iovec {
void *i_addr; /* beginning address of region */
int i_len; /* length in bytes of region */
uint i_type; /* type of region */
} xfs_log_iovec_t;
/*
* Transaction Header definitions.
*
* This is the structure written in the log at the head of every transaction. It
* identifies the type and id of the transaction, and contains the number of
* items logged by the transaction so we know how many to expect during
* recovery.
*
* Do not change the below structure without redoing the code in
* xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
*/
typedef struct xfs_trans_header {
uint th_magic; /* magic number */
uint th_type; /* transaction type */
__int32_t th_tid; /* transaction id (unused) */
uint th_num_items; /* num items logged by trans */
} xfs_trans_header_t;
#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */
/*
* Log item types.
*/
#define XFS_LI_EFI 0x1236
#define XFS_LI_EFD 0x1237
#define XFS_LI_IUNLINK 0x1238
#define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */
#define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */
#define XFS_LI_DQUOT 0x123d
#define XFS_LI_QUOTAOFF 0x123e
#define XFS_LI_ICREATE 0x123f
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
{ XFS_LI_EFD, "XFS_LI_EFD" }, \
{ XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \
{ XFS_LI_INODE, "XFS_LI_INODE" }, \
{ XFS_LI_BUF, "XFS_LI_BUF" }, \
{ XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \
{ XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \
{ XFS_LI_ICREATE, "XFS_LI_ICREATE" }
/*
* Inode Log Item Format definitions.
*
* This is the structure used to lay out an inode log item in the
* log. The size of the inline data/extents/b-tree root to be logged
* (if any) is indicated in the ilf_dsize field. Changes to this structure
* must be added on to the end.
*/
typedef struct xfs_inode_log_format {
__uint16_t ilf_type; /* inode log item type */
__uint16_t ilf_size; /* size of this item */
__uint32_t ilf_fields; /* flags for fields logged */
__uint16_t ilf_asize; /* size of attr d/ext/root */
__uint16_t ilf_dsize; /* size of data/ext/root */
__uint64_t ilf_ino; /* inode number */
union {
__uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
__int64_t ilf_blkno; /* blkno of inode buffer */
__int32_t ilf_len; /* len of inode buffer */
__int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_t;
typedef struct xfs_inode_log_format_32 {
__uint16_t ilf_type; /* inode log item type */
__uint16_t ilf_size; /* size of this item */
__uint32_t ilf_fields; /* flags for fields logged */
__uint16_t ilf_asize; /* size of attr d/ext/root */
__uint16_t ilf_dsize; /* size of data/ext/root */
__uint64_t ilf_ino; /* inode number */
union {
__uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
__int64_t ilf_blkno; /* blkno of inode buffer */
__int32_t ilf_len; /* len of inode buffer */
__int32_t ilf_boffset; /* off of inode in buffer */
} __attribute__((packed)) xfs_inode_log_format_32_t;
typedef struct xfs_inode_log_format_64 {
__uint16_t ilf_type; /* inode log item type */
__uint16_t ilf_size; /* size of this item */
__uint32_t ilf_fields; /* flags for fields logged */
__uint16_t ilf_asize; /* size of attr d/ext/root */
__uint16_t ilf_dsize; /* size of data/ext/root */
__uint32_t ilf_pad; /* pad for 64 bit boundary */
__uint64_t ilf_ino; /* inode number */
union {
__uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
__int64_t ilf_blkno; /* blkno of inode buffer */
__int32_t ilf_len; /* len of inode buffer */
__int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_64_t;
/*
* Flags for xfs_trans_log_inode flags field.
*/
#define XFS_ILOG_CORE 0x001 /* log standard inode fields */
#define XFS_ILOG_DDATA 0x002 /* log i_df.if_data */
#define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */
#define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */
#define XFS_ILOG_DEV 0x010 /* log the dev field */
#define XFS_ILOG_UUID 0x020 /* log the uuid field */
#define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */
#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */
#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */
#define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */
#define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */
/*
* The timestamps are dirty, but not necessarily anything else in the inode
* core. Unlike the other fields above this one must never make it to disk
* in the ilf_fields of the inode_log_format, but is purely store in-memory in
* ili_fields in the inode_log_item.
*/
#define XFS_ILOG_TIMESTAMP 0x4000
#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
XFS_ILOG_UUID | XFS_ILOG_ADATA | \
XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
#define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
XFS_ILOG_DBROOT)
#define XFS_ILOG_AFORK (XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
XFS_ILOG_ABROOT)
#define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \
XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
XFS_ILOG_DEV | XFS_ILOG_UUID | \
XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \
XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
static inline int xfs_ilog_fbroot(int w)
{
return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
}
static inline int xfs_ilog_fext(int w)
{
return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
}
static inline int xfs_ilog_fdata(int w)
{
return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
}
/*
* Incore version of the on-disk inode core structures. We log this directly
* into the journal in host CPU format (for better or worse) and as such
* directly mirrors the xfs_dinode structure as it must contain all the same
* information.
*/
typedef struct xfs_ictimestamp {
__int32_t t_sec; /* timestamp seconds */
__int32_t t_nsec; /* timestamp nanoseconds */
} xfs_ictimestamp_t;
/*
* NOTE: This structure must be kept identical to struct xfs_dinode
* in xfs_dinode.h except for the endianness annotations.
*/
typedef struct xfs_icdinode {
__uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
__uint16_t di_mode; /* mode and type of file */
__int8_t di_version; /* inode version */
__int8_t di_format; /* format of di_c data */
__uint16_t di_onlink; /* old number of links to file */
__uint32_t di_uid; /* owner's user id */
__uint32_t di_gid; /* owner's group id */
__uint32_t di_nlink; /* number of links to file */
__uint16_t di_projid_lo; /* lower part of owner's project id */
__uint16_t di_projid_hi; /* higher part of owner's project id */
__uint8_t di_pad[6]; /* unused, zeroed space */
__uint16_t di_flushiter; /* incremented on flush */
xfs_ictimestamp_t di_atime; /* time last accessed */
xfs_ictimestamp_t di_mtime; /* time last modified */
xfs_ictimestamp_t di_ctime; /* time created/inode modified */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
__uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
__int8_t di_aformat; /* format of attr fork's data */
__uint32_t di_dmevmask; /* DMIG event mask */
__uint16_t di_dmstate; /* DMIG state info */
__uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
__uint32_t di_gen; /* generation number */
/* di_next_unlinked is the only non-core field in the old dinode */
xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */
/* start of the extended dinode, writable fields */
__uint32_t di_crc; /* CRC of the inode */
__uint64_t di_changecount; /* number of attribute changes */
xfs_lsn_t di_lsn; /* flush sequence */
__uint64_t di_flags2; /* more random flags */
__uint8_t di_pad2[16]; /* more padding for future expansion */
/* fields only written to during inode creation */
xfs_ictimestamp_t di_crtime; /* time created */
xfs_ino_t di_ino; /* inode number */
uuid_t di_uuid; /* UUID of the filesystem */
/* structure must be padded to 64 bit alignment */
} xfs_icdinode_t;
static inline uint xfs_icdinode_size(int version)
{
if (version == 3)
return sizeof(struct xfs_icdinode);
return offsetof(struct xfs_icdinode, di_next_unlinked);
}
/*
* Buffer Log Format defintions
*
* These are the physical dirty bitmap defintions for the log format structure.
*/
#define XFS_BLF_CHUNK 128
#define XFS_BLF_SHIFT 7
#define BIT_TO_WORD_SHIFT 5
#define NBWORD (NBBY * sizeof(unsigned int))
/*
* This flag indicates that the buffer contains on disk inodes
* and requires special recovery handling.
*/
#define XFS_BLF_INODE_BUF (1<<0)
/*
* This flag indicates that the buffer should not be replayed
* during recovery because its blocks are being freed.
*/
#define XFS_BLF_CANCEL (1<<1)
/*
* This flag indicates that the buffer contains on disk
* user or group dquots and may require special recovery handling.
*/
#define XFS_BLF_UDQUOT_BUF (1<<2)
#define XFS_BLF_PDQUOT_BUF (1<<3)
#define XFS_BLF_GDQUOT_BUF (1<<4)
/*
* This is the structure used to lay out a buf log item in the
* log. The data map describes which 128 byte chunks of the buffer
* have been logged.
*/
#define XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
typedef struct xfs_buf_log_format {
unsigned short blf_type; /* buf log item type indicator */
unsigned short blf_size; /* size of this item */
ushort blf_flags; /* misc state */
ushort blf_len; /* number of blocks in this buf */
__int64_t blf_blkno; /* starting blkno of this buf */
unsigned int blf_map_size; /* used size of data bitmap in words */
unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
} xfs_buf_log_format_t;
/*
* All buffers now need to tell recovery where the magic number
* is so that it can verify and calculate the CRCs on the buffer correctly
* once the changes have been replayed into the buffer.
*
* The type value is held in the upper 5 bits of the blf_flags field, which is
* an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
*/
#define XFS_BLFT_BITS 5
#define XFS_BLFT_SHIFT 11
#define XFS_BLFT_MASK (((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
enum xfs_blft {
XFS_BLFT_UNKNOWN_BUF = 0,
XFS_BLFT_UDQUOT_BUF,
XFS_BLFT_PDQUOT_BUF,
XFS_BLFT_GDQUOT_BUF,
XFS_BLFT_BTREE_BUF,
XFS_BLFT_AGF_BUF,
XFS_BLFT_AGFL_BUF,
XFS_BLFT_AGI_BUF,
XFS_BLFT_DINO_BUF,
XFS_BLFT_SYMLINK_BUF,
XFS_BLFT_DIR_BLOCK_BUF,
XFS_BLFT_DIR_DATA_BUF,
XFS_BLFT_DIR_FREE_BUF,
XFS_BLFT_DIR_LEAF1_BUF,
XFS_BLFT_DIR_LEAFN_BUF,
XFS_BLFT_DA_NODE_BUF,
XFS_BLFT_ATTR_LEAF_BUF,
XFS_BLFT_ATTR_RMT_BUF,
XFS_BLFT_SB_BUF,
XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
};
static inline void
xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
{
ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
blf->blf_flags &= ~XFS_BLFT_MASK;
blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
}
static inline __uint16_t
xfs_blft_from_flags(struct xfs_buf_log_format *blf)
{
return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
}
/*
* EFI/EFD log format definitions
*/
typedef struct xfs_extent {
xfs_dfsbno_t ext_start;
xfs_extlen_t ext_len;
} xfs_extent_t;
/*
* Since an xfs_extent_t has types (start:64, len: 32)
* there are different alignments on 32 bit and 64 bit kernels.
* So we provide the different variants for use by a
* conversion routine.
*/
typedef struct xfs_extent_32 {
__uint64_t ext_start;
__uint32_t ext_len;
} __attribute__((packed)) xfs_extent_32_t;
typedef struct xfs_extent_64 {
__uint64_t ext_start;
__uint32_t ext_len;
__uint32_t ext_pad;
} xfs_extent_64_t;
/*
* This is the structure used to lay out an efi log item in the
* log. The efi_extents field is a variable size array whose
* size is given by efi_nextents.
*/
typedef struct xfs_efi_log_format {
__uint16_t efi_type; /* efi log item type */
__uint16_t efi_size; /* size of this item */
__uint32_t efi_nextents; /* # extents to free */
__uint64_t efi_id; /* efi identifier */
xfs_extent_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_t;
typedef struct xfs_efi_log_format_32 {
__uint16_t efi_type; /* efi log item type */
__uint16_t efi_size; /* size of this item */
__uint32_t efi_nextents; /* # extents to free */
__uint64_t efi_id; /* efi identifier */
xfs_extent_32_t efi_extents[1]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t;
typedef struct xfs_efi_log_format_64 {
__uint16_t efi_type; /* efi log item type */
__uint16_t efi_size; /* size of this item */
__uint32_t efi_nextents; /* # extents to free */
__uint64_t efi_id; /* efi identifier */
xfs_extent_64_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_64_t;
/*
* This is the structure used to lay out an efd log item in the
* log. The efd_extents array is a variable size array whose
* size is given by efd_nextents;
*/
typedef struct xfs_efd_log_format {
__uint16_t efd_type; /* efd log item type */
__uint16_t efd_size; /* size of this item */
__uint32_t efd_nextents; /* # of extents freed */
__uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_t;
typedef struct xfs_efd_log_format_32 {
__uint16_t efd_type; /* efd log item type */
__uint16_t efd_size; /* size of this item */
__uint32_t efd_nextents; /* # of extents freed */
__uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_32_t efd_extents[1]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t;
typedef struct xfs_efd_log_format_64 {
__uint16_t efd_type; /* efd log item type */
__uint16_t efd_size; /* size of this item */
__uint32_t efd_nextents; /* # of extents freed */
__uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_64_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_64_t;
/*
* Dquot Log format definitions.
*
* The first two fields must be the type and size fitting into
* 32 bits : log_recovery code assumes that.
*/
typedef struct xfs_dq_logformat {
__uint16_t qlf_type; /* dquot log item type */
__uint16_t qlf_size; /* size of this item */
xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */
__int64_t qlf_blkno; /* blkno of dquot buffer */
__int32_t qlf_len; /* len of dquot buffer */
__uint32_t qlf_boffset; /* off of dquot in buffer */
} xfs_dq_logformat_t;
/*
* log format struct for QUOTAOFF records.
* The first two fields must be the type and size fitting into
* 32 bits : log_recovery code assumes that.
* We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
* to the first and ensures that the first logitem is taken out of the AIL
* only when the last one is securely committed.
*/
typedef struct xfs_qoff_logformat {
unsigned short qf_type; /* quotaoff log item type */
unsigned short qf_size; /* size of this item */
unsigned int qf_flags; /* USR and/or GRP */
char qf_pad[12]; /* padding for future */
} xfs_qoff_logformat_t;
/*
* Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
*/
#define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */
#define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */
#define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */
#define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */
#define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */
#define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */
#define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */
/*
* Conversion to and from the combined OQUOTA flag (if necessary)
* is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk()
*/
#define XFS_GQUOTA_ENFD 0x0080 /* group quota limits enforced */
#define XFS_GQUOTA_CHKD 0x0100 /* quotacheck run on group quotas */
#define XFS_PQUOTA_ENFD 0x0200 /* project quota limits enforced */
#define XFS_PQUOTA_CHKD 0x0400 /* quotacheck run on project quotas */
#define XFS_ALL_QUOTA_ACCT \
(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
#define XFS_ALL_QUOTA_ENFD \
(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD)
#define XFS_ALL_QUOTA_CHKD \
(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD)
#define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\
XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\
XFS_PQUOTA_CHKD)
/*
* Inode create log item structure
*
* Log recovery assumes the first two entries are the type and size and they fit
* in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so
* decoding can be done correctly.
*/
struct xfs_icreate_log {
__uint16_t icl_type; /* type of log format structure */
__uint16_t icl_size; /* size of log format structure */
__be32 icl_ag; /* ag being allocated in */
__be32 icl_agbno; /* start block of inode range */
__be32 icl_count; /* number of inodes to initialise */
__be32 icl_isize; /* size of inodes */
__be32 icl_length; /* length of extent to initialise */
__be32 icl_gen; /* inode generation number to use */
};
#endif /* __XFS_LOG_FORMAT_H__ */

View File

@@ -0,0 +1,66 @@
/*
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_LOG_RECOVER_H__
#define __XFS_LOG_RECOVER_H__
/*
* Macros, structures, prototypes for internal log manager use.
*/
#define XLOG_RHASH_BITS 4
#define XLOG_RHASH_SIZE 16
#define XLOG_RHASH_SHIFT 2
#define XLOG_RHASH(tid) \
((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1)
/*
* item headers are in ri_buf[0]. Additional buffers follow.
*/
typedef struct xlog_recover_item {
struct list_head ri_list;
int ri_type;
int ri_cnt; /* count of regions found */
int ri_total; /* total regions */
xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
} xlog_recover_item_t;
struct xlog_tid;
typedef struct xlog_recover {
struct hlist_node r_list;
xlog_tid_t r_log_tid; /* log's transaction id */
xfs_trans_header_t r_theader; /* trans header for partial */
int r_state; /* not needed */
xfs_lsn_t r_lsn; /* xact lsn */
struct list_head r_itemq; /* q for items */
} xlog_recover_t;
#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr)
/*
* This is the number of entries in the l_buf_cancel_table used during
* recovery.
*/
#define XLOG_BC_TABLE_SIZE 64
#define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2
#endif /* __XFS_LOG_RECOVER_H__ */

View File

@@ -0,0 +1,150 @@
/*
* Copyright (c) 2013 Jie Liu.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_ag.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_trans_space.h"
#include "xfs_inode.h"
#include "xfs_da_btree.h"
#include "xfs_attr_leaf.h"
#include "xfs_bmap_btree.h"
/*
* Calculate the maximum length in bytes that would be required for a local
* attribute value as large attributes out of line are not logged.
*/
STATIC int
xfs_log_calc_max_attrsetm_res(
struct xfs_mount *mp)
{
int size;
int nblks;
size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) -
MAXNAMELEN - 1;
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
nblks += XFS_B_TO_FSB(mp, size);
nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
return M_RES(mp)->tr_attrsetm.tr_logres +
M_RES(mp)->tr_attrsetrt.tr_logres * nblks;
}
/*
* Iterate over the log space reservation table to figure out and return
* the maximum one in terms of the pre-calculated values which were done
* at mount time.
*/
STATIC void
xfs_log_get_max_trans_res(
struct xfs_mount *mp,
struct xfs_trans_res *max_resp)
{
struct xfs_trans_res *resp;
struct xfs_trans_res *end_resp;
int log_space = 0;
int attr_space;
attr_space = xfs_log_calc_max_attrsetm_res(mp);
resp = (struct xfs_trans_res *)M_RES(mp);
end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1);
for (; resp < end_resp; resp++) {
int tmp = resp->tr_logcount > 1 ?
resp->tr_logres * resp->tr_logcount :
resp->tr_logres;
if (log_space < tmp) {
log_space = tmp;
*max_resp = *resp; /* struct copy */
}
}
if (attr_space > log_space) {
*max_resp = M_RES(mp)->tr_attrsetm; /* struct copy */
max_resp->tr_logres = attr_space;
}
}
/*
* Calculate the minimum valid log size for the given superblock configuration.
* Used to calculate the minimum log size at mkfs time, and to determine if
* the log is large enough or not at mount time. Returns the minimum size in
* filesystem block size units.
*/
int
xfs_log_calc_minimum_size(
struct xfs_mount *mp)
{
struct xfs_trans_res tres = {0};
int max_logres;
int min_logblks = 0;
int lsunit = 0;
xfs_log_get_max_trans_res(mp, &tres);
max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres);
if (tres.tr_logcount > 1)
max_logres *= tres.tr_logcount;
if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1)
lsunit = BTOBB(mp->m_sb.sb_logsunit);
/*
* Two factors should be taken into account for calculating the minimum
* log space.
* 1) The fundamental limitation is that no single transaction can be
* larger than half size of the log.
*
* From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR
* define, which is set to 3. That means we can definitely fit
* maximally sized 2 transactions in the log. We'll use this same
* value here.
*
* 2) If the lsunit option is specified, a transaction requires 2 LSU
* for the reservation because there are two log writes that can
* require padding - the transaction data and the commit record which
* are written separately and both can require padding to the LSU.
* Consider that we can have an active CIL reservation holding 2*LSU,
* but the CIL is not over a push threshold, in this case, if we
* don't have enough log space for at one new transaction, which
* includes another 2*LSU in the reservation, we will run into dead
* loop situation in log space grant procedure. i.e.
* xlog_grant_head_wait().
*
* Hence the log size needs to be able to contain two maximally sized
* and padded transactions, which is (2 * (2 * LSU + maxlres)).
*
* Also, the log size should be a multiple of the log stripe unit, round
* it up to lsunit boundary if lsunit is specified.
*/
if (lsunit) {
min_logblks = roundup_64(BTOBB(max_logres), lsunit) +
2 * lsunit;
} else
min_logblks = BTOBB(max_logres) + 2 * BBSIZE;
min_logblks *= XFS_MIN_LOG_FACTOR;
return XFS_BB_TO_FSB(mp, min_logblks);
}

View File

@@ -0,0 +1,161 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_QUOTA_DEFS_H__
#define __XFS_QUOTA_DEFS_H__
/*
* Quota definitions shared between user and kernel source trees.
*/
/*
* Even though users may not have quota limits occupying all 64-bits,
* they may need 64-bit accounting. Hence, 64-bit quota-counters,
* and quota-limits. This is a waste in the common case, but hey ...
*/
typedef __uint64_t xfs_qcnt_t;
typedef __uint16_t xfs_qwarncnt_t;
/*
* flags for q_flags field in the dquot.
*/
#define XFS_DQ_USER 0x0001 /* a user quota */
#define XFS_DQ_PROJ 0x0002 /* project quota */
#define XFS_DQ_GROUP 0x0004 /* a group quota */
#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
#define XFS_DQ_FLAGS \
{ XFS_DQ_USER, "USER" }, \
{ XFS_DQ_PROJ, "PROJ" }, \
{ XFS_DQ_GROUP, "GROUP" }, \
{ XFS_DQ_DIRTY, "DIRTY" }, \
{ XFS_DQ_FREEING, "FREEING" }
/*
* We have the possibility of all three quota types being active at once, and
* hence free space modification requires modification of all three current
* dquots in a single transaction. For this case we need to have a reservation
* of at least 3 dquots.
*
* However, a chmod operation can change both UID and GID in a single
* transaction, resulting in requiring {old, new} x {uid, gid} dquots to be
* modified. Hence for this case we need to reserve space for at least 4 dquots.
*
* And in the worst case, there's a rename operation that can be modifying up to
* 4 inodes with dquots attached to them. In reality, the only inodes that can
* have their dquots modified are the source and destination directory inodes
* due to directory name creation and removal. That can require space allocation
* and/or freeing on both directory inodes, and hence all three dquots on each
* inode can be modified. And if the directories are world writeable, all the
* dquots can be unique and so 6 dquots can be modified....
*
* And, of course, we also need to take into account the dquot log format item
* used to describe each dquot.
*/
#define XFS_DQUOT_LOGRES(mp) \
((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT)
#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
#define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD)
#define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD)
#define XFS_IS_PQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_PQUOTA_ENFD)
/*
* Incore only flags for quotaoff - these bits get cleared when quota(s)
* are in the process of getting turned off. These flags are in m_qflags but
* never in sb_qflags.
*/
#define XFS_UQUOTA_ACTIVE 0x1000 /* uquotas are being turned off */
#define XFS_GQUOTA_ACTIVE 0x2000 /* gquotas are being turned off */
#define XFS_PQUOTA_ACTIVE 0x4000 /* pquotas are being turned off */
#define XFS_ALL_QUOTA_ACTIVE \
(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
/*
* Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
* quota will be not be switched off as long as that inode lock is held.
*/
#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
XFS_GQUOTA_ACTIVE | \
XFS_PQUOTA_ACTIVE))
#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
XFS_PQUOTA_ACTIVE))
#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
#define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
/*
* Flags to tell various functions what to do. Not all of these are meaningful
* to a single function. None of these XFS_QMOPT_* flags are meant to have
* persistent values (ie. their values can and will change between versions)
*/
#define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */
#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
/*
* flags to xfs_trans_mod_dquot to indicate which field needs to be
* modified.
*/
#define XFS_QMOPT_RES_REGBLKS 0x0010000
#define XFS_QMOPT_RES_RTBLKS 0x0020000
#define XFS_QMOPT_BCOUNT 0x0040000
#define XFS_QMOPT_ICOUNT 0x0080000
#define XFS_QMOPT_RTBCOUNT 0x0100000
#define XFS_QMOPT_DELBCOUNT 0x0200000
#define XFS_QMOPT_DELRTBCOUNT 0x0400000
#define XFS_QMOPT_RES_INOS 0x0800000
/*
* flags for dqalloc.
*/
#define XFS_QMOPT_INHERIT 0x1000000
/*
* flags to xfs_trans_mod_dquot.
*/
#define XFS_TRANS_DQ_RES_BLKS XFS_QMOPT_RES_REGBLKS
#define XFS_TRANS_DQ_RES_RTBLKS XFS_QMOPT_RES_RTBLKS
#define XFS_TRANS_DQ_RES_INOS XFS_QMOPT_RES_INOS
#define XFS_TRANS_DQ_BCOUNT XFS_QMOPT_BCOUNT
#define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT
#define XFS_TRANS_DQ_ICOUNT XFS_QMOPT_ICOUNT
#define XFS_TRANS_DQ_RTBCOUNT XFS_QMOPT_RTBCOUNT
#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
#define XFS_QMOPT_QUOTALL \
(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
xfs_dqid_t id, uint type, uint flags, char *str);
extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
#endif /* __XFS_QUOTA_H__ */

View File

@@ -0,0 +1,973 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_buf.h"
#include "xfs_icache.h"
#include "xfs_dinode.h"
#include "xfs_rtalloc.h"
/*
* Realtime allocator bitmap functions shared with userspace.
*/
/*
* Get a buffer for the bitmap or summary file block specified.
* The buffer is returned read and locked.
*/
int
xfs_rtbuf_get(
xfs_mount_t *mp, /* file system mount structure */
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t block, /* block number in bitmap or summary */
int issum, /* is summary not bitmap */
xfs_buf_t **bpp) /* output: buffer for the block */
{
xfs_buf_t *bp; /* block buffer, result */
xfs_inode_t *ip; /* bitmap or summary inode */
xfs_bmbt_irec_t map;
int nmap = 1;
int error; /* error value */
ip = issum ? mp->m_rsumip : mp->m_rbmip;
error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
if (error)
return error;
ASSERT(map.br_startblock != NULLFSBLOCK);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map.br_startblock),
mp->m_bsize, 0, &bp, NULL);
if (error)
return error;
*bpp = bp;
return 0;
}
/*
* Searching backward from start to limit, find the first block whose
* allocated/free state is different from start's.
*/
int
xfs_rtfind_back(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t start, /* starting block to look at */
xfs_rtblock_t limit, /* last block to look at */
xfs_rtblock_t *rtblock) /* out: start block found */
{
xfs_rtword_t *b; /* current word in buffer */
int bit; /* bit number in the word */
xfs_rtblock_t block; /* bitmap block number */
xfs_buf_t *bp; /* buf for the block */
xfs_rtword_t *bufp; /* starting word in buffer */
int error; /* error value */
xfs_rtblock_t firstbit; /* first useful bit in the word */
xfs_rtblock_t i; /* current bit number rel. to start */
xfs_rtblock_t len; /* length of inspected area */
xfs_rtword_t mask; /* mask of relevant bits for value */
xfs_rtword_t want; /* mask for "good" values */
xfs_rtword_t wdiff; /* difference from wanted value */
int word; /* word number in the buffer */
/*
* Compute and read in starting bitmap block for starting block.
*/
block = XFS_BITTOBLOCK(mp, start);
error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
if (error) {
return error;
}
bufp = bp->b_addr;
/*
* Get the first word's index & point to it.
*/
word = XFS_BITTOWORD(mp, start);
b = &bufp[word];
bit = (int)(start & (XFS_NBWORD - 1));
len = start - limit + 1;
/*
* Compute match value, based on the bit at start: if 1 (free)
* then all-ones, else all-zeroes.
*/
want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
/*
* If the starting position is not word-aligned, deal with the
* partial word.
*/
if (bit < XFS_NBWORD - 1) {
/*
* Calculate first (leftmost) bit number to look at,
* and mask for all the relevant bits in this word.
*/
firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
firstbit;
/*
* Calculate the difference between the value there
* and what we're looking for.
*/
if ((wdiff = (*b ^ want) & mask)) {
/*
* Different. Mark where we are and return.
*/
xfs_trans_brelse(tp, bp);
i = bit - XFS_RTHIBIT(wdiff);
*rtblock = start - i + 1;
return 0;
}
i = bit - firstbit + 1;
/*
* Go on to previous block if that's where the previous word is
* and we need the previous word.
*/
if (--word == -1 && i < len) {
/*
* If done with this block, get the previous one.
*/
xfs_trans_brelse(tp, bp);
error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
if (error) {
return error;
}
bufp = bp->b_addr;
word = XFS_BLOCKWMASK(mp);
b = &bufp[word];
} else {
/*
* Go on to the previous word in the buffer.
*/
b--;
}
} else {
/*
* Starting on a word boundary, no partial word.
*/
i = 0;
}
/*
* Loop over whole words in buffers. When we use up one buffer
* we move on to the previous one.
*/
while (len - i >= XFS_NBWORD) {
/*
* Compute difference between actual and desired value.
*/
if ((wdiff = *b ^ want)) {
/*
* Different, mark where we are and return.
*/
xfs_trans_brelse(tp, bp);
i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
*rtblock = start - i + 1;
return 0;
}
i += XFS_NBWORD;
/*
* Go on to previous block if that's where the previous word is
* and we need the previous word.
*/
if (--word == -1 && i < len) {
/*
* If done with this block, get the previous one.
*/
xfs_trans_brelse(tp, bp);
error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
if (error) {
return error;
}
bufp = bp->b_addr;
word = XFS_BLOCKWMASK(mp);
b = &bufp[word];
} else {
/*
* Go on to the previous word in the buffer.
*/
b--;
}
}
/*
* If not ending on a word boundary, deal with the last
* (partial) word.
*/
if (len - i) {
/*
* Calculate first (leftmost) bit number to look at,
* and mask for all the relevant bits in this word.
*/
firstbit = XFS_NBWORD - (len - i);
mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit;
/*
* Compute difference between actual and desired value.
*/
if ((wdiff = (*b ^ want) & mask)) {
/*
* Different, mark where we are and return.
*/
xfs_trans_brelse(tp, bp);
i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
*rtblock = start - i + 1;
return 0;
} else
i = len;
}
/*
* No match, return that we scanned the whole area.
*/
xfs_trans_brelse(tp, bp);
*rtblock = start - i + 1;
return 0;
}
/*
* Searching forward from start to limit, find the first block whose
* allocated/free state is different from start's.
*/
int
xfs_rtfind_forw(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t start, /* starting block to look at */
xfs_rtblock_t limit, /* last block to look at */
xfs_rtblock_t *rtblock) /* out: start block found */
{
xfs_rtword_t *b; /* current word in buffer */
int bit; /* bit number in the word */
xfs_rtblock_t block; /* bitmap block number */
xfs_buf_t *bp; /* buf for the block */
xfs_rtword_t *bufp; /* starting word in buffer */
int error; /* error value */
xfs_rtblock_t i; /* current bit number rel. to start */
xfs_rtblock_t lastbit; /* last useful bit in the word */
xfs_rtblock_t len; /* length of inspected area */
xfs_rtword_t mask; /* mask of relevant bits for value */
xfs_rtword_t want; /* mask for "good" values */
xfs_rtword_t wdiff; /* difference from wanted value */
int word; /* word number in the buffer */
/*
* Compute and read in starting bitmap block for starting block.
*/
block = XFS_BITTOBLOCK(mp, start);
error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
if (error) {
return error;
}
bufp = bp->b_addr;
/*
* Get the first word's index & point to it.
*/
word = XFS_BITTOWORD(mp, start);
b = &bufp[word];
bit = (int)(start & (XFS_NBWORD - 1));
len = limit - start + 1;
/*
* Compute match value, based on the bit at start: if 1 (free)
* then all-ones, else all-zeroes.
*/
want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
/*
* If the starting position is not word-aligned, deal with the
* partial word.
*/
if (bit) {
/*
* Calculate last (rightmost) bit number to look at,
* and mask for all the relevant bits in this word.
*/
lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
/*
* Calculate the difference between the value there
* and what we're looking for.
*/
if ((wdiff = (*b ^ want) & mask)) {
/*
* Different. Mark where we are and return.
*/
xfs_trans_brelse(tp, bp);
i = XFS_RTLOBIT(wdiff) - bit;
*rtblock = start + i - 1;
return 0;
}
i = lastbit - bit;
/*
* Go on to next block if that's where the next word is
* and we need the next word.
*/
if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
/*
* If done with this block, get the previous one.
*/
xfs_trans_brelse(tp, bp);
error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
if (error) {
return error;
}
b = bufp = bp->b_addr;
word = 0;
} else {
/*
* Go on to the previous word in the buffer.
*/
b++;
}
} else {
/*
* Starting on a word boundary, no partial word.
*/
i = 0;
}
/*
* Loop over whole words in buffers. When we use up one buffer
* we move on to the next one.
*/
while (len - i >= XFS_NBWORD) {
/*
* Compute difference between actual and desired value.
*/
if ((wdiff = *b ^ want)) {
/*
* Different, mark where we are and return.
*/
xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
*rtblock = start + i - 1;
return 0;
}
i += XFS_NBWORD;
/*
* Go on to next block if that's where the next word is
* and we need the next word.
*/
if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
/*
* If done with this block, get the next one.
*/
xfs_trans_brelse(tp, bp);
error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
if (error) {
return error;
}
b = bufp = bp->b_addr;
word = 0;
} else {
/*
* Go on to the next word in the buffer.
*/
b++;
}
}
/*
* If not ending on a word boundary, deal with the last
* (partial) word.
*/
if ((lastbit = len - i)) {
/*
* Calculate mask for all the relevant bits in this word.
*/
mask = ((xfs_rtword_t)1 << lastbit) - 1;
/*
* Compute difference between actual and desired value.
*/
if ((wdiff = (*b ^ want) & mask)) {
/*
* Different, mark where we are and return.
*/
xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
*rtblock = start + i - 1;
return 0;
} else
i = len;
}
/*
* No match, return that we scanned the whole area.
*/
xfs_trans_brelse(tp, bp);
*rtblock = start + i - 1;
return 0;
}
/*
* Read and modify the summary information for a given extent size,
* bitmap block combination.
* Keeps track of a current summary block, so we don't keep reading
* it from the buffer cache.
*/
int
xfs_rtmodify_summary(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
int log, /* log2 of extent size */
xfs_rtblock_t bbno, /* bitmap block number */
int delta, /* change to make to summary info */
xfs_buf_t **rbpp, /* in/out: summary block buffer */
xfs_fsblock_t *rsb) /* in/out: summary block number */
{
xfs_buf_t *bp; /* buffer for the summary block */
int error; /* error value */
xfs_fsblock_t sb; /* summary fsblock */
int so; /* index into the summary file */
xfs_suminfo_t *sp; /* pointer to returned data */
/*
* Compute entry number in the summary file.
*/
so = XFS_SUMOFFS(mp, log, bbno);
/*
* Compute the block number in the summary file.
*/
sb = XFS_SUMOFFSTOBLOCK(mp, so);
/*
* If we have an old buffer, and the block number matches, use that.
*/
if (rbpp && *rbpp && *rsb == sb)
bp = *rbpp;
/*
* Otherwise we have to get the buffer.
*/
else {
/*
* If there was an old one, get rid of it first.
*/
if (rbpp && *rbpp)
xfs_trans_brelse(tp, *rbpp);
error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
if (error) {
return error;
}
/*
* Remember this buffer and block for the next call.
*/
if (rbpp) {
*rbpp = bp;
*rsb = sb;
}
}
/*
* Point to the summary information, modify and log it.
*/
sp = XFS_SUMPTR(mp, bp, so);
*sp += delta;
xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
(uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
return 0;
}
/*
* Set the given range of bitmap bits to the given value.
* Do whatever I/O and logging is required.
*/
int
xfs_rtmodify_range(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t start, /* starting block to modify */
xfs_extlen_t len, /* length of extent to modify */
int val) /* 1 for free, 0 for allocated */
{
xfs_rtword_t *b; /* current word in buffer */
int bit; /* bit number in the word */
xfs_rtblock_t block; /* bitmap block number */
xfs_buf_t *bp; /* buf for the block */
xfs_rtword_t *bufp; /* starting word in buffer */
int error; /* error value */
xfs_rtword_t *first; /* first used word in the buffer */
int i; /* current bit number rel. to start */
int lastbit; /* last useful bit in word */
xfs_rtword_t mask; /* mask o frelevant bits for value */
int word; /* word number in the buffer */
/*
* Compute starting bitmap block number.
*/
block = XFS_BITTOBLOCK(mp, start);
/*
* Read the bitmap block, and point to its data.
*/
error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
if (error) {
return error;
}
bufp = bp->b_addr;
/*
* Compute the starting word's address, and starting bit.
*/
word = XFS_BITTOWORD(mp, start);
first = b = &bufp[word];
bit = (int)(start & (XFS_NBWORD - 1));
/*
* 0 (allocated) => all zeroes; 1 (free) => all ones.
*/
val = -val;
/*
* If not starting on a word boundary, deal with the first
* (partial) word.
*/
if (bit) {
/*
* Compute first bit not changed and mask of relevant bits.
*/
lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
/*
* Set/clear the active bits.
*/
if (val)
*b |= mask;
else
*b &= ~mask;
i = lastbit - bit;
/*
* Go on to the next block if that's where the next word is
* and we need the next word.
*/
if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
/*
* Log the changed part of this block.
* Get the next one.
*/
xfs_trans_log_buf(tp, bp,
(uint)((char *)first - (char *)bufp),
(uint)((char *)b - (char *)bufp));
error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
if (error) {
return error;
}
first = b = bufp = bp->b_addr;
word = 0;
} else {
/*
* Go on to the next word in the buffer
*/
b++;
}
} else {
/*
* Starting on a word boundary, no partial word.
*/
i = 0;
}
/*
* Loop over whole words in buffers. When we use up one buffer
* we move on to the next one.
*/
while (len - i >= XFS_NBWORD) {
/*
* Set the word value correctly.
*/
*b = val;
i += XFS_NBWORD;
/*
* Go on to the next block if that's where the next word is
* and we need the next word.
*/
if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
/*
* Log the changed part of this block.
* Get the next one.
*/
xfs_trans_log_buf(tp, bp,
(uint)((char *)first - (char *)bufp),
(uint)((char *)b - (char *)bufp));
error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
if (error) {
return error;
}
first = b = bufp = bp->b_addr;
word = 0;
} else {
/*
* Go on to the next word in the buffer
*/
b++;
}
}
/*
* If not ending on a word boundary, deal with the last
* (partial) word.
*/
if ((lastbit = len - i)) {
/*
* Compute a mask of relevant bits.
*/
bit = 0;
mask = ((xfs_rtword_t)1 << lastbit) - 1;
/*
* Set/clear the active bits.
*/
if (val)
*b |= mask;
else
*b &= ~mask;
b++;
}
/*
* Log any remaining changed bytes.
*/
if (b > first)
xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
(uint)((char *)b - (char *)bufp - 1));
return 0;
}
/*
* Mark an extent specified by start and len freed.
* Updates all the summary information as well as the bitmap.
*/
int
xfs_rtfree_range(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t start, /* starting block to free */
xfs_extlen_t len, /* length to free */
xfs_buf_t **rbpp, /* in/out: summary block buffer */
xfs_fsblock_t *rsb) /* in/out: summary block number */
{
xfs_rtblock_t end; /* end of the freed extent */
int error; /* error value */
xfs_rtblock_t postblock; /* first block freed > end */
xfs_rtblock_t preblock; /* first block freed < start */
end = start + len - 1;
/*
* Modify the bitmap to mark this extent freed.
*/
error = xfs_rtmodify_range(mp, tp, start, len, 1);
if (error) {
return error;
}
/*
* Assume we're freeing out of the middle of an allocated extent.
* We need to find the beginning and end of the extent so we can
* properly update the summary.
*/
error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
if (error) {
return error;
}
/*
* Find the next allocated block (end of allocated extent).
*/
error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
&postblock);
if (error)
return error;
/*
* If there are blocks not being freed at the front of the
* old extent, add summary data for them to be allocated.
*/
if (preblock < start) {
error = xfs_rtmodify_summary(mp, tp,
XFS_RTBLOCKLOG(start - preblock),
XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
if (error) {
return error;
}
}
/*
* If there are blocks not being freed at the end of the
* old extent, add summary data for them to be allocated.
*/
if (postblock > end) {
error = xfs_rtmodify_summary(mp, tp,
XFS_RTBLOCKLOG(postblock - end),
XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
if (error) {
return error;
}
}
/*
* Increment the summary information corresponding to the entire
* (new) free extent.
*/
error = xfs_rtmodify_summary(mp, tp,
XFS_RTBLOCKLOG(postblock + 1 - preblock),
XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
return error;
}
/*
* Check that the given range is either all allocated (val = 0) or
* all free (val = 1).
*/
int
xfs_rtcheck_range(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t start, /* starting block number of extent */
xfs_extlen_t len, /* length of extent */
int val, /* 1 for free, 0 for allocated */
xfs_rtblock_t *new, /* out: first block not matching */
int *stat) /* out: 1 for matches, 0 for not */
{
xfs_rtword_t *b; /* current word in buffer */
int bit; /* bit number in the word */
xfs_rtblock_t block; /* bitmap block number */
xfs_buf_t *bp; /* buf for the block */
xfs_rtword_t *bufp; /* starting word in buffer */
int error; /* error value */
xfs_rtblock_t i; /* current bit number rel. to start */
xfs_rtblock_t lastbit; /* last useful bit in word */
xfs_rtword_t mask; /* mask of relevant bits for value */
xfs_rtword_t wdiff; /* difference from wanted value */
int word; /* word number in the buffer */
/*
* Compute starting bitmap block number
*/
block = XFS_BITTOBLOCK(mp, start);
/*
* Read the bitmap block.
*/
error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
if (error) {
return error;
}
bufp = bp->b_addr;
/*
* Compute the starting word's address, and starting bit.
*/
word = XFS_BITTOWORD(mp, start);
b = &bufp[word];
bit = (int)(start & (XFS_NBWORD - 1));
/*
* 0 (allocated) => all zero's; 1 (free) => all one's.
*/
val = -val;
/*
* If not starting on a word boundary, deal with the first
* (partial) word.
*/
if (bit) {
/*
* Compute first bit not examined.
*/
lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
/*
* Mask of relevant bits.
*/
mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
/*
* Compute difference between actual and desired value.
*/
if ((wdiff = (*b ^ val) & mask)) {
/*
* Different, compute first wrong bit and return.
*/
xfs_trans_brelse(tp, bp);
i = XFS_RTLOBIT(wdiff) - bit;
*new = start + i;
*stat = 0;
return 0;
}
i = lastbit - bit;
/*
* Go on to next block if that's where the next word is
* and we need the next word.
*/
if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
/*
* If done with this block, get the next one.
*/
xfs_trans_brelse(tp, bp);
error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
if (error) {
return error;
}
b = bufp = bp->b_addr;
word = 0;
} else {
/*
* Go on to the next word in the buffer.
*/
b++;
}
} else {
/*
* Starting on a word boundary, no partial word.
*/
i = 0;
}
/*
* Loop over whole words in buffers. When we use up one buffer
* we move on to the next one.
*/
while (len - i >= XFS_NBWORD) {
/*
* Compute difference between actual and desired value.
*/
if ((wdiff = *b ^ val)) {
/*
* Different, compute first wrong bit and return.
*/
xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
*new = start + i;
*stat = 0;
return 0;
}
i += XFS_NBWORD;
/*
* Go on to next block if that's where the next word is
* and we need the next word.
*/
if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
/*
* If done with this block, get the next one.
*/
xfs_trans_brelse(tp, bp);
error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
if (error) {
return error;
}
b = bufp = bp->b_addr;
word = 0;
} else {
/*
* Go on to the next word in the buffer.
*/
b++;
}
}
/*
* If not ending on a word boundary, deal with the last
* (partial) word.
*/
if ((lastbit = len - i)) {
/*
* Mask of relevant bits.
*/
mask = ((xfs_rtword_t)1 << lastbit) - 1;
/*
* Compute difference between actual and desired value.
*/
if ((wdiff = (*b ^ val) & mask)) {
/*
* Different, compute first wrong bit and return.
*/
xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
*new = start + i;
*stat = 0;
return 0;
} else
i = len;
}
/*
* Successful, return.
*/
xfs_trans_brelse(tp, bp);
*new = start + i;
*stat = 1;
return 0;
}
#ifdef DEBUG
/*
* Check that the given extent (block range) is allocated already.
*/
STATIC int /* error */
xfs_rtcheck_alloc_range(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t bno, /* starting block number of extent */
xfs_extlen_t len) /* length of extent */
{
xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */
int stat;
int error;
error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat);
if (error)
return error;
ASSERT(stat);
return 0;
}
#else
#define xfs_rtcheck_alloc_range(m,t,b,l) (0)
#endif
/*
* Free an extent in the realtime subvolume. Length is expressed in
* realtime extents, as is the block number.
*/
int /* error */
xfs_rtfree_extent(
xfs_trans_t *tp, /* transaction pointer */
xfs_rtblock_t bno, /* starting block number to free */
xfs_extlen_t len) /* length of extent freed */
{
int error; /* error value */
xfs_mount_t *mp; /* file system mount structure */
xfs_fsblock_t sb; /* summary file block number */
xfs_buf_t *sumbp = NULL; /* summary file block buffer */
mp = tp->t_mountp;
ASSERT(mp->m_rbmip->i_itemp != NULL);
ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
error = xfs_rtcheck_alloc_range(mp, tp, bno, len);
if (error)
return error;
/*
* Free the range of realtime blocks.
*/
error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
if (error) {
return error;
}
/*
* Mark more blocks free in the superblock.
*/
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
/*
* If we've now freed all the blocks, reset the file sequence
* number to 0.
*/
if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
mp->m_sb.sb_rextents) {
if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
*(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
}
return 0;
}

836
fs/xfs/libxfs/xfs_sb.c Normal file
View File

@@ -0,0 +1,836 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_dinode.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
*/
static const struct {
short offset;
short type; /* 0 = integer
* 1 = binary / string (no translation)
*/
} xfs_sb_info[] = {
{ offsetof(xfs_sb_t, sb_magicnum), 0 },
{ offsetof(xfs_sb_t, sb_blocksize), 0 },
{ offsetof(xfs_sb_t, sb_dblocks), 0 },
{ offsetof(xfs_sb_t, sb_rblocks), 0 },
{ offsetof(xfs_sb_t, sb_rextents), 0 },
{ offsetof(xfs_sb_t, sb_uuid), 1 },
{ offsetof(xfs_sb_t, sb_logstart), 0 },
{ offsetof(xfs_sb_t, sb_rootino), 0 },
{ offsetof(xfs_sb_t, sb_rbmino), 0 },
{ offsetof(xfs_sb_t, sb_rsumino), 0 },
{ offsetof(xfs_sb_t, sb_rextsize), 0 },
{ offsetof(xfs_sb_t, sb_agblocks), 0 },
{ offsetof(xfs_sb_t, sb_agcount), 0 },
{ offsetof(xfs_sb_t, sb_rbmblocks), 0 },
{ offsetof(xfs_sb_t, sb_logblocks), 0 },
{ offsetof(xfs_sb_t, sb_versionnum), 0 },
{ offsetof(xfs_sb_t, sb_sectsize), 0 },
{ offsetof(xfs_sb_t, sb_inodesize), 0 },
{ offsetof(xfs_sb_t, sb_inopblock), 0 },
{ offsetof(xfs_sb_t, sb_fname[0]), 1 },
{ offsetof(xfs_sb_t, sb_blocklog), 0 },
{ offsetof(xfs_sb_t, sb_sectlog), 0 },
{ offsetof(xfs_sb_t, sb_inodelog), 0 },
{ offsetof(xfs_sb_t, sb_inopblog), 0 },
{ offsetof(xfs_sb_t, sb_agblklog), 0 },
{ offsetof(xfs_sb_t, sb_rextslog), 0 },
{ offsetof(xfs_sb_t, sb_inprogress), 0 },
{ offsetof(xfs_sb_t, sb_imax_pct), 0 },
{ offsetof(xfs_sb_t, sb_icount), 0 },
{ offsetof(xfs_sb_t, sb_ifree), 0 },
{ offsetof(xfs_sb_t, sb_fdblocks), 0 },
{ offsetof(xfs_sb_t, sb_frextents), 0 },
{ offsetof(xfs_sb_t, sb_uquotino), 0 },
{ offsetof(xfs_sb_t, sb_gquotino), 0 },
{ offsetof(xfs_sb_t, sb_qflags), 0 },
{ offsetof(xfs_sb_t, sb_flags), 0 },
{ offsetof(xfs_sb_t, sb_shared_vn), 0 },
{ offsetof(xfs_sb_t, sb_inoalignmt), 0 },
{ offsetof(xfs_sb_t, sb_unit), 0 },
{ offsetof(xfs_sb_t, sb_width), 0 },
{ offsetof(xfs_sb_t, sb_dirblklog), 0 },
{ offsetof(xfs_sb_t, sb_logsectlog), 0 },
{ offsetof(xfs_sb_t, sb_logsectsize), 0 },
{ offsetof(xfs_sb_t, sb_logsunit), 0 },
{ offsetof(xfs_sb_t, sb_features2), 0 },
{ offsetof(xfs_sb_t, sb_bad_features2), 0 },
{ offsetof(xfs_sb_t, sb_features_compat), 0 },
{ offsetof(xfs_sb_t, sb_features_ro_compat), 0 },
{ offsetof(xfs_sb_t, sb_features_incompat), 0 },
{ offsetof(xfs_sb_t, sb_features_log_incompat), 0 },
{ offsetof(xfs_sb_t, sb_crc), 0 },
{ offsetof(xfs_sb_t, sb_pad), 0 },
{ offsetof(xfs_sb_t, sb_pquotino), 0 },
{ offsetof(xfs_sb_t, sb_lsn), 0 },
{ sizeof(xfs_sb_t), 0 }
};
/*
* Reference counting access wrappers to the perag structures.
* Because we never free per-ag structures, the only thing we
* have to protect against changes is the tree structure itself.
*/
struct xfs_perag *
xfs_perag_get(
struct xfs_mount *mp,
xfs_agnumber_t agno)
{
struct xfs_perag *pag;
int ref = 0;
rcu_read_lock();
pag = radix_tree_lookup(&mp->m_perag_tree, agno);
if (pag) {
ASSERT(atomic_read(&pag->pag_ref) >= 0);
ref = atomic_inc_return(&pag->pag_ref);
}
rcu_read_unlock();
trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
return pag;
}
/*
* search from @first to find the next perag with the given tag set.
*/
struct xfs_perag *
xfs_perag_get_tag(
struct xfs_mount *mp,
xfs_agnumber_t first,
int tag)
{
struct xfs_perag *pag;
int found;
int ref;
rcu_read_lock();
found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
(void **)&pag, first, 1, tag);
if (found <= 0) {
rcu_read_unlock();
return NULL;
}
ref = atomic_inc_return(&pag->pag_ref);
rcu_read_unlock();
trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
return pag;
}
void
xfs_perag_put(
struct xfs_perag *pag)
{
int ref;
ASSERT(atomic_read(&pag->pag_ref) > 0);
ref = atomic_dec_return(&pag->pag_ref);
trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
}
/*
* Check the validity of the SB found.
*/
STATIC int
xfs_mount_validate_sb(
xfs_mount_t *mp,
xfs_sb_t *sbp,
bool check_inprogress,
bool check_version)
{
/*
* If the log device and data device have the
* same device number, the log is internal.
* Consequently, the sb_logstart should be non-zero. If
* we have a zero sb_logstart in this case, we may be trying to mount
* a volume filesystem in a non-volume manner.
*/
if (sbp->sb_magicnum != XFS_SB_MAGIC) {
xfs_warn(mp, "bad magic number");
return -EWRONGFS;
}
if (!xfs_sb_good_version(sbp)) {
xfs_warn(mp, "bad version");
return -EWRONGFS;
}
/*
* Version 5 superblock feature mask validation. Reject combinations the
* kernel cannot support up front before checking anything else. For
* write validation, we don't need to check feature masks.
*/
if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
if (xfs_sb_has_compat_feature(sbp,
XFS_SB_FEAT_COMPAT_UNKNOWN)) {
xfs_warn(mp,
"Superblock has unknown compatible features (0x%x) enabled.\n"
"Using a more recent kernel is recommended.",
(sbp->sb_features_compat &
XFS_SB_FEAT_COMPAT_UNKNOWN));
}
if (xfs_sb_has_ro_compat_feature(sbp,
XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
xfs_alert(mp,
"Superblock has unknown read-only compatible features (0x%x) enabled.",
(sbp->sb_features_ro_compat &
XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_warn(mp,
"Attempted to mount read-only compatible filesystem read-write.\n"
"Filesystem can only be safely mounted read only.");
return -EINVAL;
}
}
if (xfs_sb_has_incompat_feature(sbp,
XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
xfs_warn(mp,
"Superblock has unknown incompatible features (0x%x) enabled.\n"
"Filesystem can not be safely mounted by this kernel.",
(sbp->sb_features_incompat &
XFS_SB_FEAT_INCOMPAT_UNKNOWN));
return -EINVAL;
}
}
if (xfs_sb_version_has_pquotino(sbp)) {
if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
xfs_notice(mp,
"Version 5 of Super block has XFS_OQUOTA bits.");
return -EFSCORRUPTED;
}
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
xfs_notice(mp,
"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");
return -EFSCORRUPTED;
}
if (unlikely(
sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
xfs_warn(mp,
"filesystem is marked as having an external log; "
"specify logdev on the mount command line.");
return -EINVAL;
}
if (unlikely(
sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
xfs_warn(mp,
"filesystem is marked as having an internal log; "
"do not specify logdev on the mount command line.");
return -EINVAL;
}
/*
* More sanity checking. Most of these were stolen directly from
* xfs_repair.
*/
if (unlikely(
sbp->sb_agcount <= 0 ||
sbp->sb_sectsize < XFS_MIN_SECTORSIZE ||
sbp->sb_sectsize > XFS_MAX_SECTORSIZE ||
sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG ||
sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG ||
sbp->sb_sectsize != (1 << sbp->sb_sectlog) ||
sbp->sb_blocksize < XFS_MIN_BLOCKSIZE ||
sbp->sb_blocksize > XFS_MAX_BLOCKSIZE ||
sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG ||
sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
sbp->sb_blocksize != (1 << sbp->sb_blocklog) ||
sbp->sb_inodesize < XFS_DINODE_MIN_SIZE ||
sbp->sb_inodesize > XFS_DINODE_MAX_SIZE ||
sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
(sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) ||
sbp->sb_dblocks == 0 ||
sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) ||
sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) ||
sbp->sb_shared_vn != 0)) {
xfs_notice(mp, "SB sanity check failed");
return -EFSCORRUPTED;
}
/*
* Until this is fixed only page-sized or smaller data blocks work.
*/
if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
xfs_warn(mp,
"File system with blocksize %d bytes. "
"Only pagesize (%ld) or less will currently work.",
sbp->sb_blocksize, PAGE_SIZE);
return -ENOSYS;
}
/*
* Currently only very few inode sizes are supported.
*/
switch (sbp->sb_inodesize) {
case 256:
case 512:
case 1024:
case 2048:
break;
default:
xfs_warn(mp, "inode size of %d bytes not supported",
sbp->sb_inodesize);
return -ENOSYS;
}
if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_warn(mp,
"file system too large to be mounted on this system.");
return -EFBIG;
}
if (check_inprogress && sbp->sb_inprogress) {
xfs_warn(mp, "Offline file system operation in progress!");
return -EFSCORRUPTED;
}
return 0;
}
void
xfs_sb_quota_from_disk(struct xfs_sb *sbp)
{
/*
* older mkfs doesn't initialize quota inodes to NULLFSINO. This
* leads to in-core values having two different values for a quota
* inode to be invalid: 0 and NULLFSINO. Change it to a single value
* NULLFSINO.
*
* Note that this change affect only the in-core values. These
* values are not written back to disk unless any quota information
* is written to the disk. Even in that case, sb_pquotino field is
* not written to disk unless the superblock supports pquotino.
*/
if (sbp->sb_uquotino == 0)
sbp->sb_uquotino = NULLFSINO;
if (sbp->sb_gquotino == 0)
sbp->sb_gquotino = NULLFSINO;
if (sbp->sb_pquotino == 0)
sbp->sb_pquotino = NULLFSINO;
/*
* We need to do these manipilations only if we are working
* with an older version of on-disk superblock.
*/
if (xfs_sb_version_has_pquotino(sbp))
return;
if (sbp->sb_qflags & XFS_OQUOTA_ENFD)
sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD;
if (sbp->sb_qflags & XFS_OQUOTA_CHKD)
sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
if (sbp->sb_qflags & XFS_PQUOTA_ACCT) {
/*
* In older version of superblock, on-disk superblock only
* has sb_gquotino, and in-core superblock has both sb_gquotino
* and sb_pquotino. But, only one of them is supported at any
* point of time. So, if PQUOTA is set in disk superblock,
* copy over sb_gquotino to sb_pquotino.
*/
sbp->sb_pquotino = sbp->sb_gquotino;
sbp->sb_gquotino = NULLFSINO;
}
}
void
xfs_sb_from_disk(
struct xfs_sb *to,
xfs_dsb_t *from)
{
to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
to->sb_rblocks = be64_to_cpu(from->sb_rblocks);
to->sb_rextents = be64_to_cpu(from->sb_rextents);
memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid));
to->sb_logstart = be64_to_cpu(from->sb_logstart);
to->sb_rootino = be64_to_cpu(from->sb_rootino);
to->sb_rbmino = be64_to_cpu(from->sb_rbmino);
to->sb_rsumino = be64_to_cpu(from->sb_rsumino);
to->sb_rextsize = be32_to_cpu(from->sb_rextsize);
to->sb_agblocks = be32_to_cpu(from->sb_agblocks);
to->sb_agcount = be32_to_cpu(from->sb_agcount);
to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks);
to->sb_logblocks = be32_to_cpu(from->sb_logblocks);
to->sb_versionnum = be16_to_cpu(from->sb_versionnum);
to->sb_sectsize = be16_to_cpu(from->sb_sectsize);
to->sb_inodesize = be16_to_cpu(from->sb_inodesize);
to->sb_inopblock = be16_to_cpu(from->sb_inopblock);
memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname));
to->sb_blocklog = from->sb_blocklog;
to->sb_sectlog = from->sb_sectlog;
to->sb_inodelog = from->sb_inodelog;
to->sb_inopblog = from->sb_inopblog;
to->sb_agblklog = from->sb_agblklog;
to->sb_rextslog = from->sb_rextslog;
to->sb_inprogress = from->sb_inprogress;
to->sb_imax_pct = from->sb_imax_pct;
to->sb_icount = be64_to_cpu(from->sb_icount);
to->sb_ifree = be64_to_cpu(from->sb_ifree);
to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks);
to->sb_frextents = be64_to_cpu(from->sb_frextents);
to->sb_uquotino = be64_to_cpu(from->sb_uquotino);
to->sb_gquotino = be64_to_cpu(from->sb_gquotino);
to->sb_qflags = be16_to_cpu(from->sb_qflags);
to->sb_flags = from->sb_flags;
to->sb_shared_vn = from->sb_shared_vn;
to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt);
to->sb_unit = be32_to_cpu(from->sb_unit);
to->sb_width = be32_to_cpu(from->sb_width);
to->sb_dirblklog = from->sb_dirblklog;
to->sb_logsectlog = from->sb_logsectlog;
to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
to->sb_features2 = be32_to_cpu(from->sb_features2);
to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
to->sb_features_log_incompat =
be32_to_cpu(from->sb_features_log_incompat);
to->sb_pad = 0;
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
to->sb_lsn = be64_to_cpu(from->sb_lsn);
}
static inline void
xfs_sb_quota_to_disk(
xfs_dsb_t *to,
xfs_sb_t *from,
__int64_t *fields)
{
__uint16_t qflags = from->sb_qflags;
/*
* We need to do these manipilations only if we are working
* with an older version of on-disk superblock.
*/
if (xfs_sb_version_has_pquotino(from))
return;
if (*fields & XFS_SB_QFLAGS) {
/*
* The in-core version of sb_qflags do not have
* XFS_OQUOTA_* flags, whereas the on-disk version
* does. So, convert incore XFS_{PG}QUOTA_* flags
* to on-disk XFS_OQUOTA_* flags.
*/
qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
if (from->sb_qflags &
(XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD))
qflags |= XFS_OQUOTA_ENFD;
if (from->sb_qflags &
(XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))
qflags |= XFS_OQUOTA_CHKD;
to->sb_qflags = cpu_to_be16(qflags);
*fields &= ~XFS_SB_QFLAGS;
}
/*
* GQUOTINO and PQUOTINO cannot be used together in versions of
* superblock that do not have pquotino. from->sb_flags tells us which
* quota is active and should be copied to disk. If neither are active,
* make sure we write NULLFSINO to the sb_gquotino field as a quota
* inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
* bit is set.
*
* Note that we don't need to handle the sb_uquotino or sb_pquotino here
* as they do not require any translation. Hence the main sb field loop
* will write them appropriately from the in-core superblock.
*/
if ((*fields & XFS_SB_GQUOTINO) &&
(from->sb_qflags & XFS_GQUOTA_ACCT))
to->sb_gquotino = cpu_to_be64(from->sb_gquotino);
else if ((*fields & XFS_SB_PQUOTINO) &&
(from->sb_qflags & XFS_PQUOTA_ACCT))
to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
else {
/*
* We can't rely on just the fields being logged to tell us
* that it is safe to write NULLFSINO - we should only do that
* if quotas are not actually enabled. Hence only write
* NULLFSINO if both in-core quota inodes are NULL.
*/
if (from->sb_gquotino == NULLFSINO &&
from->sb_pquotino == NULLFSINO)
to->sb_gquotino = cpu_to_be64(NULLFSINO);
}
*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
}
/*
* Copy in core superblock to ondisk one.
*
* The fields argument is mask of superblock fields to copy.
*/
void
xfs_sb_to_disk(
xfs_dsb_t *to,
xfs_sb_t *from,
__int64_t fields)
{
xfs_caddr_t to_ptr = (xfs_caddr_t)to;
xfs_caddr_t from_ptr = (xfs_caddr_t)from;
xfs_sb_field_t f;
int first;
int size;
ASSERT(fields);
if (!fields)
return;
xfs_sb_quota_to_disk(to, from, &fields);
while (fields) {
f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
first = xfs_sb_info[f].offset;
size = xfs_sb_info[f + 1].offset - first;
ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);
if (size == 1 || xfs_sb_info[f].type == 1) {
memcpy(to_ptr + first, from_ptr + first, size);
} else {
switch (size) {
case 2:
*(__be16 *)(to_ptr + first) =
cpu_to_be16(*(__u16 *)(from_ptr + first));
break;
case 4:
*(__be32 *)(to_ptr + first) =
cpu_to_be32(*(__u32 *)(from_ptr + first));
break;
case 8:
*(__be64 *)(to_ptr + first) =
cpu_to_be64(*(__u64 *)(from_ptr + first));
break;
default:
ASSERT(0);
}
}
fields &= ~(1LL << f);
}
}
static int
xfs_sb_verify(
struct xfs_buf *bp,
bool check_version)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_sb sb;
xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
/*
* Only check the in progress field for the primary superblock as
* mkfs.xfs doesn't clear it from secondary superblocks.
*/
return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
check_version);
}
/*
* If the superblock has the CRC feature bit set or the CRC field is non-null,
* check that the CRC is valid. We check the CRC field is non-null because a
* single bit error could clear the feature bit and unused parts of the
* superblock are supposed to be zero. Hence a non-null crc field indicates that
* we've potentially lost a feature bit and we should check it anyway.
*
* However, past bugs (i.e. in growfs) left non-zeroed regions beyond the
* last field in V4 secondary superblocks. So for secondary superblocks,
* we are more forgiving, and ignore CRC failures if the primary doesn't
* indicate that the fs version is V5.
*/
static void
xfs_sb_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
int error;
/*
* open code the version check to avoid needing to convert the entire
* superblock from disk order just to check the version number
*/
if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
(((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
XFS_SB_VERSION_5) ||
dsb->sb_crc != 0)) {
if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
/* Only fail bad secondaries on a known V5 filesystem */
if (bp->b_bn == XFS_SB_DADDR ||
xfs_sb_version_hascrc(&mp->m_sb)) {
error = -EFSBADCRC;
goto out_error;
}
}
}
error = xfs_sb_verify(bp, true);
out_error:
if (error) {
xfs_buf_ioerror(bp, error);
if (error == -EFSCORRUPTED || error == -EFSBADCRC)
xfs_verifier_error(bp);
}
}
/*
* We may be probed for a filesystem match, so we may not want to emit
* messages when the superblock buffer is not actually an XFS superblock.
* If we find an XFS superblock, then run a normal, noisy mount because we are
* really going to mount it and want to know about errors.
*/
static void
xfs_sb_quiet_read_verify(
struct xfs_buf *bp)
{
struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
/* XFS filesystem, verify noisily! */
xfs_sb_read_verify(bp);
return;
}
/* quietly fail */
xfs_buf_ioerror(bp, -EWRONGFS);
}
static void
xfs_sb_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_buf_log_item *bip = bp->b_fspriv;
int error;
error = xfs_sb_verify(bp, false);
if (error) {
xfs_buf_ioerror(bp, error);
xfs_verifier_error(bp);
return;
}
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
if (bip)
XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
.verify_read = xfs_sb_read_verify,
.verify_write = xfs_sb_write_verify,
};
const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
.verify_read = xfs_sb_quiet_read_verify,
.verify_write = xfs_sb_write_verify,
};
/*
* xfs_mount_common
*
* Mount initialization code establishing various mount
* fields from the superblock associated with the given
* mount structure
*/
void
xfs_sb_mount_common(
struct xfs_mount *mp,
struct xfs_sb *sbp)
{
mp->m_agfrotor = mp->m_agirotor = 0;
spin_lock_init(&mp->m_agirotor_lock);
mp->m_maxagi = mp->m_sb.sb_agcount;
mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
mp->m_blockmask = sbp->sb_blocksize - 1;
mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
mp->m_blockwmask = mp->m_blockwsize - 1;
mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
}
/*
* xfs_initialize_perag_data
*
* Read in each per-ag structure so we can count up the number of
* allocated inodes, free inodes and used filesystem blocks as this
* information is no longer persistent in the superblock. Once we have
* this information, write it into the in-core superblock structure.
*/
int
xfs_initialize_perag_data(
struct xfs_mount *mp,
xfs_agnumber_t agcount)
{
xfs_agnumber_t index;
xfs_perag_t *pag;
xfs_sb_t *sbp = &mp->m_sb;
uint64_t ifree = 0;
uint64_t ialloc = 0;
uint64_t bfree = 0;
uint64_t bfreelst = 0;
uint64_t btree = 0;
int error;
for (index = 0; index < agcount; index++) {
/*
* read the agf, then the agi. This gets us
* all the information we need and populates the
* per-ag structures for us.
*/
error = xfs_alloc_pagf_init(mp, NULL, index, 0);
if (error)
return error;
error = xfs_ialloc_pagi_init(mp, NULL, index);
if (error)
return error;
pag = xfs_perag_get(mp, index);
ifree += pag->pagi_freecount;
ialloc += pag->pagi_count;
bfree += pag->pagf_freeblks;
bfreelst += pag->pagf_flcount;
btree += pag->pagf_btreeblks;
xfs_perag_put(pag);
}
/*
* Overwrite incore superblock counters with just-read data
*/
spin_lock(&mp->m_sb_lock);
sbp->sb_ifree = ifree;
sbp->sb_icount = ialloc;
sbp->sb_fdblocks = bfree + bfreelst + btree;
spin_unlock(&mp->m_sb_lock);
/* Fixup the per-cpu counters as well. */
xfs_icsb_reinit_counters(mp);
return 0;
}
/*
* xfs_mod_sb() can be used to copy arbitrary changes to the
* in-core superblock into the superblock buffer to be logged.
* It does not provide the higher level of locking that is
* needed to protect the in-core superblock from concurrent
* access.
*/
void
xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
{
xfs_buf_t *bp;
int first;
int last;
xfs_mount_t *mp;
xfs_sb_field_t f;
ASSERT(fields);
if (!fields)
return;
mp = tp->t_mountp;
bp = xfs_trans_getsb(tp, mp, 0);
first = sizeof(xfs_sb_t);
last = 0;
/* translate/copy */
xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
/* find modified range */
f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
ASSERT((1LL << f) & XFS_SB_MOD_BITS);
last = xfs_sb_info[f + 1].offset - 1;
f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
ASSERT((1LL << f) & XFS_SB_MOD_BITS);
first = xfs_sb_info[f].offset;
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(tp, bp, first, last);
}

621
fs/xfs/libxfs/xfs_sb.h Normal file
View File

@@ -0,0 +1,621 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_SB_H__
#define __XFS_SB_H__
/*
* Super block
* Fits into a sector-sized buffer at address 0 of each allocation group.
* Only the first of these is ever updated except during growfs.
*/
struct xfs_buf;
struct xfs_mount;
struct xfs_trans;
#define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */
#define XFS_SB_VERSION_1 1 /* 5.3, 6.0.1, 6.1 */
#define XFS_SB_VERSION_2 2 /* 6.2 - attributes */
#define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */
#define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */
#define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */
#define XFS_SB_VERSION_NUMBITS 0x000f
#define XFS_SB_VERSION_ALLFBITS 0xfff0
#define XFS_SB_VERSION_ATTRBIT 0x0010
#define XFS_SB_VERSION_NLINKBIT 0x0020
#define XFS_SB_VERSION_QUOTABIT 0x0040
#define XFS_SB_VERSION_ALIGNBIT 0x0080
#define XFS_SB_VERSION_DALIGNBIT 0x0100
#define XFS_SB_VERSION_SHAREDBIT 0x0200
#define XFS_SB_VERSION_LOGV2BIT 0x0400
#define XFS_SB_VERSION_SECTORBIT 0x0800
#define XFS_SB_VERSION_EXTFLGBIT 0x1000
#define XFS_SB_VERSION_DIRV2BIT 0x2000
#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */
#define XFS_SB_VERSION_MOREBITSBIT 0x8000
/*
* Supported feature bit list is just all bits in the versionnum field because
* we've used them all up and understand them all. Except, of course, for the
* shared superblock bit, which nobody knows what it does and so is unsupported.
*/
#define XFS_SB_VERSION_OKBITS \
((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \
~XFS_SB_VERSION_SHAREDBIT)
/*
* There are two words to hold XFS "feature" bits: the original
* word, sb_versionnum, and sb_features2. Whenever a bit is set in
* sb_features2, the feature bit XFS_SB_VERSION_MOREBITSBIT must be set.
*
* These defines represent bits in sb_features2.
*/
#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001
#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */
#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
#define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */
#define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */
#define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */
#define XFS_SB_VERSION2_FTYPE 0x00000200 /* inode type in dir */
#define XFS_SB_VERSION2_OKBITS \
(XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
XFS_SB_VERSION2_ATTR2BIT | \
XFS_SB_VERSION2_PROJID32BIT | \
XFS_SB_VERSION2_FTYPE)
/*
* Superblock - in core version. Must match the ondisk version below.
* Must be padded to 64 bit alignment.
*/
typedef struct xfs_sb {
__uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
__uint32_t sb_blocksize; /* logical block size, bytes */
xfs_drfsbno_t sb_dblocks; /* number of data blocks */
xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */
xfs_drtbno_t sb_rextents; /* number of realtime extents */
uuid_t sb_uuid; /* file system unique id */
xfs_dfsbno_t sb_logstart; /* starting block of log if internal */
xfs_ino_t sb_rootino; /* root inode number */
xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */
xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */
xfs_agblock_t sb_rextsize; /* realtime extent size, blocks */
xfs_agblock_t sb_agblocks; /* size of an allocation group */
xfs_agnumber_t sb_agcount; /* number of allocation groups */
xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */
xfs_extlen_t sb_logblocks; /* number of log blocks */
__uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */
__uint16_t sb_sectsize; /* volume sector size, bytes */
__uint16_t sb_inodesize; /* inode size, bytes */
__uint16_t sb_inopblock; /* inodes per block */
char sb_fname[12]; /* file system name */
__uint8_t sb_blocklog; /* log2 of sb_blocksize */
__uint8_t sb_sectlog; /* log2 of sb_sectsize */
__uint8_t sb_inodelog; /* log2 of sb_inodesize */
__uint8_t sb_inopblog; /* log2 of sb_inopblock */
__uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */
__uint8_t sb_rextslog; /* log2 of sb_rextents */
__uint8_t sb_inprogress; /* mkfs is in progress, don't mount */
__uint8_t sb_imax_pct; /* max % of fs for inode space */
/* statistics */
/*
* These fields must remain contiguous. If you really
* want to change their layout, make sure you fix the
* code in xfs_trans_apply_sb_deltas().
*/
__uint64_t sb_icount; /* allocated inodes */
__uint64_t sb_ifree; /* free inodes */
__uint64_t sb_fdblocks; /* free data blocks */
__uint64_t sb_frextents; /* free realtime extents */
/*
* End contiguous fields.
*/
xfs_ino_t sb_uquotino; /* user quota inode */
xfs_ino_t sb_gquotino; /* group quota inode */
__uint16_t sb_qflags; /* quota flags */
__uint8_t sb_flags; /* misc. flags */
__uint8_t sb_shared_vn; /* shared version number */
xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */
__uint32_t sb_unit; /* stripe or raid unit */
__uint32_t sb_width; /* stripe or raid width */
__uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */
__uint8_t sb_logsectlog; /* log2 of the log sector size */
__uint16_t sb_logsectsize; /* sector size for the log, bytes */
__uint32_t sb_logsunit; /* stripe unit size for the log */
__uint32_t sb_features2; /* additional feature bits */
/*
* bad features2 field as a result of failing to pad the sb
* structure to 64 bits. Some machines will be using this field
* for features2 bits. Easiest just to mark it bad and not use
* it for anything else.
*/
__uint32_t sb_bad_features2;
/* version 5 superblock fields start here */
/* feature masks */
__uint32_t sb_features_compat;
__uint32_t sb_features_ro_compat;
__uint32_t sb_features_incompat;
__uint32_t sb_features_log_incompat;
__uint32_t sb_crc; /* superblock crc */
__uint32_t sb_pad;
xfs_ino_t sb_pquotino; /* project quota inode */
xfs_lsn_t sb_lsn; /* last write sequence */
/* must be padded to 64 bit alignment */
} xfs_sb_t;
#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
/*
* Superblock - on disk version. Must match the in core version above.
* Must be padded to 64 bit alignment.
*/
typedef struct xfs_dsb {
__be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */
__be32 sb_blocksize; /* logical block size, bytes */
__be64 sb_dblocks; /* number of data blocks */
__be64 sb_rblocks; /* number of realtime blocks */
__be64 sb_rextents; /* number of realtime extents */
uuid_t sb_uuid; /* file system unique id */
__be64 sb_logstart; /* starting block of log if internal */
__be64 sb_rootino; /* root inode number */
__be64 sb_rbmino; /* bitmap inode for realtime extents */
__be64 sb_rsumino; /* summary inode for rt bitmap */
__be32 sb_rextsize; /* realtime extent size, blocks */
__be32 sb_agblocks; /* size of an allocation group */
__be32 sb_agcount; /* number of allocation groups */
__be32 sb_rbmblocks; /* number of rt bitmap blocks */
__be32 sb_logblocks; /* number of log blocks */
__be16 sb_versionnum; /* header version == XFS_SB_VERSION */
__be16 sb_sectsize; /* volume sector size, bytes */
__be16 sb_inodesize; /* inode size, bytes */
__be16 sb_inopblock; /* inodes per block */
char sb_fname[12]; /* file system name */
__u8 sb_blocklog; /* log2 of sb_blocksize */
__u8 sb_sectlog; /* log2 of sb_sectsize */
__u8 sb_inodelog; /* log2 of sb_inodesize */
__u8 sb_inopblog; /* log2 of sb_inopblock */
__u8 sb_agblklog; /* log2 of sb_agblocks (rounded up) */
__u8 sb_rextslog; /* log2 of sb_rextents */
__u8 sb_inprogress; /* mkfs is in progress, don't mount */
__u8 sb_imax_pct; /* max % of fs for inode space */
/* statistics */
/*
* These fields must remain contiguous. If you really
* want to change their layout, make sure you fix the
* code in xfs_trans_apply_sb_deltas().
*/
__be64 sb_icount; /* allocated inodes */
__be64 sb_ifree; /* free inodes */
__be64 sb_fdblocks; /* free data blocks */
__be64 sb_frextents; /* free realtime extents */
/*
* End contiguous fields.
*/
__be64 sb_uquotino; /* user quota inode */
__be64 sb_gquotino; /* group quota inode */
__be16 sb_qflags; /* quota flags */
__u8 sb_flags; /* misc. flags */
__u8 sb_shared_vn; /* shared version number */
__be32 sb_inoalignmt; /* inode chunk alignment, fsblocks */
__be32 sb_unit; /* stripe or raid unit */
__be32 sb_width; /* stripe or raid width */
__u8 sb_dirblklog; /* log2 of dir block size (fsbs) */
__u8 sb_logsectlog; /* log2 of the log sector size */
__be16 sb_logsectsize; /* sector size for the log, bytes */
__be32 sb_logsunit; /* stripe unit size for the log */
__be32 sb_features2; /* additional feature bits */
/*
* bad features2 field as a result of failing to pad the sb
* structure to 64 bits. Some machines will be using this field
* for features2 bits. Easiest just to mark it bad and not use
* it for anything else.
*/
__be32 sb_bad_features2;
/* version 5 superblock fields start here */
/* feature masks */
__be32 sb_features_compat;
__be32 sb_features_ro_compat;
__be32 sb_features_incompat;
__be32 sb_features_log_incompat;
__le32 sb_crc; /* superblock crc */
__be32 sb_pad;
__be64 sb_pquotino; /* project quota inode */
__be64 sb_lsn; /* last write sequence */
/* must be padded to 64 bit alignment */
} xfs_dsb_t;
/*
* Sequence number values for the fields.
*/
typedef enum {
XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
XFS_SBS_PQUOTINO, XFS_SBS_LSN,
XFS_SBS_FIELDCOUNT
} xfs_sb_field_t;
/*
* Mask values, defined based on the xfs_sb_field_t values.
* Only define the ones we're using.
*/
#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x)
#define XFS_SB_UUID XFS_SB_MVAL(UUID)
#define XFS_SB_FNAME XFS_SB_MVAL(FNAME)
#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO)
#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO)
#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO)
#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM)
#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO)
#define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO)
#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS)
#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2)
#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2)
#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
#define XFS_SB_CRC XFS_SB_MVAL(CRC)
#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
#define XFS_SB_MOD_BITS \
(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \
XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \
XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO)
/*
* Misc. Flags - warning - these will be cleared by xfs_repair unless
* a feature bit is set when the flag is used.
*/
#define XFS_SBF_NOFLAGS 0x00 /* no flags set */
#define XFS_SBF_READONLY 0x01 /* only read-only mounts allowed */
/*
* define max. shared version we can interoperate with
*/
#define XFS_SB_MAX_SHARED_VN 0
#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
/*
* The first XFS version we support is a v4 superblock with V2 directories.
*/
static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)
{
if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
return false;
/* check for unknown features in the fs */
if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
(sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
return false;
return true;
}
static inline bool xfs_sb_good_version(struct xfs_sb *sbp)
{
if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
return true;
if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
return xfs_sb_good_v4_features(sbp);
return false;
}
/*
* Detect a mismatched features2 field. Older kernels read/wrote
* this into the wrong slot, so to be safe we keep them in sync.
*/
static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
{
return sbp->sb_bad_features2 != sbp->sb_features2;
}
static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp)
{
return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT);
}
static inline void xfs_sb_version_addattr(struct xfs_sb *sbp)
{
sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
}
static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp)
{
return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
}
static inline void xfs_sb_version_addquota(struct xfs_sb *sbp)
{
sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
}
static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
(sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));
}
static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp)
{
return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
}
static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
(sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
}
static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
}
static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)
{
return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
}
static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp)
{
return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
}
static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
}
/*
* sb_features2 bit version macros.
*/
static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
(xfs_sb_version_hasmorebits(sbp) &&
(sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
}
static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
(xfs_sb_version_hasmorebits(sbp) &&
(sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));
}
static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp)
{
sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT;
sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT;
}
static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp)
{
sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
if (!sbp->sb_features2)
sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
}
static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
(xfs_sb_version_hasmorebits(sbp) &&
(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
}
static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
{
sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT;
sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT;
}
/*
* Extended v5 superblock feature masks. These are to be used for new v5
* superblock features only.
*
* Compat features are new features that old kernels will not notice or affect
* and so can mount read-write without issues.
*
* RO-Compat (read only) are features that old kernels can read but will break
* if they write. Hence only read-only mounts of such filesystems are allowed on
* kernels that don't support the feature bit.
*
* InCompat features are features which old kernels will not understand and so
* must not mount.
*
* Log-InCompat features are for changes to log formats or new transactions that
* can't be replayed on older kernels. The fields are set when the filesystem is
* mounted, and a clean unmount clears the fields.
*/
#define XFS_SB_FEAT_COMPAT_ALL 0
#define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL
static inline bool
xfs_sb_has_compat_feature(
struct xfs_sb *sbp,
__uint32_t feature)
{
return (sbp->sb_features_compat & feature) != 0;
}
#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
struct xfs_sb *sbp,
__uint32_t feature)
{
return (sbp->sb_features_ro_compat & feature) != 0;
}
#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
#define XFS_SB_FEAT_INCOMPAT_ALL \
(XFS_SB_FEAT_INCOMPAT_FTYPE)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
xfs_sb_has_incompat_feature(
struct xfs_sb *sbp,
__uint32_t feature)
{
return (sbp->sb_features_incompat & feature) != 0;
}
#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
static inline bool
xfs_sb_has_incompat_log_feature(
struct xfs_sb *sbp,
__uint32_t feature)
{
return (sbp->sb_features_log_incompat & feature) != 0;
}
/*
* V5 superblock specific feature checks
*/
static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
}
static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
}
static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) ||
(xfs_sb_version_hasmorebits(sbp) &&
(sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
}
static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
}
/*
* end of superblock version macros
*/
static inline bool
xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
{
return (ino == sbp->sb_uquotino ||
ino == sbp->sb_gquotino ||
ino == sbp->sb_pquotino);
}
#define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */
#define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr))
#define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d))
#define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \
xfs_daddr_to_agno(mp,d), xfs_daddr_to_agbno(mp,d))
#define XFS_FSB_TO_DADDR(mp,fsbno) XFS_AGB_TO_DADDR(mp, \
XFS_FSB_TO_AGNO(mp,fsbno), XFS_FSB_TO_AGBNO(mp,fsbno))
/*
* File system sector to basic block conversions.
*/
#define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log)
/*
* File system block to basic block conversions.
*/
#define XFS_FSB_TO_BB(mp,fsbno) ((fsbno) << (mp)->m_blkbb_log)
#define XFS_BB_TO_FSB(mp,bb) \
(((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log)
/*
* File system block to byte conversions.
*/
#define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog)
#define XFS_B_TO_FSB(mp,b) \
((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
#define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask)
/*
* perag get/put wrappers for ref counting
*/
extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t);
extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
int tag);
extern void xfs_perag_put(struct xfs_perag *pag);
extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
extern void xfs_sb_calc_crc(struct xfs_buf *);
extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
extern void xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *);
extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
#endif /* __XFS_SB_H__ */

246
fs/xfs/libxfs/xfs_shared.h Normal file
View File

@@ -0,0 +1,246 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_SHARED_H__
#define __XFS_SHARED_H__
/*
* Definitions shared between kernel and userspace that don't fit into any other
* header file that is shared with userspace.
*/
struct xfs_ifork;
struct xfs_buf;
struct xfs_buf_ops;
struct xfs_mount;
struct xfs_trans;
struct xfs_inode;
/*
* Buffer verifier operations are widely used, including userspace tools
*/
extern const struct xfs_buf_ops xfs_agf_buf_ops;
extern const struct xfs_buf_ops xfs_agi_buf_ops;
extern const struct xfs_buf_ops xfs_agf_buf_ops;
extern const struct xfs_buf_ops xfs_agfl_buf_ops;
extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
extern const struct xfs_buf_ops xfs_da3_node_buf_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
extern const struct xfs_buf_ops xfs_agi_buf_ops;
extern const struct xfs_buf_ops xfs_inobt_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
/*
* Transaction types. Used to distinguish types of buffers. These never reach
* the log.
*/
#define XFS_TRANS_SETATTR_NOT_SIZE 1
#define XFS_TRANS_SETATTR_SIZE 2
#define XFS_TRANS_INACTIVE 3
#define XFS_TRANS_CREATE 4
#define XFS_TRANS_CREATE_TRUNC 5
#define XFS_TRANS_TRUNCATE_FILE 6
#define XFS_TRANS_REMOVE 7
#define XFS_TRANS_LINK 8
#define XFS_TRANS_RENAME 9
#define XFS_TRANS_MKDIR 10
#define XFS_TRANS_RMDIR 11
#define XFS_TRANS_SYMLINK 12
#define XFS_TRANS_SET_DMATTRS 13
#define XFS_TRANS_GROWFS 14
#define XFS_TRANS_STRAT_WRITE 15
#define XFS_TRANS_DIOSTRAT 16
/* 17 was XFS_TRANS_WRITE_SYNC */
#define XFS_TRANS_WRITEID 18
#define XFS_TRANS_ADDAFORK 19
#define XFS_TRANS_ATTRINVAL 20
#define XFS_TRANS_ATRUNCATE 21
#define XFS_TRANS_ATTR_SET 22
#define XFS_TRANS_ATTR_RM 23
#define XFS_TRANS_ATTR_FLAG 24
#define XFS_TRANS_CLEAR_AGI_BUCKET 25
#define XFS_TRANS_QM_SBCHANGE 26
/*
* Dummy entries since we use the transaction type to index into the
* trans_type[] in xlog_recover_print_trans_head()
*/
#define XFS_TRANS_DUMMY1 27
#define XFS_TRANS_DUMMY2 28
#define XFS_TRANS_QM_QUOTAOFF 29
#define XFS_TRANS_QM_DQALLOC 30
#define XFS_TRANS_QM_SETQLIM 31
#define XFS_TRANS_QM_DQCLUSTER 32
#define XFS_TRANS_QM_QINOCREATE 33
#define XFS_TRANS_QM_QUOTAOFF_END 34
#define XFS_TRANS_SB_UNIT 35
#define XFS_TRANS_FSYNC_TS 36
#define XFS_TRANS_GROWFSRT_ALLOC 37
#define XFS_TRANS_GROWFSRT_ZERO 38
#define XFS_TRANS_GROWFSRT_FREE 39
#define XFS_TRANS_SWAPEXT 40
#define XFS_TRANS_SB_COUNT 41
#define XFS_TRANS_CHECKPOINT 42
#define XFS_TRANS_ICREATE 43
#define XFS_TRANS_CREATE_TMPFILE 44
#define XFS_TRANS_TYPE_MAX 44
/* new transaction types need to be reflected in xfs_logprint(8) */
#define XFS_TRANS_TYPES \
{ XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
{ XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
{ XFS_TRANS_INACTIVE, "INACTIVE" }, \
{ XFS_TRANS_CREATE, "CREATE" }, \
{ XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
{ XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
{ XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
{ XFS_TRANS_REMOVE, "REMOVE" }, \
{ XFS_TRANS_LINK, "LINK" }, \
{ XFS_TRANS_RENAME, "RENAME" }, \
{ XFS_TRANS_MKDIR, "MKDIR" }, \
{ XFS_TRANS_RMDIR, "RMDIR" }, \
{ XFS_TRANS_SYMLINK, "SYMLINK" }, \
{ XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
{ XFS_TRANS_GROWFS, "GROWFS" }, \
{ XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
{ XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
{ XFS_TRANS_WRITEID, "WRITEID" }, \
{ XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
{ XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
{ XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
{ XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
{ XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
{ XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
{ XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
{ XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \
{ XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
{ XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
{ XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
{ XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
{ XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
{ XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
{ XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
{ XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
{ XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
{ XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
{ XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
{ XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
{ XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
{ XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
{ XFS_TRANS_DUMMY1, "DUMMY1" }, \
{ XFS_TRANS_DUMMY2, "DUMMY2" }, \
{ XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
/*
* This structure is used to track log items associated with
* a transaction. It points to the log item and keeps some
* flags to track the state of the log item. It also tracks
* the amount of space needed to log the item it describes
* once we get to commit processing (see xfs_trans_commit()).
*/
struct xfs_log_item_desc {
struct xfs_log_item *lid_item;
struct list_head lid_trans;
unsigned char lid_flags;
};
#define XFS_LID_DIRTY 0x1
/* log size calculation functions */
int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
int xfs_log_calc_minimum_size(struct xfs_mount *);
/*
* Values for t_flags.
*/
#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */
#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */
#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */
#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
count in superblock */
/*
* Values for call flags parameter.
*/
#define XFS_TRANS_RELEASE_LOG_RES 0x4
#define XFS_TRANS_ABORT 0x8
/*
* Field values for xfs_trans_mod_sb.
*/
#define XFS_TRANS_SB_ICOUNT 0x00000001
#define XFS_TRANS_SB_IFREE 0x00000002
#define XFS_TRANS_SB_FDBLOCKS 0x00000004
#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008
#define XFS_TRANS_SB_FREXTENTS 0x00000010
#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020
#define XFS_TRANS_SB_DBLOCKS 0x00000040
#define XFS_TRANS_SB_AGCOUNT 0x00000080
#define XFS_TRANS_SB_IMAXPCT 0x00000100
#define XFS_TRANS_SB_REXTSIZE 0x00000200
#define XFS_TRANS_SB_RBMBLOCKS 0x00000400
#define XFS_TRANS_SB_RBLOCKS 0x00000800
#define XFS_TRANS_SB_REXTENTS 0x00001000
#define XFS_TRANS_SB_REXTSLOG 0x00002000
/*
* Here we centralize the specification of XFS meta-data buffer reference count
* values. This determines how hard the buffer cache tries to hold onto the
* buffer.
*/
#define XFS_AGF_REF 4
#define XFS_AGI_REF 4
#define XFS_AGFL_REF 3
#define XFS_INO_BTREE_REF 3
#define XFS_ALLOC_BTREE_REF 2
#define XFS_BMAP_BTREE_REF 2
#define XFS_DIR_BTREE_REF 2
#define XFS_INO_REF 2
#define XFS_ATTR_BTREE_REF 1
#define XFS_DQUOT_REF 1
/*
* Flags for xfs_trans_ichgtime().
*/
#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
/*
* Symlink decoding/encoding functions
*/
int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
uint32_t size, struct xfs_buf *bp);
bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
uint32_t size, struct xfs_buf *bp);
void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_inode *ip, struct xfs_ifork *ifp);
#endif /* __XFS_SHARED_H__ */

View File

@@ -0,0 +1,201 @@
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* Copyright (c) 2012-2013 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_ag.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_symlink.h"
#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
/*
* Each contiguous block has a header, so it is not just a simple pathlen
* to FSB conversion.
*/
int
xfs_symlink_blocks(
struct xfs_mount *mp,
int pathlen)
{
int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
return (pathlen + buflen - 1) / buflen;
}
int
xfs_symlink_hdr_set(
struct xfs_mount *mp,
xfs_ino_t ino,
uint32_t offset,
uint32_t size,
struct xfs_buf *bp)
{
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return 0;
dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
dsl->sl_offset = cpu_to_be32(offset);
dsl->sl_bytes = cpu_to_be32(size);
uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
dsl->sl_owner = cpu_to_be64(ino);
dsl->sl_blkno = cpu_to_be64(bp->b_bn);
bp->b_ops = &xfs_symlink_buf_ops;
return sizeof(struct xfs_dsymlink_hdr);
}
/*
* Checking of the symlink header is split into two parts. the verifier does
* CRC, location and bounds checking, the unpacking function checks the path
* parameters and owner.
*/
bool
xfs_symlink_hdr_ok(
xfs_ino_t ino,
uint32_t offset,
uint32_t size,
struct xfs_buf *bp)
{
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
if (offset != be32_to_cpu(dsl->sl_offset))
return false;
if (size != be32_to_cpu(dsl->sl_bytes))
return false;
if (ino != be64_to_cpu(dsl->sl_owner))
return false;
/* ok */
return true;
}
static bool
xfs_symlink_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
return false;
if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
return false;
if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
return false;
if (be32_to_cpu(dsl->sl_offset) +
be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
return false;
if (dsl->sl_owner == 0)
return false;
return true;
}
static void
xfs_symlink_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_symlink_verify(bp))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
}
static void
xfs_symlink_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_buf_log_item *bip = bp->b_fspriv;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
if (!xfs_symlink_verify(bp)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
if (bip) {
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
}
xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
.verify_read = xfs_symlink_read_verify,
.verify_write = xfs_symlink_write_verify,
};
void
xfs_symlink_local_to_remote(
struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp)
{
struct xfs_mount *mp = ip->i_mount;
char *buf;
if (!xfs_sb_version_hascrc(&mp->m_sb)) {
bp->b_ops = NULL;
memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
return;
}
/*
* As this symlink fits in an inode literal area, it must also fit in
* the smallest buffer the filesystem supports.
*/
ASSERT(BBTOB(bp->b_length) >=
ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
bp->b_ops = &xfs_symlink_buf_ops;
buf = bp->b_addr;
buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
}

View File

@@ -0,0 +1,894 @@
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* Copyright (C) 2010 Red Hat, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap_btree.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
/*
* A buffer has a format structure overhead in the log in addition
* to the data, so we need to take this into account when reserving
* space in a transaction for a buffer. Round the space required up
* to a multiple of 128 bytes so that we don't change the historical
* reservation that has been used for this overhead.
*/
STATIC uint
xfs_buf_log_overhead(void)
{
return round_up(sizeof(struct xlog_op_header) +
sizeof(struct xfs_buf_log_format), 128);
}
/*
* Calculate out transaction log reservation per item in bytes.
*
* The nbufs argument is used to indicate the number of items that
* will be changed in a transaction. size is used to tell how many
* bytes should be reserved per item.
*/
STATIC uint
xfs_calc_buf_res(
uint nbufs,
uint size)
{
return nbufs * (size + xfs_buf_log_overhead());
}
/*
* Logging inodes is really tricksy. They are logged in memory format,
* which means that what we write into the log doesn't directly translate into
* the amount of space they use on disk.
*
* Case in point - btree format forks in memory format use more space than the
* on-disk format. In memory, the buffer contains a normal btree block header so
* the btree code can treat it as though it is just another generic buffer.
* However, when we write it to the inode fork, we don't write all of this
* header as it isn't needed. e.g. the root is only ever in the inode, so
* there's no need for sibling pointers which would waste 16 bytes of space.
*
* Hence when we have an inode with a maximally sized btree format fork, then
* amount of information we actually log is greater than the size of the inode
* on disk. Hence we need an inode reservation function that calculates all this
* correctly. So, we log:
*
* - 4 log op headers for object
* - for the ilf, the inode core and 2 forks
* - inode log format object
* - the inode core
* - two inode forks containing bmap btree root blocks.
* - the btree data contained by both forks will fit into the inode size,
* hence when combined with the inode core above, we have a total of the
* actual inode size.
* - the BMBT headers need to be accounted separately, as they are
* additional to the records and pointers that fit inside the inode
* forks.
*/
STATIC uint
xfs_calc_inode_res(
struct xfs_mount *mp,
uint ninodes)
{
return ninodes *
(4 * sizeof(struct xlog_op_header) +
sizeof(struct xfs_inode_log_format) +
mp->m_sb.sb_inodesize +
2 * XFS_BMBT_BLOCK_LEN(mp));
}
/*
* The free inode btree is a conditional feature and the log reservation
* requirements differ slightly from that of the traditional inode allocation
* btree. The finobt tracks records for inode chunks with at least one free
* inode. A record can be removed from the tree for an inode allocation
* or free and thus the finobt reservation is unconditional across:
*
* - inode allocation
* - inode free
* - inode chunk allocation
*
* The 'modify' param indicates to include the record modification scenario. The
* 'alloc' param indicates to include the reservation for free space btree
* modifications on behalf of finobt modifications. This is required only for
* transactions that do not already account for free space btree modifications.
*
* the free inode btree: max depth * block size
* the allocation btrees: 2 trees * (max depth - 1) * block size
* the free inode btree entry: block size
*/
STATIC uint
xfs_calc_finobt_res(
struct xfs_mount *mp,
int alloc,
int modify)
{
uint res;
if (!xfs_sb_version_hasfinobt(&mp->m_sb))
return 0;
res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
if (alloc)
res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1));
if (modify)
res += (uint)XFS_FSB_TO_B(mp, 1);
return res;
}
/*
* Various log reservation values.
*
* These are based on the size of the file system block because that is what
* most transactions manipulate. Each adds in an additional 128 bytes per
* item logged to try to account for the overhead of the transaction mechanism.
*
* Note: Most of the reservations underestimate the number of allocation
* groups into which they could free extents in the xfs_bmap_finish() call.
* This is because the number in the worst case is quite high and quite
* unusual. In order to fix this we need to change xfs_bmap_finish() to free
* extents in only a single AG at a time. This will require changes to the
* EFI code as well, however, so that the EFI for the extents not freed is
* logged again in each transaction. See SGI PV #261917.
*
* Reservation functions here avoid a huge stack in xfs_trans_init due to
* register overflow from temporaries in the calculations.
*/
/*
* In a write transaction we can allocate a maximum of 2
* extents. This gives:
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join:
* the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_write_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
MAX((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
/*
* In truncating a file we free up to two extents at once. We can modify:
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
* And the bmap_finish transaction can free the blocks and bmap blocks:
* the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
MAX((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(5, 0) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(2 + mp->m_ialloc_blks +
mp->m_in_maxlevels, 0)));
}
/*
* In renaming a files we can modify:
* the four inodes involved: 4 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
* of bmap blocks) giving:
* the agf for the ags in which the blocks live: 3 * sector size
* the agfl for the ags in which the blocks live: 3 * sector size
* the superblock for the free block count: sector size
* the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
MAX((xfs_calc_inode_res(mp, 4) +
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
XFS_FSB_TO_B(mp, 1))));
}
/*
* For removing an inode from unlinked list at first, we can modify:
* the agi hash list and counters: sector size
* the on disk inode before ours in the agi hash list: inode cluster size
*/
STATIC uint
xfs_calc_iunlink_remove_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
}
/*
* For creating a link to an inode:
* the parent directory inode: inode size
* the linked inode: inode size
* the directory btree could split: (max depth + v2) * dir block size
* the directory bmap btree could join or split: (max depth + v2) * blocksize
* And the bmap_finish transaction can free some bmap blocks giving:
* the agf for the ag in which the blocks live: sector size
* the agfl for the ag in which the blocks live: sector size
* the superblock for the free block count: sector size
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_link_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_iunlink_remove_reservation(mp) +
MAX((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1))));
}
/*
* For adding an inode to unlinked list we can modify:
* the agi hash list: sector size
* the unlinked inode: inode size
*/
STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_inode_res(mp, 1);
}
/*
* For removing a directory entry we can modify:
* the parent directory inode: inode size
* the removed inode: inode size
* the directory btree could join: (max depth + v2) * dir block size
* the directory bmap btree could join or split: (max depth + v2) * blocksize
* And the bmap_finish transaction can free the dir and bmap blocks giving:
* the agf for the ag in which the blocks live: 2 * sector size
* the agfl for the ag in which the blocks live: 2 * sector size
* the superblock for the free block count: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_iunlink_add_reservation(mp) +
MAX((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
/*
* For create, break it in to the two cases that the transaction
* covers. We start with the modify case - allocation done by modification
* of the state of existing inodes - and the allocation case.
*/
/*
* For create we can modify:
* the parent directory inode: inode size
* the new inode: inode size
* the inode btree entry: block size
* the superblock for the nlink flag: sector size
* the directory btree: (max depth + v2) * dir block size
* the directory inode's bmap btree: (max depth + v2) * block size
* the finobt (record modification and allocation btrees)
*/
STATIC uint
xfs_calc_create_resv_modify(
struct xfs_mount *mp)
{
return xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
(uint)XFS_FSB_TO_B(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
xfs_calc_finobt_res(mp, 1, 1);
}
/*
* For create we can allocate some inodes giving:
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
* the superblock for the nlink flag: sector size
* the inode blocks allocated: mp->m_ialloc_blks * blocksize
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
*/
STATIC uint
xfs_calc_create_resv_alloc(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
STATIC uint
__xfs_calc_create_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
MAX(xfs_calc_create_resv_alloc(mp),
xfs_calc_create_resv_modify(mp));
}
/*
* For icreate we can allocate some inodes giving:
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
* the superblock for the nlink flag: sector size
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
* the finobt (record insertion)
*/
STATIC uint
xfs_calc_icreate_resv_alloc(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_finobt_res(mp, 0, 0);
}
STATIC uint
xfs_calc_icreate_reservation(xfs_mount_t *mp)
{
return XFS_DQUOT_LOGRES(mp) +
MAX(xfs_calc_icreate_resv_alloc(mp),
xfs_calc_create_resv_modify(mp));
}
STATIC uint
xfs_calc_create_reservation(
struct xfs_mount *mp)
{
if (xfs_sb_version_hascrc(&mp->m_sb))
return xfs_calc_icreate_reservation(mp);
return __xfs_calc_create_reservation(mp);
}
STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
uint res = XFS_DQUOT_LOGRES(mp);
if (xfs_sb_version_hascrc(&mp->m_sb))
res += xfs_calc_icreate_resv_alloc(mp);
else
res += xfs_calc_create_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
}
/*
* Making a new directory is the same as creating a new file.
*/
STATIC uint
xfs_calc_mkdir_reservation(
struct xfs_mount *mp)
{
return xfs_calc_create_reservation(mp);
}
/*
* Making a new symplink is the same as creating a new file, but
* with the added blocks for remote symlink data which can be up to 1kB in
* length (MAXPATHLEN).
*/
STATIC uint
xfs_calc_symlink_reservation(
struct xfs_mount *mp)
{
return xfs_calc_create_reservation(mp) +
xfs_calc_buf_res(1, MAXPATHLEN);
}
/*
* In freeing an inode we can modify:
* the inode being freed: inode size
* the super block free inode counter: sector size
* the agi hash list and counters: sector size
* the inode btree entry: block size
* the on disk inode before ours in the agi hash list: inode cluster size
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
* the finobt (record insertion, removal or modification)
*/
STATIC uint
xfs_calc_ifree_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_iunlink_remove_reservation(mp) +
xfs_calc_buf_res(1, 0) +
xfs_calc_buf_res(2 + mp->m_ialloc_blks +
mp->m_in_maxlevels, 0) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_finobt_res(mp, 0, 1);
}
/*
* When only changing the inode we log the inode and possibly the superblock
* We also add a bit of slop for the transaction stuff.
*/
STATIC uint
xfs_calc_ichange_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/*
* Growing the data section of the filesystem.
* superblock
* agi and agf
* allocation btrees
*/
STATIC uint
xfs_calc_growdata_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
/*
* Growing the rt section of the filesystem.
* In the first set of transactions (ALLOC) we allocate space to the
* bitmap or summary files.
* superblock: sector size
* agf of the ag from which the extent is allocated: sector size
* bmap btree for bitmap/summary inode: max depth * blocksize
* bitmap/summary inode: inode size
* allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
*/
STATIC uint
xfs_calc_growrtalloc_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
/*
* Growing the rt section of the filesystem.
* In the second set of transactions (ZERO) we zero the new metadata blocks.
* one bitmap/summary block: blocksize
*/
STATIC uint
xfs_calc_growrtzero_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
}
/*
* Growing the rt section of the filesystem.
* In the third set of transactions (FREE) we update metadata without
* allocating any new blocks.
* superblock: sector size
* bitmap inode: inode size
* summary inode: inode size
* one bitmap block: blocksize
* summary blocks: new summary size
*/
STATIC uint
xfs_calc_growrtfree_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
xfs_calc_buf_res(1, mp->m_rsumsize);
}
/*
* Logging the inode modification timestamp on a synchronous write.
* inode
*/
STATIC uint
xfs_calc_swrite_reservation(
struct xfs_mount *mp)
{
return xfs_calc_inode_res(mp, 1);
}
/*
* Logging the inode mode bits when writing a setuid/setgid file
* inode
*/
STATIC uint
xfs_calc_writeid_reservation(
struct xfs_mount *mp)
{
return xfs_calc_inode_res(mp, 1);
}
/*
* Converting the inode from non-attributed to attributed.
* the inode being converted: inode size
* agf block and superblock (for block allocation)
* the new block (directory sized)
* bmap blocks for the new directory block
* allocation btrees
*/
STATIC uint
xfs_calc_addafork_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
/*
* Removing the attribute fork of a file
* the inode being truncated: inode size
* the inode's bmap btree: max depth * block size
* And the bmap_finish transaction can free the blocks and bmap blocks:
* the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_attrinval_reservation(
struct xfs_mount *mp)
{
return MAX((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
XFS_FSB_TO_B(mp, 1))));
}
/*
* Setting an attribute at mount time.
* the inode getting the attribute
* the superblock for allocations
* the agfs extents are allocated from
* the attribute btree * max depth
* the inode allocation btree
* Since attribute transaction space is dependent on the size of the attribute,
* the calculation is done partially at mount time and partially at runtime(see
* below).
*/
STATIC uint
xfs_calc_attrsetm_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
}
/*
* Setting an attribute at runtime, transaction space unit per block.
* the superblock for allocations: sector size
* the inode bmap btree could join or split: max depth * block size
* Since the runtime attribute transaction space is dependent on the total
* blocks needed for the 1st bmap, here we calculate out the space unit for
* one block so that the caller could figure out the total space according
* to the attibute extent length in blocks by:
* ext * M_RES(mp)->tr_attrsetrt.tr_logres
*/
STATIC uint
xfs_calc_attrsetrt_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
XFS_FSB_TO_B(mp, 1));
}
/*
* Removing an attribute.
* the inode: inode size
* the attribute btree could join: max depth * block size
* the inode bmap btree could join or split: max depth * block size
* And the bmap_finish transaction can free the attr blocks freed giving:
* the agf for the ag in which the blocks live: 2 * sector size
* the agfl for the ag in which the blocks live: 2 * sector size
* the superblock for the free block count: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_attrrm_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
MAX((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
XFS_FSB_TO_B(mp, 1)) +
(uint)XFS_FSB_TO_B(mp,
XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
/*
* Clearing a bad agino number in an agi hash bucket.
*/
STATIC uint
xfs_calc_clear_agi_bucket_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/*
* Clearing the quotaflags in the superblock.
* the super block for changing quota flags: sector size
*/
STATIC uint
xfs_calc_qm_sbchange_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/*
* Adjusting quota limits.
* the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
*/
STATIC uint
xfs_calc_qm_setqlim_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
}
/*
* Allocating quota on disk if needed.
* the write transaction log space for quota file extent allocation
* the unit of quota allocation: one system block size
*/
STATIC uint
xfs_calc_qm_dqalloc_reservation(
struct xfs_mount *mp)
{
return xfs_calc_write_reservation(mp) +
xfs_calc_buf_res(1,
XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
}
/*
* Turning off quotas.
* the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
* the superblock for the quota flags: sector size
*/
STATIC uint
xfs_calc_qm_quotaoff_reservation(
struct xfs_mount *mp)
{
return sizeof(struct xfs_qoff_logitem) * 2 +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/*
* End of turning off quotas.
* the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
*/
STATIC uint
xfs_calc_qm_quotaoff_end_reservation(
struct xfs_mount *mp)
{
return sizeof(struct xfs_qoff_logitem) * 2;
}
/*
* Syncing the incore super block changes to disk.
* the super block to reflect the changes: sector size
*/
STATIC uint
xfs_calc_sb_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
void
xfs_trans_resv_calc(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
/*
* The following transactions are logged in physical format and
* require a permanent reservation on space.
*/
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_create_tmpfile.tr_logres =
xfs_calc_create_tmpfile_reservation(mp);
resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
/*
* The following transactions are logged in logical format with
* a default log count.
*/
resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp);
resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT;
resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp);
resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp);
resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
resp->tr_qm_equotaoff.tr_logres =
xfs_calc_qm_quotaoff_end_reservation(mp);
resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
/* The following transaction are logged in logical format */
resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
}

View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_TRANS_RESV_H__
#define __XFS_TRANS_RESV_H__
struct xfs_mount;
/*
* structure for maintaining pre-calculated transaction reservations.
*/
struct xfs_trans_res {
uint tr_logres; /* log space unit in bytes per log ticket */
int tr_logcount; /* number of log operations per log ticket */
int tr_logflags; /* log flags, currently only used for indicating
* a reservation request is permanent or not */
};
struct xfs_trans_resv {
struct xfs_trans_res tr_write; /* extent alloc trans */
struct xfs_trans_res tr_itruncate; /* truncate trans */
struct xfs_trans_res tr_rename; /* rename trans */
struct xfs_trans_res tr_link; /* link trans */
struct xfs_trans_res tr_remove; /* unlink trans */
struct xfs_trans_res tr_symlink; /* symlink trans */
struct xfs_trans_res tr_create; /* create trans */
struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */
struct xfs_trans_res tr_mkdir; /* mkdir trans */
struct xfs_trans_res tr_ifree; /* inode free trans */
struct xfs_trans_res tr_ichange; /* inode update trans */
struct xfs_trans_res tr_growdata; /* fs data section grow trans */
struct xfs_trans_res tr_addafork; /* add inode attr fork trans */
struct xfs_trans_res tr_writeid; /* write setuid/setgid file */
struct xfs_trans_res tr_attrinval; /* attr fork buffer
* invalidation */
struct xfs_trans_res tr_attrsetm; /* set/create an attribute at
* mount time */
struct xfs_trans_res tr_attrsetrt; /* set/create an attribute at
* runtime */
struct xfs_trans_res tr_attrrm; /* remove an attribute */
struct xfs_trans_res tr_clearagi; /* clear agi unlinked bucket */
struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */
struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */
struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */
struct xfs_trans_res tr_qm_sbchange; /* change quota flags */
struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */
struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */
struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */
struct xfs_trans_res tr_qm_equotaoff;/* end of turn quota off */
struct xfs_trans_res tr_sb; /* modify superblock */
struct xfs_trans_res tr_fsyncts; /* update timestamps on fsync */
};
/* shorthand way of accessing reservation structure */
#define M_RES(mp) (&(mp)->m_resv)
/*
* Per-extent log reservation for the allocation btree changes
* involved in freeing or allocating an extent.
* 2 trees * (2 blocks/level * max depth - 1) * block size
*/
#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
/*
* Per-directory log reservation for any directory change.
* dir blocks: (1 btree block per level + data block + free block) * dblock size
* bmap btree: (levels + 2) * max depth * block size
* v2 directory blocks can be fragmented below the dirblksize down to the fsb
* size, so account for that in the DAENTER macros.
*/
#define XFS_DIROP_LOG_RES(mp) \
(XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \
(XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)))
#define XFS_DIROP_LOG_COUNT(mp) \
(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
/*
* Various log count values.
*/
#define XFS_DEFAULT_LOG_COUNT 1
#define XFS_DEFAULT_PERM_LOG_COUNT 2
#define XFS_ITRUNCATE_LOG_COUNT 2
#define XFS_INACTIVE_LOG_COUNT 2
#define XFS_CREATE_LOG_COUNT 2
#define XFS_CREATE_TMPFILE_LOG_COUNT 2
#define XFS_MKDIR_LOG_COUNT 3
#define XFS_SYMLINK_LOG_COUNT 3
#define XFS_REMOVE_LOG_COUNT 2
#define XFS_LINK_LOG_COUNT 2
#define XFS_RENAME_LOG_COUNT 2
#define XFS_WRITE_LOG_COUNT 2
#define XFS_ADDAFORK_LOG_COUNT 2
#define XFS_ATTRINVAL_LOG_COUNT 1
#define XFS_ATTRSET_LOG_COUNT 3
#define XFS_ATTRRM_LOG_COUNT 3
void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
#endif /* __XFS_TRANS_RESV_H__ */

View File

@@ -0,0 +1,92 @@
/*
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_TRANS_SPACE_H__
#define __XFS_TRANS_SPACE_H__
/*
* Components of space reservations.
*/
#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \
(((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
#define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1)
#define XFS_NEXTENTADD_SPACE_RES(mp,b,w)\
(((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
XFS_EXTENTADD_SPACE_RES(mp,w))
#define XFS_DAENTER_1B(mp,w) \
((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
#define XFS_DAENTER_DBS(mp,w) \
(XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0))
#define XFS_DAENTER_BLOCKS(mp,w) \
(XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w))
#define XFS_DAENTER_BMAP1B(mp,w) \
XFS_NEXTENTADD_SPACE_RES(mp, XFS_DAENTER_1B(mp, w), w)
#define XFS_DAENTER_BMAPS(mp,w) \
(XFS_DAENTER_DBS(mp,w) * XFS_DAENTER_BMAP1B(mp,w))
#define XFS_DAENTER_SPACE_RES(mp,w) \
(XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w))
#define XFS_DAREMOVE_SPACE_RES(mp,w) XFS_DAENTER_BMAPS(mp,w)
#define XFS_DIRENTER_MAX_SPLIT(mp,nl) 1
#define XFS_DIRENTER_SPACE_RES(mp,nl) \
(XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \
XFS_DIRENTER_MAX_SPLIT(mp,nl))
#define XFS_DIRREMOVE_SPACE_RES(mp) \
XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
#define XFS_IALLOC_SPACE_RES(mp) \
((mp)->m_ialloc_blks + \
(xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
((mp)->m_in_maxlevels - 1)))
/*
* Space reservation values for various transactions.
*/
#define XFS_ADDAFORK_SPACE_RES(mp) \
((mp)->m_dir_geo->fsbcount + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))
#define XFS_ATTRRM_SPACE_RES(mp) \
XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)
/* This macro is not used - see inline code in xfs_attr_set */
#define XFS_ATTRSET_SPACE_RES(mp, v) \
(XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v))
#define XFS_CREATE_SPACE_RES(mp,nl) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_DIOSTRAT_SPACE_RES(mp, v) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
#define XFS_GROWFS_SPACE_RES(mp) \
(2 * XFS_AG_MAXLEVELS(mp))
#define XFS_GROWFSRT_SPACE_RES(mp,b) \
((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
#define XFS_LINK_SPACE_RES(mp,nl) \
XFS_DIRENTER_SPACE_RES(mp,nl)
#define XFS_MKDIR_SPACE_RES(mp,nl) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_QM_DQALLOC_SPACE_RES(mp) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \
XFS_DQUOT_CLUSTER_SIZE_FSB)
#define XFS_QM_QINOCREATE_SPACE_RES(mp) \
XFS_IALLOC_SPACE_RES(mp)
#define XFS_REMOVE_SPACE_RES(mp) \
XFS_DIRREMOVE_SPACE_RES(mp)
#define XFS_RENAME_SPACE_RES(mp,nl) \
(XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
#define XFS_IFREE_SPACE_RES(mp) \
(xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
#endif /* __XFS_TRANS_SPACE_H__ */