]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Jan 2018 18:18:00 +0000 (10:18 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Jan 2018 18:18:00 +0000 (10:18 -0800)
Pull xfs updates from Darrick Wong:
 "This merge cycle, we're again some substantive changes to XFS.

  Metadata verifiers have been restructured to provide more detail about
  which part of a metadata structure failed checks, and we've enhanced
  the new online fsck feature to cross-reference extent allocation
  information with the other metadata structures. With this pull, the
  metadata verification part of online fsck is more or less finished,
  though the feature is still experimental and still disabled by
  default.

  We're also preparing to remove the EXPERIMENTAL tag from a couple of
  features this cycle. This week we're committing a bunch of space
  accounting fixes for reflink and removing the EXPERIMENTAL tag from
  reflink; I anticipate that we'll be ready to do the same for the
  reverse mapping feature next week. (I don't have any pending fixes for
  rmap; however I wish to remove the tags one at a time.)

  This giant pile of patches has been run through a full xfstests run
  over the weekend and through a quick xfstests run against this
  morning's master, with no major failures reported. Let me know if
  there's any merge problems -- git merge reported that one of our
  patches touched the same function as the i_version series, but it
  resolved things cleanly.

  Summary:

   - Log faulting code locations when verifiers fail, for improved
     diagnosis of corrupt filesystems.

   - Implement metadata verifiers for local format inode fork data.

   - Online scrub now cross-references metadata records with other
     metadata.

   - Refactor the fs geometry ioctl generation functions.

   - Harden various metadata verifiers.

   - Fix various accounting problems.

   - Fix uncancelled transactions leaking when xattr functions fail.

   - Prevent the copy-on-write speculative preallocation garbage
     collector from racing with writeback.

   - Emit log reservation type information as trace data so that we can
     compare against xfsprogs.

   - Fix some erroneous asserts in the online scrub code.

   - Clean up the transaction reservation calculations.

   - Fix various minor bugs in online scrub.

   - Log complaints about mixed dio/buffered writes once per day and
     less noisily than before.

   - Refactor buffer log item lists to use list_head.

   - Break PNFS leases before reflinking blocks.

   - Reduce lock contention on reflink source files.

   - Fix some quota accounting problems with reflink.

   - Fix a serious corruption problem in the direct cow write code where
     we fed bad iomaps to the vfs iomap consumers.

   - Various other refactorings.

   - Remove EXPERIMENTAL tag from reflink!"

* tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (94 commits)
  xfs: remove experimental tag for reflinks
  xfs: don't screw up direct writes when freesp is fragmented
  xfs: check reflink allocation mappings
  iomap: warn on zero-length mappings
  xfs: treat CoW fork operations as delalloc for quota accounting
  xfs: only grab shared inode locks for source file during reflink
  xfs: allow xfs_lock_two_inodes to take different EXCL/SHARED modes
  xfs: reflink should break pnfs leases before sharing blocks
  xfs: don't clobber inobt/finobt cursors when xref with rmap
  xfs: skip CoW writes past EOF when writeback races with truncate
  xfs: preserve i_rdev when recycling a reclaimable inode
  xfs: refactor accounting updates out of xfs_bmap_btalloc
  xfs: refactor inode verifier corruption error printing
  xfs: make tracepoint inode number format consistent
  xfs: always zero di_flags2 when we free the inode
  xfs: call xfs_qm_dqattach before performing reflink operations
  xfs: bmap code cleanup
  Use list_head infra-structure for buffer's log items list
  Split buffer's b_fspriv field
  Get rid of xfs_buf_log_item_t typedef
  ...

97 files changed:
fs/direct-io.c
fs/iomap.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_alloc.h
fs/xfs/libxfs/xfs_alloc_btree.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_attr_remote.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap_btree.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_btree.h
fs/xfs/libxfs/xfs_da_btree.c
fs/xfs/libxfs/xfs_da_format.h
fs/xfs/libxfs/xfs_dir2.c
fs/xfs/libxfs/xfs_dir2.h
fs/xfs/libxfs/xfs_dir2_block.c
fs/xfs/libxfs/xfs_dir2_data.c
fs/xfs/libxfs/xfs_dir2_leaf.c
fs/xfs/libxfs/xfs_dir2_node.c
fs/xfs/libxfs/xfs_dir2_priv.h
fs/xfs/libxfs/xfs_dir2_sf.c
fs/xfs/libxfs/xfs_dquot_buf.c
fs/xfs/libxfs/xfs_fs.h
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_inode_buf.h
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/libxfs/xfs_inode_fork.h
fs/xfs/libxfs/xfs_log_rlimit.c
fs/xfs/libxfs/xfs_quota_defs.h
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_refcount.h
fs/xfs/libxfs/xfs_refcount_btree.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rmap.h
fs/xfs/libxfs/xfs_rmap_btree.c
fs/xfs/libxfs/xfs_rtbitmap.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/libxfs/xfs_sb.h
fs/xfs/libxfs/xfs_shared.h
fs/xfs/libxfs/xfs_symlink_remote.c
fs/xfs/libxfs/xfs_trans_resv.c
fs/xfs/scrub/agheader.c
fs/xfs/scrub/alloc.c
fs/xfs/scrub/bmap.c
fs/xfs/scrub/btree.c
fs/xfs/scrub/btree.h
fs/xfs/scrub/common.c
fs/xfs/scrub/common.h
fs/xfs/scrub/dabtree.c
fs/xfs/scrub/dir.c
fs/xfs/scrub/ialloc.c
fs/xfs/scrub/inode.c
fs/xfs/scrub/parent.c
fs/xfs/scrub/quota.c
fs/xfs/scrub/refcount.c
fs/xfs/scrub/rmap.c
fs/xfs/scrub/rtbitmap.c
fs/xfs/scrub/scrub.c
fs/xfs/scrub/scrub.h
fs/xfs/scrub/trace.h
fs/xfs/xfs_aops.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_buf_item.h
fs/xfs/xfs_dir2_readdir.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot_item.c
fs/xfs/xfs_error.c
fs/xfs/xfs_error.h
fs/xfs/xfs_fsops.c
fs/xfs/xfs_fsops.h
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_linux.h
fs/xfs/xfs_log.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_reflink.c
fs/xfs/xfs_rtalloc.h
fs/xfs/xfs_super.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_buf.c
include/linux/fs.h

index 3aafb3343a65c76fb66211228564d73ce9097485..a0ca9e48e9937da671739e24d6d7dc2a4867d7ac 100644 (file)
@@ -219,6 +219,27 @@ static inline struct page *dio_get_page(struct dio *dio,
        return dio->pages[sdio->head];
 }
 
+/*
+ * Warn about a page cache invalidation failure during a direct io write.
+ */
+void dio_warn_stale_pagecache(struct file *filp)
+{
+       static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
+       char pathname[128];
+       struct inode *inode = file_inode(filp);
+       char *path;
+
+       errseq_set(&inode->i_mapping->wb_err, -EIO);
+       if (__ratelimit(&_rs)) {
+               path = file_path(filp, pathname, sizeof(pathname));
+               if (IS_ERR(path))
+                       path = "(unknown)";
+               pr_crit("Page cache invalidation failure on direct I/O.  Possible data corruption due to collision with buffered I/O!\n");
+               pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
+                       current->comm);
+       }
+}
+
 /**
  * dio_complete() - called when all DIO BIO I/O has been completed
  * @offset: the byte offset in the file of the completed operation
@@ -290,7 +311,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
                err = invalidate_inode_pages2_range(dio->inode->i_mapping,
                                        offset >> PAGE_SHIFT,
                                        (offset + ret - 1) >> PAGE_SHIFT);
-               WARN_ON_ONCE(err);
+               if (err)
+                       dio_warn_stale_pagecache(dio->iocb->ki_filp);
        }
 
        if (!(dio->flags & DIO_SKIP_DIO_COUNT))
index 47d29ccffaef4d0f1aeb48c174dc3898344cd9b1..afd163586aa079f25ea86e5f56f8d85f25f2af68 100644 (file)
@@ -65,6 +65,8 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
                return ret;
        if (WARN_ON(iomap.offset > pos))
                return -EIO;
+       if (WARN_ON(iomap.length == 0))
+               return -EIO;
 
        /*
         * Cut down the length to the one actually provided by the filesystem,
@@ -753,7 +755,8 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
                err = invalidate_inode_pages2_range(inode->i_mapping,
                                offset >> PAGE_SHIFT,
                                (offset + dio->size - 1) >> PAGE_SHIFT);
-               WARN_ON_ONCE(err);
+               if (err)
+                       dio_warn_stale_pagecache(iocb->ki_filp);
        }
 
        inode_dio_end(file_inode(iocb->ki_filp));
@@ -1018,9 +1021,16 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        if (ret)
                goto out_free_dio;
 
+       /*
+        * Try to invalidate cache pages for the range we're direct
+        * writing.  If this invalidation fails, tough, the write will
+        * still work, but racing two incompatible write paths is a
+        * pretty crazy thing to do, so we don't support it 100%.
+        */
        ret = invalidate_inode_pages2_range(mapping,
                        start >> PAGE_SHIFT, end >> PAGE_SHIFT);
-       WARN_ON_ONCE(ret);
+       if (ret)
+               dio_warn_stale_pagecache(iocb->ki_filp);
        ret = 0;
 
        if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
index 83ed7715f856d2025509c308583436e63d0043a5..c02781a4c091b7cdc51cb422da40f64b70b29b7e 100644 (file)
@@ -167,7 +167,7 @@ xfs_alloc_lookup_ge(
  * Lookup the first record less than or equal to [bno, len]
  * in the btree given by cur.
  */
-static int                             /* error */
+int                                    /* error */
 xfs_alloc_lookup_le(
        struct xfs_btree_cur    *cur,   /* btree cursor */
        xfs_agblock_t           bno,    /* starting block of extent */
@@ -520,7 +520,7 @@ xfs_alloc_fixup_trees(
        return 0;
 }
 
-static bool
+static xfs_failaddr_t
 xfs_agfl_verify(
        struct xfs_buf  *bp)
 {
@@ -528,10 +528,19 @@ xfs_agfl_verify(
        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
        int             i;
 
+       /*
+        * There is no verification of non-crc AGFLs because mkfs does not
+        * initialise the AGFL to zero or NULL. Hence the only valid part of the
+        * AGFL is what the AGF says is active. We can't get to the AGF, so we
+        * can't verify just those entries are valid.
+        */
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return NULL;
+
        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
-               return false;
+               return __this_address;
        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
-               return false;
+               return __this_address;
        /*
         * during growfs operations, the perag is not fully initialised,
         * so we can't use it for any useful checking. growfs ensures we can't
@@ -539,16 +548,17 @@ xfs_agfl_verify(
         * so we can detect and avoid this problem.
         */
        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
-               return false;
+               return __this_address;
 
        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
-                       return false;
+                       return __this_address;
        }
 
-       return xfs_log_check_lsn(mp,
-                                be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
+       if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
+               return __this_address;
+       return NULL;
 }
 
 static void
@@ -556,6 +566,7 @@ xfs_agfl_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t  fa;
 
        /*
         * There is no verification of non-crc AGFLs because mkfs does not
@@ -567,28 +578,29 @@ xfs_agfl_read_verify(
                return;
 
        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_agfl_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_agfl_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
 xfs_agfl_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+       xfs_failaddr_t          fa;
 
        /* no verification of non-crc AGFLs */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
 
-       if (!xfs_agfl_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_agfl_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -602,6 +614,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
        .name = "xfs_agfl",
        .verify_read = xfs_agfl_read_verify,
        .verify_write = xfs_agfl_write_verify,
+       .verify_struct = xfs_agfl_verify,
 };
 
 /*
@@ -2397,19 +2410,19 @@ xfs_alloc_put_freelist(
        return 0;
 }
 
-static bool
+static xfs_failaddr_t
 xfs_agf_verify(
-       struct xfs_mount *mp,
-       struct xfs_buf  *bp)
- {
-       struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(bp);
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp,
                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
-                       return false;
+                       return __this_address;
        }
 
        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
@@ -2418,18 +2431,18 @@ xfs_agf_verify(
              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
-               return false;
+               return __this_address;
 
        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
            be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
-               return false;
+               return __this_address;
 
        if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
            (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
             be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
-               return false;
+               return __this_address;
 
        /*
         * during growfs operations, the perag is not fully initialised,
@@ -2438,18 +2451,18 @@ xfs_agf_verify(
         * so we can detect and avoid this problem.
         */
        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
-               return false;
+               return __this_address;
 
        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
-               return false;
+               return __this_address;
 
        if (xfs_sb_version_hasreflink(&mp->m_sb) &&
            (be32_to_cpu(agf->agf_refcount_level) < 1 ||
             be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
-               return false;
+               return __this_address;
 
-       return true;;
+       return NULL;
 
 }
 
@@ -2458,28 +2471,29 @@ xfs_agf_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t  fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
-                               XFS_ERRTAG_ALLOC_READ_AGF))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_agf_verify(bp);
+               if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
 xfs_agf_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_agf_verify(mp, bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_agf_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -2496,6 +2510,7 @@ const struct xfs_buf_ops xfs_agf_buf_ops = {
        .name = "xfs_agf",
        .verify_read = xfs_agf_read_verify,
        .verify_write = xfs_agf_write_verify,
+       .verify_struct = xfs_agf_verify,
 };
 
 /*
@@ -2981,3 +2996,22 @@ xfs_verify_fsbno(
                return false;
        return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
 }
+
+/* Is there a record covering a given extent? */
+int
+xfs_alloc_has_record(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       bool                    *exists)
+{
+       union xfs_btree_irec    low;
+       union xfs_btree_irec    high;
+
+       memset(&low, 0, sizeof(low));
+       low.a.ar_startblock = bno;
+       memset(&high, 0xFF, sizeof(high));
+       high.a.ar_startblock = bno + len - 1;
+
+       return xfs_btree_has_record(cur, &low, &high, exists);
+}
index 7ba2d129d50453689b887f8b1bee5686a2c0ccb8..65a0cafe06e4abcaff0d971aff5fbadc2e388945 100644 (file)
@@ -197,6 +197,13 @@ xfs_free_extent(
        struct xfs_owner_info   *oinfo, /* extent owner */
        enum xfs_ag_resv_type   type);  /* block reservation type */
 
+int                            /* error */
+xfs_alloc_lookup_le(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat); /* success/failure */
+
 int                            /* error */
 xfs_alloc_lookup_ge(
        struct xfs_btree_cur    *cur,   /* btree cursor */
@@ -237,4 +244,7 @@ bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno,
                xfs_agblock_t agbno);
 bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
 
+int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+               xfs_extlen_t len, bool *exist);
+
 #endif /* __XFS_ALLOC_H__ */
index cfde0a0f97061ec9e57973553910ca8adbcd66c8..6840b588187e5e27f8b330e6a71dd593be430d82 100644 (file)
@@ -307,13 +307,14 @@ xfs_cntbt_diff_two_keys(
                be32_to_cpu(k2->alloc.ar_startblock);
 }
 
-static bool
+static xfs_failaddr_t
 xfs_allocbt_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
+       xfs_failaddr_t          fa;
        unsigned int            level;
 
        /*
@@ -331,29 +332,31 @@ xfs_allocbt_verify(
        level = be16_to_cpu(block->bb_level);
        switch (block->bb_magic) {
        case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
-               if (!xfs_btree_sblock_v5hdr_verify(bp))
-                       return false;
+               fa = xfs_btree_sblock_v5hdr_verify(bp);
+               if (fa)
+                       return fa;
                /* fall through */
        case cpu_to_be32(XFS_ABTB_MAGIC):
                if (pag && pag->pagf_init) {
                        if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
-                               return false;
+                               return __this_address;
                } else if (level >= mp->m_ag_maxlevels)
-                       return false;
+                       return __this_address;
                break;
        case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
-               if (!xfs_btree_sblock_v5hdr_verify(bp))
-                       return false;
+               fa = xfs_btree_sblock_v5hdr_verify(bp);
+               if (fa)
+                       return fa;
                /* fall through */
        case cpu_to_be32(XFS_ABTC_MAGIC):
                if (pag && pag->pagf_init) {
                        if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
-                               return false;
+                               return __this_address;
                } else if (level >= mp->m_ag_maxlevels)
-                       return false;
+                       return __this_address;
                break;
        default:
-               return false;
+               return __this_address;
        }
 
        return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
@@ -363,25 +366,30 @@ static void
 xfs_allocbt_read_verify(
        struct xfs_buf  *bp)
 {
+       xfs_failaddr_t  fa;
+
        if (!xfs_btree_sblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_allocbt_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_allocbt_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 
-       if (bp->b_error) {
+       if (bp->b_error)
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
 }
 
 static void
 xfs_allocbt_write_verify(
        struct xfs_buf  *bp)
 {
-       if (!xfs_allocbt_verify(bp)) {
+       xfs_failaddr_t  fa;
+
+       fa = xfs_allocbt_verify(bp);
+       if (fa) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
        xfs_btree_sblock_calc_crc(bp);
@@ -392,6 +400,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
        .name = "xfs_allocbt",
        .verify_read = xfs_allocbt_read_verify,
        .verify_write = xfs_allocbt_write_verify,
+       .verify_struct = xfs_allocbt_verify,
 };
 
 
index a76914db72ef11094cd8d74e5bd6cd6add2a3fde..ce4a34a2751d6ebe3ee0806d88ff58adf1023061 100644 (file)
@@ -717,7 +717,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
        return error;
 out_defer_cancel:
        xfs_defer_cancel(args->dfops);
-       args->trans = NULL;
        return error;
 }
 
@@ -770,7 +769,6 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
        return 0;
 out_defer_cancel:
        xfs_defer_cancel(args->dfops);
-       args->trans = NULL;
        return error;
 }
 
@@ -1045,7 +1043,6 @@ xfs_attr_node_addname(xfs_da_args_t *args)
        return retval;
 out_defer_cancel:
        xfs_defer_cancel(args->dfops);
-       args->trans = NULL;
        goto out;
 }
 
@@ -1186,7 +1183,6 @@ xfs_attr_node_removename(xfs_da_args_t *args)
        return error;
 out_defer_cancel:
        xfs_defer_cancel(args->dfops);
-       args->trans = NULL;
        goto out;
 }
 
index 601eaa36f1ada22e2213f9178bcb5cdb5868034d..2135b8e67dcc15fdb969939c5fd6b18d09d0fdda 100644 (file)
@@ -247,14 +247,15 @@ xfs_attr3_leaf_hdr_to_disk(
        }
 }
 
-static bool
+static xfs_failaddr_t
 xfs_attr3_leaf_verify(
-       struct xfs_buf          *bp)
+       struct xfs_buf                  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_attr_leafblock *leaf = bp->b_addr;
-       struct xfs_perag *pag = bp->b_pag;
-       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_attr3_icleaf_hdr     ichdr;
+       struct xfs_mount                *mp = bp->b_target->bt_mount;
+       struct xfs_attr_leafblock       *leaf = bp->b_addr;
+       struct xfs_perag                *pag = bp->b_pag;
+       struct xfs_attr_leaf_entry      *entries;
 
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
 
@@ -262,17 +263,17 @@ xfs_attr3_leaf_verify(
                struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
 
                if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
-                       return false;
+                       return __this_address;
 
                if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
-                       return false;
+                       return __this_address;
        } else {
                if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
-                       return false;
+                       return __this_address;
        }
        /*
         * In recovery there is a transient state where count == 0 is valid
@@ -280,12 +281,27 @@ xfs_attr3_leaf_verify(
         * if the attr didn't fit in shortform.
         */
        if (pag && pag->pagf_init && ichdr.count == 0)
-               return false;
+               return __this_address;
+
+       /*
+        * firstused is the block offset of the first name info structure.
+        * Make sure it doesn't go off the block or crash into the header.
+        */
+       if (ichdr.firstused > mp->m_attr_geo->blksize)
+               return __this_address;
+       if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf))
+               return __this_address;
+
+       /* Make sure the entries array doesn't crash into the name info. */
+       entries = xfs_attr3_leaf_entryp(bp->b_addr);
+       if ((char *)&entries[ichdr.count] >
+           (char *)bp->b_addr + ichdr.firstused)
+               return __this_address;
 
        /* XXX: need to range check rest of attr header values */
        /* XXX: hash order check? */
 
-       return true;
+       return NULL;
 }
 
 static void
@@ -293,12 +309,13 @@ xfs_attr3_leaf_write_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_attr3_leaf_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_attr3_leaf_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -322,21 +339,23 @@ xfs_attr3_leaf_read_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
             !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_attr3_leaf_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_attr3_leaf_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
        .name = "xfs_attr3_leaf",
        .verify_read = xfs_attr3_leaf_read_verify,
        .verify_write = xfs_attr3_leaf_write_verify,
+       .verify_struct = xfs_attr3_leaf_verify,
 };
 
 int
@@ -870,6 +889,80 @@ xfs_attr_shortform_allfit(
        return xfs_attr_shortform_bytesfit(dp, bytes);
 }
 
+/* Verify the consistency of an inline attribute fork. */
+xfs_failaddr_t
+xfs_attr_shortform_verify(
+       struct xfs_inode                *ip)
+{
+       struct xfs_attr_shortform       *sfp;
+       struct xfs_attr_sf_entry        *sfep;
+       struct xfs_attr_sf_entry        *next_sfep;
+       char                            *endp;
+       struct xfs_ifork                *ifp;
+       int                             i;
+       int                             size;
+
+       ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
+       ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+       sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
+       size = ifp->if_bytes;
+
+       /*
+        * Give up if the attribute is way too short.
+        */
+       if (size < sizeof(struct xfs_attr_sf_hdr))
+               return __this_address;
+
+       endp = (char *)sfp + size;
+
+       /* Check all reported entries */
+       sfep = &sfp->list[0];
+       for (i = 0; i < sfp->hdr.count; i++) {
+               /*
+                * struct xfs_attr_sf_entry has a variable length.
+                * Check the fixed-offset parts of the structure are
+                * within the data buffer.
+                */
+               if (((char *)sfep + sizeof(*sfep)) >= endp)
+                       return __this_address;
+
+               /* Don't allow names with known bad length. */
+               if (sfep->namelen == 0)
+                       return __this_address;
+
+               /*
+                * Check that the variable-length part of the structure is
+                * within the data buffer.  The next entry starts after the
+                * name component, so nextentry is an acceptable test.
+                */
+               next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep);
+               if ((char *)next_sfep > endp)
+                       return __this_address;
+
+               /*
+                * Check for unknown flags.  Short form doesn't support
+                * the incomplete or local bits, so we can use the namespace
+                * mask here.
+                */
+               if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK)
+                       return __this_address;
+
+               /*
+                * Check for invalid namespace combinations.  We only allow
+                * one namespace flag per xattr, so we can just count the
+                * bits (i.e. hweight) here.
+                */
+               if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
+                       return __this_address;
+
+               sfep = next_sfep;
+       }
+       if ((void *)sfep != (void *)endp)
+               return __this_address;
+
+       return NULL;
+}
+
 /*
  * Convert a leaf attribute list to shortform attribute list
  */
@@ -2173,7 +2266,8 @@ xfs_attr3_leaf_lookup_int(
        leaf = bp->b_addr;
        xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
        entries = xfs_attr3_leaf_entryp(leaf);
-       ASSERT(ichdr.count < args->geo->blksize / 8);
+       if (ichdr.count >= args->geo->blksize / 8)
+               return -EFSCORRUPTED;
 
        /*
         * Binary search.  (note: small blocks will skip this loop)
@@ -2189,8 +2283,10 @@ xfs_attr3_leaf_lookup_int(
                else
                        break;
        }
-       ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count));
-       ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval);
+       if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count)))
+               return -EFSCORRUPTED;
+       if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval))
+               return -EFSCORRUPTED;
 
        /*
         * Since we may have duplicate hashval's, find the first matching
index 894124efb421e0d0674b0f39bf63dabfe7916937..4da08af5b13401a08e454f60620f4e1cde44c7a5 100644 (file)
@@ -53,6 +53,7 @@ int   xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
 int    xfs_attr_shortform_remove(struct xfs_da_args *args);
 int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int    xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
+xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip);
 void   xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
 
 /*
index d56caf037ca0e7948035f5e987caff7a1a0d8e07..21be186067a2b4cf6049f1a0dbb0b150469242d6 100644 (file)
@@ -65,7 +65,7 @@ xfs_attr3_rmt_blocks(
  * does CRC, location and bounds checking, the unpacking function checks the
  * attribute parameters and owner.
  */
-static bool
+static xfs_failaddr_t
 xfs_attr3_rmt_hdr_ok(
        void                    *ptr,
        xfs_ino_t               ino,
@@ -76,19 +76,19 @@ xfs_attr3_rmt_hdr_ok(
        struct xfs_attr3_rmt_hdr *rmt = ptr;
 
        if (bno != be64_to_cpu(rmt->rm_blkno))
-               return false;
+               return __this_address;
        if (offset != be32_to_cpu(rmt->rm_offset))
-               return false;
+               return __this_address;
        if (size != be32_to_cpu(rmt->rm_bytes))
-               return false;
+               return __this_address;
        if (ino != be64_to_cpu(rmt->rm_owner))
-               return false;
+               return __this_address;
 
        /* ok */
-       return true;
+       return NULL;
 }
 
-static bool
+static xfs_failaddr_t
 xfs_attr3_rmt_verify(
        struct xfs_mount        *mp,
        void                    *ptr,
@@ -98,27 +98,29 @@ xfs_attr3_rmt_verify(
        struct xfs_attr3_rmt_hdr *rmt = ptr;
 
        if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
+               return __this_address;
        if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
-               return false;
+               return __this_address;
        if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
-               return false;
+               return __this_address;
        if (be64_to_cpu(rmt->rm_blkno) != bno)
-               return false;
+               return __this_address;
        if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
-               return false;
+               return __this_address;
        if (be32_to_cpu(rmt->rm_offset) +
                                be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
-               return false;
+               return __this_address;
        if (rmt->rm_owner == 0)
-               return false;
+               return __this_address;
 
-       return true;
+       return NULL;
 }
 
-static void
-xfs_attr3_rmt_read_verify(
-       struct xfs_buf  *bp)
+static int
+__xfs_attr3_rmt_read_verify(
+       struct xfs_buf  *bp,
+       bool            check_crc,
+       xfs_failaddr_t  *failaddr)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
        char            *ptr;
@@ -128,7 +130,7 @@ xfs_attr3_rmt_read_verify(
 
        /* no verification of non-crc buffers */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
+               return 0;
 
        ptr = bp->b_addr;
        bno = bp->b_bn;
@@ -136,23 +138,48 @@ xfs_attr3_rmt_read_verify(
        ASSERT(len >= blksize);
 
        while (len > 0) {
-               if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
-                       xfs_buf_ioerror(bp, -EFSBADCRC);
-                       break;
-               }
-               if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
-                       xfs_buf_ioerror(bp, -EFSCORRUPTED);
-                       break;
+               if (check_crc &&
+                   !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
+                       *failaddr = __this_address;
+                       return -EFSBADCRC;
                }
+               *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
+               if (*failaddr)
+                       return -EFSCORRUPTED;
                len -= blksize;
                ptr += blksize;
                bno += BTOBB(blksize);
        }
 
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-       else
-               ASSERT(len == 0);
+       if (len != 0) {
+               *failaddr = __this_address;
+               return -EFSCORRUPTED;
+       }
+
+       return 0;
+}
+
+static void
+xfs_attr3_rmt_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_failaddr_t  fa;
+       int             error;
+
+       error = __xfs_attr3_rmt_read_verify(bp, true, &fa);
+       if (error)
+               xfs_verifier_error(bp, error, fa);
+}
+
+static xfs_failaddr_t
+xfs_attr3_rmt_verify_struct(
+       struct xfs_buf  *bp)
+{
+       xfs_failaddr_t  fa;
+       int             error;
+
+       error = __xfs_attr3_rmt_read_verify(bp, false, &fa);
+       return error ? fa : NULL;
 }
 
 static void
@@ -160,6 +187,7 @@ xfs_attr3_rmt_write_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t  fa;
        int             blksize = mp->m_attr_geo->blksize;
        char            *ptr;
        int             len;
@@ -177,9 +205,9 @@ xfs_attr3_rmt_write_verify(
        while (len > 0) {
                struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
 
-               if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
-                       xfs_buf_ioerror(bp, -EFSCORRUPTED);
-                       xfs_verifier_error(bp);
+               fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
+               if (fa) {
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                        return;
                }
 
@@ -188,8 +216,7 @@ xfs_attr3_rmt_write_verify(
                 * xfs_attr3_rmt_hdr_set() for the explanation.
                 */
                if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
-                       xfs_buf_ioerror(bp, -EFSCORRUPTED);
-                       xfs_verifier_error(bp);
+                       xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
                        return;
                }
                xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
@@ -198,13 +225,16 @@ xfs_attr3_rmt_write_verify(
                ptr += blksize;
                bno += BTOBB(blksize);
        }
-       ASSERT(len == 0);
+
+       if (len != 0)
+               xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
 }
 
 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
        .name = "xfs_attr3_rmt",
        .verify_read = xfs_attr3_rmt_read_verify,
        .verify_write = xfs_attr3_rmt_write_verify,
+       .verify_struct = xfs_attr3_rmt_verify_struct,
 };
 
 STATIC int
@@ -269,7 +299,7 @@ xfs_attr_rmtval_copyout(
                byte_cnt = min(*valuelen, byte_cnt);
 
                if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                       if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
+                       if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
                                                  byte_cnt, bno)) {
                                xfs_alert(mp,
 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
index 1bddbba6b80c960bdcc10c9a30210c119e1b2f77..daae00ed30c5d8a8eb7f6f30018aec43355642ec 100644 (file)
@@ -400,7 +400,7 @@ xfs_bmap_check_leaf_extents(
                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
                bno = be64_to_cpu(*pp);
                XFS_WANT_CORRUPTED_GOTO(mp,
-                                       XFS_FSB_SANITY_CHECK(mp, bno), error0);
+                                       xfs_verify_fsbno(mp, bno), error0);
                if (bp_release) {
                        bp_release = 0;
                        xfs_trans_brelse(NULL, bp);
@@ -1220,7 +1220,7 @@ xfs_iread_extents(
                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
                bno = be64_to_cpu(*pp);
                XFS_WANT_CORRUPTED_GOTO(mp,
-                       XFS_FSB_SANITY_CHECK(mp, bno), out_brelse);
+                       xfs_verify_fsbno(mp, bno), out_brelse);
                xfs_trans_brelse(tp, bp);
        }
 
@@ -3337,6 +3337,49 @@ xfs_bmap_btalloc_filestreams(
        return 0;
 }
 
+/* Update all inode and quota accounting for the allocation we just did. */
+static void
+xfs_bmap_btalloc_accounting(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args)
+{
+       if (ap->flags & XFS_BMAPI_COWFORK) {
+               /*
+                * COW fork blocks are in-core only and thus are treated as
+                * in-core quota reservation (like delalloc blocks) even when
+                * converted to real blocks. The quota reservation is not
+                * accounted to disk until blocks are remapped to the data
+                * fork. So if these blocks were previously delalloc, we
+                * already have quota reservation and there's nothing to do
+                * yet.
+                */
+               if (ap->wasdel)
+                       return;
+
+               /*
+                * Otherwise, we've allocated blocks in a hole. The transaction
+                * has acquired in-core quota reservation for this extent.
+                * Rather than account these as real blocks, however, we reduce
+                * the transaction quota reservation based on the allocation.
+                * This essentially transfers the transaction quota reservation
+                * to that of a delalloc extent.
+                */
+               ap->ip->i_delayed_blks += args->len;
+               xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
+                               -(long)args->len);
+               return;
+       }
+
+       /* data/attr fork only */
+       ap->ip->i_d.di_nblocks += args->len;
+       xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+       if (ap->wasdel)
+               ap->ip->i_delayed_blks -= args->len;
+       xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+               ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
+               args->len);
+}
+
 STATIC int
 xfs_bmap_btalloc(
        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
@@ -3347,6 +3390,8 @@ xfs_bmap_btalloc(
        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
        xfs_agnumber_t  ag;
        xfs_alloc_arg_t args;
+       xfs_fileoff_t   orig_offset;
+       xfs_extlen_t    orig_length;
        xfs_extlen_t    blen;
        xfs_extlen_t    nextminlen = 0;
        int             nullfb;         /* true if ap->firstblock isn't set */
@@ -3356,6 +3401,8 @@ xfs_bmap_btalloc(
        int             stripe_align;
 
        ASSERT(ap->length);
+       orig_offset = ap->offset;
+       orig_length = ap->length;
 
        mp = ap->ip->i_mount;
 
@@ -3571,19 +3618,23 @@ xfs_bmap_btalloc(
                        *ap->firstblock = args.fsbno;
                ASSERT(nullfb || fb_agno <= args.agno);
                ap->length = args.len;
-               if (!(ap->flags & XFS_BMAPI_COWFORK))
-                       ap->ip->i_d.di_nblocks += args.len;
-               xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-               if (ap->wasdel)
-                       ap->ip->i_delayed_blks -= args.len;
                /*
-                * Adjust the disk quota also. This was reserved
-                * earlier.
+                * If the extent size hint is active, we tried to round the
+                * caller's allocation request offset down to extsz and the
+                * length up to another extsz boundary.  If we found a free
+                * extent we mapped it in starting at this new offset.  If the
+                * newly mapped space isn't long enough to cover any of the
+                * range of offsets that was originally requested, move the
+                * mapping up so that we can fill as much of the caller's
+                * original request as possible.  Free space is apparently
+                * very fragmented so we're unlikely to be able to satisfy the
+                * hints anyway.
                 */
-               xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
-                       ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
-                                       XFS_TRANS_DQ_BCOUNT,
-                       (long) args.len);
+               if (ap->length <= orig_length)
+                       ap->offset = orig_offset;
+               else if (ap->offset + ap->length < orig_offset + orig_length)
+                       ap->offset = orig_offset + orig_length - ap->length;
+               xfs_bmap_btalloc_accounting(ap, &args);
        } else {
                ap->blkno = NULLFSBLOCK;
                ap->length = 0;
@@ -3876,8 +3927,6 @@ xfs_bmapi_reserve_delalloc(
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
        xfs_extlen_t            alen;
        xfs_extlen_t            indlen;
-       char                    rt = XFS_IS_REALTIME_INODE(ip);
-       xfs_extlen_t            extsz;
        int                     error;
        xfs_fileoff_t           aoff = off;
 
@@ -3892,31 +3941,25 @@ xfs_bmapi_reserve_delalloc(
                prealloc = alen - len;
 
        /* Figure out the extent size, adjust alen */
-       if (whichfork == XFS_COW_FORK)
-               extsz = xfs_get_cowextsz_hint(ip);
-       else
-               extsz = xfs_get_extsz_hint(ip);
-       if (extsz) {
+       if (whichfork == XFS_COW_FORK) {
                struct xfs_bmbt_irec    prev;
+               xfs_extlen_t            extsz = xfs_get_cowextsz_hint(ip);
 
                if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
                        prev.br_startoff = NULLFILEOFF;
 
-               error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
+               error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
                                               1, 0, &aoff, &alen);
                ASSERT(!error);
        }
 
-       if (rt)
-               extsz = alen / mp->m_sb.sb_rextsize;
-
        /*
         * Make a transaction-less quota reservation for delayed allocation
         * blocks.  This number gets adjusted later.  We return if we haven't
         * allocated blocks already inside this loop.
         */
        error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
-                       rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+                                               XFS_QMOPT_RES_REGBLKS);
        if (error)
                return error;
 
@@ -3927,12 +3970,7 @@ xfs_bmapi_reserve_delalloc(
        indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
        ASSERT(indlen > 0);
 
-       if (rt) {
-               error = xfs_mod_frextents(mp, -((int64_t)extsz));
-       } else {
-               error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
-       }
-
+       error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
        if (error)
                goto out_unreserve_quota;
 
@@ -3963,14 +4001,11 @@ xfs_bmapi_reserve_delalloc(
        return 0;
 
 out_unreserve_blocks:
-       if (rt)
-               xfs_mod_frextents(mp, extsz);
-       else
-               xfs_mod_fdblocks(mp, alen, false);
+       xfs_mod_fdblocks(mp, alen, false);
 out_unreserve_quota:
        if (XFS_IS_QUOTA_ON(mp))
-               xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
-                               XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+               xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
+                                               XFS_QMOPT_RES_REGBLKS);
        return error;
 }
 
@@ -4304,8 +4339,16 @@ xfs_bmapi_write(
        while (bno < end && n < *nmap) {
                bool                    need_alloc = false, wasdelay = false;
 
-               /* in hole or beyoned EOF? */
+               /* in hole or beyond EOF? */
                if (eof || bma.got.br_startoff > bno) {
+                       /*
+                        * CoW fork conversions should /never/ hit EOF or
+                        * holes.  There should always be something for us
+                        * to work on.
+                        */
+                       ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
+                                (flags & XFS_BMAPI_COWFORK)));
+
                        if (flags & XFS_BMAPI_DELALLOC) {
                                /*
                                 * For the COW fork we can reasonably get a
@@ -4824,6 +4867,7 @@ xfs_bmap_del_extent_cow(
                xfs_iext_insert(ip, icur, &new, state);
                break;
        }
+       ip->i_delayed_blks -= del->br_blockcount;
 }
 
 /*
index c10aecaaae441866013ef88a769f7dc06070837d..9faf479aba497ec4a4297f44700a6f809ea4ad59 100644 (file)
@@ -425,33 +425,29 @@ xfs_bmbt_diff_two_keys(
                          be64_to_cpu(k2->bmbt.br_startoff);
 }
 
-static bool
+static xfs_failaddr_t
 xfs_bmbt_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       xfs_failaddr_t          fa;
        unsigned int            level;
 
        switch (block->bb_magic) {
        case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
-               if (!xfs_sb_version_hascrc(&mp->m_sb))
-                       return false;
-               if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
-               if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
-                       return false;
                /*
                 * XXX: need a better way of verifying the owner here. Right now
                 * just make sure there has been one set.
                 */
-               if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
-                       return false;
+               fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+               if (fa)
+                       return fa;
                /* fall through */
        case cpu_to_be32(XFS_BMAP_MAGIC):
                break;
        default:
-               return false;
+               return __this_address;
        }
 
        /*
@@ -463,46 +459,39 @@ xfs_bmbt_verify(
         */
        level = be16_to_cpu(block->bb_level);
        if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
-               return false;
-       if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
-               return false;
-
-       /* sibling pointer verification */
-       if (!block->bb_u.l.bb_leftsib ||
-           (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
-            !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
-               return false;
-       if (!block->bb_u.l.bb_rightsib ||
-           (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
-            !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
-               return false;
-
-       return true;
+               return __this_address;
+
+       return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]);
 }
 
 static void
 xfs_bmbt_read_verify(
        struct xfs_buf  *bp)
 {
+       xfs_failaddr_t  fa;
+
        if (!xfs_btree_lblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_bmbt_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_bmbt_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 
-       if (bp->b_error) {
+       if (bp->b_error)
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
 }
 
 static void
 xfs_bmbt_write_verify(
        struct xfs_buf  *bp)
 {
-       if (!xfs_bmbt_verify(bp)) {
+       xfs_failaddr_t  fa;
+
+       fa = xfs_bmbt_verify(bp);
+       if (fa) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
        xfs_btree_lblock_calc_crc(bp);
@@ -512,6 +501,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
        .name = "xfs_bmbt",
        .verify_read = xfs_bmbt_read_verify,
        .verify_write = xfs_bmbt_write_verify,
+       .verify_struct = xfs_bmbt_verify,
 };
 
 
index 5f33adf8eecb91f0a49c2b3081921404be00230d..79ee4a1951d14b6649585167cd6cb5cc05977d68 100644 (file)
@@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc(
        struct xfs_buf          *bp)
 {
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
                return;
@@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc(
        struct xfs_buf          *bp)
 {
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
                return;
@@ -329,7 +329,7 @@ xfs_btree_sblock_verify_crc(
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
-                       return false;
+                       return __this_address;
                return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
        }
 
@@ -853,7 +853,7 @@ xfs_btree_read_bufl(
        xfs_daddr_t             d;              /* real disk block address */
        int                     error;
 
-       if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
+       if (!xfs_verify_fsbno(mp, fsbno))
                return -EFSCORRUPTED;
        d = XFS_FSB_TO_DADDR(mp, fsbno);
        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
@@ -4529,6 +4529,51 @@ xfs_btree_change_owner(
                        &bbcoi);
 }
 
+/* Verify the v5 fields of a long-format btree block. */
+xfs_failaddr_t
+xfs_btree_lblock_v5hdr_verify(
+       struct xfs_buf          *bp,
+       uint64_t                owner)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return __this_address;
+       if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
+               return __this_address;
+       if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn))
+               return __this_address;
+       if (owner != XFS_RMAP_OWN_UNKNOWN &&
+           be64_to_cpu(block->bb_u.l.bb_owner) != owner)
+               return __this_address;
+       return NULL;
+}
+
+/* Verify a long-format btree block. */
+xfs_failaddr_t
+xfs_btree_lblock_verify(
+       struct xfs_buf          *bp,
+       unsigned int            max_recs)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+
+       /* numrecs verification */
+       if (be16_to_cpu(block->bb_numrecs) > max_recs)
+               return __this_address;
+
+       /* sibling pointer verification */
+       if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
+           !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
+               return __this_address;
+       if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
+           !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
+               return __this_address;
+
+       return NULL;
+}
+
 /**
  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
  *                                   btree block
@@ -4537,7 +4582,7 @@ xfs_btree_change_owner(
  * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
  * @pag_max_level: pointer to the per-ag max level field
  */
-bool
+xfs_failaddr_t
 xfs_btree_sblock_v5hdr_verify(
        struct xfs_buf          *bp)
 {
@@ -4546,14 +4591,14 @@ xfs_btree_sblock_v5hdr_verify(
        struct xfs_perag        *pag = bp->b_pag;
 
        if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
+               return __this_address;
        if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
-               return false;
+               return __this_address;
        if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
-               return false;
+               return __this_address;
        if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
-               return false;
-       return true;
+               return __this_address;
+       return NULL;
 }
 
 /**
@@ -4562,29 +4607,29 @@ xfs_btree_sblock_v5hdr_verify(
  * @bp: buffer containing the btree block
  * @max_recs: maximum records allowed in this btree node
  */
-bool
+xfs_failaddr_t
 xfs_btree_sblock_verify(
        struct xfs_buf          *bp,
        unsigned int            max_recs)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       xfs_agblock_t           agno;
 
        /* numrecs verification */
        if (be16_to_cpu(block->bb_numrecs) > max_recs)
-               return false;
+               return __this_address;
 
        /* sibling pointer verification */
-       if (!block->bb_u.s.bb_leftsib ||
-           (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
-            block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
-               return false;
-       if (!block->bb_u.s.bb_rightsib ||
-           (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
-            block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
-               return false;
+       agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
+       if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
+           !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
+               return __this_address;
+       if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
+           !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
+               return __this_address;
 
-       return true;
+       return NULL;
 }
 
 /*
@@ -4953,3 +4998,33 @@ xfs_btree_diff_two_ptrs(
                return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
        return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
 }
+
+/* If there's an extent, we're done. */
+STATIC int
+xfs_btree_has_record_helper(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_rec             *rec,
+       void                            *priv)
+{
+       return XFS_BTREE_QUERY_RANGE_ABORT;
+}
+
+/* Is there a record covering a given range of keys? */
+int
+xfs_btree_has_record(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_irec    *low,
+       union xfs_btree_irec    *high,
+       bool                    *exists)
+{
+       int                     error;
+
+       error = xfs_btree_query_range(cur, low, high,
+                       &xfs_btree_has_record_helper, NULL);
+       if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+               *exists = true;
+               return 0;
+       }
+       *exists = false;
+       return error;
+}
index b57501c6f71ded2c611ed1024793186dcecc4f93..50440b5618e8fe8ff2762d6f2b2c007a1963b0c9 100644 (file)
@@ -473,10 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
 #define        XFS_FILBLKS_MIN(a,b)    min_t(xfs_filblks_t, (a), (b))
 #define        XFS_FILBLKS_MAX(a,b)    max_t(xfs_filblks_t, (a), (b))
 
-#define        XFS_FSB_SANITY_CHECK(mp,fsb)    \
-       (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
-               XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
-
 /*
  * Trace hooks.  Currently not implemented as they need to be ported
  * over to the generic tracing functionality, which is some effort.
@@ -496,8 +492,14 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
 #define XFS_BTREE_TRACE_ARGR(c, r)
 #define        XFS_BTREE_TRACE_CURSOR(c, t)
 
-bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
-bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
+xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
+xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp,
+               unsigned int max_recs);
+xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp,
+               uint64_t owner);
+xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
+               unsigned int max_recs);
+
 uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
                                 unsigned long len);
 xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
@@ -545,5 +547,7 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur,
                struct xfs_btree_block *block, union xfs_btree_key *key);
 union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
                union xfs_btree_key *key);
+int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
+               union xfs_btree_irec *high, bool *exists);
 
 #endif /* __XFS_BTREE_H__ */
index 651611530d2ff91f77a569a77a944d9f9569c9dd..ea187b4a7991c15343d1405af8720e20b2166abc 100644 (file)
@@ -128,7 +128,7 @@ xfs_da_state_free(xfs_da_state_t *state)
        kmem_zone_free(xfs_da_state_zone, state);
 }
 
-static bool
+static xfs_failaddr_t
 xfs_da3_node_verify(
        struct xfs_buf          *bp)
 {
@@ -145,24 +145,24 @@ xfs_da3_node_verify(
                struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
 
                if (ichdr.magic != XFS_DA3_NODE_MAGIC)
-                       return false;
+                       return __this_address;
 
                if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
-                       return false;
+                       return __this_address;
        } else {
                if (ichdr.magic != XFS_DA_NODE_MAGIC)
-                       return false;
+                       return __this_address;
        }
        if (ichdr.level == 0)
-               return false;
+               return __this_address;
        if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
-               return false;
+               return __this_address;
        if (ichdr.count == 0)
-               return false;
+               return __this_address;
 
        /*
         * we don't know if the node is for and attribute or directory tree,
@@ -170,11 +170,11 @@ xfs_da3_node_verify(
         */
        if (ichdr.count > mp->m_dir_geo->node_ents &&
            ichdr.count > mp->m_attr_geo->node_ents)
-               return false;
+               return __this_address;
 
        /* XXX: hash order check? */
 
-       return true;
+       return NULL;
 }
 
 static void
@@ -182,12 +182,13 @@ xfs_da3_node_write_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_da3_node_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_da3_node_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -211,19 +212,20 @@ xfs_da3_node_read_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_da_blkinfo   *info = bp->b_addr;
+       xfs_failaddr_t          fa;
 
        switch (be16_to_cpu(info->magic)) {
                case XFS_DA3_NODE_MAGIC:
                        if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
-                               xfs_buf_ioerror(bp, -EFSBADCRC);
+                               xfs_verifier_error(bp, -EFSBADCRC,
+                                               __this_address);
                                break;
                        }
                        /* fall through */
                case XFS_DA_NODE_MAGIC:
-                       if (!xfs_da3_node_verify(bp)) {
-                               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-                               break;
-                       }
+                       fa = xfs_da3_node_verify(bp);
+                       if (fa)
+                               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                        return;
                case XFS_ATTR_LEAF_MAGIC:
                case XFS_ATTR3_LEAF_MAGIC:
@@ -236,18 +238,40 @@ xfs_da3_node_read_verify(
                        bp->b_ops->verify_read(bp);
                        return;
                default:
-                       xfs_buf_ioerror(bp, -EFSCORRUPTED);
+                       xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
                        break;
        }
+}
+
+/* Verify the structure of a da3 block. */
+static xfs_failaddr_t
+xfs_da3_node_verify_struct(
+       struct xfs_buf          *bp)
+{
+       struct xfs_da_blkinfo   *info = bp->b_addr;
 
-       /* corrupt block */
-       xfs_verifier_error(bp);
+       switch (be16_to_cpu(info->magic)) {
+       case XFS_DA3_NODE_MAGIC:
+       case XFS_DA_NODE_MAGIC:
+               return xfs_da3_node_verify(bp);
+       case XFS_ATTR_LEAF_MAGIC:
+       case XFS_ATTR3_LEAF_MAGIC:
+               bp->b_ops = &xfs_attr3_leaf_buf_ops;
+               return bp->b_ops->verify_struct(bp);
+       case XFS_DIR2_LEAFN_MAGIC:
+       case XFS_DIR3_LEAFN_MAGIC:
+               bp->b_ops = &xfs_dir3_leafn_buf_ops;
+               return bp->b_ops->verify_struct(bp);
+       default:
+               return __this_address;
+       }
 }
 
 const struct xfs_buf_ops xfs_da3_node_buf_ops = {
        .name = "xfs_da3_node",
        .verify_read = xfs_da3_node_read_verify,
        .verify_write = xfs_da3_node_write_verify,
+       .verify_struct = xfs_da3_node_verify_struct,
 };
 
 int
index 3771edcb301d5ab748f15c8b62b5cd772128ce41..7e77299b778950127cbbcbe9b1dea5feb2ca6e10 100644 (file)
@@ -875,4 +875,10 @@ struct xfs_attr3_rmt_hdr {
        ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
                        sizeof(struct xfs_attr3_rmt_hdr) : 0))
 
+/* Number of bytes in a directory block. */
+static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
+{
+       return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog);
+}
+
 #endif /* __XFS_DA_FORMAT_H__ */
index e10778c102ea71e0f4b4727f6acfe28a30aaa4a1..92f94e190f04474953105c049b62a8367fbc7d1e 100644 (file)
@@ -119,8 +119,7 @@ xfs_da_mount(
 
 
        ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
-       ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
-              XFS_MAX_BLOCKSIZE);
+       ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE);
 
        mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
        mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
@@ -140,7 +139,7 @@ xfs_da_mount(
        dageo = mp->m_dir_geo;
        dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
        dageo->fsblog = mp->m_sb.sb_blocklog;
-       dageo->blksize = 1 << dageo->blklog;
+       dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb);
        dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
 
        /*
index 1a8f2cf977caa28812ab1372055dd4dc62409e5a..388d67c5c9036926a7bcf9e8bd9badfbdf5efa89 100644 (file)
@@ -340,5 +340,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
 #define XFS_READDIR_BUFSIZE    (32768)
 
 unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype);
+void *xfs_dir3_data_endp(struct xfs_da_geometry *geo,
+               struct xfs_dir2_data_hdr *hdr);
 
 #endif /* __XFS_DIR2_H__ */
index 43c902f7a68d4da0c6eb9853aeb14b5fbbe52340..2da86a394bcf05c4091db70b3651a4c52158bc81 100644 (file)
@@ -58,7 +58,7 @@ xfs_dir_startup(void)
        xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
 }
 
-static bool
+static xfs_failaddr_t
 xfs_dir3_block_verify(
        struct xfs_buf          *bp)
 {
@@ -67,20 +67,18 @@ xfs_dir3_block_verify(
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
-                       return false;
+                       return __this_address;
                if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
-                       return false;
+                       return __this_address;
        } else {
                if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
-                       return false;
+                       return __this_address;
        }
-       if (__xfs_dir3_data_check(NULL, bp))
-               return false;
-       return true;
+       return __xfs_dir3_data_check(NULL, bp);
 }
 
 static void
@@ -88,15 +86,16 @@ xfs_dir3_block_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
             !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_dir3_block_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_dir3_block_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
@@ -104,12 +103,13 @@ xfs_dir3_block_write_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_dir3_block_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_dir3_block_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -126,6 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
        .name = "xfs_dir3_block",
        .verify_read = xfs_dir3_block_read_verify,
        .verify_write = xfs_dir3_block_write_verify,
+       .verify_struct = xfs_dir3_block_verify,
 };
 
 int
index 8727a43115efd54757e89862d6e38dce3b35da2e..920279485275cf658f31cc5ae023f5cc05cb62c2 100644 (file)
@@ -36,9 +36,9 @@
 /*
  * Check the consistency of the data block.
  * The input can also be a block-format directory.
- * Return 0 is the buffer is good, otherwise an error.
+ * Return NULL if the buffer is good, otherwise the address of the error.
  */
-int
+xfs_failaddr_t
 __xfs_dir3_data_check(
        struct xfs_inode        *dp,            /* incore inode pointer */
        struct xfs_buf          *bp)            /* data block's buffer */
@@ -73,6 +73,14 @@ __xfs_dir3_data_check(
         */
        ops = xfs_dir_get_ops(mp, dp);
 
+       /*
+        * If this isn't a directory, or we don't get handed the dir ops,
+        * something is seriously wrong.  Bail out.
+        */
+       if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) ||
+           ops != xfs_dir_get_ops(mp, NULL))
+               return __this_address;
+
        hdr = bp->b_addr;
        p = (char *)ops->data_entry_p(hdr);
 
@@ -81,7 +89,6 @@ __xfs_dir3_data_check(
        case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
                btp = xfs_dir2_block_tail_p(geo, hdr);
                lep = xfs_dir2_block_leaf_p(btp);
-               endp = (char *)lep;
 
                /*
                 * The number of leaf entries is limited by the size of the
@@ -90,17 +97,19 @@ __xfs_dir3_data_check(
                 * so just ensure that the count falls somewhere inside the
                 * block right now.
                 */
-               XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
-                       ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
+               if (be32_to_cpu(btp->count) >=
+                   ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry))
+                       return __this_address;
                break;
        case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
        case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
-               endp = (char *)hdr + geo->blksize;
                break;
        default:
-               XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
-               return -EFSCORRUPTED;
+               return __this_address;
        }
+       endp = xfs_dir3_data_endp(geo, hdr);
+       if (!endp)
+               return __this_address;
 
        /*
         * Account for zero bestfree entries.
@@ -108,22 +117,25 @@ __xfs_dir3_data_check(
        bf = ops->data_bestfree_p(hdr);
        count = lastfree = freeseen = 0;
        if (!bf[0].length) {
-               XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
+               if (bf[0].offset)
+                       return __this_address;
                freeseen |= 1 << 0;
        }
        if (!bf[1].length) {
-               XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
+               if (bf[1].offset)
+                       return __this_address;
                freeseen |= 1 << 1;
        }
        if (!bf[2].length) {
-               XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
+               if (bf[2].offset)
+                       return __this_address;
                freeseen |= 1 << 2;
        }
 
-       XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
-                                               be16_to_cpu(bf[1].length));
-       XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
-                                               be16_to_cpu(bf[2].length));
+       if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length))
+               return __this_address;
+       if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length))
+               return __this_address;
        /*
         * Loop over the data/unused entries.
         */
@@ -135,22 +147,23 @@ __xfs_dir3_data_check(
                 * doesn't need to be there.
                 */
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
-                       XFS_WANT_CORRUPTED_RETURN(mp, endp >=
-                                       p + be16_to_cpu(dup->length));
-                       XFS_WANT_CORRUPTED_RETURN(mp,
-                               be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
-                                              (char *)dup - (char *)hdr);
+                       if (lastfree != 0)
+                               return __this_address;
+                       if (endp < p + be16_to_cpu(dup->length))
+                               return __this_address;
+                       if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
+                           (char *)dup - (char *)hdr)
+                               return __this_address;
                        dfp = xfs_dir2_data_freefind(hdr, bf, dup);
                        if (dfp) {
                                i = (int)(dfp - bf);
-                               XFS_WANT_CORRUPTED_RETURN(mp,
-                                       (freeseen & (1 << i)) == 0);
+                               if ((freeseen & (1 << i)) != 0)
+                                       return __this_address;
                                freeseen |= 1 << i;
                        } else {
-                               XFS_WANT_CORRUPTED_RETURN(mp,
-                                       be16_to_cpu(dup->length) <=
-                                               be16_to_cpu(bf[2].length));
+                               if (be16_to_cpu(dup->length) >
+                                   be16_to_cpu(bf[2].length))
+                                       return __this_address;
                        }
                        p += be16_to_cpu(dup->length);
                        lastfree = 1;
@@ -163,16 +176,17 @@ __xfs_dir3_data_check(
                 * The linear search is crude but this is DEBUG code.
                 */
                dep = (xfs_dir2_data_entry_t *)p;
-               XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
-               XFS_WANT_CORRUPTED_RETURN(mp,
-                       !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
-               XFS_WANT_CORRUPTED_RETURN(mp, endp >=
-                               p + ops->data_entsize(dep->namelen));
-               XFS_WANT_CORRUPTED_RETURN(mp,
-                       be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
-                                              (char *)dep - (char *)hdr);
-               XFS_WANT_CORRUPTED_RETURN(mp,
-                               ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
+               if (dep->namelen == 0)
+                       return __this_address;
+               if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)))
+                       return __this_address;
+               if (endp < p + ops->data_entsize(dep->namelen))
+                       return __this_address;
+               if (be16_to_cpu(*ops->data_entry_tag_p(dep)) !=
+                   (char *)dep - (char *)hdr)
+                       return __this_address;
+               if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX)
+                       return __this_address;
                count++;
                lastfree = 0;
                if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
@@ -188,34 +202,52 @@ __xfs_dir3_data_check(
                                    be32_to_cpu(lep[i].hashval) == hash)
                                        break;
                        }
-                       XFS_WANT_CORRUPTED_RETURN(mp,
-                                                 i < be32_to_cpu(btp->count));
+                       if (i >= be32_to_cpu(btp->count))
+                               return __this_address;
                }
                p += ops->data_entsize(dep->namelen);
        }
        /*
         * Need to have seen all the entries and all the bestfree slots.
         */
-       XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
+       if (freeseen != 7)
+               return __this_address;
        if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
            hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
                for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
                        if (lep[i].address ==
                            cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                                stale++;
-                       if (i > 0)
-                               XFS_WANT_CORRUPTED_RETURN(mp,
-                                       be32_to_cpu(lep[i].hashval) >=
-                                               be32_to_cpu(lep[i - 1].hashval));
+                       if (i > 0 && be32_to_cpu(lep[i].hashval) <
+                                    be32_to_cpu(lep[i - 1].hashval))
+                               return __this_address;
                }
-               XFS_WANT_CORRUPTED_RETURN(mp, count ==
-                       be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
-               XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
+               if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale))
+                       return __this_address;
+               if (stale != be32_to_cpu(btp->stale))
+                       return __this_address;
        }
-       return 0;
+       return NULL;
+}
+
+#ifdef DEBUG
+void
+xfs_dir3_data_check(
+       struct xfs_inode        *dp,
+       struct xfs_buf          *bp)
+{
+       xfs_failaddr_t          fa;
+
+       fa = __xfs_dir3_data_check(dp, bp);
+       if (!fa)
+               return;
+       xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
+                       bp->b_addr, __FILE__, __LINE__, fa);
+       ASSERT(0);
 }
+#endif
 
-static bool
+static xfs_failaddr_t
 xfs_dir3_data_verify(
        struct xfs_buf          *bp)
 {
@@ -224,20 +256,18 @@ xfs_dir3_data_verify(
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
-                       return false;
+                       return __this_address;
                if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
-                       return false;
+                       return __this_address;
        } else {
                if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
-                       return false;
+                       return __this_address;
        }
-       if (__xfs_dir3_data_check(NULL, bp))
-               return false;
-       return true;
+       return __xfs_dir3_data_check(NULL, bp);
 }
 
 /*
@@ -263,8 +293,7 @@ xfs_dir3_data_reada_verify(
                bp->b_ops->verify_read(bp);
                return;
        default:
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
                break;
        }
 }
@@ -274,15 +303,16 @@ xfs_dir3_data_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
-                xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_dir3_data_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+           !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_dir3_data_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
@@ -290,12 +320,13 @@ xfs_dir3_data_write_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_dir3_data_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_dir3_data_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -312,6 +343,7 @@ const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
        .name = "xfs_dir3_data",
        .verify_read = xfs_dir3_data_read_verify,
        .verify_write = xfs_dir3_data_write_verify,
+       .verify_struct = xfs_dir3_data_verify,
 };
 
 static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
@@ -515,7 +547,6 @@ xfs_dir2_data_freescan_int(
        struct xfs_dir2_data_hdr *hdr,
        int                     *loghead)
 {
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
        xfs_dir2_data_entry_t   *dep;           /* active data entry */
        xfs_dir2_data_unused_t  *dup;           /* unused data entry */
        struct xfs_dir2_data_free *bf;
@@ -537,12 +568,7 @@ xfs_dir2_data_freescan_int(
         * Set up pointers.
         */
        p = (char *)ops->data_entry_p(hdr);
-       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-           hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
-               btp = xfs_dir2_block_tail_p(geo, hdr);
-               endp = (char *)xfs_dir2_block_leaf_p(btp);
-       } else
-               endp = (char *)hdr + geo->blksize;
+       endp = xfs_dir3_data_endp(geo, hdr);
        /*
         * Loop over the block's entries.
         */
@@ -755,17 +781,9 @@ xfs_dir2_data_make_free(
        /*
         * Figure out where the end of the data area is.
         */
-       if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-           hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
-               endptr = (char *)hdr + args->geo->blksize;
-       else {
-               xfs_dir2_block_tail_t   *btp;   /* block tail */
+       endptr = xfs_dir3_data_endp(args->geo, hdr);
+       ASSERT(endptr != NULL);
 
-               ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-                       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-               btp = xfs_dir2_block_tail_p(args->geo, hdr);
-               endptr = (char *)xfs_dir2_block_leaf_p(btp);
-       }
        /*
         * If this isn't the start of the block, then back up to
         * the previous entry and see if it's free.
@@ -1067,3 +1085,21 @@ xfs_dir2_data_use_free(
        }
        *needscanp = needscan;
 }
+
+/* Find the end of the entry data in a data/block format dir block. */
+void *
+xfs_dir3_data_endp(
+       struct xfs_da_geometry          *geo,
+       struct xfs_dir2_data_hdr        *hdr)
+{
+       switch (hdr->magic) {
+       case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
+       case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
+               return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr));
+       case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
+       case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
+               return (char *)hdr + geo->blksize;
+       default:
+               return NULL;
+       }
+}
index 27297a689d9c26acea0a10e643b8c0694867b8b4..d7e630f41f9cd20a23a2986f65beedad4c0fe87a 100644 (file)
@@ -50,13 +50,7 @@ static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
  * Pop an assert if something is wrong.
  */
 #ifdef DEBUG
-#define        xfs_dir3_leaf_check(dp, bp) \
-do { \
-       if (!xfs_dir3_leaf1_check((dp), (bp))) \
-               ASSERT(0); \
-} while (0);
-
-STATIC bool
+static xfs_failaddr_t
 xfs_dir3_leaf1_check(
        struct xfs_inode        *dp,
        struct xfs_buf          *bp)
@@ -69,17 +63,32 @@ xfs_dir3_leaf1_check(
        if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
                struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
                if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
        } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
-               return false;
+               return __this_address;
 
        return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
 }
+
+static inline void
+xfs_dir3_leaf_check(
+       struct xfs_inode        *dp,
+       struct xfs_buf          *bp)
+{
+       xfs_failaddr_t          fa;
+
+       fa = xfs_dir3_leaf1_check(dp, bp);
+       if (!fa)
+               return;
+       xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
+                       bp->b_addr, __FILE__, __LINE__, fa);
+       ASSERT(0);
+}
 #else
 #define        xfs_dir3_leaf_check(dp, bp)
 #endif
 
-bool
+xfs_failaddr_t
 xfs_dir3_leaf_check_int(
        struct xfs_mount        *mp,
        struct xfs_inode        *dp,
@@ -114,27 +123,27 @@ xfs_dir3_leaf_check_int(
         * We can deduce a value for that from di_size.
         */
        if (hdr->count > ops->leaf_max_ents(geo))
-               return false;
+               return __this_address;
 
        /* Leaves and bests don't overlap in leaf format. */
        if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
             hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
            (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
-               return false;
+               return __this_address;
 
        /* Check hash value order, count stale entries.  */
        for (i = stale = 0; i < hdr->count; i++) {
                if (i + 1 < hdr->count) {
                        if (be32_to_cpu(ents[i].hashval) >
                                        be32_to_cpu(ents[i + 1].hashval))
-                               return false;
+                               return __this_address;
                }
                if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                        stale++;
        }
        if (hdr->stale != stale)
-               return false;
-       return true;
+               return __this_address;
+       return NULL;
 }
 
 /*
@@ -142,7 +151,7 @@ xfs_dir3_leaf_check_int(
  * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
  * to incorrect magic numbers.
  */
-static bool
+static xfs_failaddr_t
 xfs_dir3_leaf_verify(
        struct xfs_buf          *bp,
        uint16_t                magic)
@@ -160,16 +169,16 @@ xfs_dir3_leaf_verify(
                                                         : XFS_DIR3_LEAFN_MAGIC;
 
                if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
-                       return false;
+                       return __this_address;
                if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
-                       return false;
+                       return __this_address;
        } else {
                if (leaf->hdr.info.magic != cpu_to_be16(magic))
-                       return false;
+                       return __this_address;
        }
 
        return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
@@ -181,15 +190,16 @@ __read_verify(
        uint16_t        magic)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
             !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_dir3_leaf_verify(bp, magic))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_dir3_leaf_verify(bp, magic);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
@@ -198,12 +208,13 @@ __write_verify(
        uint16_t        magic)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_dir3_leaf_verify(bp, magic)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_dir3_leaf_verify(bp, magic);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -216,6 +227,13 @@ __write_verify(
        xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
 }
 
+static xfs_failaddr_t
+xfs_dir3_leaf1_verify(
+       struct xfs_buf  *bp)
+{
+       return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC);
+}
+
 static void
 xfs_dir3_leaf1_read_verify(
        struct xfs_buf  *bp)
@@ -230,6 +248,13 @@ xfs_dir3_leaf1_write_verify(
        __write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
 }
 
+static xfs_failaddr_t
+xfs_dir3_leafn_verify(
+       struct xfs_buf  *bp)
+{
+       return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC);
+}
+
 static void
 xfs_dir3_leafn_read_verify(
        struct xfs_buf  *bp)
@@ -248,12 +273,14 @@ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
        .name = "xfs_dir3_leaf1",
        .verify_read = xfs_dir3_leaf1_read_verify,
        .verify_write = xfs_dir3_leaf1_write_verify,
+       .verify_struct = xfs_dir3_leaf1_verify,
 };
 
 const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
        .name = "xfs_dir3_leafn",
        .verify_read = xfs_dir3_leafn_read_verify,
        .verify_write = xfs_dir3_leafn_write_verify,
+       .verify_struct = xfs_dir3_leafn_verify,
 };
 
 int
index 682e2bf370c72923b43ee7da7d778ba774f66551..239d97a6429606ae1b37bcd5a1fb11763c341bd2 100644 (file)
@@ -53,13 +53,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
  * Check internal consistency of a leafn block.
  */
 #ifdef DEBUG
-#define        xfs_dir3_leaf_check(dp, bp) \
-do { \
-       if (!xfs_dir3_leafn_check((dp), (bp))) \
-               ASSERT(0); \
-} while (0);
-
-static bool
+static xfs_failaddr_t
 xfs_dir3_leafn_check(
        struct xfs_inode        *dp,
        struct xfs_buf          *bp)
@@ -72,17 +66,32 @@ xfs_dir3_leafn_check(
        if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
                struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
                if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
        } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
-               return false;
+               return __this_address;
 
        return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
 }
+
+static inline void
+xfs_dir3_leaf_check(
+       struct xfs_inode        *dp,
+       struct xfs_buf          *bp)
+{
+       xfs_failaddr_t          fa;
+
+       fa = xfs_dir3_leafn_check(dp, bp);
+       if (!fa)
+               return;
+       xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
+                       bp->b_addr, __FILE__, __LINE__, fa);
+       ASSERT(0);
+}
 #else
 #define        xfs_dir3_leaf_check(dp, bp)
 #endif
 
-static bool
+static xfs_failaddr_t
 xfs_dir3_free_verify(
        struct xfs_buf          *bp)
 {
@@ -93,21 +102,21 @@ xfs_dir3_free_verify(
                struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
                if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
-                       return false;
+                       return __this_address;
                if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
-                       return false;
+                       return __this_address;
        } else {
                if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
-                       return false;
+                       return __this_address;
        }
 
        /* XXX: should bounds check the xfs_dir3_icfree_hdr here */
 
-       return true;
+       return NULL;
 }
 
 static void
@@ -115,15 +124,16 @@ xfs_dir3_free_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
            !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_dir3_free_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_dir3_free_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
@@ -131,12 +141,13 @@ xfs_dir3_free_write_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_dir3_free_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_dir3_free_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -153,10 +164,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
        .name = "xfs_dir3_free",
        .verify_read = xfs_dir3_free_read_verify,
        .verify_write = xfs_dir3_free_write_verify,
+       .verify_struct = xfs_dir3_free_verify,
 };
 
 /* Everything ok in the free block header? */
-static bool
+static xfs_failaddr_t
 xfs_dir3_free_header_check(
        struct xfs_inode        *dp,
        xfs_dablk_t             fbno,
@@ -174,22 +186,22 @@ xfs_dir3_free_header_check(
                struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
 
                if (be32_to_cpu(hdr3->firstdb) != firstdb)
-                       return false;
+                       return __this_address;
                if (be32_to_cpu(hdr3->nvalid) > maxbests)
-                       return false;
+                       return __this_address;
                if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
-                       return false;
+                       return __this_address;
        } else {
                struct xfs_dir2_free_hdr *hdr = bp->b_addr;
 
                if (be32_to_cpu(hdr->firstdb) != firstdb)
-                       return false;
+                       return __this_address;
                if (be32_to_cpu(hdr->nvalid) > maxbests)
-                       return false;
+                       return __this_address;
                if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
-                       return false;
+                       return __this_address;
        }
-       return true;
+       return NULL;
 }
 
 static int
@@ -200,6 +212,7 @@ __xfs_dir3_free_read(
        xfs_daddr_t             mappedbno,
        struct xfs_buf          **bpp)
 {
+       xfs_failaddr_t          fa;
        int                     err;
 
        err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
@@ -208,9 +221,9 @@ __xfs_dir3_free_read(
                return err;
 
        /* Check things that we can't do in the verifier. */
-       if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
-               xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
-               xfs_verifier_error(*bpp);
+       fa = xfs_dir3_free_header_check(dp, fbno, *bpp);
+       if (fa) {
+               xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
                xfs_trans_brelse(tp, *bpp);
                return -EFSCORRUPTED;
        }
@@ -1906,7 +1919,7 @@ xfs_dir2_node_addname_int(
                                        (unsigned long long)ifbno, lastfbno);
                                if (fblk) {
                                        xfs_alert(mp,
-                               " fblk 0x%p blkno %llu index %d magic 0x%x",
+                               " fblk "PTR_FMT" blkno %llu index %d magic 0x%x",
                                                fblk,
                                                (unsigned long long)fblk->blkno,
                                                fblk->index,
index 4badd26c47e6455c200d7088ff296f261386d3ff..753aeeeffc1833a72c2a12277157324342b4652d 100644 (file)
@@ -39,12 +39,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
 
 /* xfs_dir2_data.c */
 #ifdef DEBUG
-#define        xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
+extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
 #else
 #define        xfs_dir3_data_check(dp,bp)
 #endif
 
-extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
+extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp,
+               struct xfs_buf *bp);
 extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
                xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
 extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
@@ -89,8 +90,9 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
                int lowstale, int highstale, int *lfloglow, int *lfloghigh);
 extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
 
-extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
-               struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
+extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp,
+               struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr,
+               struct xfs_dir2_leaf *leaf);
 
 /* xfs_dir2_node.c */
 extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
@@ -127,7 +129,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
+extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
index be8b9755f66a0152cdca6a309b4c486aefcbe166..0c75a7f00883ff4b9bb2808c8d7c9229b4616088 100644 (file)
@@ -156,7 +156,6 @@ xfs_dir2_block_to_sf(
        xfs_dir2_sf_hdr_t       *sfhp)          /* shortform directory hdr */
 {
        xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
        xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
        xfs_inode_t             *dp;            /* incore directory inode */
        xfs_dir2_data_unused_t  *dup;           /* unused data pointer */
@@ -192,9 +191,8 @@ xfs_dir2_block_to_sf(
        /*
         * Set up to loop over the block's entries.
         */
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
        ptr = (char *)dp->d_ops->data_entry_p(hdr);
-       endptr = (char *)xfs_dir2_block_leaf_p(btp);
+       endptr = xfs_dir3_data_endp(args->geo, hdr);
        sfep = xfs_dir2_sf_firstentry(sfp);
        /*
         * Loop over the active and unused entries.
@@ -630,7 +628,7 @@ xfs_dir2_sf_check(
 #endif /* DEBUG */
 
 /* Verify the consistency of an inline directory. */
-int
+xfs_failaddr_t
 xfs_dir2_sf_verify(
        struct xfs_inode                *ip)
 {
@@ -665,7 +663,7 @@ xfs_dir2_sf_verify(
         */
        if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
            size < xfs_dir2_sf_hdr_size(sfp->i8count))
-               return -EFSCORRUPTED;
+               return __this_address;
 
        endp = (char *)sfp + size;
 
@@ -674,7 +672,7 @@ xfs_dir2_sf_verify(
        i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
        error = xfs_dir_ino_validate(mp, ino);
        if (error)
-               return error;
+               return __this_address;
        offset = dops->data_first_offset;
 
        /* Check all reported entries */
@@ -686,11 +684,11 @@ xfs_dir2_sf_verify(
                 * within the data buffer.
                 */
                if (((char *)sfep + sizeof(*sfep)) >= endp)
-                       return -EFSCORRUPTED;
+                       return __this_address;
 
                /* Don't allow names with known bad length. */
                if (sfep->namelen == 0)
-                       return -EFSCORRUPTED;
+                       return __this_address;
 
                /*
                 * Check that the variable-length part of the structure is
@@ -699,23 +697,23 @@ xfs_dir2_sf_verify(
                 */
                next_sfep = dops->sf_nextentry(sfp, sfep);
                if (endp < (char *)next_sfep)
-                       return -EFSCORRUPTED;
+                       return __this_address;
 
                /* Check that the offsets always increase. */
                if (xfs_dir2_sf_get_offset(sfep) < offset)
-                       return -EFSCORRUPTED;
+                       return __this_address;
 
                /* Check the inode number. */
                ino = dops->sf_get_ino(sfp, sfep);
                i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
                error = xfs_dir_ino_validate(mp, ino);
                if (error)
-                       return error;
+                       return __this_address;
 
                /* Check the file type. */
                filetype = dops->sf_get_ftype(sfep);
                if (filetype >= XFS_DIR3_FT_MAX)
-                       return -EFSCORRUPTED;
+                       return __this_address;
 
                offset = xfs_dir2_sf_get_offset(sfep) +
                                dops->data_entsize(sfep->namelen);
@@ -723,16 +721,16 @@ xfs_dir2_sf_verify(
                sfep = next_sfep;
        }
        if (i8count != sfp->i8count)
-               return -EFSCORRUPTED;
+               return __this_address;
        if ((void *)sfep != (void *)endp)
-               return -EFSCORRUPTED;
+               return __this_address;
 
        /* Make sure this whole thing ought to be in local format. */
        if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
            (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
-               return -EFSCORRUPTED;
+               return __this_address;
 
-       return 0;
+       return NULL;
 }
 
 /*
index 747085b4ef4406d387464b40fdea302b83c25c4c..8b7a6c3cb5997a8162f9e434b1e5a3a0b7973380 100644 (file)
@@ -42,18 +42,14 @@ xfs_calc_dquots_per_chunk(
 /*
  * Do some primitive error checking on ondisk dquot data structures.
  */
-int
-xfs_dqcheck(
+xfs_failaddr_t
+xfs_dquot_verify(
        struct xfs_mount *mp,
        xfs_disk_dquot_t *ddq,
        xfs_dqid_t       id,
        uint             type,    /* used only when IO_dorepair is true */
-       uint             flags,
-       const char       *str)
+       uint             flags)
 {
-       xfs_dqblk_t      *d = (xfs_dqblk_t *)ddq;
-       int             errs = 0;
-
        /*
         * We can encounter an uninitialized dquot buffer for 2 reasons:
         * 1. If we crash while deleting the quotainode(s), and those blks got
@@ -69,87 +65,57 @@ xfs_dqcheck(
         * This is all fine; things are still consistent, and we haven't lost
         * any quota information. Just don't complain about bad dquot blks.
         */
-       if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
-                       str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
-               errs++;
-       }
-       if (ddq->d_version != XFS_DQUOT_VERSION) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
-                       str, id, ddq->d_version, XFS_DQUOT_VERSION);
-               errs++;
-       }
+       if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC))
+               return __this_address;
+       if (ddq->d_version != XFS_DQUOT_VERSION)
+               return __this_address;
 
        if (ddq->d_flags != XFS_DQ_USER &&
            ddq->d_flags != XFS_DQ_PROJ &&
-           ddq->d_flags != XFS_DQ_GROUP) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
-                       str, id, ddq->d_flags);
-               errs++;
-       }
+           ddq->d_flags != XFS_DQ_GROUP)
+               return __this_address;
 
-       if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : ondisk-dquot 0x%p, ID mismatch: "
-                       "0x%x expected, found id 0x%x",
-                       str, ddq, id, be32_to_cpu(ddq->d_id));
-               errs++;
-       }
+       if (id != -1 && id != be32_to_cpu(ddq->d_id))
+               return __this_address;
 
-       if (!errs && ddq->d_id) {
-               if (ddq->d_blk_softlimit &&
-                   be64_to_cpu(ddq->d_bcount) >
-                               be64_to_cpu(ddq->d_blk_softlimit)) {
-                       if (!ddq->d_btimer) {
-                               if (flags & XFS_QMOPT_DOWARN)
-                                       xfs_alert(mp,
-                       "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
-                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
-                               errs++;
-                       }
-               }
-               if (ddq->d_ino_softlimit &&
-                   be64_to_cpu(ddq->d_icount) >
-                               be64_to_cpu(ddq->d_ino_softlimit)) {
-                       if (!ddq->d_itimer) {
-                               if (flags & XFS_QMOPT_DOWARN)
-                                       xfs_alert(mp,
-                       "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
-                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
-                               errs++;
-                       }
-               }
-               if (ddq->d_rtb_softlimit &&
-                   be64_to_cpu(ddq->d_rtbcount) >
-                               be64_to_cpu(ddq->d_rtb_softlimit)) {
-                       if (!ddq->d_rtbtimer) {
-                               if (flags & XFS_QMOPT_DOWARN)
-                                       xfs_alert(mp,
-                       "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
-                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
-                               errs++;
-                       }
-               }
-       }
+       if (!ddq->d_id)
+               return NULL;
+
+       if (ddq->d_blk_softlimit &&
+           be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) &&
+           !ddq->d_btimer)
+               return __this_address;
+
+       if (ddq->d_ino_softlimit &&
+           be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) &&
+           !ddq->d_itimer)
+               return __this_address;
 
-       if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
-               return errs;
+       if (ddq->d_rtb_softlimit &&
+           be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) &&
+           !ddq->d_rtbtimer)
+               return __this_address;
+
+       return NULL;
+}
+
+/*
+ * Do some primitive error checking on ondisk dquot data structures.
+ */
+int
+xfs_dquot_repair(
+       struct xfs_mount        *mp,
+       struct xfs_disk_dquot   *ddq,
+       xfs_dqid_t              id,
+       uint                    type)
+{
+       struct xfs_dqblk        *d = (struct xfs_dqblk *)ddq;
 
-       if (flags & XFS_QMOPT_DOWARN)
-               xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
 
        /*
         * Typically, a repair is only requested by quotacheck.
         */
        ASSERT(id != -1);
-       ASSERT(flags & XFS_QMOPT_DQREPAIR);
        memset(d, 0, sizeof(xfs_dqblk_t));
 
        d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -163,7 +129,7 @@ xfs_dqcheck(
                                 XFS_DQUOT_CRC_OFF);
        }
 
-       return errs;
+       return 0;
 }
 
 STATIC bool
@@ -198,13 +164,13 @@ xfs_dquot_buf_verify_crc(
        return true;
 }
 
-STATIC bool
+STATIC xfs_failaddr_t
 xfs_dquot_buf_verify(
        struct xfs_mount        *mp,
-       struct xfs_buf          *bp,
-       int                     warn)
+       struct xfs_buf          *bp)
 {
        struct xfs_dqblk        *d = (struct xfs_dqblk *)bp->b_addr;
+       xfs_failaddr_t          fa;
        xfs_dqid_t              id = 0;
        int                     ndquots;
        int                     i;
@@ -228,33 +194,43 @@ xfs_dquot_buf_verify(
         */
        for (i = 0; i < ndquots; i++) {
                struct xfs_disk_dquot   *ddq;
-               int                     error;
 
                ddq = &d[i].dd_diskdq;
 
                if (i == 0)
                        id = be32_to_cpu(ddq->d_id);
 
-               error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
-               if (error)
-                       return false;
+               fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0);
+               if (fa)
+                       return fa;
        }
-       return true;
+
+       return NULL;
+}
+
+static xfs_failaddr_t
+xfs_dquot_buf_verify_struct(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       return xfs_dquot_buf_verify(mp, bp);
 }
 
 static void
 xfs_dquot_buf_read_verify(
-       struct xfs_buf  *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
 
        if (!xfs_dquot_buf_verify_crc(mp, bp))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_dquot_buf_verify(mp, bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
+       }
 }
 
 /*
@@ -270,7 +246,7 @@ xfs_dquot_buf_readahead_verify(
        struct xfs_mount        *mp = bp->b_target->bt_mount;
 
        if (!xfs_dquot_buf_verify_crc(mp, bp) ||
-           !xfs_dquot_buf_verify(mp, bp, 0)) {
+           xfs_dquot_buf_verify(mp, bp) != NULL) {
                xfs_buf_ioerror(bp, -EIO);
                bp->b_flags &= ~XBF_DONE;
        }
@@ -283,21 +259,21 @@ xfs_dquot_buf_readahead_verify(
  */
 static void
 xfs_dquot_buf_write_verify(
-       struct xfs_buf  *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
+       fa = xfs_dquot_buf_verify(mp, bp);
+       if (fa)
+               xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
 }
 
 const struct xfs_buf_ops xfs_dquot_buf_ops = {
        .name = "xfs_dquot",
        .verify_read = xfs_dquot_buf_read_verify,
        .verify_write = xfs_dquot_buf_write_verify,
+       .verify_struct = xfs_dquot_buf_verify_struct,
 };
 
 const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
index b909241045962c13bf875086811b48505c9aac29..faf1a4edd6181d00b664f847c0650e40f7f4a3f2 100644 (file)
@@ -233,6 +233,13 @@ typedef struct xfs_fsop_resblks {
 #define XFS_MAX_LOG_BLOCKS     (1024 * 1024ULL)
 #define XFS_MIN_LOG_BYTES      (10 * 1024 * 1024ULL)
 
+/*
+ * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than
+ * 16MB or larger than 1TB.
+ */
+#define XFS_MIN_AG_BYTES       (1ULL << 24)    /* 16 MB */
+#define XFS_MAX_AG_BYTES       (1ULL << 40)    /* 1 TB */
+
 /* keep the maximum size under 2^31 by a small amount */
 #define XFS_MAX_LOG_BYTES \
        ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
index 3b57ef0f2f76c758e6a9c8b89b7a0c470cdd09a9..0e2cf5f0be1f364cb4651a4e3cf2edd64653fb79 100644 (file)
@@ -2491,7 +2491,7 @@ xfs_check_agi_unlinked(
 #define xfs_check_agi_unlinked(agi)
 #endif
 
-static bool
+static xfs_failaddr_t
 xfs_agi_verify(
        struct xfs_buf  *bp)
 {
@@ -2500,28 +2500,28 @@ xfs_agi_verify(
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
-                       return false;
+                       return __this_address;
                if (!xfs_log_check_lsn(mp,
                                be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
-                       return false;
+                       return __this_address;
        }
 
        /*
         * Validate the magic number of the agi block.
         */
        if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
-               return false;
+               return __this_address;
        if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
-               return false;
+               return __this_address;
 
        if (be32_to_cpu(agi->agi_level) < 1 ||
            be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
-               return false;
+               return __this_address;
 
        if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
            (be32_to_cpu(agi->agi_free_level) < 1 ||
             be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS))
-               return false;
+               return __this_address;
 
        /*
         * during growfs operations, the perag is not fully initialised,
@@ -2530,10 +2530,10 @@ xfs_agi_verify(
         * so we can detect and avoid this problem.
         */
        if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
-               return false;
+               return __this_address;
 
        xfs_check_agi_unlinked(agi);
-       return true;
+       return NULL;
 }
 
 static void
@@ -2541,28 +2541,29 @@ xfs_agi_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t  fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
            !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
-                               XFS_ERRTAG_IALLOC_READ_AGI))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_agi_verify(bp);
+               if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
 xfs_agi_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+       xfs_failaddr_t          fa;
 
-       if (!xfs_agi_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_agi_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -2578,6 +2579,7 @@ const struct xfs_buf_ops xfs_agi_buf_ops = {
        .name = "xfs_agi",
        .verify_read = xfs_agi_read_verify,
        .verify_write = xfs_agi_write_verify,
+       .verify_struct = xfs_agi_verify,
 };
 
 /*
@@ -2751,3 +2753,102 @@ xfs_verify_dir_ino(
                return false;
        return xfs_verify_ino(mp, ino);
 }
+
+/* Is there an inode record covering a given range of inode numbers? */
+int
+xfs_ialloc_has_inode_record(
+       struct xfs_btree_cur    *cur,
+       xfs_agino_t             low,
+       xfs_agino_t             high,
+       bool                    *exists)
+{
+       struct xfs_inobt_rec_incore     irec;
+       xfs_agino_t             agino;
+       uint16_t                holemask;
+       int                     has_record;
+       int                     i;
+       int                     error;
+
+       *exists = false;
+       error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
+       while (error == 0 && has_record) {
+               error = xfs_inobt_get_rec(cur, &irec, &has_record);
+               if (error || irec.ir_startino > high)
+                       break;
+
+               agino = irec.ir_startino;
+               holemask = irec.ir_holemask;
+               for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
+                               i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
+                       if (holemask & 1)
+                               continue;
+                       if (agino + XFS_INODES_PER_HOLEMASK_BIT > low &&
+                                       agino <= high) {
+                               *exists = true;
+                               return 0;
+                       }
+               }
+
+               error = xfs_btree_increment(cur, 0, &has_record);
+       }
+       return error;
+}
+
+/* Is there an inode record covering a given extent? */
+int
+xfs_ialloc_has_inodes_at_extent(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       bool                    *exists)
+{
+       xfs_agino_t             low;
+       xfs_agino_t             high;
+
+       low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0);
+       high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1;
+
+       return xfs_ialloc_has_inode_record(cur, low, high, exists);
+}
+
+struct xfs_ialloc_count_inodes {
+       xfs_agino_t                     count;
+       xfs_agino_t                     freecount;
+};
+
+/* Record inode counts across all inobt records. */
+STATIC int
+xfs_ialloc_count_inodes_rec(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_rec             *rec,
+       void                            *priv)
+{
+       struct xfs_inobt_rec_incore     irec;
+       struct xfs_ialloc_count_inodes  *ci = priv;
+
+       xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
+       ci->count += irec.ir_count;
+       ci->freecount += irec.ir_freecount;
+
+       return 0;
+}
+
+/* Count allocated and free inodes under an inobt. */
+int
+xfs_ialloc_count_inodes(
+       struct xfs_btree_cur            *cur,
+       xfs_agino_t                     *count,
+       xfs_agino_t                     *freecount)
+{
+       struct xfs_ialloc_count_inodes  ci = {0};
+       int                             error;
+
+       ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
+       error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
+       if (error)
+               return error;
+
+       *count = ci.count;
+       *freecount = ci.freecount;
+       return 0;
+}
index 66a8de0b1caaad8d1ba9d5ed94fccc813780dca3..c5402bb4ce0cd017e78e3bab72317cf77170d15f 100644 (file)
@@ -170,6 +170,12 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
 union xfs_btree_rec;
 void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
                struct xfs_inobt_rec_incore *irec);
+int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur,
+               xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low,
+               xfs_agino_t high, bool *exists);
+int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count,
+               xfs_agino_t *freecount);
 
 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
 void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
index 317caba9faa67d4c01d057c24015c0d8964cca60..af197a5f3a82e622c2dea38a9a2b33aaaf676b66 100644 (file)
@@ -141,21 +141,42 @@ xfs_finobt_alloc_block(
        union xfs_btree_ptr     *new,
        int                     *stat)
 {
+       if (cur->bc_mp->m_inotbt_nores)
+               return xfs_inobt_alloc_block(cur, start, new, stat);
        return __xfs_inobt_alloc_block(cur, start, new, stat,
                        XFS_AG_RESV_METADATA);
 }
 
 STATIC int
-xfs_inobt_free_block(
+__xfs_inobt_free_block(
        struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp)
+       struct xfs_buf          *bp,
+       enum xfs_ag_resv_type   resv)
 {
        struct xfs_owner_info   oinfo;
 
        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
        return xfs_free_extent(cur->bc_tp,
                        XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
-                       &oinfo, XFS_AG_RESV_NONE);
+                       &oinfo, resv);
+}
+
+STATIC int
+xfs_inobt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE);
+}
+
+STATIC int
+xfs_finobt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       if (cur->bc_mp->m_inotbt_nores)
+               return xfs_inobt_free_block(cur, bp);
+       return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA);
 }
 
 STATIC int
@@ -250,12 +271,13 @@ xfs_inobt_diff_two_keys(
                          be32_to_cpu(k2->inobt.ir_startino);
 }
 
-static int
+static xfs_failaddr_t
 xfs_inobt_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       xfs_failaddr_t          fa;
        unsigned int            level;
 
        /*
@@ -271,20 +293,21 @@ xfs_inobt_verify(
        switch (block->bb_magic) {
        case cpu_to_be32(XFS_IBT_CRC_MAGIC):
        case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
-               if (!xfs_btree_sblock_v5hdr_verify(bp))
-                       return false;
+               fa = xfs_btree_sblock_v5hdr_verify(bp);
+               if (fa)
+                       return fa;
                /* fall through */
        case cpu_to_be32(XFS_IBT_MAGIC):
        case cpu_to_be32(XFS_FIBT_MAGIC):
                break;
        default:
-               return 0;
+               return NULL;
        }
 
        /* level verification */
        level = be16_to_cpu(block->bb_level);
        if (level >= mp->m_in_maxlevels)
-               return false;
+               return __this_address;
 
        return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
 }
@@ -293,25 +316,30 @@ static void
 xfs_inobt_read_verify(
        struct xfs_buf  *bp)
 {
+       xfs_failaddr_t  fa;
+
        if (!xfs_btree_sblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_inobt_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_inobt_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 
-       if (bp->b_error) {
+       if (bp->b_error)
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
 }
 
 static void
 xfs_inobt_write_verify(
        struct xfs_buf  *bp)
 {
-       if (!xfs_inobt_verify(bp)) {
+       xfs_failaddr_t  fa;
+
+       fa = xfs_inobt_verify(bp);
+       if (fa) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
        xfs_btree_sblock_calc_crc(bp);
@@ -322,6 +350,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
        .name = "xfs_inobt",
        .verify_read = xfs_inobt_read_verify,
        .verify_write = xfs_inobt_write_verify,
+       .verify_struct = xfs_inobt_verify,
 };
 
 STATIC int
@@ -372,7 +401,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
        .dup_cursor             = xfs_inobt_dup_cursor,
        .set_root               = xfs_finobt_set_root,
        .alloc_block            = xfs_finobt_alloc_block,
-       .free_block             = xfs_inobt_free_block,
+       .free_block             = xfs_finobt_free_block,
        .get_minrecs            = xfs_inobt_get_minrecs,
        .get_maxrecs            = xfs_inobt_get_maxrecs,
        .init_key_from_rec      = xfs_inobt_init_key_from_rec,
index b9c0bf80669ccc94afb0e13cf439e6745098f6c7..4fe17b3683160b6c4f7612fb04d39be73d64e03c 100644 (file)
@@ -115,8 +115,7 @@ xfs_inode_buf_verify(
                                return;
                        }
 
-                       xfs_buf_ioerror(bp, -EFSCORRUPTED);
-                       xfs_verifier_error(bp);
+                       xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
 #ifdef DEBUG
                        xfs_alert(mp,
                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -384,7 +383,7 @@ xfs_log_dinode_to_disk(
        }
 }
 
-bool
+xfs_failaddr_t
 xfs_dinode_verify(
        struct xfs_mount        *mp,
        xfs_ino_t               ino,
@@ -393,53 +392,122 @@ xfs_dinode_verify(
        uint16_t                mode;
        uint16_t                flags;
        uint64_t                flags2;
+       uint64_t                di_size;
 
        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
-               return false;
+               return __this_address;
+
+       /* Verify v3 integrity information first */
+       if (dip->di_version >= 3) {
+               if (!xfs_sb_version_hascrc(&mp->m_sb))
+                       return __this_address;
+               if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
+                                     XFS_DINODE_CRC_OFF))
+                       return __this_address;
+               if (be64_to_cpu(dip->di_ino) != ino)
+                       return __this_address;
+               if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
+                       return __this_address;
+       }
 
        /* don't allow invalid i_size */
-       if (be64_to_cpu(dip->di_size) & (1ULL << 63))
-               return false;
+       di_size = be64_to_cpu(dip->di_size);
+       if (di_size & (1ULL << 63))
+               return __this_address;
 
        mode = be16_to_cpu(dip->di_mode);
        if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
-               return false;
+               return __this_address;
 
        /* No zero-length symlinks/dirs. */
-       if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0)
-               return false;
+       if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
+               return __this_address;
+
+       /* Fork checks carried over from xfs_iformat_fork */
+       if (mode &&
+           be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
+                       be64_to_cpu(dip->di_nblocks))
+               return __this_address;
+
+       if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
+               return __this_address;
+
+       flags = be16_to_cpu(dip->di_flags);
+
+       if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
+               return __this_address;
+
+       /* Do we have appropriate data fork formats for the mode? */
+       switch (mode & S_IFMT) {
+       case S_IFIFO:
+       case S_IFCHR:
+       case S_IFBLK:
+       case S_IFSOCK:
+               if (dip->di_format != XFS_DINODE_FMT_DEV)
+                       return __this_address;
+               break;
+       case S_IFREG:
+       case S_IFLNK:
+       case S_IFDIR:
+               switch (dip->di_format) {
+               case XFS_DINODE_FMT_LOCAL:
+                       /*
+                        * no local regular files yet
+                        */
+                       if (S_ISREG(mode))
+                               return __this_address;
+                       if (di_size > XFS_DFORK_DSIZE(dip, mp))
+                               return __this_address;
+                       /* fall through */
+               case XFS_DINODE_FMT_EXTENTS:
+               case XFS_DINODE_FMT_BTREE:
+                       break;
+               default:
+                       return __this_address;
+               }
+               break;
+       case 0:
+               /* Uninitialized inode ok. */
+               break;
+       default:
+               return __this_address;
+       }
+
+       if (XFS_DFORK_Q(dip)) {
+               switch (dip->di_aformat) {
+               case XFS_DINODE_FMT_LOCAL:
+               case XFS_DINODE_FMT_EXTENTS:
+               case XFS_DINODE_FMT_BTREE:
+                       break;
+               default:
+                       return __this_address;
+               }
+       }
 
        /* only version 3 or greater inodes are extensively verified here */
        if (dip->di_version < 3)
-               return true;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
-       if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             XFS_DINODE_CRC_OFF))
-               return false;
-       if (be64_to_cpu(dip->di_ino) != ino)
-               return false;
-       if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
-               return false;
+               return NULL;
 
-       flags = be16_to_cpu(dip->di_flags);
        flags2 = be64_to_cpu(dip->di_flags2);
 
        /* don't allow reflink/cowextsize if we don't have reflink */
        if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
             !xfs_sb_version_hasreflink(&mp->m_sb))
-               return false;
+               return __this_address;
+
+       /* only regular files get reflink */
+       if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
+               return __this_address;
 
        /* don't let reflink and realtime mix */
        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
-               return false;
+               return __this_address;
 
        /* don't let reflink and dax mix */
        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
-               return false;
+               return __this_address;
 
-       return true;
+       return NULL;
 }
 
 void
@@ -479,6 +547,7 @@ xfs_iread(
 {
        xfs_buf_t       *bp;
        xfs_dinode_t    *dip;
+       xfs_failaddr_t  fa;
        int             error;
 
        /*
@@ -510,11 +579,10 @@ xfs_iread(
                return error;
 
        /* even unallocated inodes are verified */
-       if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
-               xfs_alert(mp, "%s: validation failed for inode %lld",
-                               __func__, ip->i_ino);
-
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
+       fa = xfs_dinode_verify(mp, ip->i_ino, dip);
+       if (fa) {
+               xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
+                               sizeof(*dip), fa);
                error = -EFSCORRUPTED;
                goto out_brelse;
        }
index a9c97a356c3062a39634f76d57bb156c167cac93..8a5e1da52d742b1c80ef9c76ed0f9b3e5a6a38f1 100644 (file)
@@ -82,7 +82,7 @@ void  xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #define        xfs_inobp_check(mp, bp)
 #endif /* DEBUG */
 
-bool   xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
-                         struct xfs_dinode *dip);
+xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
+                          struct xfs_dinode *dip);
 
 #endif /* __XFS_INODE_BUF_H__ */
index c79a1616b79d7530f50746aecd26800d95d4fd48..866d2861c625c7530f4f312b4f7f006ddc391580 100644 (file)
@@ -35,6 +35,8 @@
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_dir2_priv.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_shared.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
@@ -62,69 +64,11 @@ xfs_iformat_fork(
        int                     error = 0;
        xfs_fsize_t             di_size;
 
-       if (unlikely(be32_to_cpu(dip->di_nextents) +
-                    be16_to_cpu(dip->di_anextents) >
-                    be64_to_cpu(dip->di_nblocks))) {
-               xfs_warn(ip->i_mount,
-                       "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
-                       (unsigned long long)ip->i_ino,
-                       (int)(be32_to_cpu(dip->di_nextents) +
-                             be16_to_cpu(dip->di_anextents)),
-                       (unsigned long long)
-                               be64_to_cpu(dip->di_nblocks));
-               XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
-                                    ip->i_mount, dip);
-               return -EFSCORRUPTED;
-       }
-
-       if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
-               xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
-                       (unsigned long long)ip->i_ino,
-                       dip->di_forkoff);
-               XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
-                                    ip->i_mount, dip);
-               return -EFSCORRUPTED;
-       }
-
-       if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
-                    !ip->i_mount->m_rtdev_targp)) {
-               xfs_warn(ip->i_mount,
-                       "corrupt dinode %Lu, has realtime flag set.",
-                       ip->i_ino);
-               XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
-                                    XFS_ERRLEVEL_LOW, ip->i_mount, dip);
-               return -EFSCORRUPTED;
-       }
-
-       if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) {
-               xfs_warn(ip->i_mount,
-                       "corrupt dinode %llu, wrong file type for reflink.",
-                       ip->i_ino);
-               XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
-                                    XFS_ERRLEVEL_LOW, ip->i_mount, dip);
-               return -EFSCORRUPTED;
-       }
-
-       if (unlikely(xfs_is_reflink_inode(ip) &&
-           (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) {
-               xfs_warn(ip->i_mount,
-                       "corrupt dinode %llu, has reflink+realtime flag set.",
-                       ip->i_ino);
-               XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
-                                    XFS_ERRLEVEL_LOW, ip->i_mount, dip);
-               return -EFSCORRUPTED;
-       }
-
        switch (inode->i_mode & S_IFMT) {
        case S_IFIFO:
        case S_IFCHR:
        case S_IFBLK:
        case S_IFSOCK:
-               if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
-                       XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
-                                             ip->i_mount, dip);
-                       return -EFSCORRUPTED;
-               }
                ip->i_d.di_size = 0;
                inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
                break;
@@ -134,32 +78,7 @@ xfs_iformat_fork(
        case S_IFDIR:
                switch (dip->di_format) {
                case XFS_DINODE_FMT_LOCAL:
-                       /*
-                        * no local regular files yet
-                        */
-                       if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
-                               xfs_warn(ip->i_mount,
-                       "corrupt inode %Lu (local format for regular file).",
-                                       (unsigned long long) ip->i_ino);
-                               XFS_CORRUPTION_ERROR("xfs_iformat(4)",
-                                                    XFS_ERRLEVEL_LOW,
-                                                    ip->i_mount, dip);
-                               return -EFSCORRUPTED;
-                       }
-
                        di_size = be64_to_cpu(dip->di_size);
-                       if (unlikely(di_size < 0 ||
-                                    di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
-                               xfs_warn(ip->i_mount,
-                       "corrupt inode %Lu (bad size %Ld for local inode).",
-                                       (unsigned long long) ip->i_ino,
-                                       (long long) di_size);
-                               XFS_CORRUPTION_ERROR("xfs_iformat(5)",
-                                                    XFS_ERRLEVEL_LOW,
-                                                    ip->i_mount, dip);
-                               return -EFSCORRUPTED;
-                       }
-
                        size = (int)di_size;
                        error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
                        break;
@@ -170,28 +89,16 @@ xfs_iformat_fork(
                        error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
                        break;
                default:
-                       XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
-                                        ip->i_mount);
                        return -EFSCORRUPTED;
                }
                break;
 
        default:
-               XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
                return -EFSCORRUPTED;
        }
        if (error)
                return error;
 
-       /* Check inline dir contents. */
-       if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) {
-               error = xfs_dir2_sf_verify(ip);
-               if (error) {
-                       xfs_idestroy_fork(ip, XFS_DATA_FORK);
-                       return error;
-               }
-       }
-
        if (xfs_is_reflink_inode(ip)) {
                ASSERT(ip->i_cowfp == NULL);
                xfs_ifork_init_cow(ip);
@@ -208,18 +115,6 @@ xfs_iformat_fork(
                atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
                size = be16_to_cpu(atp->hdr.totsize);
 
-               if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
-                       xfs_warn(ip->i_mount,
-                               "corrupt inode %Lu (bad attr fork size %Ld).",
-                               (unsigned long long) ip->i_ino,
-                               (long long) size);
-                       XFS_CORRUPTION_ERROR("xfs_iformat(8)",
-                                            XFS_ERRLEVEL_LOW,
-                                            ip->i_mount, dip);
-                       error = -EFSCORRUPTED;
-                       break;
-               }
-
                error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
                break;
        case XFS_DINODE_FMT_EXTENTS:
@@ -403,6 +298,7 @@ xfs_iformat_btree(
         */
        if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
                                        XFS_IFORK_MAXEXT(ip, whichfork) ||
+                    nrecs == 0 ||
                     XFS_BMDR_SPACE_CALC(nrecs) >
                                        XFS_DFORK_SIZE(dip, mp, whichfork) ||
                     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
@@ -827,3 +723,45 @@ xfs_ifork_init_cow(
        ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
        ip->i_cnextents = 0;
 }
+
+/* Default fork content verifiers. */
+struct xfs_ifork_ops xfs_default_ifork_ops = {
+       .verify_attr    = xfs_attr_shortform_verify,
+       .verify_dir     = xfs_dir2_sf_verify,
+       .verify_symlink = xfs_symlink_shortform_verify,
+};
+
+/* Verify the inline contents of the data fork of an inode. */
+xfs_failaddr_t
+xfs_ifork_verify_data(
+       struct xfs_inode        *ip,
+       struct xfs_ifork_ops    *ops)
+{
+       /* Non-local data fork, we're done. */
+       if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+               return NULL;
+
+       /* Check the inline data fork if there is one. */
+       switch (VFS_I(ip)->i_mode & S_IFMT) {
+       case S_IFDIR:
+               return ops->verify_dir(ip);
+       case S_IFLNK:
+               return ops->verify_symlink(ip);
+       default:
+               return NULL;
+       }
+}
+
+/* Verify the inline contents of the attr fork of an inode. */
+xfs_failaddr_t
+xfs_ifork_verify_attr(
+       struct xfs_inode        *ip,
+       struct xfs_ifork_ops    *ops)
+{
+       /* There has to be an attr fork allocated if aformat is local. */
+       if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
+               return NULL;
+       if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK))
+               return __this_address;
+       return ops->verify_attr(ip);
+}
index b9f0098e33b826cd51bc6ce15801c8f4f755703d..dd8aba0dd119cf5fa29f206734f89164150e94c7 100644 (file)
@@ -186,4 +186,18 @@ extern struct kmem_zone    *xfs_ifork_zone;
 
 extern void xfs_ifork_init_cow(struct xfs_inode *ip);
 
+typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *);
+
+struct xfs_ifork_ops {
+       xfs_ifork_verifier_t    verify_symlink;
+       xfs_ifork_verifier_t    verify_dir;
+       xfs_ifork_verifier_t    verify_attr;
+};
+extern struct xfs_ifork_ops    xfs_default_ifork_ops;
+
+xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip,
+               struct xfs_ifork_ops *ops);
+xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip,
+               struct xfs_ifork_ops *ops);
+
 #endif /* __XFS_INODE_FORK_H__ */
index c10597973333ce44487c4c1c6723dac4ed2d38a3..cc4cbe290939255c2279052ae773bea227f42d70 100644 (file)
@@ -55,7 +55,7 @@ xfs_log_calc_max_attrsetm_res(
  * the maximum one in terms of the pre-calculated values which were done
  * at mount time.
  */
-STATIC void
+void
 xfs_log_get_max_trans_res(
        struct xfs_mount        *mp,
        struct xfs_trans_res    *max_resp)
index d69c772271cb0d882ceabbc0b5754c94ec4d5367..bb1b13a9b5f48b445e6585231f53893ec4f0d45a 100644 (file)
@@ -112,8 +112,6 @@ typedef uint16_t    xfs_qwarncnt_t;
 #define XFS_QMOPT_PQUOTA       0x0000008 /* project dquot requested */
 #define XFS_QMOPT_FORCE_RES    0x0000010 /* ignore quota limits */
 #define XFS_QMOPT_SBVERSION    0x0000040 /* change superblock version num */
-#define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_DQREPAIR     0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA       0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC       0x0004000 /* enospc instead of edquot (prj) */
 #define XFS_QMOPT_DQNEXT       0x0008000 /* return next dquot >= this ID */
@@ -153,8 +151,11 @@ typedef uint16_t   xfs_qwarncnt_t;
                (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
 #define XFS_QMOPT_RESBLK_MASK  (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
 
-extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
-                      xfs_dqid_t id, uint type, uint flags, const char *str);
+extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp,
+               struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type,
+               uint flags);
 extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
+extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq,
+               xfs_dqid_t id, uint type);
 
 #endif /* __XFS_QUOTA_H__ */
index c40d26763075307b064d49bd3cb48dfce8dd5b67..bee68c23d61252742fe9cda63a3fad0f5b59e835 100644 (file)
@@ -1696,3 +1696,22 @@ xfs_refcount_recover_cow_leftovers(
        xfs_trans_brelse(tp, agbp);
        goto out_trans;
 }
+
+/* Is there a record covering a given extent? */
+int
+xfs_refcount_has_record(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       bool                    *exists)
+{
+       union xfs_btree_irec    low;
+       union xfs_btree_irec    high;
+
+       memset(&low, 0, sizeof(low));
+       low.rc.rc_startblock = bno;
+       memset(&high, 0xFF, sizeof(high));
+       high.rc.rc_startblock = bno + len - 1;
+
+       return xfs_btree_has_record(cur, &low, &high, exists);
+}
index eafb9d1f3b3748bb9943560bf1e10e4dc2723204..2a731ac68fe4872376cd57e82e75e8ff4933a5b6 100644 (file)
@@ -83,4 +83,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
        return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
 }
 
+extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
+               xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+
 #endif /* __XFS_REFCOUNT_H__ */
index 3c59dd3d58d714dc3f5b586906c8cc63e0265822..8479769e470d0c87822212b577082720026ef62d 100644 (file)
@@ -223,29 +223,31 @@ xfs_refcountbt_diff_two_keys(
                          be32_to_cpu(k2->refc.rc_startblock);
 }
 
-STATIC bool
+STATIC xfs_failaddr_t
 xfs_refcountbt_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
+       xfs_failaddr_t          fa;
        unsigned int            level;
 
        if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC))
-               return false;
+               return __this_address;
 
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
-               return false;
-       if (!xfs_btree_sblock_v5hdr_verify(bp))
-               return false;
+               return __this_address;
+       fa = xfs_btree_sblock_v5hdr_verify(bp);
+       if (fa)
+               return fa;
 
        level = be16_to_cpu(block->bb_level);
        if (pag && pag->pagf_init) {
                if (level >= pag->pagf_refcount_level)
-                       return false;
+                       return __this_address;
        } else if (level >= mp->m_refc_maxlevels)
-               return false;
+               return __this_address;
 
        return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]);
 }
@@ -254,25 +256,30 @@ STATIC void
 xfs_refcountbt_read_verify(
        struct xfs_buf  *bp)
 {
+       xfs_failaddr_t  fa;
+
        if (!xfs_btree_sblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_refcountbt_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_refcountbt_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 
-       if (bp->b_error) {
+       if (bp->b_error)
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
 }
 
 STATIC void
 xfs_refcountbt_write_verify(
        struct xfs_buf  *bp)
 {
-       if (!xfs_refcountbt_verify(bp)) {
+       xfs_failaddr_t  fa;
+
+       fa = xfs_refcountbt_verify(bp);
+       if (fa) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
        xfs_btree_sblock_calc_crc(bp);
@@ -283,6 +290,7 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
        .name                   = "xfs_refcountbt",
        .verify_read            = xfs_refcountbt_read_verify,
        .verify_write           = xfs_refcountbt_write_verify,
+       .verify_struct          = xfs_refcountbt_verify,
 };
 
 STATIC int
index 50db920ceeebbf077c2b3b13690066173ba7cf4e..79822cf6ebe36d62b8d912b1b44f5204e0ec1438 100644 (file)
@@ -2387,3 +2387,70 @@ xfs_rmap_compare(
        else
                return 0;
 }
+
+/* Is there a record covering a given extent? */
+int
+xfs_rmap_has_record(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       bool                    *exists)
+{
+       union xfs_btree_irec    low;
+       union xfs_btree_irec    high;
+
+       memset(&low, 0, sizeof(low));
+       low.r.rm_startblock = bno;
+       memset(&high, 0xFF, sizeof(high));
+       high.r.rm_startblock = bno + len - 1;
+
+       return xfs_btree_has_record(cur, &low, &high, exists);
+}
+
+/*
+ * Is there a record for this owner completely covering a given physical
+ * extent?  If so, *has_rmap will be set to true.  If there is no record
+ * or the record only covers part of the range, we set *has_rmap to false.
+ * This function doesn't perform range lookups or offset checks, so it is
+ * not suitable for checking data fork blocks.
+ */
+int
+xfs_rmap_record_exists(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       struct xfs_owner_info   *oinfo,
+       bool                    *has_rmap)
+{
+       uint64_t                owner;
+       uint64_t                offset;
+       unsigned int            flags;
+       int                     has_record;
+       struct xfs_rmap_irec    irec;
+       int                     error;
+
+       xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
+       ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
+              (flags & XFS_RMAP_BMBT_BLOCK));
+
+       error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
+                       &has_record);
+       if (error)
+               return error;
+       if (!has_record) {
+               *has_rmap = false;
+               return 0;
+       }
+
+       error = xfs_rmap_get_rec(cur, &irec, &has_record);
+       if (error)
+               return error;
+       if (!has_record) {
+               *has_rmap = false;
+               return 0;
+       }
+
+       *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
+                    irec.rm_startblock + irec.rm_blockcount >= bno + len);
+       return 0;
+}
index 0fcd5b1ba7295379081e0c61d230324447a8ae56..380e53be98d536f78e456892e250113c3f9bc40d 100644 (file)
@@ -233,5 +233,10 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a,
 union xfs_btree_rec;
 int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
                struct xfs_rmap_irec *irec);
+int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+               xfs_extlen_t len, bool *exists);
+int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+               xfs_extlen_t len, struct xfs_owner_info *oinfo,
+               bool *has_rmap);
 
 #endif /* __XFS_RMAP_H__ */
index 9d9c9192584c97dba7733f74e6892adb33f8b163..e829c3e489ea43d4e913166d594490161f39fe29 100644 (file)
@@ -303,13 +303,14 @@ xfs_rmapbt_diff_two_keys(
        return 0;
 }
 
-static bool
+static xfs_failaddr_t
 xfs_rmapbt_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
+       xfs_failaddr_t          fa;
        unsigned int            level;
 
        /*
@@ -325,19 +326,20 @@ xfs_rmapbt_verify(
         * in this case.
         */
        if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
-               return false;
+               return __this_address;
 
        if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
-               return false;
-       if (!xfs_btree_sblock_v5hdr_verify(bp))
-               return false;
+               return __this_address;
+       fa = xfs_btree_sblock_v5hdr_verify(bp);
+       if (fa)
+               return fa;
 
        level = be16_to_cpu(block->bb_level);
        if (pag && pag->pagf_init) {
                if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
-                       return false;
+                       return __this_address;
        } else if (level >= mp->m_rmap_maxlevels)
-               return false;
+               return __this_address;
 
        return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
 }
@@ -346,25 +348,30 @@ static void
 xfs_rmapbt_read_verify(
        struct xfs_buf  *bp)
 {
+       xfs_failaddr_t  fa;
+
        if (!xfs_btree_sblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_rmapbt_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_rmapbt_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 
-       if (bp->b_error) {
+       if (bp->b_error)
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
 }
 
 static void
 xfs_rmapbt_write_verify(
        struct xfs_buf  *bp)
 {
-       if (!xfs_rmapbt_verify(bp)) {
+       xfs_failaddr_t  fa;
+
+       fa = xfs_rmapbt_verify(bp);
+       if (fa) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
        xfs_btree_sblock_calc_crc(bp);
@@ -375,6 +382,7 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
        .name                   = "xfs_rmapbt",
        .verify_read            = xfs_rmapbt_read_verify,
        .verify_write           = xfs_rmapbt_write_verify,
+       .verify_struct          = xfs_rmapbt_verify,
 };
 
 STATIC int
index 3fb29a5ea915001276abc6938925175d76c44a2b..106be2d0bb88cb44fda2ab4979b15c39c2fd64c7 100644 (file)
@@ -1097,3 +1097,24 @@ xfs_verify_rtbno(
 {
        return rtbno < mp->m_sb.sb_rblocks;
 }
+
+/* Is the given extent all free? */
+int
+xfs_rtalloc_extent_is_free(
+       struct xfs_mount                *mp,
+       struct xfs_trans                *tp,
+       xfs_rtblock_t                   start,
+       xfs_extlen_t                    len,
+       bool                            *is_free)
+{
+       xfs_rtblock_t                   end;
+       int                             matches;
+       int                             error;
+
+       error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches);
+       if (error)
+               return error;
+
+       *is_free = matches;
+       return 0;
+}
index 9b5aae2bcc0b7817922c25f24fb6c5324bb9ac1c..46af6aa60a8edac7179cc07a2ea07537d9ae05e8 100644 (file)
@@ -40,6 +40,8 @@
 #include "xfs_rmap_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_refcount_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
 
 /*
  * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -116,6 +118,9 @@ xfs_mount_validate_sb(
        bool            check_inprogress,
        bool            check_version)
 {
+       u32             agcount = 0;
+       u32             rem;
+
        if (sbp->sb_magicnum != XFS_SB_MAGIC) {
                xfs_warn(mp, "bad magic number");
                return -EWRONGFS;
@@ -226,6 +231,13 @@ xfs_mount_validate_sb(
                return -EINVAL;
        }
 
+       /* Compute agcount for this number of dblocks and agblocks */
+       if (sbp->sb_agblocks) {
+               agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem);
+               if (rem)
+                       agcount++;
+       }
+
        /*
         * More sanity checking.  Most of these were stolen directly from
         * xfs_repair.
@@ -250,6 +262,10 @@ xfs_mount_validate_sb(
            sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
            sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE                    ||
            sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
+           XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES       ||
+           XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES       ||
+           sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 ||
+           agcount == 0 || agcount != sbp->sb_agcount                  ||
            (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
            (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
            (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)  ||
@@ -640,11 +656,10 @@ xfs_sb_read_verify(
        error = xfs_sb_verify(bp, true);
 
 out_error:
-       if (error) {
+       if (error == -EFSCORRUPTED || error == -EFSBADCRC)
+               xfs_verifier_error(bp, error, __this_address);
+       else if (error)
                xfs_buf_ioerror(bp, error);
-               if (error == -EFSCORRUPTED || error == -EFSBADCRC)
-                       xfs_verifier_error(bp);
-       }
 }
 
 /*
@@ -673,13 +688,12 @@ xfs_sb_write_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        int                     error;
 
        error = xfs_sb_verify(bp, false);
        if (error) {
-               xfs_buf_ioerror(bp, error);
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, error, __this_address);
                return;
        }
 
@@ -876,3 +890,88 @@ xfs_sync_sb(
                xfs_trans_set_sync(tp);
        return xfs_trans_commit(tp);
 }
+
+int
+xfs_fs_geometry(
+       struct xfs_sb           *sbp,
+       struct xfs_fsop_geom    *geo,
+       int                     struct_version)
+{
+       memset(geo, 0, sizeof(struct xfs_fsop_geom));
+
+       geo->blocksize = sbp->sb_blocksize;
+       geo->rtextsize = sbp->sb_rextsize;
+       geo->agblocks = sbp->sb_agblocks;
+       geo->agcount = sbp->sb_agcount;
+       geo->logblocks = sbp->sb_logblocks;
+       geo->sectsize = sbp->sb_sectsize;
+       geo->inodesize = sbp->sb_inodesize;
+       geo->imaxpct = sbp->sb_imax_pct;
+       geo->datablocks = sbp->sb_dblocks;
+       geo->rtblocks = sbp->sb_rblocks;
+       geo->rtextents = sbp->sb_rextents;
+       geo->logstart = sbp->sb_logstart;
+       BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid));
+       memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid));
+
+       if (struct_version < 2)
+               return 0;
+
+       geo->sunit = sbp->sb_unit;
+       geo->swidth = sbp->sb_width;
+
+       if (struct_version < 3)
+               return 0;
+
+       geo->version = XFS_FSOP_GEOM_VERSION;
+       geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
+                    XFS_FSOP_GEOM_FLAGS_DIRV2;
+       if (xfs_sb_version_hasattr(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
+       if (xfs_sb_version_hasquota(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA;
+       if (xfs_sb_version_hasalign(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN;
+       if (xfs_sb_version_hasdalign(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN;
+       if (xfs_sb_version_hasextflgbit(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG;
+       if (xfs_sb_version_hassector(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
+       if (xfs_sb_version_hasasciici(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI;
+       if (xfs_sb_version_haslazysbcount(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB;
+       if (xfs_sb_version_hasattr2(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2;
+       if (xfs_sb_version_hasprojid32bit(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32;
+       if (xfs_sb_version_hascrc(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB;
+       if (xfs_sb_version_hasftype(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE;
+       if (xfs_sb_version_hasfinobt(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT;
+       if (xfs_sb_version_hassparseinodes(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES;
+       if (xfs_sb_version_hasrmapbt(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT;
+       if (xfs_sb_version_hasreflink(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK;
+       if (xfs_sb_version_hassector(sbp))
+               geo->logsectsize = sbp->sb_logsectsize;
+       else
+               geo->logsectsize = BBSIZE;
+       geo->rtsectsize = sbp->sb_blocksize;
+       geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
+
+       if (struct_version < 4)
+               return 0;
+
+       if (xfs_sb_version_haslogv2(sbp))
+               geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2;
+
+       geo->logsunit = sbp->sb_logsunit;
+
+       return 0;
+}
index 961e6475a3099bb9acf2c5df67f355f35ffbb3c7..63dcd2a1a65717353b539617e0480eb0fb8f2709 100644 (file)
@@ -34,4 +34,8 @@ extern void   xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
 extern void    xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
 extern void    xfs_sb_quota_from_disk(struct xfs_sb *sbp);
 
+#define XFS_FS_GEOM_MAX_STRUCT_VER     (4)
+extern int     xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo,
+                               int struct_version);
+
 #endif /* __XFS_SB_H__ */
index c6f4eb46fe263e0b3faf8e2a33e93cbf8984c976..d0b84da0cb1ecdb2a19c832e29e92128afcc35a1 100644 (file)
@@ -76,6 +76,9 @@ struct xfs_log_item_desc {
 int    xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
 int    xfs_log_calc_minimum_size(struct xfs_mount *);
 
+struct xfs_trans_res;
+void   xfs_log_get_max_trans_res(struct xfs_mount *mp,
+                                 struct xfs_trans_res *max_resp);
 
 /*
  * Values for t_flags.
@@ -143,5 +146,6 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
                        uint32_t size, struct xfs_buf *bp);
 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
                                 struct xfs_inode *ip, struct xfs_ifork *ifp);
+xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
 
 #endif /* __XFS_SHARED_H__ */
index c484877129a0d0b55c6f94517c0a1ac1703550fc..5ef5f354587e9c0dc8b5e5042e11e7f6a873b56d 100644 (file)
@@ -98,7 +98,7 @@ xfs_symlink_hdr_ok(
        return true;
 }
 
-static bool
+static xfs_failaddr_t
 xfs_symlink_verify(
        struct xfs_buf          *bp)
 {
@@ -106,22 +106,22 @@ xfs_symlink_verify(
        struct xfs_dsymlink_hdr *dsl = bp->b_addr;
 
        if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
+               return __this_address;
        if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
-               return false;
+               return __this_address;
        if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
-               return false;
+               return __this_address;
        if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
-               return false;
+               return __this_address;
        if (be32_to_cpu(dsl->sl_offset) +
                                be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
-               return false;
+               return __this_address;
        if (dsl->sl_owner == 0)
-               return false;
+               return __this_address;
        if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
-               return false;
+               return __this_address;
 
-       return true;
+       return NULL;
 }
 
 static void
@@ -129,18 +129,19 @@ xfs_symlink_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t  fa;
 
        /* no verification of non-crc buffers */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
 
        if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
-               xfs_buf_ioerror(bp, -EFSBADCRC);
-       else if (!xfs_symlink_verify(bp))
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
+               xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+       else {
+               fa = xfs_symlink_verify(bp);
+               if (fa)
+                       xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+       }
 }
 
 static void
@@ -148,15 +149,16 @@ xfs_symlink_write_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+       xfs_failaddr_t          fa;
 
        /* no verification of non-crc buffers */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
 
-       if (!xfs_symlink_verify(bp)) {
-               xfs_buf_ioerror(bp, -EFSCORRUPTED);
-               xfs_verifier_error(bp);
+       fa = xfs_symlink_verify(bp);
+       if (fa) {
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
                return;
        }
 
@@ -171,6 +173,7 @@ const struct xfs_buf_ops xfs_symlink_buf_ops = {
        .name = "xfs_symlink",
        .verify_read = xfs_symlink_read_verify,
        .verify_write = xfs_symlink_write_verify,
+       .verify_struct = xfs_symlink_verify,
 };
 
 void
@@ -207,3 +210,37 @@ xfs_symlink_local_to_remote(
        xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
                                        ifp->if_bytes - 1);
 }
+
+/* Verify the consistency of an inline symlink. */
+xfs_failaddr_t
+xfs_symlink_shortform_verify(
+       struct xfs_inode        *ip)
+{
+       char                    *sfp;
+       char                    *endp;
+       struct xfs_ifork        *ifp;
+       int                     size;
+
+       ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       sfp = (char *)ifp->if_u1.if_data;
+       size = ifp->if_bytes;
+       endp = sfp + size;
+
+       /* Zero length symlinks can exist while we're deleting a remote one. */
+       if (size == 0)
+               return NULL;
+
+       /* No negative sizes or overly long symlink targets. */
+       if (size < 0 || size > XFS_SYMLINK_MAXLEN)
+               return __this_address;
+
+       /* No NULLs in the target either. */
+       if (memchr(sfp, 0, size - 1))
+               return __this_address;
+
+       /* We /did/ null-terminate the buffer, right? */
+       if (*endp != 0)
+               return __this_address;
+       return NULL;
+}
index 6bd916bd35e24a6144940bbd34a1cbab63afc09b..5f17641f040f3162dba9f9c763b35a7c69391b6d 100644 (file)
@@ -34,6 +34,9 @@
 #include "xfs_trans_space.h"
 #include "xfs_trace.h"
 
+#define _ALLOC true
+#define _FREE  false
+
 /*
  * A buffer has a format structure overhead in the log in addition
  * to the data, so we need to take this into account when reserving
@@ -132,43 +135,77 @@ xfs_calc_inode_res(
 }
 
 /*
- * The free inode btree is a conditional feature and the log reservation
- * requirements differ slightly from that of the traditional inode allocation
- * btree. The finobt tracks records for inode chunks with at least one free
- * inode. A record can be removed from the tree for an inode allocation
- * or free and thus the finobt reservation is unconditional across:
+ * Inode btree record insertion/removal modifies the inode btree and free space
+ * btrees (since the inobt does not use the agfl). This requires the following
+ * reservation:
  *
- *     - inode allocation
- *     - inode free
- *     - inode chunk allocation
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
  *
- * The 'modify' param indicates to include the record modification scenario. The
- * 'alloc' param indicates to include the reservation for free space btree
- * modifications on behalf of finobt modifications. This is required only for
- * transactions that do not already account for free space btree modifications.
+ * The caller must account for SB and AG header modifications, etc.
+ */
+STATIC uint
+xfs_calc_inobt_res(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * The free inode btree is a conditional feature. The behavior differs slightly
+ * from that of the traditional inode btree in that the finobt tracks records
+ * for inode chunks with at least one free inode. A record can be removed from
+ * the tree during individual inode allocation. Therefore the finobt
+ * reservation is unconditional for both the inode chunk allocation and
+ * individual inode allocation (modify) cases.
  *
- * the free inode btree: max depth * block size
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the free inode btree entry: block size
+ * Behavior aside, the reservation for finobt modification is equivalent to the
+ * traditional inobt: cover a full finobt shape change plus block allocation.
  */
 STATIC uint
 xfs_calc_finobt_res(
-       struct xfs_mount        *mp,
-       int                     alloc,
-       int                     modify)
+       struct xfs_mount        *mp)
 {
-       uint res;
-
        if (!xfs_sb_version_hasfinobt(&mp->m_sb))
                return 0;
 
-       res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
-       if (alloc)
-               res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
-                                       XFS_FSB_TO_B(mp, 1));
-       if (modify)
-               res += (uint)XFS_FSB_TO_B(mp, 1);
+       return xfs_calc_inobt_res(mp);
+}
 
+/*
+ * Calculate the reservation required to allocate or free an inode chunk. This
+ * includes:
+ *
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the inode chunk: m_ialloc_blks * N
+ *
+ * The size N of the inode chunk reservation depends on whether it is for
+ * allocation or free and which type of create transaction is in use. An inode
+ * chunk free always invalidates the buffers and only requires reservation for
+ * headers (N == 0). An inode chunk allocation requires a chunk sized
+ * reservation on v4 and older superblocks to initialize the chunk. No chunk
+ * reservation is required for allocation on v5 supers, which use ordered
+ * buffers to initialize.
+ */
+STATIC uint
+xfs_calc_inode_chunk_res(
+       struct xfs_mount        *mp,
+       bool                    alloc)
+{
+       uint                    res, size = 0;
+
+       res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+                              XFS_FSB_TO_B(mp, 1));
+       if (alloc) {
+               /* icreate tx uses ordered buffers */
+               if (xfs_sb_version_hascrc(&mp->m_sb))
+                       return res;
+               size = XFS_FSB_TO_B(mp, 1);
+       }
+
+       res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
        return res;
 }
 
@@ -232,8 +269,6 @@ xfs_calc_write_reservation(
  *    the super block to reflect the freed blocks: sector size
  *    worst case split in allocation btrees per extent assuming 4 extents:
  *             4 exts * 2 trees * (2 * max depth - 1) * block size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
  */
 STATIC uint
 xfs_calc_itruncate_reservation(
@@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation(
                                      XFS_FSB_TO_B(mp, 1))),
                    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
                     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
-                                     XFS_FSB_TO_B(mp, 1)) +
-                   xfs_calc_buf_res(5, 0) +
-                   xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
-                                    XFS_FSB_TO_B(mp, 1)) +
-                   xfs_calc_buf_res(2 + mp->m_ialloc_blks +
-                                    mp->m_in_maxlevels, 0)));
+                                     XFS_FSB_TO_B(mp, 1))));
 }
 
 /*
@@ -282,13 +312,14 @@ xfs_calc_rename_reservation(
  * For removing an inode from unlinked list at first, we can modify:
  *    the agi hash list and counters: sector size
  *    the on disk inode before ours in the agi hash list: inode cluster size
+ *    the on disk inode in the agi hash list: inode cluster size
  */
 STATIC uint
 xfs_calc_iunlink_remove_reservation(
        struct xfs_mount        *mp)
 {
        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-              max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+              2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
 }
 
 /*
@@ -320,13 +351,13 @@ xfs_calc_link_reservation(
 /*
  * For adding an inode to unlinked list we can modify:
  *    the agi hash list: sector size
- *    the unlinked inode: inode size
+ *    the on disk inode: inode cluster size
  */
 STATIC uint
 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
 {
        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               xfs_calc_inode_res(mp, 1);
+               max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
 }
 
 /*
@@ -379,45 +410,16 @@ xfs_calc_create_resv_modify(
                xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
                (uint)XFS_FSB_TO_B(mp, 1) +
                xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_finobt_res(mp, 1, 1);
-}
-
-/*
- * For create we can allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the superblock for the nlink flag: sector size
- *    the inode blocks allocated: mp->m_ialloc_blks * blocksize
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_create_resv_alloc(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-               mp->m_sb.sb_sectsize +
-               xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
-                                XFS_FSB_TO_B(mp, 1));
-}
-
-STATIC uint
-__xfs_calc_create_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               MAX(xfs_calc_create_resv_alloc(mp),
-                   xfs_calc_create_resv_modify(mp));
+               xfs_calc_finobt_res(mp);
 }
 
 /*
  * For icreate we can allocate some inodes giving:
  *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
  *    the superblock for the nlink flag: sector size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- *    the finobt (record insertion)
+ *    the inode chunk (allocation, optional init)
+ *    the inobt (record insertion)
+ *    the finobt (optional, record insertion)
  */
 STATIC uint
 xfs_calc_icreate_resv_alloc(
@@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc(
 {
        return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
                mp->m_sb.sb_sectsize +
-               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
-                                XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_finobt_res(mp, 0, 0);
+               xfs_calc_inode_chunk_res(mp, _ALLOC) +
+               xfs_calc_inobt_res(mp) +
+               xfs_calc_finobt_res(mp);
 }
 
 STATIC uint
@@ -439,27 +440,13 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp)
                    xfs_calc_create_resv_modify(mp));
 }
 
-STATIC uint
-xfs_calc_create_reservation(
-       struct xfs_mount        *mp)
-{
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               return xfs_calc_icreate_reservation(mp);
-       return __xfs_calc_create_reservation(mp);
-
-}
-
 STATIC uint
 xfs_calc_create_tmpfile_reservation(
        struct xfs_mount        *mp)
 {
        uint    res = XFS_DQUOT_LOGRES(mp);
 
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               res += xfs_calc_icreate_resv_alloc(mp);
-       else
-               res += xfs_calc_create_resv_alloc(mp);
-
+       res += xfs_calc_icreate_resv_alloc(mp);
        return res + xfs_calc_iunlink_add_reservation(mp);
 }
 
@@ -470,7 +457,7 @@ STATIC uint
 xfs_calc_mkdir_reservation(
        struct xfs_mount        *mp)
 {
-       return xfs_calc_create_reservation(mp);
+       return xfs_calc_icreate_reservation(mp);
 }
 
 
@@ -483,20 +470,24 @@ STATIC uint
 xfs_calc_symlink_reservation(
        struct xfs_mount        *mp)
 {
-       return xfs_calc_create_reservation(mp) +
+       return xfs_calc_icreate_reservation(mp) +
               xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
 }
 
 /*
  * In freeing an inode we can modify:
  *    the inode being freed: inode size
- *    the super block free inode counter: sector size
- *    the agi hash list and counters: sector size
- *    the inode btree entry: block size
- *    the on disk inode before ours in the agi hash list: inode cluster size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ *    the super block free inode counter, AGF and AGFL: sector size
+ *    the on disk inode (agi unlinked list removal)
+ *    the inode chunk (invalidated, headers only)
+ *    the inode btree
  *    the finobt (record insertion, removal or modification)
+ *
+ * Note that the inode chunk res. includes an allocfree res. for freeing of the
+ * inode chunk. This is technically extraneous because the inode chunk free is
+ * deferred (it occurs after a transaction roll). Include the extra reservation
+ * anyways since we've had reports of ifree transaction overruns due to too many
+ * agfl fixups during inode chunk frees.
  */
 STATIC uint
 xfs_calc_ifree_reservation(
@@ -504,15 +495,11 @@ xfs_calc_ifree_reservation(
 {
        return XFS_DQUOT_LOGRES(mp) +
                xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
                xfs_calc_iunlink_remove_reservation(mp) +
-               xfs_calc_buf_res(1, 0) +
-               xfs_calc_buf_res(2 + mp->m_ialloc_blks +
-                                mp->m_in_maxlevels, 0) +
-               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
-                                XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_finobt_res(mp, 0, 1);
+               xfs_calc_inode_chunk_res(mp, _FREE) +
+               xfs_calc_inobt_res(mp) +
+               xfs_calc_finobt_res(mp);
 }
 
 /*
@@ -842,7 +829,7 @@ xfs_trans_resv_calc(
        resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
        resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
-       resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
+       resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
        resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
        resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
index 2a9b4f9e93c64be83edad15c0e19edb45269f406..fd975524f4603387e28078d13b175b5df2443021 100644 (file)
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
+#include "xfs_rmap.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
 
 /*
- * Set up scrub to check all the static metadata in each AG.
- * This means the SB, AGF, AGI, and AGFL headers.
+ * Walk all the blocks in the AGFL.  The fn function can return any negative
+ * error code or XFS_BTREE_QUERY_RANGE_ABORT.
  */
 int
-xfs_scrub_setup_ag_header(
-       struct xfs_scrub_context        *sc,
-       struct xfs_inode                *ip)
-{
-       struct xfs_mount                *mp = sc->mp;
-
-       if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
-           sc->sm->sm_ino || sc->sm->sm_gen)
-               return -EINVAL;
-       return xfs_scrub_setup_fs(sc, ip);
-}
-
-/* Walk all the blocks in the AGFL. */
-int
 xfs_scrub_walk_agfl(
        struct xfs_scrub_context        *sc,
        int                             (*fn)(struct xfs_scrub_context *,
@@ -115,6 +102,36 @@ xfs_scrub_walk_agfl(
 
 /* Superblock */
 
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_superblock_xref(
+       struct xfs_scrub_context        *sc,
+       struct xfs_buf                  *bp)
+{
+       struct xfs_owner_info           oinfo;
+       struct xfs_mount                *mp = sc->mp;
+       xfs_agnumber_t                  agno = sc->sm->sm_agno;
+       xfs_agblock_t                   agbno;
+       int                             error;
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       agbno = XFS_SB_BLOCK(mp);
+
+       error = xfs_scrub_ag_init(sc, agno, &sc->sa);
+       if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, 1);
+       xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+       xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+       xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+       /* scrub teardown will take care of sc->sa for us */
+}
+
 /*
  * Scrub the filesystem superblock.
  *
@@ -143,6 +160,22 @@ xfs_scrub_superblock(
        error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
                  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
                  XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+       /*
+        * The superblock verifier can return several different error codes
+        * if it thinks the superblock doesn't look right.  For a mount these
+        * would all get bounced back to userspace, but if we're here then the
+        * fs mounted successfully, which means that this secondary superblock
+        * is simply incorrect.  Treat all these codes the same way we treat
+        * any corruption.
+        */
+       switch (error) {
+       case -EINVAL:   /* also -EWRONGFS */
+       case -ENOSYS:
+       case -EFBIG:
+               error = -EFSCORRUPTED;
+       default:
+               break;
+       }
        if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
                return error;
 
@@ -387,11 +420,175 @@ xfs_scrub_superblock(
                        BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
                xfs_scrub_block_set_corrupt(sc, bp);
 
+       xfs_scrub_superblock_xref(sc, bp);
+
        return error;
 }
 
 /* AGF */
 
+/* Tally freespace record lengths. */
+STATIC int
+xfs_scrub_agf_record_bno_lengths(
+       struct xfs_btree_cur            *cur,
+       struct xfs_alloc_rec_incore     *rec,
+       void                            *priv)
+{
+       xfs_extlen_t                    *blocks = priv;
+
+       (*blocks) += rec->ar_blockcount;
+       return 0;
+}
+
+/* Check agf_freeblks */
+static inline void
+xfs_scrub_agf_xref_freeblks(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_agf                  *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+       xfs_extlen_t                    blocks = 0;
+       int                             error;
+
+       if (!sc->sa.bno_cur)
+               return;
+
+       error = xfs_alloc_query_all(sc->sa.bno_cur,
+                       xfs_scrub_agf_record_bno_lengths, &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+               return;
+       if (blocks != be32_to_cpu(agf->agf_freeblks))
+               xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Cross reference the AGF with the cntbt (freespace by length btree) */
+static inline void
+xfs_scrub_agf_xref_cntbt(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_agf                  *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+       xfs_agblock_t                   agbno;
+       xfs_extlen_t                    blocks;
+       int                             have;
+       int                             error;
+
+       if (!sc->sa.cnt_cur)
+               return;
+
+       /* Any freespace at all? */
+       error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+               return;
+       if (!have) {
+               if (agf->agf_freeblks != be32_to_cpu(0))
+                       xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+               return;
+       }
+
+       /* Check agf_longest */
+       error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+               return;
+       if (!have || blocks != be32_to_cpu(agf->agf_longest))
+               xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Check the btree block counts in the AGF against the btrees. */
+STATIC void
+xfs_scrub_agf_xref_btreeblks(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_agf                  *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+       struct xfs_mount                *mp = sc->mp;
+       xfs_agblock_t                   blocks;
+       xfs_agblock_t                   btreeblks;
+       int                             error;
+
+       /* Check agf_rmap_blocks; set up for agf_btreeblks check */
+       if (sc->sa.rmap_cur) {
+               error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks);
+               if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+                       return;
+               btreeblks = blocks - 1;
+               if (blocks != be32_to_cpu(agf->agf_rmap_blocks))
+                       xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+       } else {
+               btreeblks = 0;
+       }
+
+       /*
+        * No rmap cursor; we can't xref if we have the rmapbt feature.
+        * We also can't do it if we're missing the free space btree cursors.
+        */
+       if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) ||
+           !sc->sa.bno_cur || !sc->sa.cnt_cur)
+               return;
+
+       /* Check agf_btreeblks */
+       error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+               return;
+       btreeblks += blocks - 1;
+
+       error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+               return;
+       btreeblks += blocks - 1;
+
+       if (btreeblks != be32_to_cpu(agf->agf_btreeblks))
+               xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Check agf_refcount_blocks against tree size */
+static inline void
+xfs_scrub_agf_xref_refcblks(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_agf                  *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+       xfs_agblock_t                   blocks;
+       int                             error;
+
+       if (!sc->sa.refc_cur)
+               return;
+
+       error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+               return;
+       if (blocks != be32_to_cpu(agf->agf_refcount_blocks))
+               xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agf_xref(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_owner_info           oinfo;
+       struct xfs_mount                *mp = sc->mp;
+       xfs_agblock_t                   agbno;
+       int                             error;
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       agbno = XFS_AGF_BLOCK(mp);
+
+       error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+       if (error)
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, 1);
+       xfs_scrub_agf_xref_freeblks(sc);
+       xfs_scrub_agf_xref_cntbt(sc);
+       xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+       xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+       xfs_scrub_agf_xref_btreeblks(sc);
+       xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+       xfs_scrub_agf_xref_refcblks(sc);
+
+       /* scrub teardown will take care of sc->sa for us */
+}
+
 /* Scrub the AGF. */
 int
 xfs_scrub_agf(
@@ -414,6 +611,7 @@ xfs_scrub_agf(
                        &sc->sa.agf_bp, &sc->sa.agfl_bp);
        if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
                goto out;
+       xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp);
 
        agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 
@@ -470,6 +668,7 @@ xfs_scrub_agf(
        if (agfl_count != 0 && fl_count != agfl_count)
                xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
 
+       xfs_scrub_agf_xref(sc);
 out:
        return error;
 }
@@ -477,11 +676,28 @@ xfs_scrub_agf(
 /* AGFL */
 
 struct xfs_scrub_agfl_info {
+       struct xfs_owner_info           oinfo;
        unsigned int                    sz_entries;
        unsigned int                    nr_entries;
        xfs_agblock_t                   *entries;
 };
 
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agfl_block_xref(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       struct xfs_owner_info           *oinfo)
+{
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, 1);
+       xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+       xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo);
+       xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+}
+
 /* Scrub an AGFL block. */
 STATIC int
 xfs_scrub_agfl_block(
@@ -499,6 +715,8 @@ xfs_scrub_agfl_block(
        else
                xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp);
 
+       xfs_scrub_agfl_block_xref(sc, agbno, priv);
+
        return 0;
 }
 
@@ -513,6 +731,37 @@ xfs_scrub_agblock_cmp(
        return (int)*a - (int)*b;
 }
 
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agfl_xref(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_owner_info           oinfo;
+       struct xfs_mount                *mp = sc->mp;
+       xfs_agblock_t                   agbno;
+       int                             error;
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       agbno = XFS_AGFL_BLOCK(mp);
+
+       error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+       if (error)
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, 1);
+       xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+       xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+       xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+       /*
+        * Scrub teardown will take care of sc->sa for us.  Leave sc->sa
+        * active so that the agfl block xref can use it too.
+        */
+}
+
 /* Scrub the AGFL. */
 int
 xfs_scrub_agfl(
@@ -532,6 +781,12 @@ xfs_scrub_agfl(
                goto out;
        if (!sc->sa.agf_bp)
                return -EFSCORRUPTED;
+       xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp);
+
+       xfs_scrub_agfl_xref(sc);
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               goto out;
 
        /* Allocate buffer to ensure uniqueness of AGFL entries. */
        agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -548,6 +803,7 @@ xfs_scrub_agfl(
        }
 
        /* Check the blocks in the AGFL. */
+       xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
        error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai);
        if (error)
                goto out_free;
@@ -575,6 +831,56 @@ xfs_scrub_agfl(
 
 /* AGI */
 
+/* Check agi_count/agi_freecount */
+static inline void
+xfs_scrub_agi_xref_icounts(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+       xfs_agino_t                     icount;
+       xfs_agino_t                     freecount;
+       int                             error;
+
+       if (!sc->sa.ino_cur)
+               return;
+
+       error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur))
+               return;
+       if (be32_to_cpu(agi->agi_count) != icount ||
+           be32_to_cpu(agi->agi_freecount) != freecount)
+               xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agi_xref(
+       struct xfs_scrub_context        *sc)
+{
+       struct xfs_owner_info           oinfo;
+       struct xfs_mount                *mp = sc->mp;
+       xfs_agblock_t                   agbno;
+       int                             error;
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       agbno = XFS_AGI_BLOCK(mp);
+
+       error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+       if (error)
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, 1);
+       xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+       xfs_scrub_agi_xref_icounts(sc);
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+       xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+       xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+       /* scrub teardown will take care of sc->sa for us */
+}
+
 /* Scrub the AGI. */
 int
 xfs_scrub_agi(
@@ -598,6 +904,7 @@ xfs_scrub_agi(
                        &sc->sa.agf_bp, &sc->sa.agfl_bp);
        if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
                goto out;
+       xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp);
 
        agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
 
@@ -653,6 +960,7 @@ xfs_scrub_agi(
        if (agi->agi_pad32 != cpu_to_be32(0))
                xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
 
+       xfs_scrub_agi_xref(sc);
 out:
        return error;
 }
index 059663e134144d6bd80b7a601f17f1eebe4e748e..517c079d3f68db704a803469bbcdc742c2a42851 100644 (file)
@@ -31,6 +31,7 @@
 #include "xfs_sb.h"
 #include "xfs_alloc.h"
 #include "xfs_rmap.h"
+#include "xfs_alloc.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
@@ -49,6 +50,64 @@ xfs_scrub_setup_ag_allocbt(
 }
 
 /* Free space btree scrubber. */
+/*
+ * Ensure there's a corresponding cntbt/bnobt record matching this
+ * bnobt/cntbt record, respectively.
+ */
+STATIC void
+xfs_scrub_allocbt_xref_other(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       struct xfs_btree_cur            **pcur;
+       xfs_agblock_t                   fbno;
+       xfs_extlen_t                    flen;
+       int                             has_otherrec;
+       int                             error;
+
+       if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
+               pcur = &sc->sa.cnt_cur;
+       else
+               pcur = &sc->sa.bno_cur;
+       if (!*pcur)
+               return;
+
+       error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec);
+       if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+               return;
+       if (!has_otherrec) {
+               xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+               return;
+       }
+
+       error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec);
+       if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+               return;
+       if (!has_otherrec) {
+               xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+               return;
+       }
+
+       if (fbno != agbno || flen != len)
+               xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_allocbt_xref(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       xfs_scrub_allocbt_xref_other(sc, agbno, len);
+       xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+       xfs_scrub_xref_has_no_owner(sc, agbno, len);
+       xfs_scrub_xref_is_not_shared(sc, agbno, len);
+}
 
 /* Scrub a bnobt/cntbt record. */
 STATIC int
@@ -70,6 +129,8 @@ xfs_scrub_allocbt_rec(
            !xfs_verify_agbno(mp, agno, bno + len - 1))
                xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
 
+       xfs_scrub_allocbt_xref(bs->sc, bno, len);
+
        return error;
 }
 
@@ -100,3 +161,23 @@ xfs_scrub_cntbt(
 {
        return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
 }
+
+/* xref check that the extent is not free */
+void
+xfs_scrub_xref_is_used_space(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       bool                            is_freesp;
+       int                             error;
+
+       if (!sc->sa.bno_cur)
+               return;
+
+       error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+               return;
+       if (is_freesp)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0);
+}
index 42fec0bcd9e18d34760222671e43cbcb35c2f5f5..d002821304920f0b2ba1f09e4b9f98e3fd0133e5 100644 (file)
@@ -37,6 +37,7 @@
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_rmap.h"
+#include "xfs_refcount.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
@@ -99,6 +100,201 @@ struct xfs_scrub_bmap_info {
        int                             whichfork;
 };
 
+/* Look for a corresponding rmap for this irec. */
+static inline bool
+xfs_scrub_bmap_get_rmap(
+       struct xfs_scrub_bmap_info      *info,
+       struct xfs_bmbt_irec            *irec,
+       xfs_agblock_t                   agbno,
+       uint64_t                        owner,
+       struct xfs_rmap_irec            *rmap)
+{
+       xfs_fileoff_t                   offset;
+       unsigned int                    rflags = 0;
+       int                             has_rmap;
+       int                             error;
+
+       if (info->whichfork == XFS_ATTR_FORK)
+               rflags |= XFS_RMAP_ATTR_FORK;
+
+       /*
+        * CoW staging extents are owned (on disk) by the refcountbt, so
+        * their rmaps do not have offsets.
+        */
+       if (info->whichfork == XFS_COW_FORK)
+               offset = 0;
+       else
+               offset = irec->br_startoff;
+
+       /*
+        * If the caller thinks this could be a shared bmbt extent (IOWs,
+        * any data fork extent of a reflink inode) then we have to use the
+        * range rmap lookup to make sure we get the correct owner/offset.
+        */
+       if (info->is_shared) {
+               error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
+                               owner, offset, rflags, rmap, &has_rmap);
+               if (!xfs_scrub_should_check_xref(info->sc, &error,
+                               &info->sc->sa.rmap_cur))
+                       return false;
+               goto out;
+       }
+
+       /*
+        * Otherwise, use the (faster) regular lookup.
+        */
+       error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
+                       offset, rflags, &has_rmap);
+       if (!xfs_scrub_should_check_xref(info->sc, &error,
+                       &info->sc->sa.rmap_cur))
+               return false;
+       if (!has_rmap)
+               goto out;
+
+       error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
+       if (!xfs_scrub_should_check_xref(info->sc, &error,
+                       &info->sc->sa.rmap_cur))
+               return false;
+
+out:
+       if (!has_rmap)
+               xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+                       irec->br_startoff);
+       return has_rmap;
+}
+
+/* Make sure that we have rmapbt records for this extent. */
+STATIC void
+xfs_scrub_bmap_xref_rmap(
+       struct xfs_scrub_bmap_info      *info,
+       struct xfs_bmbt_irec            *irec,
+       xfs_agblock_t                   agbno)
+{
+       struct xfs_rmap_irec            rmap;
+       unsigned long long              rmap_end;
+       uint64_t                        owner;
+
+       if (!info->sc->sa.rmap_cur)
+               return;
+
+       if (info->whichfork == XFS_COW_FORK)
+               owner = XFS_RMAP_OWN_COW;
+       else
+               owner = info->sc->ip->i_ino;
+
+       /* Find the rmap record for this irec. */
+       if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+               return;
+
+       /* Check the rmap. */
+       rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
+       if (rmap.rm_startblock > agbno ||
+           agbno + irec->br_blockcount > rmap_end)
+               xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+                               irec->br_startoff);
+
+       /*
+        * Check the logical offsets if applicable.  CoW staging extents
+        * don't track logical offsets since the mappings only exist in
+        * memory.
+        */
+       if (info->whichfork != XFS_COW_FORK) {
+               rmap_end = (unsigned long long)rmap.rm_offset +
+                               rmap.rm_blockcount;
+               if (rmap.rm_offset > irec->br_startoff ||
+                   irec->br_startoff + irec->br_blockcount > rmap_end)
+                       xfs_scrub_fblock_xref_set_corrupt(info->sc,
+                                       info->whichfork, irec->br_startoff);
+       }
+
+       if (rmap.rm_owner != owner)
+               xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+                               irec->br_startoff);
+
+       /*
+        * Check for discrepancies between the unwritten flag in the irec and
+        * the rmap.  Note that the (in-memory) CoW fork distinguishes between
+        * unwritten and written extents, but we don't track that in the rmap
+        * records because the blocks are owned (on-disk) by the refcountbt,
+        * which doesn't track unwritten state.
+        */
+       if (owner != XFS_RMAP_OWN_COW &&
+           irec->br_state == XFS_EXT_UNWRITTEN &&
+           !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
+               xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+                               irec->br_startoff);
+
+       if (info->whichfork == XFS_ATTR_FORK &&
+           !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
+               xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+                               irec->br_startoff);
+       if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
+               xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+                               irec->br_startoff);
+}
+
+/* Cross-reference a single rtdev extent record. */
+STATIC void
+xfs_scrub_bmap_rt_extent_xref(
+       struct xfs_scrub_bmap_info      *info,
+       struct xfs_inode                *ip,
+       struct xfs_btree_cur            *cur,
+       struct xfs_bmbt_irec            *irec)
+{
+       if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock,
+                       irec->br_blockcount);
+}
+
+/* Cross-reference a single datadev extent record. */
+STATIC void
+xfs_scrub_bmap_extent_xref(
+       struct xfs_scrub_bmap_info      *info,
+       struct xfs_inode                *ip,
+       struct xfs_btree_cur            *cur,
+       struct xfs_bmbt_irec            *irec)
+{
+       struct xfs_mount                *mp = info->sc->mp;
+       xfs_agnumber_t                  agno;
+       xfs_agblock_t                   agbno;
+       xfs_extlen_t                    len;
+       int                             error;
+
+       if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+       agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
+       len = irec->br_blockcount;
+
+       error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa);
+       if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork,
+                       irec->br_startoff, &error))
+               return;
+
+       xfs_scrub_xref_is_used_space(info->sc, agbno, len);
+       xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len);
+       xfs_scrub_bmap_xref_rmap(info, irec, agbno);
+       switch (info->whichfork) {
+       case XFS_DATA_FORK:
+               if (xfs_is_reflink_inode(info->sc->ip))
+                       break;
+               /* fall through */
+       case XFS_ATTR_FORK:
+               xfs_scrub_xref_is_not_shared(info->sc, agbno,
+                               irec->br_blockcount);
+               break;
+       case XFS_COW_FORK:
+               xfs_scrub_xref_is_cow_staging(info->sc, agbno,
+                               irec->br_blockcount);
+               break;
+       }
+
+       xfs_scrub_ag_free(info->sc, &info->sc->sa);
+}
+
 /* Scrub a single extent record. */
 STATIC int
 xfs_scrub_bmap_extent(
@@ -109,6 +305,7 @@ xfs_scrub_bmap_extent(
 {
        struct xfs_mount                *mp = info->sc->mp;
        struct xfs_buf                  *bp = NULL;
+       xfs_filblks_t                   end;
        int                             error = 0;
 
        if (cur)
@@ -136,19 +333,23 @@ xfs_scrub_bmap_extent(
                                irec->br_startoff);
 
        /* Make sure the extent points to a valid place. */
+       if (irec->br_blockcount > MAXEXTLEN)
+               xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+                               irec->br_startoff);
        if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
                xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
+       end = irec->br_startblock + irec->br_blockcount - 1;
        if (info->is_rt &&
            (!xfs_verify_rtbno(mp, irec->br_startblock) ||
-            !xfs_verify_rtbno(mp, irec->br_startblock +
-                               irec->br_blockcount - 1)))
+            !xfs_verify_rtbno(mp, end)))
                xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
        if (!info->is_rt &&
            (!xfs_verify_fsbno(mp, irec->br_startblock) ||
-            !xfs_verify_fsbno(mp, irec->br_startblock +
-                               irec->br_blockcount - 1)))
+            !xfs_verify_fsbno(mp, end) ||
+            XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
+                               XFS_FSB_TO_AGNO(mp, end)))
                xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
 
@@ -158,6 +359,11 @@ xfs_scrub_bmap_extent(
                xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
 
+       if (info->is_rt)
+               xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec);
+       else
+               xfs_scrub_bmap_extent_xref(info, ip, cur, irec);
+
        info->lastoff = irec->br_startoff + irec->br_blockcount;
        return error;
 }
@@ -235,7 +441,6 @@ xfs_scrub_bmap(
        struct xfs_ifork                *ifp;
        xfs_fileoff_t                   endoff;
        struct xfs_iext_cursor          icur;
-       bool                            found;
        int                             error = 0;
 
        ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -314,9 +519,7 @@ xfs_scrub_bmap(
        /* Scrub extent records. */
        info.lastoff = 0;
        ifp = XFS_IFORK_PTR(ip, whichfork);
-       for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec);
-            found != 0;
-            found = xfs_iext_next_extent(ifp, &icur, &irec)) {
+       for_each_xfs_iext(ifp, &icur, &irec) {
                if (xfs_scrub_should_terminate(sc, &error))
                        break;
                if (isnullstartblock(irec.br_startblock))
index df0766132acea690e0d883e87a02365539810168..54218168c8f969f06f26acb6520647a6621fb1b8 100644 (file)
  * Check for btree operation errors.  See the section about handling
  * operational errors in common.c.
  */
-bool
-xfs_scrub_btree_process_error(
+static bool
+__xfs_scrub_btree_process_error(
        struct xfs_scrub_context        *sc,
        struct xfs_btree_cur            *cur,
        int                             level,
-       int                             *error)
+       int                             *error,
+       __u32                           errflag,
+       void                            *ret_ip)
 {
        if (*error == 0)
                return true;
@@ -60,36 +62,80 @@ xfs_scrub_btree_process_error(
        case -EFSBADCRC:
        case -EFSCORRUPTED:
                /* Note the badness but don't abort. */
-               sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+               sc->sm->sm_flags |= errflag;
                *error = 0;
                /* fall through */
        default:
                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
                        trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
-                                       *error, __return_address);
+                                       *error, ret_ip);
                else
                        trace_xfs_scrub_btree_op_error(sc, cur, level,
-                                       *error, __return_address);
+                                       *error, ret_ip);
                break;
        }
        return false;
 }
 
+bool
+xfs_scrub_btree_process_error(
+       struct xfs_scrub_context        *sc,
+       struct xfs_btree_cur            *cur,
+       int                             level,
+       int                             *error)
+{
+       return __xfs_scrub_btree_process_error(sc, cur, level, error,
+                       XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_btree_xref_process_error(
+       struct xfs_scrub_context        *sc,
+       struct xfs_btree_cur            *cur,
+       int                             level,
+       int                             *error)
+{
+       return __xfs_scrub_btree_process_error(sc, cur, level, error,
+                       XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
 /* Record btree block corruption. */
-void
-xfs_scrub_btree_set_corrupt(
+static void
+__xfs_scrub_btree_set_corrupt(
        struct xfs_scrub_context        *sc,
        struct xfs_btree_cur            *cur,
-       int                             level)
+       int                             level,
+       __u32                           errflag,
+       void                            *ret_ip)
 {
-       sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+       sc->sm->sm_flags |= errflag;
 
        if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
                trace_xfs_scrub_ifork_btree_error(sc, cur, level,
-                               __return_address);
+                               ret_ip);
        else
                trace_xfs_scrub_btree_error(sc, cur, level,
-                               __return_address);
+                               ret_ip);
+}
+
+void
+xfs_scrub_btree_set_corrupt(
+       struct xfs_scrub_context        *sc,
+       struct xfs_btree_cur            *cur,
+       int                             level)
+{
+       __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
+                       __return_address);
+}
+
+void
+xfs_scrub_btree_xref_set_corrupt(
+       struct xfs_scrub_context        *sc,
+       struct xfs_btree_cur            *cur,
+       int                             level)
+{
+       __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
+                       __return_address);
 }
 
 /*
@@ -268,6 +314,8 @@ xfs_scrub_btree_block_check_sibling(
        pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
        if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
                goto out;
+       if (pbp)
+               xfs_scrub_buffer_recheck(bs->sc, pbp);
 
        if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
                xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
@@ -315,6 +363,97 @@ xfs_scrub_btree_block_check_siblings(
        return error;
 }
 
+struct check_owner {
+       struct list_head        list;
+       xfs_daddr_t             daddr;
+       int                     level;
+};
+
+/*
+ * Make sure this btree block isn't in the free list and that there's
+ * an rmap record for it.
+ */
+STATIC int
+xfs_scrub_btree_check_block_owner(
+       struct xfs_scrub_btree          *bs,
+       int                             level,
+       xfs_daddr_t                     daddr)
+{
+       xfs_agnumber_t                  agno;
+       xfs_agblock_t                   agbno;
+       xfs_btnum_t                     btnum;
+       bool                            init_sa;
+       int                             error = 0;
+
+       if (!bs->cur)
+               return 0;
+
+       btnum = bs->cur->bc_btnum;
+       agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
+       agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
+
+       init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
+       if (init_sa) {
+               error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa);
+               if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur,
+                               level, &error))
+                       return error;
+       }
+
+       xfs_scrub_xref_is_used_space(bs->sc, agbno, 1);
+       /*
+        * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
+        * have to nullify it (to shut down further block owner checks) if
+        * self-xref encounters problems.
+        */
+       if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
+               bs->cur = NULL;
+
+       xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
+       if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
+               bs->cur = NULL;
+
+       if (init_sa)
+               xfs_scrub_ag_free(bs->sc, &bs->sc->sa);
+
+       return error;
+}
+
+/* Check the owner of a btree block. */
+STATIC int
+xfs_scrub_btree_check_owner(
+       struct xfs_scrub_btree          *bs,
+       int                             level,
+       struct xfs_buf                  *bp)
+{
+       struct xfs_btree_cur            *cur = bs->cur;
+       struct check_owner              *co;
+
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
+               return 0;
+
+       /*
+        * We want to cross-reference each btree block with the bnobt
+        * and the rmapbt.  We cannot cross-reference the bnobt or
+        * rmapbt while scanning the bnobt or rmapbt, respectively,
+        * because we cannot alter the cursor and we'd prefer not to
+        * duplicate cursors.  Therefore, save the buffer daddr for
+        * later scanning.
+        */
+       if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
+               co = kmem_alloc(sizeof(struct check_owner),
+                               KM_MAYFAIL | KM_NOFS);
+               if (!co)
+                       return -ENOMEM;
+               co->level = level;
+               co->daddr = XFS_BUF_ADDR(bp);
+               list_add_tail(&co->list, &bs->to_check);
+               return 0;
+       }
+
+       return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
+}
+
 /*
  * Grab and scrub a btree block given a btree pointer.  Returns block
  * and buffer pointers (if applicable) if they're ok to use.
@@ -349,6 +488,16 @@ xfs_scrub_btree_get_block(
                xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
                return 0;
        }
+       if (*pbp)
+               xfs_scrub_buffer_recheck(bs->sc, *pbp);
+
+       /*
+        * Check the block's owner; this function absorbs error codes
+        * for us.
+        */
+       error = xfs_scrub_btree_check_owner(bs, level, *pbp);
+       if (error)
+               return error;
 
        /*
         * Check the block's siblings; this function absorbs error codes
@@ -421,6 +570,8 @@ xfs_scrub_btree(
        struct xfs_btree_block          *block;
        int                             level;
        struct xfs_buf                  *bp;
+       struct check_owner              *co;
+       struct check_owner              *n;
        int                             i;
        int                             error = 0;
 
@@ -512,5 +663,14 @@ xfs_scrub_btree(
        }
 
 out:
+       /* Process deferred owner checks on btree blocks. */
+       list_for_each_entry_safe(co, n, &bs.to_check, list) {
+               if (!error && bs.cur)
+                       error = xfs_scrub_btree_check_block_owner(&bs,
+                                       co->level, co->daddr);
+               list_del(&co->list);
+               kmem_free(co);
+       }
+
        return error;
 }
index 4de825a626d19404122ec9e527aef91a06a98560..e2b868ede70beb9174bafe9d2d6bf4874d47c5f3 100644 (file)
 bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc,
                struct xfs_btree_cur *cur, int level, int *error);
 
+/* Check for btree xref operation errors. */
+bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc,
+                               struct xfs_btree_cur *cur, int level,
+                               int *error);
+
 /* Check for btree corruption. */
 void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc,
                struct xfs_btree_cur *cur, int level);
 
+/* Check for btree xref discrepancies. */
+void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc,
+               struct xfs_btree_cur *cur, int level);
+
 struct xfs_scrub_btree;
 typedef int (*xfs_scrub_btree_rec_fn)(
        struct xfs_scrub_btree  *bs,
index ac95fe911d96aacfd545aaa0ff594f3c783e5009..8033ab9d8f47d51f63e64ac168f3a9ac82d23055 100644 (file)
  */
 
 /* Check for operational errors. */
-bool
-xfs_scrub_process_error(
+static bool
+__xfs_scrub_process_error(
        struct xfs_scrub_context        *sc,
        xfs_agnumber_t                  agno,
        xfs_agblock_t                   bno,
-       int                             *error)
+       int                             *error,
+       __u32                           errflag,
+       void                            *ret_ip)
 {
        switch (*error) {
        case 0:
@@ -95,24 +97,48 @@ xfs_scrub_process_error(
        case -EFSBADCRC:
        case -EFSCORRUPTED:
                /* Note the badness but don't abort. */
-               sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+               sc->sm->sm_flags |= errflag;
                *error = 0;
                /* fall through */
        default:
                trace_xfs_scrub_op_error(sc, agno, bno, *error,
-                               __return_address);
+                               ret_ip);
                break;
        }
        return false;
 }
 
-/* Check for operational errors for a file offset. */
 bool
-xfs_scrub_fblock_process_error(
+xfs_scrub_process_error(
+       struct xfs_scrub_context        *sc,
+       xfs_agnumber_t                  agno,
+       xfs_agblock_t                   bno,
+       int                             *error)
+{
+       return __xfs_scrub_process_error(sc, agno, bno, error,
+                       XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_xref_process_error(
+       struct xfs_scrub_context        *sc,
+       xfs_agnumber_t                  agno,
+       xfs_agblock_t                   bno,
+       int                             *error)
+{
+       return __xfs_scrub_process_error(sc, agno, bno, error,
+                       XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
+/* Check for operational errors for a file offset. */
+static bool
+__xfs_scrub_fblock_process_error(
        struct xfs_scrub_context        *sc,
        int                             whichfork,
        xfs_fileoff_t                   offset,
-       int                             *error)
+       int                             *error,
+       __u32                           errflag,
+       void                            *ret_ip)
 {
        switch (*error) {
        case 0:
@@ -124,17 +150,39 @@ xfs_scrub_fblock_process_error(
        case -EFSBADCRC:
        case -EFSCORRUPTED:
                /* Note the badness but don't abort. */
-               sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+               sc->sm->sm_flags |= errflag;
                *error = 0;
                /* fall through */
        default:
                trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
-                               __return_address);
+                               ret_ip);
                break;
        }
        return false;
 }
 
+bool
+xfs_scrub_fblock_process_error(
+       struct xfs_scrub_context        *sc,
+       int                             whichfork,
+       xfs_fileoff_t                   offset,
+       int                             *error)
+{
+       return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+                       XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_fblock_xref_process_error(
+       struct xfs_scrub_context        *sc,
+       int                             whichfork,
+       xfs_fileoff_t                   offset,
+       int                             *error)
+{
+       return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+                       XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
 /*
  * Handling scrub corruption/optimization/warning checks.
  *
@@ -183,6 +231,16 @@ xfs_scrub_block_set_corrupt(
        trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
 }
 
+/* Record a corruption while cross-referencing. */
+void
+xfs_scrub_block_xref_set_corrupt(
+       struct xfs_scrub_context        *sc,
+       struct xfs_buf                  *bp)
+{
+       sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+       trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
+}
+
 /*
  * Record a corrupt inode.  The trace data will include the block given
  * by bp if bp is given; otherwise it will use the block location of the
@@ -198,6 +256,17 @@ xfs_scrub_ino_set_corrupt(
        trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
 }
 
+/* Record a corruption while cross-referencing with an inode. */
+void
+xfs_scrub_ino_xref_set_corrupt(
+       struct xfs_scrub_context        *sc,
+       xfs_ino_t                       ino,
+       struct xfs_buf                  *bp)
+{
+       sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+       trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
+}
+
 /* Record corruption in a block indexed by a file fork. */
 void
 xfs_scrub_fblock_set_corrupt(
@@ -209,6 +278,17 @@ xfs_scrub_fblock_set_corrupt(
        trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
 }
 
+/* Record a corruption while cross-referencing a fork block. */
+void
+xfs_scrub_fblock_xref_set_corrupt(
+       struct xfs_scrub_context        *sc,
+       int                             whichfork,
+       xfs_fileoff_t                   offset)
+{
+       sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+       trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
+}
+
 /*
  * Warn about inodes that need administrative review but is not
  * incorrect.
@@ -244,6 +324,59 @@ xfs_scrub_set_incomplete(
        trace_xfs_scrub_incomplete(sc, __return_address);
 }
 
+/*
+ * rmap scrubbing -- compute the number of blocks with a given owner,
+ * at least according to the reverse mapping data.
+ */
+
+struct xfs_scrub_rmap_ownedby_info {
+       struct xfs_owner_info   *oinfo;
+       xfs_filblks_t           *blocks;
+};
+
+STATIC int
+xfs_scrub_count_rmap_ownedby_irec(
+       struct xfs_btree_cur                    *cur,
+       struct xfs_rmap_irec                    *rec,
+       void                                    *priv)
+{
+       struct xfs_scrub_rmap_ownedby_info      *sroi = priv;
+       bool                                    irec_attr;
+       bool                                    oinfo_attr;
+
+       irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
+       oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
+
+       if (rec->rm_owner != sroi->oinfo->oi_owner)
+               return 0;
+
+       if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
+               (*sroi->blocks) += rec->rm_blockcount;
+
+       return 0;
+}
+
+/*
+ * Calculate the number of blocks the rmap thinks are owned by something.
+ * The caller should pass us an rmapbt cursor.
+ */
+int
+xfs_scrub_count_rmap_ownedby_ag(
+       struct xfs_scrub_context                *sc,
+       struct xfs_btree_cur                    *cur,
+       struct xfs_owner_info                   *oinfo,
+       xfs_filblks_t                           *blocks)
+{
+       struct xfs_scrub_rmap_ownedby_info      sroi;
+
+       sroi.oinfo = oinfo;
+       *blocks = 0;
+       sroi.blocks = blocks;
+
+       return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec,
+                       &sroi);
+}
+
 /*
  * AG scrubbing
  *
@@ -302,7 +435,7 @@ xfs_scrub_ag_read_headers(
        error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
        if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
                goto out;
-
+       error = 0;
 out:
        return error;
 }
@@ -472,7 +605,7 @@ xfs_scrub_setup_ag_btree(
                        return error;
        }
 
-       error = xfs_scrub_setup_ag_header(sc, ip);
+       error = xfs_scrub_setup_fs(sc, ip);
        if (error)
                return error;
 
@@ -503,18 +636,11 @@ xfs_scrub_get_inode(
        struct xfs_scrub_context        *sc,
        struct xfs_inode                *ip_in)
 {
+       struct xfs_imap                 imap;
        struct xfs_mount                *mp = sc->mp;
        struct xfs_inode                *ip = NULL;
        int                             error;
 
-       /*
-        * If userspace passed us an AG number or a generation number
-        * without an inode number, they haven't got a clue so bail out
-        * immediately.
-        */
-       if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
-               return -EINVAL;
-
        /* We want to scan the inode we already had opened. */
        if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
                sc->ip = ip_in;
@@ -526,10 +652,33 @@ xfs_scrub_get_inode(
                return -ENOENT;
        error = xfs_iget(mp, NULL, sc->sm->sm_ino,
                        XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
-       if (error == -ENOENT || error == -EINVAL) {
-               /* inode doesn't exist... */
-               return -ENOENT;
-       } else if (error) {
+       switch (error) {
+       case -ENOENT:
+               /* Inode doesn't exist, just bail out. */
+               return error;
+       case 0:
+               /* Got an inode, continue. */
+               break;
+       case -EINVAL:
+               /*
+                * -EINVAL with IGET_UNTRUSTED could mean one of several
+                * things: userspace gave us an inode number that doesn't
+                * correspond to fs space, or doesn't have an inobt entry;
+                * or it could simply mean that the inode buffer failed the
+                * read verifiers.
+                *
+                * Try just the inode mapping lookup -- if it succeeds, then
+                * the inode buffer verifier failed and something needs fixing.
+                * Otherwise, we really couldn't find it so tell userspace
+                * that it no longer exists.
+                */
+               error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
+                               XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
+               if (error)
+                       return -ENOENT;
+               error = -EFSCORRUPTED;
+               /* fall through */
+       default:
                trace_xfs_scrub_op_error(sc,
                                XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
                                XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
@@ -572,3 +721,61 @@ xfs_scrub_setup_inode_contents(
        /* scrub teardown will unlock and release the inode for us */
        return error;
 }
+
+/*
+ * Predicate that decides if we need to evaluate the cross-reference check.
+ * If there was an error accessing the cross-reference btree, just delete
+ * the cursor and skip the check.
+ */
+bool
+xfs_scrub_should_check_xref(
+       struct xfs_scrub_context        *sc,
+       int                             *error,
+       struct xfs_btree_cur            **curpp)
+{
+       if (*error == 0)
+               return true;
+
+       if (curpp) {
+               /* If we've already given up on xref, just bail out. */
+               if (!*curpp)
+                       return false;
+
+               /* xref error, delete cursor and bail out. */
+               xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
+               *curpp = NULL;
+       }
+
+       sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
+       trace_xfs_scrub_xref_error(sc, *error, __return_address);
+
+       /*
+        * Errors encountered during cross-referencing with another
+        * data structure should not cause this scrubber to abort.
+        */
+       *error = 0;
+       return false;
+}
+
+/* Run the structure verifiers on in-memory buffers to detect bad memory. */
+void
+xfs_scrub_buffer_recheck(
+       struct xfs_scrub_context        *sc,
+       struct xfs_buf                  *bp)
+{
+       xfs_failaddr_t                  fa;
+
+       if (bp->b_ops == NULL) {
+               xfs_scrub_block_set_corrupt(sc, bp);
+               return;
+       }
+       if (bp->b_ops->verify_struct == NULL) {
+               xfs_scrub_set_incomplete(sc);
+               return;
+       }
+       fa = bp->b_ops->verify_struct(bp);
+       if (!fa)
+               return;
+       sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+       trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
+}
index 5c043855570e43b0eaf11f52e22e57d985040ab0..ddb65d22c76acdf4ad447164efa2ffc2c66c76c5 100644 (file)
@@ -56,6 +56,11 @@ bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
 bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
                xfs_fileoff_t offset, int *error);
 
+bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc,
+               xfs_agnumber_t agno, xfs_agblock_t bno, int *error);
+bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
+               int whichfork, xfs_fileoff_t offset, int *error);
+
 void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
                struct xfs_buf *bp);
 void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
@@ -68,6 +73,13 @@ void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
 void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
                xfs_fileoff_t offset);
 
+void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
+               struct xfs_buf *bp);
+void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
+               struct xfs_buf *bp);
+void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
+               int whichfork, xfs_fileoff_t offset);
+
 void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
                struct xfs_buf *bp);
 void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
@@ -76,10 +88,12 @@ void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
 void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
 int xfs_scrub_checkpoint_log(struct xfs_mount *mp);
 
+/* Are we set up for a cross-referencing check? */
+bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error,
+                          struct xfs_btree_cur **curpp);
+
 /* Setup functions */
 int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
-int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
-                             struct xfs_inode *ip);
 int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc,
                               struct xfs_inode *ip);
 int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
@@ -134,11 +148,16 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
                        int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
                                  void *),
                        void *priv);
+int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
+                                   struct xfs_btree_cur *cur,
+                                   struct xfs_owner_info *oinfo,
+                                   xfs_filblks_t *blocks);
 
 int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc,
                             struct xfs_inode *ip, bool force_log);
 int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
 int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc,
                                   struct xfs_inode *ip, unsigned int resblks);
+void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
 
 #endif /* __XFS_SCRUB_COMMON_H__ */
index d94edd93cba833b602da7dfc3f510161d6cd62b9..bffdb7dc09bf8db89b9702013bfbdbe614b6cf8b 100644 (file)
@@ -233,11 +233,28 @@ xfs_scrub_da_btree_write_verify(
                return;
        }
 }
+static void *
+xfs_scrub_da_btree_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_da_blkinfo   *info = bp->b_addr;
+
+       switch (be16_to_cpu(info->magic)) {
+       case XFS_DIR2_LEAF1_MAGIC:
+       case XFS_DIR3_LEAF1_MAGIC:
+               bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+               return bp->b_ops->verify_struct(bp);
+       default:
+               bp->b_ops = &xfs_da3_node_buf_ops;
+               return bp->b_ops->verify_struct(bp);
+       }
+}
 
 static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
        .name = "xfs_scrub_da_btree",
        .verify_read = xfs_scrub_da_btree_read_verify,
        .verify_write = xfs_scrub_da_btree_write_verify,
+       .verify_struct = xfs_scrub_da_btree_verify,
 };
 
 /* Check a block's sibling. */
@@ -276,6 +293,9 @@ xfs_scrub_da_btree_block_check_sibling(
                xfs_scrub_da_set_corrupt(ds, level);
                return error;
        }
+       if (ds->state->altpath.blk[level].bp)
+               xfs_scrub_buffer_recheck(ds->sc,
+                               ds->state->altpath.blk[level].bp);
 
        /* Compare upper level pointer to sibling pointer. */
        if (ds->state->altpath.blk[level].blkno != sibling)
@@ -358,6 +378,8 @@ xfs_scrub_da_btree_block(
                        &xfs_scrub_da_btree_buf_ops);
        if (!xfs_scrub_da_process_error(ds, level, &error))
                goto out_nobuf;
+       if (blk->bp)
+               xfs_scrub_buffer_recheck(ds->sc, blk->bp);
 
        /*
         * We didn't find a dir btree root block, which means that
index 69e1efdd4019141d17965915ff883e2fc517758f..50b6a26b02998489b2056315cf676d5c00894c55 100644 (file)
@@ -92,7 +92,7 @@ xfs_scrub_dir_check_ftype(
         * inodes can trigger immediate inactive cleanup of the inode.
         */
        error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
-       if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
+       if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
                        &error))
                goto out;
 
@@ -200,6 +200,7 @@ xfs_scrub_dir_rec(
        struct xfs_inode                *dp = ds->dargs.dp;
        struct xfs_dir2_data_entry      *dent;
        struct xfs_buf                  *bp;
+       char                            *p, *endp;
        xfs_ino_t                       ino;
        xfs_dablk_t                     rec_bno;
        xfs_dir2_db_t                   db;
@@ -237,9 +238,37 @@ xfs_scrub_dir_rec(
                xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
                goto out;
        }
+       xfs_scrub_buffer_recheck(ds->sc, bp);
 
-       /* Retrieve the entry, sanity check it, and compare hashes. */
        dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
+
+       /* Make sure we got a real directory entry. */
+       p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr);
+       endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
+       if (!endp) {
+               xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+               goto out_relse;
+       }
+       while (p < endp) {
+               struct xfs_dir2_data_entry      *dep;
+               struct xfs_dir2_data_unused     *dup;
+
+               dup = (struct xfs_dir2_data_unused *)p;
+               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+                       p += be16_to_cpu(dup->length);
+                       continue;
+               }
+               dep = (struct xfs_dir2_data_entry *)p;
+               if (dep == dent)
+                       break;
+               p += mp->m_dir_inode_ops->data_entsize(dep->namelen);
+       }
+       if (p >= endp) {
+               xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+               goto out_relse;
+       }
+
+       /* Retrieve the entry, sanity check it, and compare hashes. */
        ino = be64_to_cpu(dent->inumber);
        hash = be32_to_cpu(ent->hashval);
        tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
@@ -324,6 +353,7 @@ xfs_scrub_directory_data_bestfree(
        }
        if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
                goto out;
+       xfs_scrub_buffer_recheck(sc, bp);
 
        /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
 
@@ -361,13 +391,7 @@ xfs_scrub_directory_data_bestfree(
 
        /* Make sure the bestfrees are actually the best free spaces. */
        ptr = (char *)d_ops->data_entry_p(bp->b_addr);
-       if (is_block) {
-               struct xfs_dir2_block_tail      *btp;
-
-               btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr);
-               endptr = (char *)xfs_dir2_block_leaf_p(btp);
-       } else
-               endptr = (char *)bp->b_addr + BBTOB(bp->b_length);
+       endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
 
        /* Iterate the entries, stopping when we hit or go past the end. */
        while (ptr < endptr) {
@@ -474,6 +498,7 @@ xfs_scrub_directory_leaf1_bestfree(
        error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
        if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
                goto out;
+       xfs_scrub_buffer_recheck(sc, bp);
 
        leaf = bp->b_addr;
        d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
@@ -559,6 +584,7 @@ xfs_scrub_directory_free_bestfree(
        error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
        if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
                goto out;
+       xfs_scrub_buffer_recheck(sc, bp);
 
        if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
                struct xfs_dir3_free_hdr        *hdr3 = bp->b_addr;
index 496d6f2fbb9e3cd974eefccaadaf394a6032526f..63ab3f98430d7f3cd3c18ba4696e105253d490a0 100644 (file)
@@ -58,6 +58,56 @@ xfs_scrub_setup_ag_iallocbt(
 
 /* Inode btree scrubber. */
 
+/*
+ * If we're checking the finobt, cross-reference with the inobt.
+ * Otherwise we're checking the inobt; if there is an finobt, make sure
+ * we have a record or not depending on freecount.
+ */
+static inline void
+xfs_scrub_iallocbt_chunk_xref_other(
+       struct xfs_scrub_context        *sc,
+       struct xfs_inobt_rec_incore     *irec,
+       xfs_agino_t                     agino)
+{
+       struct xfs_btree_cur            **pcur;
+       bool                            has_irec;
+       int                             error;
+
+       if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
+               pcur = &sc->sa.ino_cur;
+       else
+               pcur = &sc->sa.fino_cur;
+       if (!(*pcur))
+               return;
+       error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec);
+       if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+               return;
+       if (((irec->ir_freecount > 0 && !has_irec) ||
+            (irec->ir_freecount == 0 && has_irec)))
+               xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_iallocbt_chunk_xref(
+       struct xfs_scrub_context        *sc,
+       struct xfs_inobt_rec_incore     *irec,
+       xfs_agino_t                     agino,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       struct xfs_owner_info           oinfo;
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, len);
+       xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino);
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+       xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo);
+       xfs_scrub_xref_is_not_shared(sc, agbno, len);
+}
+
 /* Is this chunk worth checking? */
 STATIC bool
 xfs_scrub_iallocbt_chunk(
@@ -76,6 +126,8 @@ xfs_scrub_iallocbt_chunk(
            !xfs_verify_agbno(mp, agno, bno + len - 1))
                xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
 
+       xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
+
        return true;
 }
 
@@ -190,8 +242,14 @@ xfs_scrub_iallocbt_check_freemask(
                }
 
                /* If any part of this is a hole, skip it. */
-               if (ir_holemask)
+               if (ir_holemask) {
+                       xfs_scrub_xref_is_not_owned_by(bs->sc, agbno,
+                                       blks_per_cluster, &oinfo);
                        continue;
+               }
+
+               xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
+                               &oinfo);
 
                /* Grab the inode cluster buffer. */
                imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
@@ -227,6 +285,7 @@ xfs_scrub_iallocbt_rec(
        union xfs_btree_rec             *rec)
 {
        struct xfs_mount                *mp = bs->cur->bc_mp;
+       xfs_filblks_t                   *inode_blocks = bs->private;
        struct xfs_inobt_rec_incore     irec;
        uint64_t                        holes;
        xfs_agnumber_t                  agno = bs->cur->bc_private.a.agno;
@@ -264,6 +323,9 @@ xfs_scrub_iallocbt_rec(
            (agbno & (xfs_icluster_size_fsb(mp) - 1)))
                xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
 
+       *inode_blocks += XFS_B_TO_FSB(mp,
+                       irec.ir_count * mp->m_sb.sb_inodesize);
+
        /* Handle non-sparse inodes */
        if (!xfs_inobt_issparse(irec.ir_holemask)) {
                len = XFS_B_TO_FSB(mp,
@@ -308,6 +370,72 @@ xfs_scrub_iallocbt_rec(
        return error;
 }
 
+/*
+ * Make sure the inode btrees are as large as the rmap thinks they are.
+ * Don't bother if we're missing btree cursors, as we're already corrupt.
+ */
+STATIC void
+xfs_scrub_iallocbt_xref_rmap_btreeblks(
+       struct xfs_scrub_context        *sc,
+       int                             which)
+{
+       struct xfs_owner_info           oinfo;
+       xfs_filblks_t                   blocks;
+       xfs_extlen_t                    inobt_blocks = 0;
+       xfs_extlen_t                    finobt_blocks = 0;
+       int                             error;
+
+       if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
+           (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur))
+               return;
+
+       /* Check that we saw as many inobt blocks as the rmap says. */
+       error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks);
+       if (!xfs_scrub_process_error(sc, 0, 0, &error))
+               return;
+
+       if (sc->sa.fino_cur) {
+               error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks);
+               if (!xfs_scrub_process_error(sc, 0, 0, &error))
+                       return;
+       }
+
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
+       error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+                       &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+               return;
+       if (blocks != inobt_blocks + finobt_blocks)
+               xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+}
+
+/*
+ * Make sure that the inobt records point to the same number of blocks as
+ * the rmap says are owned by inodes.
+ */
+STATIC void
+xfs_scrub_iallocbt_xref_rmap_inodes(
+       struct xfs_scrub_context        *sc,
+       int                             which,
+       xfs_filblks_t                   inode_blocks)
+{
+       struct xfs_owner_info           oinfo;
+       xfs_filblks_t                   blocks;
+       int                             error;
+
+       if (!sc->sa.rmap_cur)
+               return;
+
+       /* Check that we saw as many inode blocks as the rmap knows about. */
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+       error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+                       &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+               return;
+       if (blocks != inode_blocks)
+               xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+}
+
 /* Scrub the inode btrees for some AG. */
 STATIC int
 xfs_scrub_iallocbt(
@@ -316,10 +444,29 @@ xfs_scrub_iallocbt(
 {
        struct xfs_btree_cur            *cur;
        struct xfs_owner_info           oinfo;
+       xfs_filblks_t                   inode_blocks = 0;
+       int                             error;
 
        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
        cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
-       return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL);
+       error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo,
+                       &inode_blocks);
+       if (error)
+               return error;
+
+       xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which);
+
+       /*
+        * If we're scrubbing the inode btree, inode_blocks is the number of
+        * blocks pointed to by all the inode chunk records.  Therefore, we
+        * should compare to the number of inode chunk blocks that the rmap
+        * knows about.  We can't do this for the finobt since it only points
+        * to inode chunks with free inodes.
+        */
+       if (which == XFS_BTNUM_INO)
+               xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
+
+       return error;
 }
 
 int
@@ -335,3 +482,46 @@ xfs_scrub_finobt(
 {
        return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
 }
+
+/* See if an inode btree has (or doesn't have) an inode chunk record. */
+static inline void
+xfs_scrub_xref_inode_check(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len,
+       struct xfs_btree_cur            **icur,
+       bool                            should_have_inodes)
+{
+       bool                            has_inodes;
+       int                             error;
+
+       if (!(*icur))
+               return;
+
+       error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes);
+       if (!xfs_scrub_should_check_xref(sc, &error, icur))
+               return;
+       if (has_inodes != should_have_inodes)
+               xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0);
+}
+
+/* xref check that the extent is not covered by inodes */
+void
+xfs_scrub_xref_is_not_inode_chunk(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
+       xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
+}
+
+/* xref check that the extent is covered by inodes */
+void
+xfs_scrub_xref_is_inode_chunk(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
+}
index f120fb20452f493ecd6fd29bfa09732770f28015..21297bef8df1649416345e4103a31ff6c426e49b 100644 (file)
 #include "xfs_ialloc.h"
 #include "xfs_da_format.h"
 #include "xfs_reflink.h"
+#include "xfs_rmap.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
+#include "scrub/btree.h"
 #include "scrub/trace.h"
 
 /*
@@ -64,7 +68,7 @@ xfs_scrub_setup_inode(
                break;
        case -EFSCORRUPTED:
        case -EFSBADCRC:
-               return 0;
+               return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
        default:
                return error;
        }
@@ -392,6 +396,14 @@ xfs_scrub_dinode(
                break;
        }
 
+       /* di_[amc]time.nsec */
+       if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
+               xfs_scrub_ino_set_corrupt(sc, ino, bp);
+       if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
+               xfs_scrub_ino_set_corrupt(sc, ino, bp);
+       if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
+               xfs_scrub_ino_set_corrupt(sc, ino, bp);
+
        /*
         * di_size.  xfs_dinode_verify checks for things that screw up
         * the VFS such as the upper bit being set and zero-length
@@ -495,6 +507,8 @@ xfs_scrub_dinode(
        }
 
        if (dip->di_version >= 3) {
+               if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
+                       xfs_scrub_ino_set_corrupt(sc, ino, bp);
                xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
                xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
                                flags2);
@@ -546,7 +560,7 @@ xfs_scrub_inode_map_raw(
         */
        bp->b_ops = &xfs_inode_buf_ops;
        dip = xfs_buf_offset(bp, imap.im_boffset);
-       if (!xfs_dinode_verify(mp, ino, dip) ||
+       if (xfs_dinode_verify(mp, ino, dip) != NULL ||
            !xfs_dinode_good_version(mp, dip->di_version)) {
                xfs_scrub_ino_set_corrupt(sc, ino, bp);
                goto out_buf;
@@ -567,18 +581,155 @@ xfs_scrub_inode_map_raw(
        return error;
 }
 
+/*
+ * Make sure the finobt doesn't think this inode is free.
+ * We don't have to check the inobt ourselves because we got the inode via
+ * IGET_UNTRUSTED, which checks the inobt for us.
+ */
+static void
+xfs_scrub_inode_xref_finobt(
+       struct xfs_scrub_context        *sc,
+       xfs_ino_t                       ino)
+{
+       struct xfs_inobt_rec_incore     rec;
+       xfs_agino_t                     agino;
+       int                             has_record;
+       int                             error;
+
+       if (!sc->sa.fino_cur)
+               return;
+
+       agino = XFS_INO_TO_AGINO(sc->mp, ino);
+
+       /*
+        * Try to get the finobt record.  If we can't get it, then we're
+        * in good shape.
+        */
+       error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
+                       &has_record);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+           !has_record)
+               return;
+
+       error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+           !has_record)
+               return;
+
+       /*
+        * Otherwise, make sure this record either doesn't cover this inode,
+        * or that it does but it's marked present.
+        */
+       if (rec.ir_startino > agino ||
+           rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
+               return;
+
+       if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
+}
+
+/* Cross reference the inode fields with the forks. */
+STATIC void
+xfs_scrub_inode_xref_bmap(
+       struct xfs_scrub_context        *sc,
+       struct xfs_dinode               *dip)
+{
+       xfs_extnum_t                    nextents;
+       xfs_filblks_t                   count;
+       xfs_filblks_t                   acount;
+       int                             error;
+
+       /* Walk all the extents to check nextents/naextents/nblocks. */
+       error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
+                       &nextents, &count);
+       if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+               return;
+       if (nextents < be32_to_cpu(dip->di_nextents))
+               xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+
+       error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
+                       &nextents, &acount);
+       if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+               return;
+       if (nextents != be16_to_cpu(dip->di_anextents))
+               xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+
+       /* Check nblocks against the inode. */
+       if (count + acount != be64_to_cpu(dip->di_nblocks))
+               xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_inode_xref(
+       struct xfs_scrub_context        *sc,
+       xfs_ino_t                       ino,
+       struct xfs_dinode               *dip)
+{
+       struct xfs_owner_info           oinfo;
+       xfs_agnumber_t                  agno;
+       xfs_agblock_t                   agbno;
+       int                             error;
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       agno = XFS_INO_TO_AGNO(sc->mp, ino);
+       agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
+
+       error = xfs_scrub_ag_init(sc, agno, &sc->sa);
+       if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, 1);
+       xfs_scrub_inode_xref_finobt(sc, ino);
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+       xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+       xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+       xfs_scrub_inode_xref_bmap(sc, dip);
+
+       xfs_scrub_ag_free(sc, &sc->sa);
+}
+
+/*
+ * If the reflink iflag disagrees with a scan for shared data fork extents,
+ * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
+ * any shared extents).  We already checked for reflink iflag set on a non
+ * reflink filesystem.
+ */
+static void
+xfs_scrub_inode_check_reflink_iflag(
+       struct xfs_scrub_context        *sc,
+       xfs_ino_t                       ino,
+       struct xfs_buf                  *bp)
+{
+       struct xfs_mount                *mp = sc->mp;
+       bool                            has_shared;
+       int                             error;
+
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return;
+
+       error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
+                       &has_shared);
+       if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
+                       XFS_INO_TO_AGBNO(mp, ino), &error))
+               return;
+       if (xfs_is_reflink_inode(sc->ip) && !has_shared)
+               xfs_scrub_ino_set_preen(sc, ino, bp);
+       else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
+               xfs_scrub_ino_set_corrupt(sc, ino, bp);
+}
+
 /* Scrub an inode. */
 int
 xfs_scrub_inode(
        struct xfs_scrub_context        *sc)
 {
        struct xfs_dinode               di;
-       struct xfs_mount                *mp = sc->mp;
        struct xfs_buf                  *bp = NULL;
        struct xfs_dinode               *dip;
        xfs_ino_t                       ino;
-
-       bool                            has_shared;
        int                             error = 0;
 
        /* Did we get the in-core inode, or are we doing this manually? */
@@ -603,19 +754,14 @@ xfs_scrub_inode(
                goto out;
 
        /*
-        * Does this inode have the reflink flag set but no shared extents?
-        * Set the preening flag if this is the case.
+        * Look for discrepancies between file's data blocks and the reflink
+        * iflag.  We already checked the iflag against the file mode when
+        * we scrubbed the dinode.
         */
-       if (xfs_is_reflink_inode(sc->ip)) {
-               error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
-                               &has_shared);
-               if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
-                               XFS_INO_TO_AGBNO(mp, ino), &error))
-                       goto out;
-               if (!has_shared)
-                       xfs_scrub_ino_set_preen(sc, ino, bp);
-       }
+       if (S_ISREG(VFS_I(sc->ip)->i_mode))
+               xfs_scrub_inode_check_reflink_iflag(sc, ino, bp);
 
+       xfs_scrub_inode_xref(sc, ino, dip);
 out:
        if (bp)
                xfs_trans_brelse(sc->tp, bp);
index 63a25334fc831acc63524d5bab777415c8cd6980..0d3851410c74feae8ffb5001a34568f47e177c4b 100644 (file)
@@ -169,9 +169,9 @@ xfs_scrub_parent_validate(
         * immediate inactive cleanup of the inode.
         */
        error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
-       if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+       if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
                goto out;
-       if (dp == sc->ip) {
+       if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
                xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
                goto out_rele;
        }
@@ -185,7 +185,7 @@ xfs_scrub_parent_validate(
         */
        if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
                error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
-               if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
+               if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
                                &error))
                        goto out_unlock;
                if (nlink != expected_nlink)
@@ -205,7 +205,7 @@ xfs_scrub_parent_validate(
 
        /* Go looking for our dentry. */
        error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
-       if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+       if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
                goto out_unlock;
 
        /* Drop the parent lock, relock this inode. */
index 3d9037eceaf1b81c2848e056bdc8e9309bc4d87f..51daa4ae2627af7b4f8988ecf423d0680b9f519a 100644 (file)
@@ -67,13 +67,6 @@ xfs_scrub_setup_quota(
 {
        uint                            dqtype;
 
-       /*
-        * If userspace gave us an AG number or inode data, they don't
-        * know what they're doing.  Get out.
-        */
-       if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
-               return -EINVAL;
-
        dqtype = xfs_scrub_quota_to_dqtype(sc);
        if (dqtype == 0)
                return -EINVAL;
index 2f88a8d44bd057c9344999ab66e468394d4500ad..400f1561cd3db5ec9975b4db15dd188450aaa622 100644 (file)
@@ -31,6 +31,7 @@
 #include "xfs_sb.h"
 #include "xfs_alloc.h"
 #include "xfs_rmap.h"
+#include "xfs_refcount.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
@@ -50,6 +51,307 @@ xfs_scrub_setup_ag_refcountbt(
 
 /* Reference count btree scrubber. */
 
+/*
+ * Confirming Reference Counts via Reverse Mappings
+ *
+ * We want to count the reverse mappings overlapping a refcount record
+ * (bno, len, refcount), allowing for the possibility that some of the
+ * overlap may come from smaller adjoining reverse mappings, while some
+ * comes from single extents which overlap the range entirely.  The
+ * outer loop is as follows:
+ *
+ * 1. For all reverse mappings overlapping the refcount extent,
+ *    a. If a given rmap completely overlaps, mark it as seen.
+ *    b. Otherwise, record the fragment (in agbno order) for later
+ *       processing.
+ *
+ * Once we've seen all the rmaps, we know that for all blocks in the
+ * refcount record we want to find $refcount owners and we've already
+ * visited $seen extents that overlap all the blocks.  Therefore, we
+ * need to find ($refcount - $seen) owners for every block in the
+ * extent; call that quantity $target_nr.  Proceed as follows:
+ *
+ * 2. Pull the first $target_nr fragments from the list; all of them
+ *    should start at or before the start of the extent.
+ *    Call this subset of fragments the working set.
+ * 3. Until there are no more unprocessed fragments,
+ *    a. Find the shortest fragments in the set and remove them.
+ *    b. Note the block number of the end of these fragments.
+ *    c. Pull the same number of fragments from the list.  All of these
+ *       fragments should start at the block number recorded in the
+ *       previous step.
+ *    d. Put those fragments in the set.
+ * 4. Check that there are $target_nr fragments remaining in the list,
+ *    and that they all end at or beyond the end of the refcount extent.
+ *
+ * If the refcount is correct, all the check conditions in the algorithm
+ * should always hold true.  If not, the refcount is incorrect.
+ */
+struct xfs_scrub_refcnt_frag {
+       struct list_head                list;
+       struct xfs_rmap_irec            rm;
+};
+
+struct xfs_scrub_refcnt_check {
+       struct xfs_scrub_context        *sc;
+       struct list_head                fragments;
+
+       /* refcount extent we're examining */
+       xfs_agblock_t                   bno;
+       xfs_extlen_t                    len;
+       xfs_nlink_t                     refcount;
+
+       /* number of owners seen */
+       xfs_nlink_t                     seen;
+};
+
+/*
+ * Decide if the given rmap is large enough that we can redeem it
+ * towards refcount verification now, or if it's a fragment, in
+ * which case we'll hang onto it in the hopes that we'll later
+ * discover that we've collected exactly the correct number of
+ * fragments as the refcountbt says we should have.
+ */
+STATIC int
+xfs_scrub_refcountbt_rmap_check(
+       struct xfs_btree_cur            *cur,
+       struct xfs_rmap_irec            *rec,
+       void                            *priv)
+{
+       struct xfs_scrub_refcnt_check   *refchk = priv;
+       struct xfs_scrub_refcnt_frag    *frag;
+       xfs_agblock_t                   rm_last;
+       xfs_agblock_t                   rc_last;
+       int                             error = 0;
+
+       if (xfs_scrub_should_terminate(refchk->sc, &error))
+               return error;
+
+       rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
+       rc_last = refchk->bno + refchk->len - 1;
+
+       /* Confirm that a single-owner refc extent is a CoW stage. */
+       if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
+               xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0);
+               return 0;
+       }
+
+       if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
+               /*
+                * The rmap overlaps the refcount record, so we can confirm
+                * one refcount owner seen.
+                */
+               refchk->seen++;
+       } else {
+               /*
+                * This rmap covers only part of the refcount record, so
+                * save the fragment for later processing.  If the rmapbt
+                * is healthy each rmap_irec we see will be in agbno order
+                * so we don't need insertion sort here.
+                */
+               frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
+                               KM_MAYFAIL | KM_NOFS);
+               if (!frag)
+                       return -ENOMEM;
+               memcpy(&frag->rm, rec, sizeof(frag->rm));
+               list_add_tail(&frag->list, &refchk->fragments);
+       }
+
+       return 0;
+}
+
+/*
+ * Given a bunch of rmap fragments, iterate through them, keeping
+ * a running tally of the refcount.  If this ever deviates from
+ * what we expect (which is the refcountbt's refcount minus the
+ * number of extents that totally covered the refcountbt extent),
+ * we have a refcountbt error.
+ */
+STATIC void
+xfs_scrub_refcountbt_process_rmap_fragments(
+       struct xfs_scrub_refcnt_check   *refchk)
+{
+       struct list_head                worklist;
+       struct xfs_scrub_refcnt_frag    *frag;
+       struct xfs_scrub_refcnt_frag    *n;
+       xfs_agblock_t                   bno;
+       xfs_agblock_t                   rbno;
+       xfs_agblock_t                   next_rbno;
+       xfs_nlink_t                     nr;
+       xfs_nlink_t                     target_nr;
+
+       target_nr = refchk->refcount - refchk->seen;
+       if (target_nr == 0)
+               return;
+
+       /*
+        * There are (refchk->rc.rc_refcount - refchk->nr refcount)
+        * references we haven't found yet.  Pull that many off the
+        * fragment list and figure out where the smallest rmap ends
+        * (and therefore the next rmap should start).  All the rmaps
+        * we pull off should start at or before the beginning of the
+        * refcount record's range.
+        */
+       INIT_LIST_HEAD(&worklist);
+       rbno = NULLAGBLOCK;
+       nr = 1;
+
+       /* Make sure the fragments actually /are/ in agbno order. */
+       bno = 0;
+       list_for_each_entry(frag, &refchk->fragments, list) {
+               if (frag->rm.rm_startblock < bno)
+                       goto done;
+               bno = frag->rm.rm_startblock;
+       }
+
+       /*
+        * Find all the rmaps that start at or before the refc extent,
+        * and put them on the worklist.
+        */
+       list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+               if (frag->rm.rm_startblock > refchk->bno)
+                       goto done;
+               bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+               if (bno < rbno)
+                       rbno = bno;
+               list_move_tail(&frag->list, &worklist);
+               if (nr == target_nr)
+                       break;
+               nr++;
+       }
+
+       /*
+        * We should have found exactly $target_nr rmap fragments starting
+        * at or before the refcount extent.
+        */
+       if (nr != target_nr)
+               goto done;
+
+       while (!list_empty(&refchk->fragments)) {
+               /* Discard any fragments ending at rbno from the worklist. */
+               nr = 0;
+               next_rbno = NULLAGBLOCK;
+               list_for_each_entry_safe(frag, n, &worklist, list) {
+                       bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+                       if (bno != rbno) {
+                               if (bno < next_rbno)
+                                       next_rbno = bno;
+                               continue;
+                       }
+                       list_del(&frag->list);
+                       kmem_free(frag);
+                       nr++;
+               }
+
+               /* Try to add nr rmaps starting at rbno to the worklist. */
+               list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+                       bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+                       if (frag->rm.rm_startblock != rbno)
+                               goto done;
+                       list_move_tail(&frag->list, &worklist);
+                       if (next_rbno > bno)
+                               next_rbno = bno;
+                       nr--;
+                       if (nr == 0)
+                               break;
+               }
+
+               /*
+                * If we get here and nr > 0, this means that we added fewer
+                * items to the worklist than we discarded because the fragment
+                * list ran out of items.  Therefore, we cannot maintain the
+                * required refcount.  Something is wrong, so we're done.
+                */
+               if (nr)
+                       goto done;
+
+               rbno = next_rbno;
+       }
+
+       /*
+        * Make sure the last extent we processed ends at or beyond
+        * the end of the refcount extent.
+        */
+       if (rbno < refchk->bno + refchk->len)
+               goto done;
+
+       /* Actually record us having seen the remaining refcount. */
+       refchk->seen = refchk->refcount;
+done:
+       /* Delete fragments and work list. */
+       list_for_each_entry_safe(frag, n, &worklist, list) {
+               list_del(&frag->list);
+               kmem_free(frag);
+       }
+       list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+               list_del(&frag->list);
+               kmem_free(frag);
+       }
+}
+
+/* Use the rmap entries covering this extent to verify the refcount. */
+STATIC void
+xfs_scrub_refcountbt_xref_rmap(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   bno,
+       xfs_extlen_t                    len,
+       xfs_nlink_t                     refcount)
+{
+       struct xfs_scrub_refcnt_check   refchk = {
+               .sc = sc,
+               .bno = bno,
+               .len = len,
+               .refcount = refcount,
+               .seen = 0,
+       };
+       struct xfs_rmap_irec            low;
+       struct xfs_rmap_irec            high;
+       struct xfs_scrub_refcnt_frag    *frag;
+       struct xfs_scrub_refcnt_frag    *n;
+       int                             error;
+
+       if (!sc->sa.rmap_cur)
+               return;
+
+       /* Cross-reference with the rmapbt to confirm the refcount. */
+       memset(&low, 0, sizeof(low));
+       low.rm_startblock = bno;
+       memset(&high, 0xFF, sizeof(high));
+       high.rm_startblock = bno + len - 1;
+
+       INIT_LIST_HEAD(&refchk.fragments);
+       error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
+                       &xfs_scrub_refcountbt_rmap_check, &refchk);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+               goto out_free;
+
+       xfs_scrub_refcountbt_process_rmap_fragments(&refchk);
+       if (refcount != refchk.seen)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+out_free:
+       list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
+               list_del(&frag->list);
+               kmem_free(frag);
+       }
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_refcountbt_xref(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len,
+       xfs_nlink_t                     refcount)
+{
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, len);
+       xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+       xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount);
+}
+
 /* Scrub a refcountbt record. */
 STATIC int
 xfs_scrub_refcountbt_rec(
@@ -57,6 +359,7 @@ xfs_scrub_refcountbt_rec(
        union xfs_btree_rec             *rec)
 {
        struct xfs_mount                *mp = bs->cur->bc_mp;
+       xfs_agblock_t                   *cow_blocks = bs->private;
        xfs_agnumber_t                  agno = bs->cur->bc_private.a.agno;
        xfs_agblock_t                   bno;
        xfs_extlen_t                    len;
@@ -72,6 +375,8 @@ xfs_scrub_refcountbt_rec(
        has_cowflag = (bno & XFS_REFC_COW_START);
        if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
                xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+       if (has_cowflag)
+               (*cow_blocks) += len;
 
        /* Check the extent. */
        bno &= ~XFS_REFC_COW_START;
@@ -83,17 +388,128 @@ xfs_scrub_refcountbt_rec(
        if (refcount == 0)
                xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
 
+       xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount);
+
        return error;
 }
 
+/* Make sure we have as many refc blocks as the rmap says. */
+STATIC void
+xfs_scrub_refcount_xref_rmap(
+       struct xfs_scrub_context        *sc,
+       struct xfs_owner_info           *oinfo,
+       xfs_filblks_t                   cow_blocks)
+{
+       xfs_extlen_t                    refcbt_blocks = 0;
+       xfs_filblks_t                   blocks;
+       int                             error;
+
+       if (!sc->sa.rmap_cur)
+               return;
+
+       /* Check that we saw as many refcbt blocks as the rmap knows about. */
+       error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
+       if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
+               return;
+       error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+                       &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+               return;
+       if (blocks != refcbt_blocks)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+       /* Check that we saw as many cow blocks as the rmap knows about. */
+       xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
+       error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+                       &blocks);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+               return;
+       if (blocks != cow_blocks)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
+
 /* Scrub the refcount btree for some AG. */
 int
 xfs_scrub_refcountbt(
        struct xfs_scrub_context        *sc)
 {
        struct xfs_owner_info           oinfo;
+       xfs_agblock_t                   cow_blocks = 0;
+       int                             error;
 
        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
-       return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
-                       &oinfo, NULL);
+       error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
+                       &oinfo, &cow_blocks);
+       if (error)
+               return error;
+
+       xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
+
+       return 0;
+}
+
+/* xref check that a cow staging extent is marked in the refcountbt. */
+void
+xfs_scrub_xref_is_cow_staging(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       struct xfs_refcount_irec        rc;
+       bool                            has_cowflag;
+       int                             has_refcount;
+       int                             error;
+
+       if (!sc->sa.refc_cur)
+               return;
+
+       /* Find the CoW staging extent. */
+       error = xfs_refcount_lookup_le(sc->sa.refc_cur,
+                       agbno + XFS_REFC_COW_START, &has_refcount);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+               return;
+       if (!has_refcount) {
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+               return;
+       }
+
+       error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+               return;
+       if (!has_refcount) {
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+               return;
+       }
+
+       /* CoW flag must be set, refcount must be 1. */
+       has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
+       if (!has_cowflag || rc.rc_refcount != 1)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+
+       /* Must be at least as long as what was passed in */
+       if (rc.rc_blockcount < len)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+}
+
+/*
+ * xref check that the extent is not shared.  Only file data blocks
+ * can have multiple owners.
+ */
+void
+xfs_scrub_xref_is_not_shared(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len)
+{
+       bool                            shared;
+       int                             error;
+
+       if (!sc->sa.refc_cur)
+               return;
+
+       error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+               return;
+       if (shared)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 }
index 97846c4246907b325b8f4940d3501ef1c0195058..8f2a7c3ff455181b01aac1d6620b623af3c6e865 100644 (file)
@@ -32,6 +32,7 @@
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_rmap.h"
+#include "xfs_refcount.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
@@ -51,6 +52,61 @@ xfs_scrub_setup_ag_rmapbt(
 
 /* Reverse-mapping scrubber. */
 
+/* Cross-reference a rmap against the refcount btree. */
+STATIC void
+xfs_scrub_rmapbt_xref_refc(
+       struct xfs_scrub_context        *sc,
+       struct xfs_rmap_irec            *irec)
+{
+       xfs_agblock_t                   fbno;
+       xfs_extlen_t                    flen;
+       bool                            non_inode;
+       bool                            is_bmbt;
+       bool                            is_attr;
+       bool                            is_unwritten;
+       int                             error;
+
+       if (!sc->sa.refc_cur)
+               return;
+
+       non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
+       is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
+       is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
+       is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
+
+       /* If this is shared, must be a data fork extent. */
+       error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock,
+                       irec->rm_blockcount, &fbno, &flen, false);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+               return;
+       if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten))
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_rmapbt_xref(
+       struct xfs_scrub_context        *sc,
+       struct xfs_rmap_irec            *irec)
+{
+       xfs_agblock_t                   agbno = irec->rm_startblock;
+       xfs_extlen_t                    len = irec->rm_blockcount;
+
+       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               return;
+
+       xfs_scrub_xref_is_used_space(sc, agbno, len);
+       if (irec->rm_owner == XFS_RMAP_OWN_INODES)
+               xfs_scrub_xref_is_inode_chunk(sc, agbno, len);
+       else
+               xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+       if (irec->rm_owner == XFS_RMAP_OWN_COW)
+               xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock,
+                               irec->rm_blockcount);
+       else
+               xfs_scrub_rmapbt_xref_refc(sc, irec);
+}
+
 /* Scrub an rmapbt record. */
 STATIC int
 xfs_scrub_rmapbt_rec(
@@ -121,6 +177,8 @@ xfs_scrub_rmapbt_rec(
                    irec.rm_owner > XFS_RMAP_OWN_FS)
                        xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
        }
+
+       xfs_scrub_rmapbt_xref(bs->sc, &irec);
 out:
        return error;
 }
@@ -136,3 +194,68 @@ xfs_scrub_rmapbt(
        return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec,
                        &oinfo, NULL);
 }
+
+/* xref check that the extent is owned by a given owner */
+static inline void
+xfs_scrub_xref_check_owner(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   bno,
+       xfs_extlen_t                    len,
+       struct xfs_owner_info           *oinfo,
+       bool                            should_have_rmap)
+{
+       bool                            has_rmap;
+       int                             error;
+
+       if (!sc->sa.rmap_cur)
+               return;
+
+       error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo,
+                       &has_rmap);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+               return;
+       if (has_rmap != should_have_rmap)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
+
+/* xref check that the extent is owned by a given owner */
+void
+xfs_scrub_xref_is_owned_by(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   bno,
+       xfs_extlen_t                    len,
+       struct xfs_owner_info           *oinfo)
+{
+       xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true);
+}
+
+/* xref check that the extent is not owned by a given owner */
+void
+xfs_scrub_xref_is_not_owned_by(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   bno,
+       xfs_extlen_t                    len,
+       struct xfs_owner_info           *oinfo)
+{
+       xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false);
+}
+
+/* xref check that the extent has no reverse mapping at all */
+void
+xfs_scrub_xref_has_no_owner(
+       struct xfs_scrub_context        *sc,
+       xfs_agblock_t                   bno,
+       xfs_extlen_t                    len)
+{
+       bool                            has_rmap;
+       int                             error;
+
+       if (!sc->sa.rmap_cur)
+               return;
+
+       error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap);
+       if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+               return;
+       if (has_rmap)
+               xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
index c6fedb698008146b6842a511d49c6f7dbb1f9eb2..26390991369ad7427aa33466c28614aa1dc1b7c5 100644 (file)
@@ -43,22 +43,14 @@ xfs_scrub_setup_rt(
        struct xfs_scrub_context        *sc,
        struct xfs_inode                *ip)
 {
-       struct xfs_mount                *mp = sc->mp;
-       int                             error = 0;
-
-       /*
-        * If userspace gave us an AG number or inode data, they don't
-        * know what they're doing.  Get out.
-        */
-       if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
-               return -EINVAL;
+       int                             error;
 
        error = xfs_scrub_setup_fs(sc, ip);
        if (error)
                return error;
 
        sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
-       sc->ip = mp->m_rbmip;
+       sc->ip = sc->mp->m_rbmip;
        xfs_ilock(sc->ip, sc->ilock_flags);
 
        return 0;
@@ -106,3 +98,26 @@ xfs_scrub_rtsummary(
        /* XXX: implement this some day */
        return -ENOENT;
 }
+
+
+/* xref check that the extent is not free in the rtbitmap */
+void
+xfs_scrub_xref_is_used_rt_space(
+       struct xfs_scrub_context        *sc,
+       xfs_rtblock_t                   fsbno,
+       xfs_extlen_t                    len)
+{
+       bool                            is_free;
+       int                             error;
+
+       xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+       error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len,
+                       &is_free);
+       if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+               goto out_unlock;
+       if (is_free)
+               xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino,
+                               NULL);
+out_unlock:
+       xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+}
index ab3aef2ae8233350f42647b77c0dff833521fe2f..26c75967a0724f23966c8b80df7f06455193c6e4 100644 (file)
  * structure itself is corrupt, the CORRUPT flag will be set.  If
  * the metadata is correct but otherwise suboptimal, the PREEN flag
  * will be set.
+ *
+ * We perform secondary validation of filesystem metadata by
+ * cross-referencing every record with all other available metadata.
+ * For example, for block mapping extents, we verify that there are no
+ * records in the free space and inode btrees corresponding to that
+ * space extent and that there is a corresponding entry in the reverse
+ * mapping btree.  Inconsistent metadata is noted by setting the
+ * XCORRUPT flag; btree query function errors are noted by setting the
+ * XFAIL flag and deleting the cursor to prevent further attempts to
+ * cross-reference with a defective btree.
  */
 
 /*
@@ -128,8 +138,6 @@ xfs_scrub_probe(
 {
        int                             error = 0;
 
-       if (sc->sm->sm_ino || sc->sm->sm_agno)
-               return -EINVAL;
        if (xfs_scrub_should_terminate(sc, &error))
                return error;
 
@@ -151,7 +159,8 @@ xfs_scrub_teardown(
                sc->tp = NULL;
        }
        if (sc->ip) {
-               xfs_iunlock(sc->ip, sc->ilock_flags);
+               if (sc->ilock_flags)
+                       xfs_iunlock(sc->ip, sc->ilock_flags);
                if (sc->ip != ip_in &&
                    !xfs_internal_inum(sc->mp, sc->ip->i_ino))
                        iput(VFS_I(sc->ip));
@@ -167,106 +176,130 @@ xfs_scrub_teardown(
 /* Scrubbing dispatch. */
 
 static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
-       { /* ioctl presence test */
+       [XFS_SCRUB_TYPE_PROBE] = {      /* ioctl presence test */
+               .type   = ST_NONE,
                .setup  = xfs_scrub_setup_fs,
                .scrub  = xfs_scrub_probe,
        },
-       { /* superblock */
-               .setup  = xfs_scrub_setup_ag_header,
+       [XFS_SCRUB_TYPE_SB] = {         /* superblock */
+               .type   = ST_PERAG,
+               .setup  = xfs_scrub_setup_fs,
                .scrub  = xfs_scrub_superblock,
        },
-       { /* agf */
-               .setup  = xfs_scrub_setup_ag_header,
+       [XFS_SCRUB_TYPE_AGF] = {        /* agf */
+               .type   = ST_PERAG,
+               .setup  = xfs_scrub_setup_fs,
                .scrub  = xfs_scrub_agf,
        },
-       { /* agfl */
-               .setup  = xfs_scrub_setup_ag_header,
+       [XFS_SCRUB_TYPE_AGFL]= {        /* agfl */
+               .type   = ST_PERAG,
+               .setup  = xfs_scrub_setup_fs,
                .scrub  = xfs_scrub_agfl,
        },
-       { /* agi */
-               .setup  = xfs_scrub_setup_ag_header,
+       [XFS_SCRUB_TYPE_AGI] = {        /* agi */
+               .type   = ST_PERAG,
+               .setup  = xfs_scrub_setup_fs,
                .scrub  = xfs_scrub_agi,
        },
-       { /* bnobt */
+       [XFS_SCRUB_TYPE_BNOBT] = {      /* bnobt */
+               .type   = ST_PERAG,
                .setup  = xfs_scrub_setup_ag_allocbt,
                .scrub  = xfs_scrub_bnobt,
        },
-       { /* cntbt */
+       [XFS_SCRUB_TYPE_CNTBT] = {      /* cntbt */
+               .type   = ST_PERAG,
                .setup  = xfs_scrub_setup_ag_allocbt,
                .scrub  = xfs_scrub_cntbt,
        },
-       { /* inobt */
+       [XFS_SCRUB_TYPE_INOBT] = {      /* inobt */
+               .type   = ST_PERAG,
                .setup  = xfs_scrub_setup_ag_iallocbt,
                .scrub  = xfs_scrub_inobt,
        },
-       { /* finobt */
+       [XFS_SCRUB_TYPE_FINOBT] = {     /* finobt */
+               .type   = ST_PERAG,
                .setup  = xfs_scrub_setup_ag_iallocbt,
                .scrub  = xfs_scrub_finobt,
                .has    = xfs_sb_version_hasfinobt,
        },
-       { /* rmapbt */
+       [XFS_SCRUB_TYPE_RMAPBT] = {     /* rmapbt */
+               .type   = ST_PERAG,
                .setup  = xfs_scrub_setup_ag_rmapbt,
                .scrub  = xfs_scrub_rmapbt,
                .has    = xfs_sb_version_hasrmapbt,
        },
-       { /* refcountbt */
+       [XFS_SCRUB_TYPE_REFCNTBT] = {   /* refcountbt */
+               .type   = ST_PERAG,
                .setup  = xfs_scrub_setup_ag_refcountbt,
                .scrub  = xfs_scrub_refcountbt,
                .has    = xfs_sb_version_hasreflink,
        },
-       { /* inode record */
+       [XFS_SCRUB_TYPE_INODE] = {      /* inode record */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_inode,
                .scrub  = xfs_scrub_inode,
        },
-       { /* inode data fork */
+       [XFS_SCRUB_TYPE_BMBTD] = {      /* inode data fork */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_inode_bmap,
                .scrub  = xfs_scrub_bmap_data,
        },
-       { /* inode attr fork */
+       [XFS_SCRUB_TYPE_BMBTA] = {      /* inode attr fork */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_inode_bmap,
                .scrub  = xfs_scrub_bmap_attr,
        },
-       { /* inode CoW fork */
+       [XFS_SCRUB_TYPE_BMBTC] = {      /* inode CoW fork */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_inode_bmap,
                .scrub  = xfs_scrub_bmap_cow,
        },
-       { /* directory */
+       [XFS_SCRUB_TYPE_DIR] = {        /* directory */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_directory,
                .scrub  = xfs_scrub_directory,
        },
-       { /* extended attributes */
+       [XFS_SCRUB_TYPE_XATTR] = {      /* extended attributes */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_xattr,
                .scrub  = xfs_scrub_xattr,
        },
-       { /* symbolic link */
+       [XFS_SCRUB_TYPE_SYMLINK] = {    /* symbolic link */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_symlink,
                .scrub  = xfs_scrub_symlink,
        },
-       { /* parent pointers */
+       [XFS_SCRUB_TYPE_PARENT] = {     /* parent pointers */
+               .type   = ST_INODE,
                .setup  = xfs_scrub_setup_parent,
                .scrub  = xfs_scrub_parent,
        },
-       { /* realtime bitmap */
+       [XFS_SCRUB_TYPE_RTBITMAP] = {   /* realtime bitmap */
+               .type   = ST_FS,
                .setup  = xfs_scrub_setup_rt,
                .scrub  = xfs_scrub_rtbitmap,
                .has    = xfs_sb_version_hasrealtime,
        },
-       { /* realtime summary */
+       [XFS_SCRUB_TYPE_RTSUM] = {      /* realtime summary */
+               .type   = ST_FS,
                .setup  = xfs_scrub_setup_rt,
                .scrub  = xfs_scrub_rtsummary,
                .has    = xfs_sb_version_hasrealtime,
        },
-       { /* user quota */
-               .setup = xfs_scrub_setup_quota,
-               .scrub = xfs_scrub_quota,
+       [XFS_SCRUB_TYPE_UQUOTA] = {     /* user quota */
+               .type   = ST_FS,
+               .setup  = xfs_scrub_setup_quota,
+               .scrub  = xfs_scrub_quota,
        },
-       { /* group quota */
-               .setup = xfs_scrub_setup_quota,
-               .scrub = xfs_scrub_quota,
+       [XFS_SCRUB_TYPE_GQUOTA] = {     /* group quota */
+               .type   = ST_FS,
+               .setup  = xfs_scrub_setup_quota,
+               .scrub  = xfs_scrub_quota,
        },
-       { /* project quota */
-               .setup = xfs_scrub_setup_quota,
-               .scrub = xfs_scrub_quota,
+       [XFS_SCRUB_TYPE_PQUOTA] = {     /* project quota */
+               .type   = ST_FS,
+               .setup  = xfs_scrub_setup_quota,
+               .scrub  = xfs_scrub_quota,
        },
 };
 
@@ -284,44 +317,56 @@ xfs_scrub_experimental_warning(
 "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
 }
 
-/* Dispatch metadata scrubbing. */
-int
-xfs_scrub_metadata(
-       struct xfs_inode                *ip,
+static int
+xfs_scrub_validate_inputs(
+       struct xfs_mount                *mp,
        struct xfs_scrub_metadata       *sm)
 {
-       struct xfs_scrub_context        sc;
-       struct xfs_mount                *mp = ip->i_mount;
+       int                             error;
        const struct xfs_scrub_meta_ops *ops;
-       bool                            try_harder = false;
-       int                             error = 0;
-
-       trace_xfs_scrub_start(ip, sm, error);
-
-       /* Forbidden if we are shut down or mounted norecovery. */
-       error = -ESHUTDOWN;
-       if (XFS_FORCED_SHUTDOWN(mp))
-               goto out;
-       error = -ENOTRECOVERABLE;
-       if (mp->m_flags & XFS_MOUNT_NORECOVERY)
-               goto out;
 
-       /* Check our inputs. */
        error = -EINVAL;
+       /* Check our inputs. */
        sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
        if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
                goto out;
+       /* sm_reserved[] must be zero */
        if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
                goto out;
 
-       /* Do we know about this type of metadata? */
        error = -ENOENT;
+       /* Do we know about this type of metadata? */
        if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
                goto out;
        ops = &meta_scrub_ops[sm->sm_type];
-       if (ops->scrub == NULL)
+       if (ops->setup == NULL || ops->scrub == NULL)
                goto out;
+       /* Does this fs even support this type of metadata? */
+       if (ops->has && !ops->has(&mp->m_sb))
+               goto out;
+
+       error = -EINVAL;
+       /* restricting fields must be appropriate for type */
+       switch (ops->type) {
+       case ST_NONE:
+       case ST_FS:
+               if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
+                       goto out;
+               break;
+       case ST_PERAG:
+               if (sm->sm_ino || sm->sm_gen ||
+                   sm->sm_agno >= mp->m_sb.sb_agcount)
+                       goto out;
+               break;
+       case ST_INODE:
+               if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
+                       goto out;
+               break;
+       default:
+               goto out;
+       }
 
+       error = -EOPNOTSUPP;
        /*
         * We won't scrub any filesystem that doesn't have the ability
         * to record unwritten extents.  The option was made default in
@@ -331,20 +376,46 @@ xfs_scrub_metadata(
         * We also don't support v1-v3 filesystems, which aren't
         * mountable.
         */
-       error = -EOPNOTSUPP;
        if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
                goto out;
 
-       /* Does this fs even support this type of metadata? */
-       error = -ENOENT;
-       if (ops->has && !ops->has(&mp->m_sb))
-               goto out;
-
        /* We don't know how to repair anything yet. */
-       error = -EOPNOTSUPP;
        if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
                goto out;
 
+       error = 0;
+out:
+       return error;
+}
+
+/* Dispatch metadata scrubbing. */
+int
+xfs_scrub_metadata(
+       struct xfs_inode                *ip,
+       struct xfs_scrub_metadata       *sm)
+{
+       struct xfs_scrub_context        sc;
+       struct xfs_mount                *mp = ip->i_mount;
+       bool                            try_harder = false;
+       int                             error = 0;
+
+       BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
+               (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR));
+
+       trace_xfs_scrub_start(ip, sm, error);
+
+       /* Forbidden if we are shut down or mounted norecovery. */
+       error = -ESHUTDOWN;
+       if (XFS_FORCED_SHUTDOWN(mp))
+               goto out;
+       error = -ENOTRECOVERABLE;
+       if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+               goto out;
+
+       error = xfs_scrub_validate_inputs(mp, sm);
+       if (error)
+               goto out;
+
        xfs_scrub_experimental_warning(mp);
 
 retry_op:
@@ -352,7 +423,7 @@ xfs_scrub_metadata(
        memset(&sc, 0, sizeof(sc));
        sc.mp = ip->i_mount;
        sc.sm = sm;
-       sc.ops = ops;
+       sc.ops = &meta_scrub_ops[sm->sm_type];
        sc.try_harder = try_harder;
        sc.sa.agno = NULLAGNUMBER;
        error = sc.ops->setup(&sc, ip);
index e9ec041cf71374cfc80bec221089f4321e93a95d..0d92af86f67ae6a0343ac001f5ed160dcf6f24f2 100644 (file)
 
 struct xfs_scrub_context;
 
+/* Type info and names for the scrub types. */
+enum xfs_scrub_type {
+       ST_NONE = 1,    /* disabled */
+       ST_PERAG,       /* per-AG metadata */
+       ST_FS,          /* per-FS metadata */
+       ST_INODE,       /* per-inode metadata */
+};
+
 struct xfs_scrub_meta_ops {
        /* Acquire whatever resources are needed for the operation. */
        int             (*setup)(struct xfs_scrub_context *,
@@ -32,6 +40,9 @@ struct xfs_scrub_meta_ops {
 
        /* Decide if we even have this piece of metadata. */
        bool            (*has)(struct xfs_sb *);
+
+       /* type describing required/allowed inputs */
+       enum xfs_scrub_type     type;
 };
 
 /* Buffer pointers and btree cursors for an entire AG. */
@@ -112,4 +123,30 @@ xfs_scrub_quota(struct xfs_scrub_context *sc)
 }
 #endif
 
+/* cross-referencing helpers */
+void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc,
+               xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc,
+               xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc,
+               xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc,
+               xfs_agblock_t agbno, xfs_extlen_t len,
+               struct xfs_owner_info *oinfo);
+void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc,
+               xfs_agblock_t agbno, xfs_extlen_t len,
+               struct xfs_owner_info *oinfo);
+void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc,
+               xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc,
+               xfs_agblock_t bno, xfs_extlen_t len);
+void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc,
+               xfs_agblock_t bno, xfs_extlen_t len);
+#ifdef CONFIG_XFS_RT
+void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc,
+               xfs_rtblock_t rtbno, xfs_extlen_t len);
+#else
+# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
+#endif
+
 #endif /* __XFS_SCRUB_SCRUB_H__ */
index c4ebfb5c1ee8c241a84c20b8ca0f18777f0f2324..4dc896852bf024470ac25a049e89abb50d04622f 100644 (file)
@@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
                __entry->flags = sm->sm_flags;
                __entry->error = error;
        ),
-       TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d",
+       TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->type,
@@ -90,7 +90,7 @@ TRACE_EVENT(xfs_scrub_op_error,
                __entry->error = error;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF",
+       TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->type,
                  __entry->agno,
@@ -121,7 +121,7 @@ TRACE_EVENT(xfs_scrub_file_op_error,
                __entry->error = error;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF",
+       TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->whichfork,
@@ -156,7 +156,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
                __entry->bno = bno;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF",
+       TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->type,
                  __entry->agno,
@@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
                __entry->bno = bno;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF",
+       TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->type,
@@ -246,7 +246,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
                __entry->offset = offset;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF",
+       TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->whichfork,
@@ -277,7 +277,7 @@ TRACE_EVENT(xfs_scrub_incomplete,
                __entry->type = sc->sm->sm_type;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d type %u ret_ip %pF",
+       TP_printk("dev %d:%d type %u ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->type,
                  __entry->ret_ip)
@@ -311,7 +311,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error,
                __entry->error = error;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
+       TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->type,
                  __entry->btnum,
@@ -354,7 +354,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
                __entry->error = error;
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
+       TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->whichfork,
@@ -393,7 +393,7 @@ TRACE_EVENT(xfs_scrub_btree_error,
                __entry->ptr = cur->bc_ptrs[level];
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
+       TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->type,
                  __entry->btnum,
@@ -433,7 +433,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error,
                __entry->ptr = cur->bc_ptrs[level];
                __entry->ret_ip = ret_ip;
        ),
-       TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
+       TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->whichfork,
@@ -491,6 +491,28 @@ DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
 DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
 DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
 
+TRACE_EVENT(xfs_scrub_xref_error,
+       TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip),
+       TP_ARGS(sc, error, ret_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, type)
+               __field(int, error)
+               __field(void *, ret_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = sc->mp->m_super->s_dev;
+               __entry->type = sc->sm->sm_type;
+               __entry->error = error;
+               __entry->ret_ip = ret_ip;
+       ),
+       TP_printk("dev %d:%d type %u xref error %d ret_ip %pF",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->type,
+                 __entry->error,
+                 __entry->ret_ip)
+);
+
 #endif /* _TRACE_XFS_SCRUB_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
index 4fc526a27a94fe4594508b707f0a9ca976c1dd98..9c6a830da0eec4663c04b5c58606962139598890 100644 (file)
@@ -390,6 +390,19 @@ xfs_map_blocks(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
+       /*
+        * Truncate can race with writeback since writeback doesn't take the
+        * iolock and truncate decreases the file size before it starts
+        * truncating the pages between new_size and old_size.  Therefore, we
+        * can end up in the situation where writeback gets a CoW fork mapping
+        * but the truncate makes the mapping invalid and we end up in here
+        * trying to get a new mapping.  Bail out here so that we simply never
+        * get a valid mapping and so we drop the write altogether.  The page
+        * truncation will kill the contents anyway.
+        */
+       if (type == XFS_IO_COW && offset > i_size_read(inode))
+               return 0;
+
        ASSERT(type != XFS_IO_COW);
        if (type == XFS_IO_UNWRITTEN)
                bmapi_flags |= XFS_BMAPI_IGSTATE;
@@ -791,7 +804,7 @@ xfs_aops_discard_page(
                goto out_invalidate;
 
        xfs_alert(ip->i_mount,
-               "page discard on page %p, inode 0x%llx, offset %llu.",
+               "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
                        page, ip->i_ino, offset);
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
index 6d37ab43195f6d4bb04812e8b7c90bd322ce4820..c83f549dc17b9dcf9e22631e099a23b2d377b7da 100644 (file)
@@ -1872,7 +1872,7 @@ xfs_swap_extents(
         */
        lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
        lock_flags = XFS_MMAPLOCK_EXCL;
-       xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
+       xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL);
 
        /* Verify that both files have the same format */
        if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
@@ -1919,7 +1919,7 @@ xfs_swap_extents(
         * Lock and join the inodes to the tansaction so that transaction commit
         * or cancel will unlock the inodes from this point onwards.
         */
-       xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+       xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
        lock_flags |= XFS_ILOCK_EXCL;
        xfs_trans_ijoin(tp, ip, 0);
        xfs_trans_ijoin(tp, tip, 0);
index 4c6e86d861fda1a452dbeb20c1d3d64018627a20..d1da2ee9e6dbced92e2aaaddb0ae635f5b4d9bab 100644 (file)
@@ -236,6 +236,7 @@ _xfs_buf_alloc(
        init_completion(&bp->b_iowait);
        INIT_LIST_HEAD(&bp->b_lru);
        INIT_LIST_HEAD(&bp->b_list);
+       INIT_LIST_HEAD(&bp->b_li_list);
        sema_init(&bp->b_sema, 0); /* held, no waiters */
        spin_lock_init(&bp->b_lock);
        XB_SET_OWNER(bp);
@@ -585,7 +586,7 @@ _xfs_buf_find(
                 * returning a specific error on buffer lookup failures.
                 */
                xfs_alert(btp->bt_mount,
-                         "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
+                         "%s: daddr 0x%llx out of range, EOFS 0x%llx",
                          __func__, cmap.bm_bn, eofs);
                WARN_ON(1);
                return NULL;
@@ -1180,13 +1181,14 @@ xfs_buf_ioend_async(
 }
 
 void
-xfs_buf_ioerror(
+__xfs_buf_ioerror(
        xfs_buf_t               *bp,
-       int                     error)
+       int                     error,
+       xfs_failaddr_t          failaddr)
 {
        ASSERT(error <= 0 && error >= -1000);
        bp->b_error = error;
-       trace_xfs_buf_ioerror(bp, error, _RET_IP_);
+       trace_xfs_buf_ioerror(bp, error, failaddr);
 }
 
 void
@@ -1195,8 +1197,9 @@ xfs_buf_ioerror_alert(
        const char              *func)
 {
        xfs_alert(bp->b_target->bt_mount,
-"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
-               (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
+"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
+                       func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
+                       -bp->b_error);
 }
 
 int
@@ -1378,9 +1381,10 @@ _xfs_buf_ioapply(
                         */
                        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                                xfs_warn(mp,
-                                       "%s: no ops on block 0x%llx/0x%x",
+                                       "%s: no buf ops on daddr 0x%llx len %d",
                                        __func__, bp->b_bn, bp->b_length);
-                               xfs_hex_dump(bp->b_addr, 64);
+                               xfs_hex_dump(bp->b_addr,
+                                               XFS_CORRUPTION_DUMP_LEN);
                                dump_stack();
                        }
                }
@@ -1671,7 +1675,7 @@ xfs_wait_buftarg(
                        list_del_init(&bp->b_lru);
                        if (bp->b_flags & XBF_WRITE_FAIL) {
                                xfs_alert(btp->bt_mount,
-"Corruption Alert: Buffer at block 0x%llx had permanent write failures!",
+"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
                                        (long long)bp->b_bn);
                                xfs_alert(btp->bt_mount,
 "Please run xfs_repair to determine the extent of the problem.");
index f873bb786824280f01eeb90bf6e891f5d5ae7165..2f4c91452861953cb5a4953f66d7f4d36e50bb50 100644 (file)
@@ -140,6 +140,7 @@ struct xfs_buf_ops {
        char *name;
        void (*verify_read)(struct xfs_buf *);
        void (*verify_write)(struct xfs_buf *);
+       xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp);
 };
 
 typedef struct xfs_buf {
@@ -175,7 +176,8 @@ typedef struct xfs_buf {
        struct workqueue_struct *b_ioend_wq;    /* I/O completion wq */
        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
        struct completion       b_iowait;       /* queue for I/O waiters */
-       void                    *b_fspriv;
+       void                    *b_log_item;
+       struct list_head        b_li_list;      /* Log items list head */
        struct xfs_trans        *b_transp;
        struct page             **b_pages;      /* array of page pointers */
        struct page             *b_page_array[XB_PAGES]; /* inline pages */
@@ -315,7 +317,9 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_buf *bp);
 extern void xfs_buf_ioend(struct xfs_buf *bp);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
+               xfs_failaddr_t failaddr);
+#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
 extern void xfs_buf_submit(struct xfs_buf *bp);
 extern int xfs_buf_submit_wait(struct xfs_buf *bp);
index e0a0af0946f23bd84944256141967526044b2468..270ddb4d23131be0ffa19ea8a52a6c42afcd1d59 100644 (file)
@@ -61,14 +61,14 @@ xfs_buf_log_format_size(
  */
 STATIC void
 xfs_buf_item_size_segment(
-       struct xfs_buf_log_item *bip,
-       struct xfs_buf_log_format *blfp,
-       int                     *nvecs,
-       int                     *nbytes)
+       struct xfs_buf_log_item         *bip,
+       struct xfs_buf_log_format       *blfp,
+       int                             *nvecs,
+       int                             *nbytes)
 {
-       struct xfs_buf          *bp = bip->bli_buf;
-       int                     next_bit;
-       int                     last_bit;
+       struct xfs_buf                  *bp = bip->bli_buf;
+       int                             next_bit;
+       int                             last_bit;
 
        last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
        if (last_bit == -1)
@@ -218,12 +218,12 @@ xfs_buf_item_format_segment(
        uint                    offset,
        struct xfs_buf_log_format *blfp)
 {
-       struct xfs_buf  *bp = bip->bli_buf;
-       uint            base_size;
-       int             first_bit;
-       int             last_bit;
-       int             next_bit;
-       uint            nbits;
+       struct xfs_buf          *bp = bip->bli_buf;
+       uint                    base_size;
+       int                     first_bit;
+       int                     last_bit;
+       int                     next_bit;
+       uint                    nbits;
 
        /* copy the flags across from the base format item */
        blfp->blf_flags = bip->__bli_format.blf_flags;
@@ -406,12 +406,12 @@ xfs_buf_item_unpin(
        int                     remove)
 {
        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-       xfs_buf_t       *bp = bip->bli_buf;
-       struct xfs_ail  *ailp = lip->li_ailp;
-       int             stale = bip->bli_flags & XFS_BLI_STALE;
-       int             freed;
+       xfs_buf_t               *bp = bip->bli_buf;
+       struct xfs_ail          *ailp = lip->li_ailp;
+       int                     stale = bip->bli_flags & XFS_BLI_STALE;
+       int                     freed;
 
-       ASSERT(bp->b_fspriv == bip);
+       ASSERT(bp->b_log_item == bip);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
        trace_xfs_buf_item_unpin(bip);
@@ -456,13 +456,14 @@ xfs_buf_item_unpin(
                 */
                if (bip->bli_flags & XFS_BLI_STALE_INODE) {
                        xfs_buf_do_callbacks(bp);
-                       bp->b_fspriv = NULL;
+                       bp->b_log_item = NULL;
+                       list_del_init(&bp->b_li_list);
                        bp->b_iodone = NULL;
                } else {
                        spin_lock(&ailp->xa_lock);
                        xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
                        xfs_buf_item_relse(bp);
-                       ASSERT(bp->b_fspriv == NULL);
+                       ASSERT(bp->b_log_item == NULL);
                }
                xfs_buf_relse(bp);
        } else if (freed && remove) {
@@ -722,18 +723,15 @@ xfs_buf_item_free_format(
 
 /*
  * Allocate a new buf log item to go with the given buffer.
- * Set the buffer's b_fsprivate field to point to the new
- * buf log item.  If there are other item's attached to the
- * buffer (see xfs_buf_attach_iodone() below), then put the
- * buf log item at the front.
+ * Set the buffer's b_log_item field to point to the new
+ * buf log item.
  */
 int
 xfs_buf_item_init(
        struct xfs_buf  *bp,
        struct xfs_mount *mp)
 {
-       struct xfs_log_item     *lip = bp->b_fspriv;
-       struct xfs_buf_log_item *bip;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        int                     chunks;
        int                     map_size;
        int                     error;
@@ -741,13 +739,14 @@ xfs_buf_item_init(
 
        /*
         * Check to see if there is already a buf log item for
-        * this buffer.  If there is, it is guaranteed to be
-        * the first.  If we do already have one, there is
+        * this buffer. If we do already have one, there is
         * nothing to do here so return.
         */
        ASSERT(bp->b_target->bt_mount == mp);
-       if (lip != NULL && lip->li_type == XFS_LI_BUF)
+       if (bip != NULL) {
+               ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
                return 0;
+       }
 
        bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
        xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
@@ -781,13 +780,7 @@ xfs_buf_item_init(
                bip->bli_formats[i].blf_map_size = map_size;
        }
 
-       /*
-        * Put the buf item into the list of items attached to the
-        * buffer at the front.
-        */
-       if (bp->b_fspriv)
-               bip->bli_item.li_bio_list = bp->b_fspriv;
-       bp->b_fspriv = bip;
+       bp->b_log_item = bip;
        xfs_buf_hold(bp);
        return 0;
 }
@@ -880,7 +873,7 @@ xfs_buf_item_log_segment(
  */
 void
 xfs_buf_item_log(
-       xfs_buf_log_item_t      *bip,
+       struct xfs_buf_log_item *bip,
        uint                    first,
        uint                    last)
 {
@@ -943,7 +936,7 @@ xfs_buf_item_dirty_format(
 
 STATIC void
 xfs_buf_item_free(
-       xfs_buf_log_item_t      *bip)
+       struct xfs_buf_log_item *bip)
 {
        xfs_buf_item_free_format(bip);
        kmem_free(bip->bli_item.li_lv_shadow);
@@ -961,13 +954,13 @@ void
 xfs_buf_item_relse(
        xfs_buf_t       *bp)
 {
-       xfs_buf_log_item_t      *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        trace_xfs_buf_item_relse(bp, _RET_IP_);
        ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
 
-       bp->b_fspriv = bip->bli_item.li_bio_list;
-       if (bp->b_fspriv == NULL)
+       bp->b_log_item = NULL;
+       if (list_empty(&bp->b_li_list))
                bp->b_iodone = NULL;
 
        xfs_buf_rele(bp);
@@ -980,9 +973,7 @@ xfs_buf_item_relse(
  * to be called when the buffer's I/O completes.  If it is not set
  * already, set the buffer's b_iodone() routine to be
  * xfs_buf_iodone_callbacks() and link the log item into the list of
- * items rooted at b_fsprivate.  Items are always added as the second
- * entry in the list if there is a first, because the buf item code
- * assumes that the buf log item is first.
+ * items rooted at b_li_list.
  */
 void
 xfs_buf_attach_iodone(
@@ -990,18 +981,10 @@ xfs_buf_attach_iodone(
        void            (*cb)(xfs_buf_t *, xfs_log_item_t *),
        xfs_log_item_t  *lip)
 {
-       xfs_log_item_t  *head_lip;
-
        ASSERT(xfs_buf_islocked(bp));
 
        lip->li_cb = cb;
-       head_lip = bp->b_fspriv;
-       if (head_lip) {
-               lip->li_bio_list = head_lip->li_bio_list;
-               head_lip->li_bio_list = lip;
-       } else {
-               bp->b_fspriv = lip;
-       }
+       list_add_tail(&lip->li_bio_list, &bp->b_li_list);
 
        ASSERT(bp->b_iodone == NULL ||
               bp->b_iodone == xfs_buf_iodone_callbacks);
@@ -1011,12 +994,12 @@ xfs_buf_attach_iodone(
 /*
  * We can have many callbacks on a buffer. Running the callbacks individually
  * can cause a lot of contention on the AIL lock, so we allow for a single
- * callback to be able to scan the remaining lip->li_bio_list for other items
- * of the same type and callback to be processed in the first call.
+ * callback to be able to scan the remaining items in bp->b_li_list for other
+ * items of the same type and callback to be processed in the first call.
  *
  * As a result, the loop walking the callback list below will also modify the
  * list. it removes the first item from the list and then runs the callback.
- * The loop then restarts from the new head of the list. This allows the
+ * The loop then restarts from the new first item int the list. This allows the
  * callback to scan and modify the list attached to the buffer and we don't
  * have to care about maintaining a next item pointer.
  */
@@ -1024,18 +1007,26 @@ STATIC void
 xfs_buf_do_callbacks(
        struct xfs_buf          *bp)
 {
+       struct xfs_buf_log_item *blip = bp->b_log_item;
        struct xfs_log_item     *lip;
 
-       while ((lip = bp->b_fspriv) != NULL) {
-               bp->b_fspriv = lip->li_bio_list;
-               ASSERT(lip->li_cb != NULL);
+       /* If there is a buf_log_item attached, run its callback */
+       if (blip) {
+               lip = &blip->bli_item;
+               lip->li_cb(bp, lip);
+       }
+
+       while (!list_empty(&bp->b_li_list)) {
+               lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
+                                      li_bio_list);
+
                /*
-                * Clear the next pointer so we don't have any
+                * Remove the item from the list, so we don't have any
                 * confusion if the item is added to another buf.
                 * Don't touch the log item after calling its
                 * callback, because it could have freed itself.
                 */
-               lip->li_bio_list = NULL;
+               list_del_init(&lip->li_bio_list);
                lip->li_cb(bp, lip);
        }
 }
@@ -1052,13 +1043,22 @@ STATIC void
 xfs_buf_do_callbacks_fail(
        struct xfs_buf          *bp)
 {
-       struct xfs_log_item     *next;
-       struct xfs_log_item     *lip = bp->b_fspriv;
-       struct xfs_ail          *ailp = lip->li_ailp;
+       struct xfs_log_item     *lip;
+       struct xfs_ail          *ailp;
 
+       /*
+        * Buffer log item errors are handled directly by xfs_buf_item_push()
+        * and xfs_buf_iodone_callback_error, and they have no IO error
+        * callbacks. Check only for items in b_li_list.
+        */
+       if (list_empty(&bp->b_li_list))
+               return;
+
+       lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
+                       li_bio_list);
+       ailp = lip->li_ailp;
        spin_lock(&ailp->xa_lock);
-       for (; lip; lip = next) {
-               next = lip->li_bio_list;
+       list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
                if (lip->li_ops->iop_error)
                        lip->li_ops->iop_error(lip, bp);
        }
@@ -1069,12 +1069,22 @@ static bool
 xfs_buf_iodone_callback_error(
        struct xfs_buf          *bp)
 {
-       struct xfs_log_item     *lip = bp->b_fspriv;
-       struct xfs_mount        *mp = lip->li_mountp;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+       struct xfs_log_item     *lip;
+       struct xfs_mount        *mp;
        static ulong            lasttime;
        static xfs_buftarg_t    *lasttarg;
        struct xfs_error_cfg    *cfg;
 
+       /*
+        * The failed buffer might not have a buf_log_item attached or the
+        * log_item list might be empty. Get the mp from the available
+        * xfs_log_item
+        */
+       lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item,
+                                      li_bio_list);
+       mp = lip ? lip->li_mountp : bip->bli_item.li_mountp;
+
        /*
         * If we've already decided to shutdown the filesystem because of
         * I/O errors, there's no point in giving this a retry.
@@ -1183,7 +1193,8 @@ xfs_buf_iodone_callbacks(
        bp->b_first_retry_time = 0;
 
        xfs_buf_do_callbacks(bp);
-       bp->b_fspriv = NULL;
+       bp->b_log_item = NULL;
+       list_del_init(&bp->b_li_list);
        bp->b_iodone = NULL;
        xfs_buf_ioend(bp);
 }
@@ -1228,10 +1239,9 @@ xfs_buf_iodone(
 bool
 xfs_buf_resubmit_failed_buffers(
        struct xfs_buf          *bp,
-       struct xfs_log_item     *lip,
        struct list_head        *buffer_list)
 {
-       struct xfs_log_item     *next;
+       struct xfs_log_item     *lip;
 
        /*
         * Clear XFS_LI_FAILED flag from all items before resubmit
@@ -1239,10 +1249,8 @@ xfs_buf_resubmit_failed_buffers(
         * XFS_LI_FAILED set/clear is protected by xa_lock, caller  this
         * function already have it acquired
         */
-       for (; lip; lip = next) {
-               next = lip->li_bio_list;
+       list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
                xfs_clear_li_failed(lip);
-       }
 
        /* Add this buffer back to the delayed write list */
        return xfs_buf_delwri_queue(bp, buffer_list);
index 9690ce62c9a7f63bbc2a4b485617199cc14253c0..643f53dcfe516dff1348387685106d6d72fb76e2 100644 (file)
@@ -50,7 +50,7 @@ struct xfs_buf_log_item;
  * needed to log buffers.  It tracks how many times the lock has been
  * locked, and which 128 byte chunks of the buffer are dirty.
  */
-typedef struct xfs_buf_log_item {
+struct xfs_buf_log_item {
        xfs_log_item_t          bli_item;       /* common item structure */
        struct xfs_buf          *bli_buf;       /* real buffer pointer */
        unsigned int            bli_flags;      /* misc flags */
@@ -59,11 +59,11 @@ typedef struct xfs_buf_log_item {
        int                     bli_format_count;       /* count of headers */
        struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
        struct xfs_buf_log_format __bli_format; /* embedded in-log header */
-} xfs_buf_log_item_t;
+};
 
 int    xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
 void   xfs_buf_item_relse(struct xfs_buf *);
-void   xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
+void   xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
 bool   xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
 void   xfs_buf_attach_iodone(struct xfs_buf *,
                              void(*)(struct xfs_buf *, xfs_log_item_t *),
@@ -71,7 +71,6 @@ void  xfs_buf_attach_iodone(struct xfs_buf *,
 void   xfs_buf_iodone_callbacks(struct xfs_buf *);
 void   xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
 bool   xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
-                                       struct xfs_log_item *,
                                        struct list_head *);
 
 extern kmem_zone_t     *xfs_buf_item_zone;
index 0c58918bc0ade9b6246a27605573bfc44ed442b5..b6ae3597bfb0b408041b13c785ec8492cd53198e 100644 (file)
@@ -152,7 +152,6 @@ xfs_dir2_block_getdents(
        struct xfs_inode        *dp = args->dp; /* incore directory inode */
        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        struct xfs_buf          *bp;            /* buffer for block */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
        xfs_dir2_data_entry_t   *dep;           /* block data entry */
        xfs_dir2_data_unused_t  *dup;           /* block unused entry */
        char                    *endptr;        /* end of the data entries */
@@ -185,9 +184,8 @@ xfs_dir2_block_getdents(
        /*
         * Set up values for the loop.
         */
-       btp = xfs_dir2_block_tail_p(geo, hdr);
        ptr = (char *)dp->d_ops->data_entry_p(hdr);
-       endptr = (char *)xfs_dir2_block_leaf_p(btp);
+       endptr = xfs_dir3_data_endp(geo, hdr);
 
        /*
         * Loop over the data portion of the block.
index f248708c10ff7f64e61fb3cde307f42031322424..43572f8a1b8e4ac460e68051abbb540a7f38149b 100644 (file)
@@ -399,52 +399,6 @@ xfs_qm_dqalloc(
        return error;
 }
 
-STATIC int
-xfs_qm_dqrepair(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       struct xfs_dquot        *dqp,
-       xfs_dqid_t              firstid,
-       struct xfs_buf          **bpp)
-{
-       int                     error;
-       struct xfs_disk_dquot   *ddq;
-       struct xfs_dqblk        *d;
-       int                     i;
-
-       /*
-        * Read the buffer without verification so we get the corrupted
-        * buffer returned to us. make sure we verify it on write, though.
-        */
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
-                                  mp->m_quotainfo->qi_dqchunklen,
-                                  0, bpp, NULL);
-
-       if (error) {
-               ASSERT(*bpp == NULL);
-               return error;
-       }
-       (*bpp)->b_ops = &xfs_dquot_buf_ops;
-
-       ASSERT(xfs_buf_islocked(*bpp));
-       d = (struct xfs_dqblk *)(*bpp)->b_addr;
-
-       /* Do the actual repair of dquots in this buffer */
-       for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
-               ddq = &d[i].dd_diskdq;
-               error = xfs_dqcheck(mp, ddq, firstid + i,
-                                      dqp->dq_flags & XFS_DQ_ALLTYPES,
-                                      XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
-               if (error) {
-                       /* repair failed, we're screwed */
-                       xfs_trans_brelse(tp, *bpp);
-                       return -EIO;
-               }
-       }
-
-       return 0;
-}
-
 /*
  * Maps a dquot to the buffer containing its on-disk version.
  * This returns a ptr to the buffer containing the on-disk dquot
@@ -526,14 +480,6 @@ xfs_qm_dqtobp(
                                           dqp->q_blkno,
                                           mp->m_quotainfo->qi_dqchunklen,
                                           0, &bp, &xfs_dquot_buf_ops);
-
-               if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
-                       xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
-                                               mp->m_quotainfo->qi_dqperchunk;
-                       ASSERT(bp == NULL);
-                       error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
-               }
-
                if (error) {
                        ASSERT(bp == NULL);
                        return error;
@@ -1010,6 +956,7 @@ xfs_qm_dqflush(
        struct xfs_mount        *mp = dqp->q_mount;
        struct xfs_buf          *bp;
        struct xfs_disk_dquot   *ddqp;
+       xfs_failaddr_t          fa;
        int                     error;
 
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
@@ -1056,9 +1003,10 @@ xfs_qm_dqflush(
        /*
         * A simple sanity check in case we got a corrupted dquot..
         */
-       error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
-                          XFS_QMOPT_DOWARN, "dqflush (incore copy)");
-       if (error) {
+       fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0);
+       if (fa) {
+               xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
+                               be32_to_cpu(ddqp->d_id), fa);
                xfs_buf_relse(bp);
                xfs_dqfunlock(dqp);
                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
index 664dea105e76fee564a1feeb16a05387fe6b9000..96eaa693370926a7550fd7a505130aa2f343dbc3 100644 (file)
@@ -150,10 +150,7 @@ xfs_dquot_item_error(
        struct xfs_log_item     *lip,
        struct xfs_buf          *bp)
 {
-       struct xfs_dquot        *dqp;
-
-       dqp = DQUOT_ITEM(lip)->qli_dquot;
-       ASSERT(!completion_done(&dqp->q_flush));
+       ASSERT(!completion_done(&DQUOT_ITEM(lip)->qli_dquot->q_flush));
        xfs_set_li_failed(lip, bp);
 }
 
@@ -179,7 +176,7 @@ xfs_qm_dquot_logitem_push(
                if (!xfs_buf_trylock(bp))
                        return XFS_ITEM_LOCKED;
 
-               if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+               if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
                        rval = XFS_ITEM_FLUSHING;
 
                xfs_buf_unlock(bp);
@@ -212,7 +209,7 @@ xfs_qm_dquot_logitem_push(
 
        error = xfs_qm_dqflush(dqp, &bp);
        if (error) {
-               xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
+               xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT,
                        __func__, error, dqp);
        } else {
                if (!xfs_buf_delwri_queue(bp, buffer_list))
index 4c9f35d983b2a567aeef32565023efead34e1b25..ccf520f0b00dc62782e220adbeb738fc6f5c27c5 100644 (file)
@@ -24,6 +24,7 @@
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_sysfs.h"
+#include "xfs_inode.h"
 
 #ifdef DEBUG
 
@@ -314,12 +315,12 @@ xfs_error_report(
        struct xfs_mount        *mp,
        const char              *filename,
        int                     linenum,
-       void                    *ra)
+       xfs_failaddr_t          failaddr)
 {
        if (level <= xfs_error_level) {
                xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
                "Internal error %s at line %d of file %s.  Caller %pS",
-                           tag, linenum, filename, ra);
+                           tag, linenum, filename, failaddr);
 
                xfs_stack_trace();
        }
@@ -333,11 +334,11 @@ xfs_corruption_error(
        void                    *p,
        const char              *filename,
        int                     linenum,
-       void                    *ra)
+       xfs_failaddr_t          failaddr)
 {
        if (level <= xfs_error_level)
-               xfs_hex_dump(p, 64);
-       xfs_error_report(tag, level, mp, filename, linenum, ra);
+               xfs_hex_dump(p, XFS_CORRUPTION_DUMP_LEN);
+       xfs_error_report(tag, level, mp, filename, linenum, failaddr);
        xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
 }
 
@@ -347,19 +348,62 @@ xfs_corruption_error(
  */
 void
 xfs_verifier_error(
-       struct xfs_buf          *bp)
+       struct xfs_buf          *bp,
+       int                     error,
+       xfs_failaddr_t          failaddr)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       xfs_failaddr_t          fa;
+
+       fa = failaddr ? failaddr : __return_address;
+       __xfs_buf_ioerror(bp, error, fa);
 
        xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
                  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
-                 __return_address, bp->b_ops->name, bp->b_bn);
+                 fa, bp->b_ops->name, bp->b_bn);
 
        xfs_alert(mp, "Unmount and run xfs_repair");
 
        if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
-               xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
-               xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
+               xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
+                               XFS_CORRUPTION_DUMP_LEN);
+               xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN);
+       }
+
+       if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+               xfs_stack_trace();
+}
+
+/*
+ * Warnings for inode corruption problems.  Don't bother with the stack
+ * trace unless the error level is turned up high.
+ */
+void
+xfs_inode_verifier_error(
+       struct xfs_inode        *ip,
+       int                     error,
+       const char              *name,
+       void                    *buf,
+       size_t                  bufsz,
+       xfs_failaddr_t          failaddr)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_failaddr_t          fa;
+       int                     sz;
+
+       fa = failaddr ? failaddr : __return_address;
+
+       xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s",
+                 error == -EFSBADCRC ? "CRC error" : "corruption",
+                 fa, ip->i_ino, name);
+
+       xfs_alert(mp, "Unmount and run xfs_repair");
+
+       if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) {
+               sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz);
+               xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
+                               sz);
+               xfs_hex_dump(buf, sz);
        }
 
        if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
index ea816c1bf8dbf2a83a89af5cae738a8791e06e6c..7e728c5a46b896634c4578414e2aaba8a0f4a23c 100644 (file)
 struct xfs_mount;
 
 extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
-                       const char *filename, int linenum, void *ra);
+                       const char *filename, int linenum,
+                       xfs_failaddr_t failaddr);
 extern void xfs_corruption_error(const char *tag, int level,
                        struct xfs_mount *mp, void *p, const char *filename,
-                       int linenum, void *ra);
-extern void xfs_verifier_error(struct xfs_buf *bp);
+                       int linenum, xfs_failaddr_t failaddr);
+extern void xfs_verifier_error(struct xfs_buf *bp, int error,
+                       xfs_failaddr_t failaddr);
+extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
+                       const char *name, void *buf, size_t bufsz,
+                       xfs_failaddr_t failaddr);
 
 #define        XFS_ERROR_REPORT(e, lvl, mp)    \
        xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
@@ -37,6 +42,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_ERRLEVEL_LOW       1
 #define XFS_ERRLEVEL_HIGH      5
 
+/* Dump 128 bytes of any corrupt buffer */
+#define XFS_CORRUPTION_DUMP_LEN                (128)
+
 /*
  * Macros to set EFSCORRUPTED & return/branch.
  */
index 60a2e128cb6a59aa7181faa5c519d511a308bc5c..8b4545623e25691f564397a471e11864c8326a1a 100644 (file)
  * File system operations
  */
 
-int
-xfs_fs_geometry(
-       xfs_mount_t             *mp,
-       xfs_fsop_geom_t         *geo,
-       int                     new_version)
-{
-
-       memset(geo, 0, sizeof(*geo));
-
-       geo->blocksize = mp->m_sb.sb_blocksize;
-       geo->rtextsize = mp->m_sb.sb_rextsize;
-       geo->agblocks = mp->m_sb.sb_agblocks;
-       geo->agcount = mp->m_sb.sb_agcount;
-       geo->logblocks = mp->m_sb.sb_logblocks;
-       geo->sectsize = mp->m_sb.sb_sectsize;
-       geo->inodesize = mp->m_sb.sb_inodesize;
-       geo->imaxpct = mp->m_sb.sb_imax_pct;
-       geo->datablocks = mp->m_sb.sb_dblocks;
-       geo->rtblocks = mp->m_sb.sb_rblocks;
-       geo->rtextents = mp->m_sb.sb_rextents;
-       geo->logstart = mp->m_sb.sb_logstart;
-       ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid));
-       memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid));
-       if (new_version >= 2) {
-               geo->sunit = mp->m_sb.sb_unit;
-               geo->swidth = mp->m_sb.sb_width;
-       }
-       if (new_version >= 3) {
-               geo->version = XFS_FSOP_GEOM_VERSION;
-               geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
-                            XFS_FSOP_GEOM_FLAGS_DIRV2 |
-                       (xfs_sb_version_hasattr(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
-                       (xfs_sb_version_hasquota(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
-                       (xfs_sb_version_hasalign(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
-                       (xfs_sb_version_hasdalign(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
-                       (xfs_sb_version_hasextflgbit(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
-                       (xfs_sb_version_hassector(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
-                       (xfs_sb_version_hasasciici(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
-                       (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
-                       (xfs_sb_version_hasattr2(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
-                       (xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
-                       (xfs_sb_version_hascrc(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
-                       (xfs_sb_version_hasftype(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
-                       (xfs_sb_version_hasfinobt(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
-                       (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
-                       (xfs_sb_version_hasrmapbt(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) |
-                       (xfs_sb_version_hasreflink(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_REFLINK : 0);
-               geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
-                               mp->m_sb.sb_logsectsize : BBSIZE;
-               geo->rtsectsize = mp->m_sb.sb_blocksize;
-               geo->dirblocksize = mp->m_dir_geo->blksize;
-       }
-       if (new_version >= 4) {
-               geo->flags |=
-                       (xfs_sb_version_haslogv2(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
-               geo->logsunit = mp->m_sb.sb_logsunit;
-       }
-       return 0;
-}
-
 static struct xfs_buf *
 xfs_growfs_get_hdr_buf(
        struct xfs_mount        *mp,
@@ -955,7 +878,7 @@ xfs_do_force_shutdown(
 
        if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
                xfs_notice(mp,
-       "%s(0x%x) called from line %d of file %s.  Return address = 0x%p",
+       "%s(0x%x) called from line %d of file %s.  Return address = "PTR_FMT,
                        __func__, flags, lnnum, fname, __return_address);
        }
        /*
index 2954c13a3acd2f044bf6c6a0c3988e673c900a70..20484ed5e919af18144faf9b4e639a21b50ef6f2 100644 (file)
@@ -18,7 +18,6 @@
 #ifndef __XFS_FSOPS_H__
 #define        __XFS_FSOPS_H__
 
-extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
 extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
 extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
 extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
index 3bcb8fd2a826f317fc018721c942940629259b06..d53a316162d6c1ea8360bf40e6c71ee882d19ed0 100644 (file)
@@ -296,6 +296,7 @@ xfs_reinit_inode(
        uint32_t        generation = inode->i_generation;
        uint64_t        version = inode_peek_iversion(inode);
        umode_t         mode = inode->i_mode;
+       dev_t           dev = inode->i_rdev;
 
        error = inode_init_always(mp->m_super, inode);
 
@@ -303,6 +304,7 @@ xfs_reinit_inode(
        inode->i_generation = generation;
        inode_set_iversion_queried(inode, version);
        inode->i_mode = mode;
+       inode->i_rdev = dev;
        return error;
 }
 
@@ -474,6 +476,11 @@ xfs_iget_cache_miss(
        if (error)
                goto out_destroy;
 
+       if (!xfs_inode_verify_forks(ip)) {
+               error = -EFSCORRUPTED;
+               goto out_destroy;
+       }
+
        trace_xfs_iget_miss(ip);
 
        if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
@@ -1651,28 +1658,15 @@ xfs_inode_clear_eofblocks_tag(
 }
 
 /*
- * Automatic CoW Reservation Freeing
- *
- * These functions automatically garbage collect leftover CoW reservations
- * that were made on behalf of a cowextsize hint when we start to run out
- * of quota or when the reservations sit around for too long.  If the file
- * has dirty pages or is undergoing writeback, its CoW reservations will
- * be retained.
- *
- * The actual garbage collection piggybacks off the same code that runs
- * the speculative EOF preallocation garbage collector.
+ * Set ourselves up to free CoW blocks from this file.  If it's already clean
+ * then we can bail out quickly, but otherwise we must back off if the file
+ * is undergoing some kind of write.
  */
-STATIC int
-xfs_inode_free_cowblocks(
+static bool
+xfs_prep_free_cowblocks(
        struct xfs_inode        *ip,
-       int                     flags,
-       void                    *args)
+       struct xfs_ifork        *ifp)
 {
-       int ret;
-       struct xfs_eofblocks *eofb = args;
-       int match;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
-
        /*
         * Just clear the tag if we have an empty cow fork or none at all. It's
         * possible the inode was fully unshared since it was originally tagged.
@@ -1680,7 +1674,7 @@ xfs_inode_free_cowblocks(
        if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
                trace_xfs_inode_free_cowblocks_invalid(ip);
                xfs_inode_clear_cowblocks_tag(ip);
-               return 0;
+               return false;
        }
 
        /*
@@ -1691,6 +1685,35 @@ xfs_inode_free_cowblocks(
            mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
            mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
            atomic_read(&VFS_I(ip)->i_dio_count))
+               return false;
+
+       return true;
+}
+
+/*
+ * Automatic CoW Reservation Freeing
+ *
+ * These functions automatically garbage collect leftover CoW reservations
+ * that were made on behalf of a cowextsize hint when we start to run out
+ * of quota or when the reservations sit around for too long.  If the file
+ * has dirty pages or is undergoing writeback, its CoW reservations will
+ * be retained.
+ *
+ * The actual garbage collection piggybacks off the same code that runs
+ * the speculative EOF preallocation garbage collector.
+ */
+STATIC int
+xfs_inode_free_cowblocks(
+       struct xfs_inode        *ip,
+       int                     flags,
+       void                    *args)
+{
+       struct xfs_eofblocks    *eofb = args;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       int                     match;
+       int                     ret = 0;
+
+       if (!xfs_prep_free_cowblocks(ip, ifp))
                return 0;
 
        if (eofb) {
@@ -1711,7 +1734,12 @@ xfs_inode_free_cowblocks(
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
        xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
 
-       ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
+       /*
+        * Check again, nobody else should be able to dirty blocks or change
+        * the reflink iflag now that we have the first two locks held.
+        */
+       if (xfs_prep_free_cowblocks(ip, ifp))
+               ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
 
        xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
index 9f424e0aef1f9c86423a70ce74a6bf22fd821b23..604ee384a00abd6e449e2b858154313d0cc72d2d 100644 (file)
@@ -547,23 +547,36 @@ xfs_lock_inodes(
 
 /*
  * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
- * lock more than one at a time, lockdep will report false positives saying we
- * have violated locking orders.
+ * the mmaplock or the ilock, but not more than one type at a time. If we lock
+ * more than one at a time, lockdep will report false positives saying we have
+ * violated locking orders.  The iolock must be double-locked separately since
+ * we use i_rwsem for that.  We now support taking one lock EXCL and the other
+ * SHARED.
  */
 void
 xfs_lock_two_inodes(
-       xfs_inode_t             *ip0,
-       xfs_inode_t             *ip1,
-       uint                    lock_mode)
+       struct xfs_inode        *ip0,
+       uint                    ip0_mode,
+       struct xfs_inode        *ip1,
+       uint                    ip1_mode)
 {
-       xfs_inode_t             *temp;
+       struct xfs_inode        *temp;
+       uint                    mode_temp;
        int                     attempts = 0;
        xfs_log_item_t          *lp;
 
-       ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
-       if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
-               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       ASSERT(hweight32(ip0_mode) == 1);
+       ASSERT(hweight32(ip1_mode) == 1);
+       ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
+       ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
+       ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+              !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+              !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+              !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+              !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
 
        ASSERT(ip0->i_ino != ip1->i_ino);
 
@@ -571,10 +584,13 @@ xfs_lock_two_inodes(
                temp = ip0;
                ip0 = ip1;
                ip1 = temp;
+               mode_temp = ip0_mode;
+               ip0_mode = ip1_mode;
+               ip1_mode = mode_temp;
        }
 
  again:
-       xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
+       xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
 
        /*
         * If the first lock we have locked is in the AIL, we must TRY to get
@@ -583,18 +599,17 @@ xfs_lock_two_inodes(
         */
        lp = (xfs_log_item_t *)ip0->i_itemp;
        if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
-               if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
-                       xfs_iunlock(ip0, lock_mode);
+               if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
+                       xfs_iunlock(ip0, ip0_mode);
                        if ((++attempts % 5) == 0)
                                delay(1); /* Don't just spin the CPU */
                        goto again;
                }
        } else {
-               xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
+               xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
        }
 }
 
-
 void
 __xfs_iflock(
        struct xfs_inode        *ip)
@@ -1422,7 +1437,7 @@ xfs_link(
        if (error)
                goto std_return;
 
-       xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
+       xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
 
        xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
@@ -2215,7 +2230,7 @@ xfs_ifree_cluster(
        xfs_buf_t               *bp;
        xfs_inode_t             *ip;
        xfs_inode_log_item_t    *iip;
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
        struct xfs_perag        *pag;
        xfs_ino_t               inum;
 
@@ -2273,8 +2288,7 @@ xfs_ifree_cluster(
                 * stale first, we will not attempt to lock them in the loop
                 * below as the XFS_ISTALE flag will be set.
                 */
-               lip = bp->b_fspriv;
-               while (lip) {
+               list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
                        if (lip->li_type == XFS_LI_INODE) {
                                iip = (xfs_inode_log_item_t *)lip;
                                ASSERT(iip->ili_logged == 1);
@@ -2284,7 +2298,6 @@ xfs_ifree_cluster(
                                                        &iip->ili_item.li_lsn);
                                xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
                        }
-                       lip = lip->li_bio_list;
                }
 
 
@@ -2452,6 +2465,7 @@ xfs_ifree(
 
        VFS_I(ip)->i_mode = 0;          /* mark incore inode as free */
        ip->i_d.di_flags = 0;
+       ip->i_d.di_flags2 = 0;
        ip->i_d.di_dmevmask = 0;
        ip->i_d.di_forkoff = 0;         /* mark the attr fork not in use */
        ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
@@ -2587,7 +2601,7 @@ xfs_remove(
                goto std_return;
        }
 
-       xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
+       xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
 
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -3480,6 +3494,36 @@ xfs_iflush(
        return error;
 }
 
+/*
+ * If there are inline format data / attr forks attached to this inode,
+ * make sure they're not corrupt.
+ */
+bool
+xfs_inode_verify_forks(
+       struct xfs_inode        *ip)
+{
+       struct xfs_ifork        *ifp;
+       xfs_failaddr_t          fa;
+
+       fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
+       if (fa) {
+               ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+               xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
+                               ifp->if_u1.if_data, ifp->if_bytes, fa);
+               return false;
+       }
+
+       fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
+       if (fa) {
+               ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+               xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
+                               ifp ? ifp->if_u1.if_data : NULL,
+                               ifp ? ifp->if_bytes : 0, fa);
+               return false;
+       }
+       return true;
+}
+
 STATIC int
 xfs_iflush_int(
        struct xfs_inode        *ip,
@@ -3502,7 +3546,7 @@ xfs_iflush_int(
        if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
                               mp, XFS_ERRTAG_IFLUSH_1)) {
                xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
-                       "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
+                       "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
                        __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
                goto corrupt_out;
        }
@@ -3512,7 +3556,7 @@ xfs_iflush_int(
                    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
                    mp, XFS_ERRTAG_IFLUSH_3)) {
                        xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
-                               "%s: Bad regular inode %Lu, ptr 0x%p",
+                               "%s: Bad regular inode %Lu, ptr "PTR_FMT,
                                __func__, ip->i_ino, ip);
                        goto corrupt_out;
                }
@@ -3523,7 +3567,7 @@ xfs_iflush_int(
                    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
                    mp, XFS_ERRTAG_IFLUSH_4)) {
                        xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
-                               "%s: Bad directory inode %Lu, ptr 0x%p",
+                               "%s: Bad directory inode %Lu, ptr "PTR_FMT,
                                __func__, ip->i_ino, ip);
                        goto corrupt_out;
                }
@@ -3532,7 +3576,7 @@ xfs_iflush_int(
                                ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
                xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
                        "%s: detected corrupt incore inode %Lu, "
-                       "total extents = %d, nblocks = %Ld, ptr 0x%p",
+                       "total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
                        __func__, ip->i_ino,
                        ip->i_d.di_nextents + ip->i_d.di_anextents,
                        ip->i_d.di_nblocks, ip);
@@ -3541,7 +3585,7 @@ xfs_iflush_int(
        if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
                                mp, XFS_ERRTAG_IFLUSH_6)) {
                xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
-                       "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
+                       "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
                        __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
                goto corrupt_out;
        }
@@ -3558,10 +3602,8 @@ xfs_iflush_int(
        if (ip->i_d.di_version < 3)
                ip->i_d.di_flushiter++;
 
-       /* Check the inline directory data. */
-       if (S_ISDIR(VFS_I(ip)->i_mode) &&
-           ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
-           xfs_dir2_sf_verify(ip))
+       /* Check the inline fork data before we write out. */
+       if (!xfs_inode_verify_forks(ip))
                goto corrupt_out;
 
        /*
@@ -3624,7 +3666,7 @@ xfs_iflush_int(
        /* generate the checksum. */
        xfs_dinode_calc_crc(mp, dip);
 
-       ASSERT(bp->b_fspriv != NULL);
+       ASSERT(!list_empty(&bp->b_li_list));
        ASSERT(bp->b_iodone != NULL);
        return 0;
 
index d383e392ec9ddcca6f552dc8c4cfe5329373d6c9..3e8dc990d41c5bc559673c637a1a33c9d9fd86e9 100644 (file)
@@ -423,7 +423,8 @@ void                xfs_iunpin_wait(xfs_inode_t *);
 #define xfs_ipincount(ip)      ((unsigned int) atomic_read(&ip->i_pincount))
 
 int            xfs_iflush(struct xfs_inode *, struct xfs_buf **);
-void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
+void           xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
+                               struct xfs_inode *ip1, uint ip1_mode);
 
 xfs_extlen_t   xfs_get_extsz_hint(struct xfs_inode *ip);
 xfs_extlen_t   xfs_get_cowextsz_hint(struct xfs_inode *ip);
@@ -491,4 +492,6 @@ extern struct kmem_zone     *xfs_inode_zone;
 /* The default CoW extent size hint. */
 #define XFS_DEFAULT_COWEXTSZ_HINT 32
 
+bool xfs_inode_verify_forks(struct xfs_inode *ip);
+
 #endif /* __XFS_INODE_H__ */
index 7571abf5dfb385ef2009047c3ceefd0c47890c78..d5037f060d6fe60a861be30ac64eb1894bcea1d6 100644 (file)
@@ -522,7 +522,7 @@ xfs_inode_item_push(
                if (!xfs_buf_trylock(bp))
                        return XFS_ITEM_LOCKED;
 
-               if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+               if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
                        rval = XFS_ITEM_FLUSHING;
 
                xfs_buf_unlock(bp);
@@ -713,37 +713,23 @@ xfs_iflush_done(
        struct xfs_log_item     *lip)
 {
        struct xfs_inode_log_item *iip;
-       struct xfs_log_item     *blip;
-       struct xfs_log_item     *next;
-       struct xfs_log_item     *prev;
+       struct xfs_log_item     *blip, *n;
        struct xfs_ail          *ailp = lip->li_ailp;
        int                     need_ail = 0;
+       LIST_HEAD(tmp);
 
        /*
         * Scan the buffer IO completions for other inodes being completed and
         * attach them to the current inode log item.
         */
-       blip = bp->b_fspriv;
-       prev = NULL;
-       while (blip != NULL) {
-               if (blip->li_cb != xfs_iflush_done) {
-                       prev = blip;
-                       blip = blip->li_bio_list;
-                       continue;
-               }
 
-               /* remove from list */
-               next = blip->li_bio_list;
-               if (!prev) {
-                       bp->b_fspriv = next;
-               } else {
-                       prev->li_bio_list = next;
-               }
+       list_add_tail(&lip->li_bio_list, &tmp);
 
-               /* add to current list */
-               blip->li_bio_list = lip->li_bio_list;
-               lip->li_bio_list = blip;
+       list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
+               if (lip->li_cb != xfs_iflush_done)
+                       continue;
 
+               list_move_tail(&blip->li_bio_list, &tmp);
                /*
                 * while we have the item, do the unlocked check for needing
                 * the AIL lock.
@@ -752,8 +738,6 @@ xfs_iflush_done(
                if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
                    (blip->li_flags & XFS_LI_FAILED))
                        need_ail++;
-
-               blip = next;
        }
 
        /* make sure we capture the state of the initial inode. */
@@ -776,7 +760,7 @@ xfs_iflush_done(
 
                /* this is an opencoded batch version of xfs_trans_ail_delete */
                spin_lock(&ailp->xa_lock);
-               for (blip = lip; blip; blip = blip->li_bio_list) {
+               list_for_each_entry(blip, &tmp, li_bio_list) {
                        if (INODE_ITEM(blip)->ili_logged &&
                            blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
                                mlip_changed |= xfs_ail_delete_one(ailp, blip);
@@ -802,15 +786,14 @@ xfs_iflush_done(
         * ili_last_fields bits now that we know that the data corresponding to
         * them is safely on disk.
         */
-       for (blip = lip; blip; blip = next) {
-               next = blip->li_bio_list;
-               blip->li_bio_list = NULL;
-
+       list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
+               list_del_init(&blip->li_bio_list);
                iip = INODE_ITEM(blip);
                iip->ili_logged = 0;
                iip->ili_last_fields = 0;
                xfs_ifunlock(iip->ili_inode);
        }
+       list_del(&tmp);
 }
 
 /*
index 20dc65fef6a42fa52ad412b9cd5619aa1434245b..89fb1eb80aae88b3a47e4fd63cb64857639fbb76 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/fsmap.h>
 #include "xfs_fsmap.h"
 #include "scrub/xfs_scrub.h"
+#include "xfs_sb.h"
 
 #include <linux/capability.h>
 #include <linux/cred.h>
@@ -809,7 +810,7 @@ xfs_ioc_fsgeometry_v1(
        xfs_fsop_geom_t         fsgeo;
        int                     error;
 
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3);
        if (error)
                return error;
 
@@ -831,7 +832,7 @@ xfs_ioc_fsgeometry(
        xfs_fsop_geom_t         fsgeo;
        int                     error;
 
-       error = xfs_fs_geometry(mp, &fsgeo, 4);
+       error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4);
        if (error)
                return error;
 
index 35c79e246fdebe092d93b020b84a52ee6d3763c0..10fbde359649dc6a52a8afdb9b26353b4152bbc5 100644 (file)
@@ -37,6 +37,7 @@
 #include "xfs_ioctl.h"
 #include "xfs_ioctl32.h"
 #include "xfs_trace.h"
+#include "xfs_sb.h"
 
 #define  _NATIVE_IOC(cmd, type) \
          _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
@@ -66,7 +67,7 @@ xfs_compat_ioc_fsgeometry_v1(
        xfs_fsop_geom_t           fsgeo;
        int                       error;
 
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3);
        if (error)
                return error;
        /* The 32-bit variant simply has some padding at the end */
index 99562ec0de56113e85aa4b60652239af671a0bd5..bee51a14a906ea71661e06b5d82fd46dd219c536 100644 (file)
@@ -285,8 +285,22 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
 #define XFS_IS_REALTIME_INODE(ip)                      \
        (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) &&  \
         (ip)->i_mount->m_rtdev_targp)
+#define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0)
 #else
 #define XFS_IS_REALTIME_INODE(ip) (0)
+#define XFS_IS_REALTIME_MOUNT(mp) (0)
+#endif
+
+/*
+ * Starting in Linux 4.15, the %p (raw pointer value) printk modifier
+ * prints a hashed version of the pointer to avoid leaking kernel
+ * pointers into dmesg.  If we're trying to debug the kernel we want the
+ * raw values, so override this behavior as best we can.
+ */
+#ifdef DEBUG
+# define PTR_FMT "%px"
+#else
+# define PTR_FMT "%p"
 #endif
 
 #endif /* __XFS_LINUX__ */
index a503af96d780ecf9fd15f6c6d579bc9822abac6f..3e5ba1ecc08047904b6b5d533482632ee65098a6 100644 (file)
@@ -1047,6 +1047,7 @@ xfs_log_item_init(
 
        INIT_LIST_HEAD(&item->li_ail);
        INIT_LIST_HEAD(&item->li_cil);
+       INIT_LIST_HEAD(&item->li_bio_list);
 }
 
 /*
@@ -1242,7 +1243,7 @@ xlog_space_left(
 static void
 xlog_iodone(xfs_buf_t *bp)
 {
-       struct xlog_in_core     *iclog = bp->b_fspriv;
+       struct xlog_in_core     *iclog = bp->b_log_item;
        struct xlog             *l = iclog->ic_log;
        int                     aborted = 0;
 
@@ -1773,7 +1774,7 @@ STATIC int
 xlog_bdstrat(
        struct xfs_buf          *bp)
 {
-       struct xlog_in_core     *iclog = bp->b_fspriv;
+       struct xlog_in_core     *iclog = bp->b_log_item;
 
        xfs_buf_lock(bp);
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -1919,7 +1920,7 @@ xlog_sync(
        }
 
        bp->b_io_length = BTOBB(count);
-       bp->b_fspriv = iclog;
+       bp->b_log_item = iclog;
        bp->b_flags &= ~XBF_FLUSH;
        bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
 
@@ -1958,7 +1959,7 @@ xlog_sync(
                XFS_BUF_SET_ADDR(bp, 0);             /* logical 0 */
                xfs_buf_associate_memory(bp,
                                (char *)&iclog->ic_header + count, split);
-               bp->b_fspriv = iclog;
+               bp->b_log_item = iclog;
                bp->b_flags &= ~XBF_FLUSH;
                bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
 
@@ -2117,7 +2118,9 @@ xlog_print_trans(
 
        /* dump core transaction and ticket info */
        xfs_warn(mp, "transaction summary:");
-       xfs_warn(mp, "  flags   = 0x%x", tp->t_flags);
+       xfs_warn(mp, "  log res   = %d", tp->t_log_res);
+       xfs_warn(mp, "  log count = %d", tp->t_log_count);
+       xfs_warn(mp, "  flags     = 0x%x", tp->t_flags);
 
        xlog_print_tic_res(mp, tp->t_ticket);
 
@@ -2242,7 +2245,7 @@ xlog_write_setup_ophdr(
                break;
        default:
                xfs_warn(log->l_mp,
-                       "Bad XFS transaction clientid 0x%x in ticket 0x%p",
+                       "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT,
                        ophdr->oh_clientid, ticket);
                return NULL;
        }
@@ -3924,7 +3927,7 @@ xlog_verify_iclog(
                }
                if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
                        xfs_warn(log->l_mp,
-                               "%s: invalid clientid %d op 0x%p offset 0x%lx",
+                               "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx",
                                __func__, clientid, ophead,
                                (unsigned long)field_offset);
 
index 28d1abfe835eef3e9d87f7da1c7c805fef0488f4..00240c9ee72e2c07cc653c33885dea21caf1097e 100644 (file)
@@ -400,9 +400,9 @@ xlog_recover_iodone(
         * On v5 supers, a bli could be attached to update the metadata LSN.
         * Clean it up.
         */
-       if (bp->b_fspriv)
+       if (bp->b_log_item)
                xfs_buf_item_relse(bp);
-       ASSERT(bp->b_fspriv == NULL);
+       ASSERT(bp->b_log_item == NULL);
 
        bp->b_iodone = NULL;
        xfs_buf_ioend(bp);
@@ -2218,7 +2218,7 @@ xlog_recover_do_inode_buffer(
                                next_unlinked_offset - reg_buf_offset;
                if (unlikely(*logged_nextp == 0)) {
                        xfs_alert(mp,
-               "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). "
+               "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
                "Trying to replay bad (0) inode di_next_unlinked field.",
                                item, bp);
                        XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
@@ -2630,7 +2630,7 @@ xlog_recover_validate_buf_type(
                ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
                bp->b_iodone = xlog_recover_iodone;
                xfs_buf_item_init(bp, mp);
-               bip = bp->b_fspriv;
+               bip = bp->b_log_item;
                bip->bli_item.li_lsn = current_lsn;
        }
 }
@@ -2652,7 +2652,7 @@ xlog_recover_do_reg_buffer(
        int                     i;
        int                     bit;
        int                     nbits;
-       int                     error;
+       xfs_failaddr_t          fa;
 
        trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
 
@@ -2687,7 +2687,7 @@ xlog_recover_do_reg_buffer(
                 * the first dquot in the buffer should do. XXXThis is
                 * probably a good thing to do for other buf types also.
                 */
-               error = 0;
+               fa = NULL;
                if (buf_f->blf_flags &
                   (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
                        if (item->ri_buf[i].i_addr == NULL) {
@@ -2701,11 +2701,14 @@ xlog_recover_do_reg_buffer(
                                        item->ri_buf[i].i_len, __func__);
                                goto next;
                        }
-                       error = xfs_dqcheck(mp, item->ri_buf[i].i_addr,
-                                              -1, 0, XFS_QMOPT_DOWARN,
-                                              "dquot_buf_recover");
-                       if (error)
+                       fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
+                                              -1, 0, 0);
+                       if (fa) {
+                               xfs_alert(mp,
+       "dquot corrupt at %pS trying to replay into block 0x%llx",
+                                       fa, bp->b_bn);
                                goto next;
+                       }
                }
 
                memcpy(xfs_buf_offset(bp,
@@ -2957,6 +2960,10 @@ xfs_recover_inode_owner_change(
        if (error)
                goto out_free_ip;
 
+       if (!xfs_inode_verify_forks(ip)) {
+               error = -EFSCORRUPTED;
+               goto out_free_ip;
+       }
 
        if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
                ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
@@ -3042,7 +3049,7 @@ xlog_recover_inode_pass2(
         */
        if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
                xfs_alert(mp,
-       "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
+       "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
                        __func__, dip, bp, in_f->ilf_ino);
                XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
                                 XFS_ERRLEVEL_LOW, mp);
@@ -3052,7 +3059,7 @@ xlog_recover_inode_pass2(
        ldip = item->ri_buf[1].i_addr;
        if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
                xfs_alert(mp,
-                       "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
+                       "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
                        __func__, item, in_f->ilf_ino);
                XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
                                 XFS_ERRLEVEL_LOW, mp);
@@ -3110,8 +3117,8 @@ xlog_recover_inode_pass2(
                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
                                         XFS_ERRLEVEL_LOW, mp, ldip);
                        xfs_alert(mp,
-               "%s: Bad regular inode log record, rec ptr 0x%p, "
-               "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
+               "%s: Bad regular inode log record, rec ptr "PTR_FMT", "
+               "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
                                __func__, item, dip, bp, in_f->ilf_ino);
                        error = -EFSCORRUPTED;
                        goto out_release;
@@ -3123,8 +3130,8 @@ xlog_recover_inode_pass2(
                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
                                             XFS_ERRLEVEL_LOW, mp, ldip);
                        xfs_alert(mp,
-               "%s: Bad dir inode log record, rec ptr 0x%p, "
-               "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
+               "%s: Bad dir inode log record, rec ptr "PTR_FMT", "
+               "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
                                __func__, item, dip, bp, in_f->ilf_ino);
                        error = -EFSCORRUPTED;
                        goto out_release;
@@ -3134,8 +3141,8 @@ xlog_recover_inode_pass2(
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
                                     XFS_ERRLEVEL_LOW, mp, ldip);
                xfs_alert(mp,
-       "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
-       "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
+       "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+       "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
                        __func__, item, dip, bp, in_f->ilf_ino,
                        ldip->di_nextents + ldip->di_anextents,
                        ldip->di_nblocks);
@@ -3146,8 +3153,8 @@ xlog_recover_inode_pass2(
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
                                     XFS_ERRLEVEL_LOW, mp, ldip);
                xfs_alert(mp,
-       "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
-       "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
+       "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+       "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
                        item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
                error = -EFSCORRUPTED;
                goto out_release;
@@ -3157,7 +3164,7 @@ xlog_recover_inode_pass2(
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
                                     XFS_ERRLEVEL_LOW, mp, ldip);
                xfs_alert(mp,
-                       "%s: Bad inode log record length %d, rec ptr 0x%p",
+                       "%s: Bad inode log record length %d, rec ptr "PTR_FMT,
                        __func__, item->ri_buf[1].i_len, item);
                error = -EFSCORRUPTED;
                goto out_release;
@@ -3303,6 +3310,7 @@ xlog_recover_dquot_pass2(
        xfs_mount_t             *mp = log->l_mp;
        xfs_buf_t               *bp;
        struct xfs_disk_dquot   *ddq, *recddq;
+       xfs_failaddr_t          fa;
        int                     error;
        xfs_dq_logformat_t      *dq_f;
        uint                    type;
@@ -3345,10 +3353,12 @@ xlog_recover_dquot_pass2(
         */
        dq_f = item->ri_buf[0].i_addr;
        ASSERT(dq_f);
-       error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
-                          "xlog_recover_dquot_pass2 (log copy)");
-       if (error)
+       fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0);
+       if (fa) {
+               xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
+                               dq_f->qlf_id, fa);
                return -EIO;
+       }
        ASSERT(dq_f->qlf_len == 1);
 
        /*
index c879b517cc94482e5a51506f3da1448e2404a6c1..98fd41cbb9e10a3331ede6273271d6dc32da262f 100644 (file)
@@ -162,6 +162,7 @@ xfs_free_perag(
                ASSERT(pag);
                ASSERT(atomic_read(&pag->pag_ref) == 0);
                xfs_buf_hash_destroy(pag);
+               mutex_destroy(&pag->pag_ici_reclaim_lock);
                call_rcu(&pag->rcu_head, __xfs_free_perag);
        }
 }
@@ -248,6 +249,7 @@ xfs_initialize_perag(
 out_hash_destroy:
        xfs_buf_hash_destroy(pag);
 out_free_pag:
+       mutex_destroy(&pag->pag_ici_reclaim_lock);
        kmem_free(pag);
 out_unwind_new_pags:
        /* unwind any prior newly initialized pags */
@@ -256,6 +258,7 @@ xfs_initialize_perag(
                if (!pag)
                        break;
                xfs_buf_hash_destroy(pag);
+               mutex_destroy(&pag->pag_ici_reclaim_lock);
                kmem_free(pag);
        }
        return error;
index b897b11afb2c658bebba0416739fddcc0fec5aa4..5b848f4b637ffbd72128ec5ef41592b2bcb3567e 100644 (file)
@@ -162,7 +162,7 @@ xfs_qm_dqpurge(
                 */
                error = xfs_qm_dqflush(dqp, &bp);
                if (error) {
-                       xfs_warn(mp, "%s: dquot %p flush failed",
+                       xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed",
                                __func__, dqp);
                } else {
                        error = xfs_bwrite(bp);
@@ -291,8 +291,7 @@ xfs_qm_dqattach_one(
         * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
         * turned off suddenly.
         */
-       error = xfs_qm_dqget(ip->i_mount, ip, id, type,
-                            doalloc | XFS_QMOPT_DOWARN, &dqp);
+       error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp);
        if (error)
                return error;
 
@@ -481,7 +480,7 @@ xfs_qm_dquot_isolate(
 
                error = xfs_qm_dqflush(dqp, &bp);
                if (error) {
-                       xfs_warn(dqp->q_mount, "%s: dquot %p flush failed",
+                       xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed",
                                 __func__, dqp);
                        goto out_unlock_dirty;
                }
@@ -574,7 +573,7 @@ xfs_qm_set_defquota(
        struct xfs_def_quota    *defq;
        int                     error;
 
-       error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
+       error = xfs_qm_dqread(mp, 0, type, 0, &dqp);
 
        if (!error) {
                xfs_disk_dquot_t        *ddqp = &dqp->q_core;
@@ -652,7 +651,7 @@ xfs_qm_init_quotainfo(
                        XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
                         (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
                          XFS_DQ_PROJ),
-                       XFS_QMOPT_DOWARN, &dqp);
+                       0, &dqp);
 
        if (!error) {
                xfs_disk_dquot_t        *ddqp = &dqp->q_core;
@@ -843,6 +842,7 @@ xfs_qm_reset_dqcounts(
 {
        struct xfs_dqblk        *dqb;
        int                     j;
+       xfs_failaddr_t          fa;
 
        trace_xfs_reset_dqcounts(bp, _RET_IP_);
 
@@ -864,10 +864,13 @@ xfs_qm_reset_dqcounts(
                /*
                 * Do a sanity check, and if needed, repair the dqblk. Don't
                 * output any warnings because it's perfectly possible to
-                * find uninitialised dquot blks. See comment in xfs_dqcheck.
+                * find uninitialised dquot blks. See comment in
+                * xfs_dquot_verify.
                 */
-               xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
-                           "xfs_quotacheck");
+               fa = xfs_dquot_verify(mp, ddq, id + j, type, 0);
+               if (fa)
+                       xfs_dquot_repair(mp, ddq, id + j, type);
+
                /*
                 * Reset type in case we are reusing group quota file for
                 * project quotas or vice versa
@@ -1074,8 +1077,7 @@ xfs_qm_quotacheck_dqadjust(
        struct xfs_dquot        *dqp;
        int                     error;
 
-       error = xfs_qm_dqget(mp, ip, id, type,
-                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+       error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp);
        if (error) {
                /*
                 * Shouldn't be able to turn off quotas here.
@@ -1696,8 +1698,7 @@ xfs_qm_vop_dqalloc(
                        xfs_iunlock(ip, lockflags);
                        error = xfs_qm_dqget(mp, NULL, uid,
                                                 XFS_DQ_USER,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
+                                                XFS_QMOPT_DQALLOC,
                                                 &uq);
                        if (error) {
                                ASSERT(error != -ENOENT);
@@ -1723,8 +1724,7 @@ xfs_qm_vop_dqalloc(
                        xfs_iunlock(ip, lockflags);
                        error = xfs_qm_dqget(mp, NULL, gid,
                                                 XFS_DQ_GROUP,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
+                                                XFS_QMOPT_DQALLOC,
                                                 &gq);
                        if (error) {
                                ASSERT(error != -ENOENT);
@@ -1743,8 +1743,7 @@ xfs_qm_vop_dqalloc(
                        xfs_iunlock(ip, lockflags);
                        error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
                                                 XFS_DQ_PROJ,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
+                                                XFS_QMOPT_DQALLOC,
                                                 &pq);
                        if (error) {
                                ASSERT(error != -ENOENT);
index 47aea2e82c268f4bbf9c25c1c1c6f3821c11caa3..270246943a065fff8224b36b225e2572e213ef05 100644 (file)
@@ -464,6 +464,13 @@ xfs_reflink_allocate_cow(
        error = xfs_trans_commit(tp);
        if (error)
                return error;
+
+       /*
+        * Allocation succeeded but the requested range was not even partially
+        * satisfied?  Bail out!
+        */
+       if (nimaps == 0)
+               return -ENOSPC;
 convert:
        return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
                        &dfops);
@@ -599,10 +606,6 @@ xfs_reflink_cancel_cow_blocks(
                                        del.br_startblock, del.br_blockcount,
                                        NULL);
 
-                       /* Update quota accounting */
-                       xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
-                                       -(long)del.br_blockcount);
-
                        /* Roll the transaction */
                        xfs_defer_ijoin(&dfops, ip);
                        error = xfs_defer_finish(tpp, &dfops);
@@ -613,6 +616,13 @@ xfs_reflink_cancel_cow_blocks(
 
                        /* Remove the mapping from the CoW fork. */
                        xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+
+                       /* Remove the quota reservation */
+                       error = xfs_trans_reserve_quota_nblks(NULL, ip,
+                                       -(long)del.br_blockcount, 0,
+                                       XFS_QMOPT_RES_REGBLKS);
+                       if (error)
+                               break;
                } else {
                        /* Didn't do anything, push cursor back. */
                        xfs_iext_prev(ifp, &icur);
@@ -795,6 +805,10 @@ xfs_reflink_end_cow(
                if (error)
                        goto out_defer;
 
+               /* Charge this new data fork mapping to the on-disk quota. */
+               xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
+                               (long)del.br_blockcount);
+
                /* Remove the mapping from the CoW fork. */
                xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
 
@@ -944,7 +958,7 @@ xfs_reflink_set_inode_flag(
        if (src->i_ino == dest->i_ino)
                xfs_ilock(src, XFS_ILOCK_EXCL);
        else
-               xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL);
+               xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL);
 
        if (!xfs_is_reflink_inode(src)) {
                trace_xfs_reflink_set_inode_flag(src);
@@ -1202,13 +1216,16 @@ xfs_reflink_remap_blocks(
 
        /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
        while (len) {
+               uint            lock_mode;
+
                trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
                                dest, destoff);
+
                /* Read extent from the source file */
                nimaps = 1;
-               xfs_ilock(src, XFS_ILOCK_EXCL);
+               lock_mode = xfs_ilock_data_map_shared(src);
                error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
-               xfs_iunlock(src, XFS_ILOCK_EXCL);
+               xfs_iunlock(src, lock_mode);
                if (error)
                        goto err;
                ASSERT(nimaps == 1);
@@ -1244,6 +1261,50 @@ xfs_reflink_remap_blocks(
        return error;
 }
 
+/*
+ * Grab the exclusive iolock for a data copy from src to dest, making
+ * sure to abide vfs locking order (lowest pointer value goes first) and
+ * breaking the pnfs layout leases on dest before proceeding.  The loop
+ * is needed because we cannot call the blocking break_layout() with the
+ * src iolock held, and therefore have to back out both locks.
+ */
+static int
+xfs_iolock_two_inodes_and_break_layout(
+       struct inode            *src,
+       struct inode            *dest)
+{
+       int                     error;
+
+retry:
+       if (src < dest) {
+               inode_lock_shared(src);
+               inode_lock_nested(dest, I_MUTEX_NONDIR2);
+       } else {
+               /* src >= dest */
+               inode_lock(dest);
+       }
+
+       error = break_layout(dest, false);
+       if (error == -EWOULDBLOCK) {
+               inode_unlock(dest);
+               if (src < dest)
+                       inode_unlock_shared(src);
+               error = break_layout(dest, true);
+               if (error)
+                       return error;
+               goto retry;
+       }
+       if (error) {
+               inode_unlock(dest);
+               if (src < dest)
+                       inode_unlock_shared(src);
+               return error;
+       }
+       if (src > dest)
+               inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
+       return 0;
+}
+
 /*
  * Link a range of blocks from one file to another.
  */
@@ -1274,11 +1335,14 @@ xfs_reflink_remap_range(
                return -EIO;
 
        /* Lock both files against IO */
-       lock_two_nondirectories(inode_in, inode_out);
+       ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
+       if (ret)
+               return ret;
        if (same_inode)
                xfs_ilock(src, XFS_MMAPLOCK_EXCL);
        else
-               xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
+               xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest,
+                               XFS_MMAPLOCK_EXCL);
 
        /* Check file eligibility and prepare for block sharing. */
        ret = -EINVAL;
@@ -1295,6 +1359,11 @@ xfs_reflink_remap_range(
        if (ret <= 0)
                goto out_unlock;
 
+       /* Attach dquots to dest inode before changing block map */
+       ret = xfs_qm_dqattach(dest, 0);
+       if (ret)
+               goto out_unlock;
+
        trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
        /*
@@ -1341,10 +1410,12 @@ xfs_reflink_remap_range(
                        is_dedupe);
 
 out_unlock:
-       xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
+       xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
+       if (!same_inode)
+               xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
+       inode_unlock(inode_out);
        if (!same_inode)
-               xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
-       unlock_two_nondirectories(inode_in, inode_out);
+               inode_unlock_shared(inode_in);
        if (ret)
                trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
        return ret;
index 3f30f846d7f29fee442c02738d124159249d1959..dfee3c9911552547adb6d981e36a57b5d06a847b 100644 (file)
@@ -139,6 +139,9 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
                          xfs_rtalloc_query_range_fn fn,
                          void *priv);
 bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
+int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+                              xfs_rtblock_t start, xfs_extlen_t len,
+                              bool *is_free);
 #else
 # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb)    (ENOSYS)
 # define xfs_rtfree_extent(t,b,l)                       (ENOSYS)
@@ -148,6 +151,7 @@ bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
 # define xfs_rtalloc_query_all(t,f,p)                   (ENOSYS)
 # define xfs_rtbuf_get(m,t,b,i,p)                       (ENOSYS)
 # define xfs_verify_rtbno(m, r)                        (false)
+# define xfs_rtalloc_extent_is_free(m,t,s,l,i)          (ENOSYS)
 static inline int              /* error */
 xfs_rtmount_init(
        xfs_mount_t     *mp)    /* file system mount structure */
index 1dacccc367f81725a678ea3a6ed50528a731920d..f3e0001f999234bad791cd80b36e3f68e41281a2 100644 (file)
@@ -1153,6 +1153,14 @@ xfs_fs_statfs(
            ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
                              (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
                xfs_qm_statvfs(ip, statp);
+
+       if (XFS_IS_REALTIME_MOUNT(mp) &&
+           (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
+               statp->f_blocks = sbp->sb_rblocks;
+               statp->f_bavail = statp->f_bfree =
+                       sbp->sb_frextents * sbp->sb_rextsize;
+       }
+
        return 0;
 }
 
@@ -1660,7 +1668,7 @@ xfs_fs_fill_super(
                }
                if (xfs_sb_version_hasreflink(&mp->m_sb))
                        xfs_alert(mp,
-               "DAX and reflink have not been tested together!");
+               "DAX and reflink cannot be used together!");
        }
 
        if (mp->m_flags & XFS_MOUNT_DISCARD) {
@@ -1684,10 +1692,6 @@ xfs_fs_fill_super(
        "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
        }
 
-       if (xfs_sb_version_hasreflink(&mp->m_sb))
-               xfs_alert(mp,
-       "EXPERIMENTAL reflink feature enabled. Use at your own risk!");
-
        error = xfs_mountfs(mp);
        if (error)
                goto out_filestream_unmount;
index d718a10c22714b5551d44bb2f8e662ac93199078..945de08af7ba98a79c85b04080fda404dd750763 100644 (file)
@@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
                __entry->flags = ctx->flags;
        ),
        TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s",
+                 "alist %p size %u count %u firstu %u flags %d %s",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __entry->hashval,
@@ -119,7 +119,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
                __entry->refcount = refcount;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d agno %u refcount %d caller %ps",
+       TP_printk("dev %d:%d agno %u refcount %d caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __entry->refcount,
@@ -200,7 +200,7 @@ TRACE_EVENT(xfs_attr_list_node_descend,
                __entry->bt_before = be32_to_cpu(btree->before);
        ),
        TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s "
+                 "alist %p size %u count %u firstu %u flags %d %s "
                  "node hashval %u, node before %u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
@@ -251,8 +251,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
                __entry->bmap_state = state;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d "
-                 "offset %lld block %lld count %lld flag %d caller %ps",
+       TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d "
+                 "offset %lld block %lld count %lld flag %d caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -301,7 +301,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
-                 "lock %d flags %s caller %ps",
+                 "lock %d flags %s caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->nblks,
@@ -370,7 +370,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %ps",
+                 "lock %d flags %s caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->buffer_length,
@@ -390,7 +390,7 @@ DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
 DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
 
 TRACE_EVENT(xfs_buf_ioerror,
-       TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
+       TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip),
        TP_ARGS(bp, error, caller_ip),
        TP_STRUCT__entry(
                __field(dev_t, dev)
@@ -401,7 +401,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                __field(int, pincount)
                __field(unsigned, lockval)
                __field(int, error)
-               __field(unsigned long, caller_ip)
+               __field(xfs_failaddr_t, caller_ip)
        ),
        TP_fast_assign(
                __entry->dev = bp->b_target->bt_dev;
@@ -415,7 +415,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d error %d flags %s caller %ps",
+                 "lock %d error %d flags %s caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->buffer_length,
@@ -460,7 +460,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
                  "lock %d flags %s recur %d refcount %d bliflags %s "
-                 "lidesc 0x%p liflags %s",
+                 "lidesc %p liflags %s",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->buf_bno,
                  __entry->buf_len,
@@ -579,7 +579,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
                __entry->lock_flags = lock_flags;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
+       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -697,7 +697,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
                __entry->pincount = atomic_read(&ip->i_pincount);
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->count,
@@ -1028,7 +1028,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
                __entry->flags = lip->li_flags;
                __entry->lsn = lip->li_lsn;
        ),
-       TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s",
+       TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->lip,
                  CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn),
@@ -1049,7 +1049,7 @@ TRACE_EVENT(xfs_log_force,
                __entry->lsn = lsn;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d lsn 0x%llx caller %ps",
+       TP_printk("dev %d:%d lsn 0x%llx caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->lsn, (void *)__entry->caller_ip)
 )
@@ -1082,7 +1082,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class,
                __entry->old_lsn = old_lsn;
                __entry->new_lsn = new_lsn;
        ),
-       TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s",
+       TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->lip,
                  CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
@@ -1403,7 +1403,7 @@ TRACE_EVENT(xfs_bunmap,
                __entry->flags = flags;
        ),
        TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                 "flags %s caller %ps",
+                 "flags %s caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->size,
@@ -1517,7 +1517,7 @@ TRACE_EVENT(xfs_agf,
        ),
        TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
                  "levels b %u c %u flfirst %u fllast %u flcount %u "
-                 "freeblks %u longest %u caller %ps",
+                 "freeblks %u longest %u caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
@@ -2014,7 +2014,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
                __entry->count = item->ri_cnt;
                __entry->total = item->ri_total;
        ),
-       TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, "
+       TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, "
                  "item type %s item region count/total %d/%d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->tid,
@@ -2486,7 +2486,7 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class,
                __entry->error = error;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d agno %u error %d caller %ps",
+       TP_printk("dev %d:%d agno %u error %d caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __entry->error,
@@ -2977,7 +2977,7 @@ DECLARE_EVENT_CLASS(xfs_inode_error_class,
                __entry->error = error;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d ino %llx error %d caller %ps",
+       TP_printk("dev %d:%d ino %llx error %d caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->error,
@@ -3313,6 +3313,32 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
 
+TRACE_EVENT(xfs_trans_resv_calc,
+       TP_PROTO(struct xfs_mount *mp, unsigned int type,
+                struct xfs_trans_res *res),
+       TP_ARGS(mp, type, res),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, type)
+               __field(uint, logres)
+               __field(int, logcount)
+               __field(int, logflags)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->type = type;
+               __entry->logres = res->tr_logres;
+               __entry->logcount = res->tr_logcount;
+               __entry->logflags = res->tr_logflags;
+       ),
+       TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->type,
+                 __entry->logres,
+                 __entry->logcount,
+                 __entry->logflags)
+);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
index a87f657f59c96dbf32241f30335b9b1b2a4f1538..86f92df32c428c857892c9b99a13f65f6f44e87b 100644 (file)
 kmem_zone_t    *xfs_trans_zone;
 kmem_zone_t    *xfs_log_item_desc_zone;
 
+#if defined(CONFIG_TRACEPOINTS)
+static void
+xfs_trans_trace_reservations(
+       struct xfs_mount        *mp)
+{
+       struct xfs_trans_res    resv;
+       struct xfs_trans_res    *res;
+       struct xfs_trans_res    *end_res;
+       int                     i;
+
+       res = (struct xfs_trans_res *)M_RES(mp);
+       end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
+       for (i = 0; res < end_res; i++, res++)
+               trace_xfs_trans_resv_calc(mp, i, res);
+       xfs_log_get_max_trans_res(mp, &resv);
+       trace_xfs_trans_resv_calc(mp, -1, &resv);
+}
+#else
+# define xfs_trans_trace_reservations(mp)
+#endif
+
 /*
  * Initialize the precomputed transaction reservation values
  * in the mount structure.
@@ -44,6 +65,7 @@ xfs_trans_init(
        struct xfs_mount        *mp)
 {
        xfs_trans_resv_calc(mp, M_RES(mp));
+       xfs_trans_trace_reservations(mp);
 }
 
 /*
index 815b53d20e262770c21d6cf8609511a50f851bae..9d542dfe00524cf23aa99fe0d22f5a2dba9353ab 100644 (file)
@@ -50,7 +50,7 @@ typedef struct xfs_log_item {
        uint                            li_type;        /* item type */
        uint                            li_flags;       /* misc flags */
        struct xfs_buf                  *li_buf;        /* real buffer pointer */
-       struct xfs_log_item             *li_bio_list;   /* buffer item list */
+       struct list_head                li_bio_list;    /* buffer item list */
        void                            (*li_cb)(struct xfs_buf *,
                                                 struct xfs_log_item *);
                                                        /* buffer item iodone */
index 3ba7a96a8abdb3c00f1f2c614a24c6063e2d89fc..653ce379d36bf4798d350932fcd857f5635f0976 100644 (file)
@@ -82,12 +82,12 @@ _xfs_trans_bjoin(
        ASSERT(bp->b_transp == NULL);
 
        /*
-        * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
+        * The xfs_buf_log_item pointer is stored in b_log_item.  If
         * it doesn't have one yet, then allocate one and initialize it.
         * The checks to see if one is there are in xfs_buf_item_init().
         */
        xfs_buf_item_init(bp, tp->t_mountp);
-       bip = bp->b_fspriv;
+       bip = bp->b_log_item;
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -118,7 +118,7 @@ xfs_trans_bjoin(
        struct xfs_buf          *bp)
 {
        _xfs_trans_bjoin(tp, bp, 0);
-       trace_xfs_trans_bjoin(bp->b_fspriv);
+       trace_xfs_trans_bjoin(bp->b_log_item);
 }
 
 /*
@@ -139,7 +139,7 @@ xfs_trans_get_buf_map(
        xfs_buf_flags_t         flags)
 {
        xfs_buf_t               *bp;
-       xfs_buf_log_item_t      *bip;
+       struct xfs_buf_log_item *bip;
 
        if (!tp)
                return xfs_buf_get_map(target, map, nmaps, flags);
@@ -159,7 +159,7 @@ xfs_trans_get_buf_map(
                }
 
                ASSERT(bp->b_transp == tp);
-               bip = bp->b_fspriv;
+               bip = bp->b_log_item;
                ASSERT(bip != NULL);
                ASSERT(atomic_read(&bip->bli_refcount) > 0);
                bip->bli_recur++;
@@ -175,7 +175,7 @@ xfs_trans_get_buf_map(
        ASSERT(!bp->b_error);
 
        _xfs_trans_bjoin(tp, bp, 1);
-       trace_xfs_trans_get_buf(bp->b_fspriv);
+       trace_xfs_trans_get_buf(bp->b_log_item);
        return bp;
 }
 
@@ -188,12 +188,13 @@ xfs_trans_get_buf_map(
  * mount structure.
  */
 xfs_buf_t *
-xfs_trans_getsb(xfs_trans_t    *tp,
-               struct xfs_mount *mp,
-               int             flags)
+xfs_trans_getsb(
+       xfs_trans_t             *tp,
+       struct xfs_mount        *mp,
+       int                     flags)
 {
        xfs_buf_t               *bp;
-       xfs_buf_log_item_t      *bip;
+       struct xfs_buf_log_item *bip;
 
        /*
         * Default to just trying to lock the superblock buffer
@@ -210,7 +211,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
         */
        bp = mp->m_sb_bp;
        if (bp->b_transp == tp) {
-               bip = bp->b_fspriv;
+               bip = bp->b_log_item;
                ASSERT(bip != NULL);
                ASSERT(atomic_read(&bip->bli_refcount) > 0);
                bip->bli_recur++;
@@ -223,7 +224,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
                return NULL;
 
        _xfs_trans_bjoin(tp, bp, 1);
-       trace_xfs_trans_getsb(bp->b_fspriv);
+       trace_xfs_trans_getsb(bp->b_log_item);
        return bp;
 }
 
@@ -266,7 +267,7 @@ xfs_trans_read_buf_map(
        if (bp) {
                ASSERT(xfs_buf_islocked(bp));
                ASSERT(bp->b_transp == tp);
-               ASSERT(bp->b_fspriv != NULL);
+               ASSERT(bp->b_log_item != NULL);
                ASSERT(!bp->b_error);
                ASSERT(bp->b_flags & XBF_DONE);
 
@@ -279,7 +280,7 @@ xfs_trans_read_buf_map(
                        return -EIO;
                }
 
-               bip = bp->b_fspriv;
+               bip = bp->b_log_item;
                bip->bli_recur++;
 
                ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -329,7 +330,7 @@ xfs_trans_read_buf_map(
 
        if (tp) {
                _xfs_trans_bjoin(tp, bp, 1);
-               trace_xfs_trans_read_buf(bp->b_fspriv);
+               trace_xfs_trans_read_buf(bp->b_log_item);
        }
        *bpp = bp;
        return 0;
@@ -352,10 +353,11 @@ xfs_trans_read_buf_map(
  * brelse() call.
  */
 void
-xfs_trans_brelse(xfs_trans_t   *tp,
-                xfs_buf_t      *bp)
+xfs_trans_brelse(
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip;
+       struct xfs_buf_log_item *bip;
        int                     freed;
 
        /*
@@ -368,7 +370,7 @@ xfs_trans_brelse(xfs_trans_t        *tp,
        }
 
        ASSERT(bp->b_transp == tp);
-       bip = bp->b_fspriv;
+       bip = bp->b_log_item;
        ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -456,10 +458,11 @@ xfs_trans_brelse(xfs_trans_t      *tp,
  */
 /* ARGSUSED */
 void
-xfs_trans_bhold(xfs_trans_t    *tp,
-               xfs_buf_t       *bp)
+xfs_trans_bhold(
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -476,10 +479,11 @@ xfs_trans_bhold(xfs_trans_t       *tp,
  * for this transaction.
  */
 void
-xfs_trans_bhold_release(xfs_trans_t    *tp,
-                       xfs_buf_t       *bp)
+xfs_trans_bhold_release(
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -500,7 +504,7 @@ xfs_trans_dirty_buf(
        struct xfs_trans        *tp,
        struct xfs_buf          *bp)
 {
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -557,7 +561,7 @@ xfs_trans_log_buf(
        uint                    first,
        uint                    last)
 {
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(first <= last && last < BBTOB(bp->b_length));
        ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
@@ -600,10 +604,10 @@ xfs_trans_log_buf(
  */
 void
 xfs_trans_binval(
-       xfs_trans_t     *tp,
-       xfs_buf_t       *bp)
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
        int                     i;
 
        ASSERT(bp->b_transp == tp);
@@ -655,10 +659,10 @@ xfs_trans_binval(
  */
 void
 xfs_trans_inode_buf(
-       xfs_trans_t     *tp,
-       xfs_buf_t       *bp)
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -679,10 +683,10 @@ xfs_trans_inode_buf(
  */
 void
 xfs_trans_stale_inode_buf(
-       xfs_trans_t     *tp,
-       xfs_buf_t       *bp)
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -704,10 +708,10 @@ xfs_trans_stale_inode_buf(
 /* ARGSUSED */
 void
 xfs_trans_inode_alloc_buf(
-       xfs_trans_t     *tp,
-       xfs_buf_t       *bp)
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -729,7 +733,7 @@ xfs_trans_ordered_buf(
        struct xfs_trans        *tp,
        struct xfs_buf          *bp)
 {
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(bp->b_transp == tp);
        ASSERT(bip != NULL);
@@ -759,7 +763,7 @@ xfs_trans_buf_set_type(
        struct xfs_buf          *bp,
        enum xfs_blft           type)
 {
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        if (!tp)
                return;
@@ -776,8 +780,8 @@ xfs_trans_buf_copy_type(
        struct xfs_buf          *dst_bp,
        struct xfs_buf          *src_bp)
 {
-       struct xfs_buf_log_item *sbip = src_bp->b_fspriv;
-       struct xfs_buf_log_item *dbip = dst_bp->b_fspriv;
+       struct xfs_buf_log_item *sbip = src_bp->b_log_item;
+       struct xfs_buf_log_item *dbip = dst_bp->b_log_item;
        enum xfs_blft           type;
 
        type = xfs_blft_from_flags(&sbip->__bli_format);
@@ -797,11 +801,11 @@ xfs_trans_buf_copy_type(
 /* ARGSUSED */
 void
 xfs_trans_dquot_buf(
-       xfs_trans_t     *tp,
-       xfs_buf_t       *bp,
-       uint            type)
+       xfs_trans_t             *tp,
+       xfs_buf_t               *bp,
+       uint                    type)
 {
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
        ASSERT(type == XFS_BLF_UDQUOT_BUF ||
               type == XFS_BLF_PDQUOT_BUF ||
index 8f6654c2171185e4eae7ad6504b9625b7c7a1e00..2a815560fda0e162c5c27da8249b9f3921328c21 100644 (file)
@@ -748,6 +748,11 @@ static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
        down_write_nested(&inode->i_rwsem, subclass);
 }
 
+static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass)
+{
+       down_read_nested(&inode->i_rwsem, subclass);
+}
+
 void lock_two_nondirectories(struct inode *, struct inode*);
 void unlock_two_nondirectories(struct inode *, struct inode*);
 
@@ -2980,6 +2985,7 @@ enum {
 };
 
 void dio_end_io(struct bio *bio);
+void dio_warn_stale_pagecache(struct file *filp);
 
 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
                             struct block_device *bdev, struct iov_iter *iter,