]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - fs/xfs/scrub/ialloc.c
xfs: consolidate scrub dinode mapping code into a single function
[linux.git] / fs / xfs / scrub / ialloc.c
index 882dc56c5c21e5c0f8d02ea705669eced2132664..2c9dad2b61b1a921754ccb3f746287e4293a9215 100644 (file)
@@ -47,6 +47,12 @@ xchk_setup_ag_iallocbt(
 struct xchk_iallocbt {
        /* Number of inodes we see while scanning inobt. */
        unsigned long long      inodes;
+
+       /* Expected next startino, for big block filesystems. */
+       xfs_agino_t             next_startino;
+
+       /* Expected end of the current inode cluster. */
+       xfs_agino_t             next_cluster_ino;
 };
 
 /*
@@ -128,41 +134,57 @@ xchk_iallocbt_freecount(
        return hweight64(freemask);
 }
 
-/* Check a particular inode with ir_free. */
+/*
+ * Check that an inode's allocation status matches ir_free in the inobt
+ * record.  First we try querying the in-core inode state, and if the inode
+ * isn't loaded we examine the on-disk inode directly.
+ *
+ * Since there can be 1:M and M:1 mappings between inobt records and inode
+ * clusters, we pass in the inode location information as an inobt record;
+ * the index of an inode cluster within the inobt record (as well as the
+ * cluster buffer itself); and the index of the inode within the cluster.
+ *
+ * @irec is the inobt record.
+ * @irec_ino is the inode offset from the start of the record.
+ * @dip is the on-disk inode.
+ */
 STATIC int
-xchk_iallocbt_check_cluster_freemask(
+xchk_iallocbt_check_cluster_ifree(
        struct xchk_btree               *bs,
-       xfs_ino_t                       fsino,
-       xfs_agino_t                     chunkino,
-       xfs_agino_t                     clusterino,
        struct xfs_inobt_rec_incore     *irec,
-       struct xfs_buf                  *bp)
+       unsigned int                    irec_ino,
+       struct xfs_dinode               *dip)
 {
-       struct xfs_dinode               *dip;
        struct xfs_mount                *mp = bs->cur->bc_mp;
-       bool                            inode_is_free = false;
+       xfs_ino_t                       fsino;
+       xfs_agino_t                     agino;
+       bool                            irec_free;
+       bool                            ino_inuse;
        bool                            freemask_ok;
-       bool                            inuse;
-       int                             error = 0;
+       int                             error;
 
        if (xchk_should_terminate(bs->sc, &error))
                return error;
 
-       dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
+       /*
+        * Given an inobt record and the offset of an inode from the start of
+        * the record, compute which fs inode we're talking about.
+        */
+       agino = irec->ir_startino + irec_ino;
+       fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
+       irec_free = (irec->ir_free & XFS_INOBT_MASK(irec_ino));
+
        if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
-           (dip->di_version >= 3 &&
-            be64_to_cpu(dip->di_ino) != fsino + clusterino)) {
+           (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)) {
                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
                goto out;
        }
 
-       if (irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino))
-               inode_is_free = true;
-       error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp,
-                       fsino + clusterino, &inuse);
+       error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino,
+                       &ino_inuse);
        if (error == -ENODATA) {
                /* Not cached, just read the disk buffer */
-               freemask_ok = inode_is_free ^ !!(dip->di_mode);
+               freemask_ok = irec_free ^ !!(dip->di_mode);
                if (!bs->sc->try_harder && !freemask_ok)
                        return -EDEADLOCK;
        } else if (error < 0) {
@@ -174,7 +196,7 @@ xchk_iallocbt_check_cluster_freemask(
                goto out;
        } else {
                /* Inode is all there. */
-               freemask_ok = inode_is_free ^ inuse;
+               freemask_ok = irec_free ^ ino_inuse;
        }
        if (!freemask_ok)
                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
@@ -182,86 +204,221 @@ xchk_iallocbt_check_cluster_freemask(
        return 0;
 }
 
-/* Make sure the free mask is consistent with what the inodes think. */
+/*
+ * Check that the holemask and freemask of a hypothetical inode cluster match
+ * what's actually on disk.  If sparse inodes are enabled, the cluster does
+ * not actually have to map to inodes if the corresponding holemask bit is set.
+ *
+ * @cluster_base is the first inode in the cluster within the @irec.
+ */
 STATIC int
-xchk_iallocbt_check_freemask(
+xchk_iallocbt_check_cluster(
        struct xchk_btree               *bs,
-       struct xfs_inobt_rec_incore     *irec)
+       struct xfs_inobt_rec_incore     *irec,
+       unsigned int                    cluster_base)
 {
        struct xfs_imap                 imap;
        struct xfs_mount                *mp = bs->cur->bc_mp;
        struct xfs_dinode               *dip;
-       struct xfs_buf                  *bp;
-       xfs_ino_t                       fsino;
-       xfs_agino_t                     nr_inodes;
-       xfs_agino_t                     agino;
-       xfs_agino_t                     chunkino;
-       xfs_agino_t                     clusterino;
+       struct xfs_buf                  *cluster_bp;
+       unsigned int                    nr_inodes;
+       xfs_agnumber_t                  agno = bs->cur->bc_private.a.agno;
        xfs_agblock_t                   agbno;
-       uint16_t                        holemask;
+       unsigned int                    cluster_index;
+       uint16_t                        cluster_mask = 0;
        uint16_t                        ir_holemask;
        int                             error = 0;
 
-       /* Make sure the freemask matches the inode records. */
-       nr_inodes = mp->m_inodes_per_cluster;
-
-       for (agino = irec->ir_startino;
-            agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
-            agino += mp->m_inodes_per_cluster) {
-               fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
-               chunkino = agino - irec->ir_startino;
-               agbno = XFS_AGINO_TO_AGBNO(mp, agino);
-
-               /* Compute the holemask mask for this cluster. */
-               for (clusterino = 0, holemask = 0; clusterino < nr_inodes;
-                    clusterino += XFS_INODES_PER_HOLEMASK_BIT)
-                       holemask |= XFS_INOBT_MASK((chunkino + clusterino) /
-                                       XFS_INODES_PER_HOLEMASK_BIT);
-
-               /* The whole cluster must be a hole or not a hole. */
-               ir_holemask = (irec->ir_holemask & holemask);
-               if (ir_holemask != holemask && ir_holemask != 0) {
+       nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
+                       mp->m_inodes_per_cluster);
+
+       /* Map this inode cluster */
+       agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
+
+       /* Compute a bitmask for this cluster that can be used for holemask. */
+       for (cluster_index = 0;
+            cluster_index < nr_inodes;
+            cluster_index += XFS_INODES_PER_HOLEMASK_BIT)
+               cluster_mask |= XFS_INOBT_MASK((cluster_base + cluster_index) /
+                               XFS_INODES_PER_HOLEMASK_BIT);
+
+       /*
+        * Map the first inode of this cluster to a buffer and offset.
+        * Be careful about inobt records that don't align with the start of
+        * the inode buffer when block sizes are large enough to hold multiple
+        * inode chunks.  When this happens, cluster_base will be zero but
+        * ir_startino can be large enough to make im_boffset nonzero.
+        */
+       ir_holemask = (irec->ir_holemask & cluster_mask);
+       imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
+       imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+       imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino);
+
+       if (imap.im_boffset != 0 && cluster_base != 0) {
+               ASSERT(imap.im_boffset == 0 || cluster_base == 0);
+               xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+               return 0;
+       }
+
+       trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino,
+                       imap.im_blkno, imap.im_len, cluster_base, nr_inodes,
+                       cluster_mask, ir_holemask,
+                       XFS_INO_TO_OFFSET(mp, irec->ir_startino +
+                                         cluster_base));
+
+       /* The whole cluster must be a hole or not a hole. */
+       if (ir_holemask != cluster_mask && ir_holemask != 0) {
+               xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+               return 0;
+       }
+
+       /* If any part of this is a hole, skip it. */
+       if (ir_holemask) {
+               xchk_xref_is_not_owned_by(bs->sc, agbno,
+                               mp->m_blocks_per_cluster,
+                               &XFS_RMAP_OINFO_INODES);
+               return 0;
+       }
+
+       xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster,
+                       &XFS_RMAP_OINFO_INODES);
+
+       /* Grab the inode cluster buffer. */
+       error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp,
+                       0, 0);
+       if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error))
+               return error;
+
+       /* Check free status of each inode within this cluster. */
+       for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
+               struct xfs_dinode       *dip;
+
+               if (imap.im_boffset >= BBTOB(cluster_bp->b_length)) {
                        xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
-                       continue;
+                       break;
                }
 
-               /* If any part of this is a hole, skip it. */
-               if (ir_holemask) {
-                       xchk_xref_is_not_owned_by(bs->sc, agbno,
-                                       mp->m_blocks_per_cluster,
-                                       &XFS_RMAP_OINFO_INODES);
-                       continue;
+               dip = xfs_buf_offset(cluster_bp, imap.im_boffset);
+               error = xchk_iallocbt_check_cluster_ifree(bs, irec,
+                               cluster_base + cluster_index, dip);
+               if (error)
+                       break;
+               imap.im_boffset += mp->m_sb.sb_inodesize;
+       }
+
+       xfs_trans_brelse(bs->cur->bc_tp, cluster_bp);
+       return error;
+}
+
+/*
+ * For all the inode clusters that could map to this inobt record, make sure
+ * that the holemask makes sense and that the allocation status of each inode
+ * matches the freemask.
+ */
+STATIC int
+xchk_iallocbt_check_clusters(
+       struct xchk_btree               *bs,
+       struct xfs_inobt_rec_incore     *irec)
+{
+       unsigned int                    cluster_base;
+       int                             error = 0;
+
+       /*
+        * For the common case where this inobt record maps to multiple inode
+        * clusters this will call _check_cluster for each cluster.
+        *
+        * For the case that multiple inobt records map to a single cluster,
+        * this will call _check_cluster once.
+        */
+       for (cluster_base = 0;
+            cluster_base < XFS_INODES_PER_CHUNK;
+            cluster_base += bs->sc->mp->m_inodes_per_cluster) {
+               error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
+               if (error)
+                       break;
+       }
+
+       return error;
+}
+
+/*
+ * Make sure this inode btree record is aligned properly.  Because a fs block
+ * contains multiple inodes, we check that the inobt record is aligned to the
+ * correct inode, not just the correct block on disk.  This results in a finer
+ * grained corruption check.
+ */
+STATIC void
+xchk_iallocbt_rec_alignment(
+       struct xchk_btree               *bs,
+       struct xfs_inobt_rec_incore     *irec)
+{
+       struct xfs_mount                *mp = bs->sc->mp;
+       struct xchk_iallocbt            *iabt = bs->private;
+
+       /*
+        * finobt records have different positioning requirements than inobt
+        * records: each finobt record must have a corresponding inobt record.
+        * That is checked in the xref function, so for now we only catch the
+        * obvious case where the record isn't at all aligned properly.
+        *
+        * Note that if a fs block contains more than a single chunk of inodes,
+        * we will have finobt records only for those chunks containing free
+        * inodes, and therefore expect chunk alignment of finobt records.
+        * Otherwise, we expect that the finobt record is aligned to the
+        * cluster alignment as told by the superblock.
+        */
+       if (bs->cur->bc_btnum == XFS_BTNUM_FINO) {
+               unsigned int    imask;
+
+               imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
+                               mp->m_cluster_align_inodes) - 1;
+               if (irec->ir_startino & imask)
+                       xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+               return;
+       }
+
+       if (iabt->next_startino != NULLAGINO) {
+               /*
+                * We're midway through a cluster of inodes that is mapped by
+                * multiple inobt records.  Did we get the record for the next
+                * irec in the sequence?
+                */
+               if (irec->ir_startino != iabt->next_startino) {
+                       xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+                       return;
                }
 
-               xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster,
-                               &XFS_RMAP_OINFO_INODES);
+               iabt->next_startino += XFS_INODES_PER_CHUNK;
 
-               /* Grab the inode cluster buffer. */
-               imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
-                               agbno);
-               imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
-               imap.im_boffset = 0;
-
-               error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
-                               &dip, &bp, 0, 0);
-               if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0,
-                               &error))
-                       continue;
-
-               /* Which inodes are free? */
-               for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
-                       error = xchk_iallocbt_check_cluster_freemask(bs,
-                                       fsino, chunkino, clusterino, irec, bp);
-                       if (error) {
-                               xfs_trans_brelse(bs->cur->bc_tp, bp);
-                               return error;
-                       }
+               /* Are we done with the cluster? */
+               if (iabt->next_startino >= iabt->next_cluster_ino) {
+                       iabt->next_startino = NULLAGINO;
+                       iabt->next_cluster_ino = NULLAGINO;
                }
+               return;
+       }
+
+       /* inobt records must be aligned to cluster and inoalignmnt size. */
+       if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) {
+               xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+               return;
+       }
 
-               xfs_trans_brelse(bs->cur->bc_tp, bp);
+       if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) {
+               xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+               return;
        }
 
-       return error;
+       if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK)
+               return;
+
+       /*
+        * If this is the start of an inode cluster that can be mapped by
+        * multiple inobt records, the next inobt record must follow exactly
+        * after this one.
+        */
+       iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
+       iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster;
 }
 
 /* Scrub an inobt/finobt record. */
@@ -276,7 +433,6 @@ xchk_iallocbt_rec(
        uint64_t                        holes;
        xfs_agnumber_t                  agno = bs->cur->bc_private.a.agno;
        xfs_agino_t                     agino;
-       xfs_agblock_t                   agbno;
        xfs_extlen_t                    len;
        int                             holecount;
        int                             i;
@@ -303,11 +459,9 @@ xchk_iallocbt_rec(
                goto out;
        }
 
-       /* Make sure this record is aligned to cluster and inoalignmnt size. */
-       agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
-       if ((agbno & (mp->m_cluster_align - 1)) ||
-           (agbno & (mp->m_blocks_per_cluster - 1)))
-               xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+       xchk_iallocbt_rec_alignment(bs, &irec);
+       if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+               goto out;
 
        iabt->inodes += irec.ir_count;
 
@@ -320,7 +474,7 @@ xchk_iallocbt_rec(
 
                if (!xchk_iallocbt_chunk(bs, &irec, agino, len))
                        goto out;
-               goto check_freemask;
+               goto check_clusters;
        }
 
        /* Check each chunk of a sparse inode cluster. */
@@ -346,8 +500,8 @@ xchk_iallocbt_rec(
            holecount + irec.ir_count != XFS_INODES_PER_CHUNK)
                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
-check_freemask:
-       error = xchk_iallocbt_check_freemask(bs, &irec);
+check_clusters:
+       error = xchk_iallocbt_check_clusters(bs, &irec);
        if (error)
                goto out;
 
@@ -429,6 +583,8 @@ xchk_iallocbt(
        struct xfs_btree_cur    *cur;
        struct xchk_iallocbt    iabt = {
                .inodes         = 0,
+               .next_startino  = NULLAGINO,
+               .next_cluster_ino = NULLAGINO,
        };
        int                     error;