1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_alloc.h"
19 #include "xfs_bmap_btree.h"
21 #include "xfs_rmap_btree.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/btree.h"
26 /* Set us up with an inode's bmap. */
28 xchk_setup_inode_bmap(
34 error = xchk_get_inode(sc, ip);
38 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
39 xfs_ilock(sc->ip, sc->ilock_flags);
42 * We don't want any ephemeral data fork updates sitting around
43 * while we inspect block mappings, so wait for directio to finish
44 * and flush dirty data if we have delalloc reservations.
46 if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
47 sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
48 inode_dio_wait(VFS_I(sc->ip));
49 error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
54 /* Got the inode, lock it and we're ready to go. */
55 error = xchk_trans_alloc(sc, 0);
58 sc->ilock_flags |= XFS_ILOCK_EXCL;
59 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
62 /* scrub teardown will unlock and release the inode */
67 * Inode fork block mapping (BMBT) scrubber.
68 * More complex than the others because we have to scrub
69 * all the extents regardless of whether or not the fork
73 struct xchk_bmap_info {
75 xfs_fileoff_t lastoff;
82 /* Look for a corresponding rmap for this irec. */
85 struct xchk_bmap_info *info,
86 struct xfs_bmbt_irec *irec,
89 struct xfs_rmap_irec *rmap)
92 unsigned int rflags = 0;
96 if (info->whichfork == XFS_ATTR_FORK)
97 rflags |= XFS_RMAP_ATTR_FORK;
100 * CoW staging extents are owned (on disk) by the refcountbt, so
101 * their rmaps do not have offsets.
103 if (info->whichfork == XFS_COW_FORK)
106 offset = irec->br_startoff;
109 * If the caller thinks this could be a shared bmbt extent (IOWs,
110 * any data fork extent of a reflink inode) then we have to use the
111 * range rmap lookup to make sure we get the correct owner/offset.
113 if (info->is_shared) {
114 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
115 owner, offset, rflags, rmap, &has_rmap);
116 if (!xchk_should_check_xref(info->sc, &error,
117 &info->sc->sa.rmap_cur))
123 * Otherwise, use the (faster) regular lookup.
125 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
126 offset, rflags, &has_rmap);
127 if (!xchk_should_check_xref(info->sc, &error,
128 &info->sc->sa.rmap_cur))
133 error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
134 if (!xchk_should_check_xref(info->sc, &error,
135 &info->sc->sa.rmap_cur))
140 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
145 /* Make sure that we have rmapbt records for this extent. */
148 struct xchk_bmap_info *info,
149 struct xfs_bmbt_irec *irec,
152 struct xfs_rmap_irec rmap;
153 unsigned long long rmap_end;
156 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
159 if (info->whichfork == XFS_COW_FORK)
160 owner = XFS_RMAP_OWN_COW;
162 owner = info->sc->ip->i_ino;
164 /* Find the rmap record for this irec. */
165 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
168 /* Check the rmap. */
169 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
170 if (rmap.rm_startblock > agbno ||
171 agbno + irec->br_blockcount > rmap_end)
172 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
176 * Check the logical offsets if applicable. CoW staging extents
177 * don't track logical offsets since the mappings only exist in
180 if (info->whichfork != XFS_COW_FORK) {
181 rmap_end = (unsigned long long)rmap.rm_offset +
183 if (rmap.rm_offset > irec->br_startoff ||
184 irec->br_startoff + irec->br_blockcount > rmap_end)
185 xchk_fblock_xref_set_corrupt(info->sc,
186 info->whichfork, irec->br_startoff);
189 if (rmap.rm_owner != owner)
190 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
194 * Check for discrepancies between the unwritten flag in the irec and
195 * the rmap. Note that the (in-memory) CoW fork distinguishes between
196 * unwritten and written extents, but we don't track that in the rmap
197 * records because the blocks are owned (on-disk) by the refcountbt,
198 * which doesn't track unwritten state.
200 if (owner != XFS_RMAP_OWN_COW &&
201 irec->br_state == XFS_EXT_UNWRITTEN &&
202 !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
203 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
206 if (info->whichfork == XFS_ATTR_FORK &&
207 !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
208 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
210 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
211 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
215 /* Cross-reference a single rtdev extent record. */
217 xchk_bmap_rt_iextent_xref(
218 struct xfs_inode *ip,
219 struct xchk_bmap_info *info,
220 struct xfs_bmbt_irec *irec)
222 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
223 irec->br_blockcount);
226 /* Cross-reference a single datadev extent record. */
228 xchk_bmap_iextent_xref(
229 struct xfs_inode *ip,
230 struct xchk_bmap_info *info,
231 struct xfs_bmbt_irec *irec)
233 struct xfs_mount *mp = info->sc->mp;
239 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
240 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
241 len = irec->br_blockcount;
243 error = xchk_ag_init(info->sc, agno, &info->sc->sa);
244 if (!xchk_fblock_process_error(info->sc, info->whichfork,
245 irec->br_startoff, &error))
248 xchk_xref_is_used_space(info->sc, agbno, len);
249 xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
250 xchk_bmap_xref_rmap(info, irec, agbno);
251 switch (info->whichfork) {
253 if (xfs_is_reflink_inode(info->sc->ip))
257 xchk_xref_is_not_shared(info->sc, agbno,
258 irec->br_blockcount);
261 xchk_xref_is_cow_staging(info->sc, agbno,
262 irec->br_blockcount);
266 xchk_ag_free(info->sc, &info->sc->sa);
270 * Directories and attr forks should never have blocks that can't be addressed
274 xchk_bmap_dirattr_extent(
275 struct xfs_inode *ip,
276 struct xchk_bmap_info *info,
277 struct xfs_bmbt_irec *irec)
279 struct xfs_mount *mp = ip->i_mount;
282 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
285 if (!xfs_verify_dablk(mp, irec->br_startoff))
286 xchk_fblock_set_corrupt(info->sc, info->whichfork,
289 off = irec->br_startoff + irec->br_blockcount - 1;
290 if (!xfs_verify_dablk(mp, off))
291 xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
294 /* Scrub a single extent record. */
297 struct xfs_inode *ip,
298 struct xchk_bmap_info *info,
299 struct xfs_bmbt_irec *irec)
301 struct xfs_mount *mp = info->sc->mp;
306 * Check for out-of-order extents. This record could have come
307 * from the incore list, for which there is no ordering check.
309 if (irec->br_startoff < info->lastoff)
310 xchk_fblock_set_corrupt(info->sc, info->whichfork,
313 xchk_bmap_dirattr_extent(ip, info, irec);
315 /* There should never be a "hole" extent in either extent list. */
316 if (irec->br_startblock == HOLESTARTBLOCK)
317 xchk_fblock_set_corrupt(info->sc, info->whichfork,
321 * Check for delalloc extents. We never iterate the ones in the
322 * in-core extent scan, and we should never see these in the bmbt.
324 if (isnullstartblock(irec->br_startblock))
325 xchk_fblock_set_corrupt(info->sc, info->whichfork,
328 /* Make sure the extent points to a valid place. */
329 if (irec->br_blockcount > MAXEXTLEN)
330 xchk_fblock_set_corrupt(info->sc, info->whichfork,
332 if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
333 xchk_fblock_set_corrupt(info->sc, info->whichfork,
335 end = irec->br_startblock + irec->br_blockcount - 1;
337 (!xfs_verify_rtbno(mp, irec->br_startblock) ||
338 !xfs_verify_rtbno(mp, end)))
339 xchk_fblock_set_corrupt(info->sc, info->whichfork,
342 (!xfs_verify_fsbno(mp, irec->br_startblock) ||
343 !xfs_verify_fsbno(mp, end) ||
344 XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
345 XFS_FSB_TO_AGNO(mp, end)))
346 xchk_fblock_set_corrupt(info->sc, info->whichfork,
349 /* We don't allow unwritten extents on attr forks. */
350 if (irec->br_state == XFS_EXT_UNWRITTEN &&
351 info->whichfork == XFS_ATTR_FORK)
352 xchk_fblock_set_corrupt(info->sc, info->whichfork,
355 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
359 xchk_bmap_rt_iextent_xref(ip, info, irec);
361 xchk_bmap_iextent_xref(ip, info, irec);
363 info->lastoff = irec->br_startoff + irec->br_blockcount;
367 /* Scrub a bmbt record. */
370 struct xchk_btree *bs,
371 union xfs_btree_rec *rec)
373 struct xfs_bmbt_irec irec;
374 struct xfs_bmbt_irec iext_irec;
375 struct xfs_iext_cursor icur;
376 struct xchk_bmap_info *info = bs->private;
377 struct xfs_inode *ip = bs->cur->bc_private.b.ip;
378 struct xfs_buf *bp = NULL;
379 struct xfs_btree_block *block;
380 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, info->whichfork);
385 * Check the owners of the btree blocks up to the level below
386 * the root since the verifiers don't do that.
388 if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
389 bs->cur->bc_ptrs[0] == 1) {
390 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
391 block = xfs_btree_get_block(bs->cur, i, &bp);
392 owner = be64_to_cpu(block->bb_u.l.bb_owner);
393 if (owner != ip->i_ino)
394 xchk_fblock_set_corrupt(bs->sc,
400 * Check that the incore extent tree contains an extent that matches
401 * this one exactly. We validate those cached bmaps later, so we don't
402 * need to check them here. If the incore extent tree was just loaded
403 * from disk by the scrubber, we assume that its contents match what's
404 * on disk (we still hold the ILOCK) and skip the equivalence check.
406 if (!info->was_loaded)
409 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
410 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
412 irec.br_startoff != iext_irec.br_startoff ||
413 irec.br_startblock != iext_irec.br_startblock ||
414 irec.br_blockcount != iext_irec.br_blockcount ||
415 irec.br_state != iext_irec.br_state)
416 xchk_fblock_set_corrupt(bs->sc, info->whichfork,
421 /* Scan the btree records. */
424 struct xfs_scrub *sc,
426 struct xchk_bmap_info *info)
428 struct xfs_owner_info oinfo;
429 struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->ip, whichfork);
430 struct xfs_mount *mp = sc->mp;
431 struct xfs_inode *ip = sc->ip;
432 struct xfs_btree_cur *cur;
435 /* Load the incore bmap cache if it's not loaded. */
436 info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
437 if (!info->was_loaded) {
438 error = xfs_iread_extents(sc->tp, ip, whichfork);
439 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
443 /* Check the btree structure. */
444 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
445 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
446 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
447 xfs_btree_del_cursor(cur, error);
452 struct xchk_bmap_check_rmap_info {
453 struct xfs_scrub *sc;
455 struct xfs_iext_cursor icur;
458 /* Can we find bmaps that fit this rmap? */
460 xchk_bmap_check_rmap(
461 struct xfs_btree_cur *cur,
462 struct xfs_rmap_irec *rec,
465 struct xfs_bmbt_irec irec;
466 struct xchk_bmap_check_rmap_info *sbcri = priv;
467 struct xfs_ifork *ifp;
468 struct xfs_scrub *sc = sbcri->sc;
471 /* Is this even the right fork? */
472 if (rec->rm_owner != sc->ip->i_ino)
474 if ((sbcri->whichfork == XFS_ATTR_FORK) ^
475 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
477 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
480 /* Now look up the bmbt record. */
481 ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork);
483 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
487 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
488 &sbcri->icur, &irec);
490 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
493 * bmap extent record lengths are constrained to 2^21 blocks in length
494 * because of space constraints in the on-disk metadata structure.
495 * However, rmap extent record lengths are constrained only by AG
496 * length, so we have to loop through the bmbt to make sure that the
497 * entire rmap is covered by bmbt records.
500 if (irec.br_startoff != rec->rm_offset)
501 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
503 if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
504 cur->bc_private.a.agno, rec->rm_startblock))
505 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
507 if (irec.br_blockcount > rec->rm_blockcount)
508 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
510 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
512 rec->rm_startblock += irec.br_blockcount;
513 rec->rm_offset += irec.br_blockcount;
514 rec->rm_blockcount -= irec.br_blockcount;
515 if (rec->rm_blockcount == 0)
517 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
519 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
524 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
529 /* Make sure each rmap has a corresponding bmbt entry. */
531 xchk_bmap_check_ag_rmaps(
532 struct xfs_scrub *sc,
536 struct xchk_bmap_check_rmap_info sbcri;
537 struct xfs_btree_cur *cur;
541 error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf);
545 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno);
552 sbcri.whichfork = whichfork;
553 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
554 if (error == -ECANCELED)
557 xfs_btree_del_cursor(cur, error);
559 xfs_trans_brelse(sc->tp, agf);
563 /* Make sure each rmap has a corresponding bmbt entry. */
565 xchk_bmap_check_rmaps(
566 struct xfs_scrub *sc,
573 if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
574 whichfork == XFS_COW_FORK ||
575 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
578 /* Don't support realtime rmap checks yet. */
579 if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK)
583 * Only do this for complex maps that are in btree format, or for
584 * situations where we would seem to have a size but zero extents.
585 * The inode repair code can zap broken iforks, which means we have
586 * to flag this bmap as corrupt if there are rmaps that need to be
591 size = i_size_read(VFS_I(sc->ip));
594 size = XFS_IFORK_Q(sc->ip);
600 if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE &&
601 (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0))
604 for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
605 error = xchk_bmap_check_ag_rmaps(sc, whichfork, agno);
608 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
616 * Scrub an inode fork's block mappings.
618 * First we scan every record in every btree block, if applicable.
619 * Then we unconditionally scan the incore extent cache.
623 struct xfs_scrub *sc,
626 struct xfs_bmbt_irec irec;
627 struct xchk_bmap_info info = { NULL };
628 struct xfs_mount *mp = sc->mp;
629 struct xfs_inode *ip = sc->ip;
630 struct xfs_ifork *ifp;
631 xfs_fileoff_t endoff;
632 struct xfs_iext_cursor icur;
635 ifp = XFS_IFORK_PTR(ip, whichfork);
637 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
638 info.whichfork = whichfork;
639 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
644 /* Non-existent CoW forks are ignorable. */
647 /* No CoW forks on non-reflink inodes/filesystems. */
648 if (!xfs_is_reflink_inode(ip)) {
649 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
656 if (!xfs_sb_version_hasattr(&mp->m_sb) &&
657 !xfs_sb_version_hasattr2(&mp->m_sb))
658 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
661 ASSERT(whichfork == XFS_DATA_FORK);
665 /* Check the fork values */
666 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
667 case XFS_DINODE_FMT_UUID:
668 case XFS_DINODE_FMT_DEV:
669 case XFS_DINODE_FMT_LOCAL:
670 /* No mappings to check. */
672 case XFS_DINODE_FMT_EXTENTS:
673 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
674 xchk_fblock_set_corrupt(sc, whichfork, 0);
678 case XFS_DINODE_FMT_BTREE:
679 if (whichfork == XFS_COW_FORK) {
680 xchk_fblock_set_corrupt(sc, whichfork, 0);
684 error = xchk_bmap_btree(sc, whichfork, &info);
689 xchk_fblock_set_corrupt(sc, whichfork, 0);
693 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
696 /* Find the offset of the last extent in the mapping. */
697 error = xfs_bmap_last_offset(ip, &endoff, whichfork);
698 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
701 /* Scrub extent records. */
703 ifp = XFS_IFORK_PTR(ip, whichfork);
704 for_each_xfs_iext(ifp, &icur, &irec) {
705 if (xchk_should_terminate(sc, &error) ||
706 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
708 if (isnullstartblock(irec.br_startblock))
710 if (irec.br_startoff >= endoff) {
711 xchk_fblock_set_corrupt(sc, whichfork,
715 error = xchk_bmap_iextent(ip, &info, &irec);
721 error = xchk_bmap_check_rmaps(sc, whichfork);
722 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
728 /* Scrub an inode's data fork. */
731 struct xfs_scrub *sc)
733 return xchk_bmap(sc, XFS_DATA_FORK);
736 /* Scrub an inode's attr fork. */
739 struct xfs_scrub *sc)
741 return xchk_bmap(sc, XFS_ATTR_FORK);
744 /* Scrub an inode's CoW fork. */
747 struct xfs_scrub *sc)
749 if (!xfs_is_reflink_inode(sc->ip))
752 return xchk_bmap(sc, XFS_COW_FORK);