2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_inode.h"
30 #include "xfs_trans.h"
31 #include "xfs_inode_item.h"
32 #include "xfs_error.h"
33 #include "xfs_btree.h"
34 #include "xfs_alloc_btree.h"
35 #include "xfs_alloc.h"
36 #include "xfs_rmap_btree.h"
37 #include "xfs_ialloc.h"
38 #include "xfs_fsops.h"
39 #include "xfs_itable.h"
40 #include "xfs_trans_space.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_trace.h"
44 #include "xfs_filestream.h"
46 #include "xfs_ag_resv.h"
49 * File system operations
52 static struct xfs_buf *
53 xfs_growfs_get_hdr_buf(
58 const struct xfs_buf_ops *ops)
62 bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
66 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
68 bp->b_maps[0].bm_bn = blkno;
74 struct aghdr_init_data {
76 xfs_agnumber_t agno; /* ag to init */
77 xfs_extlen_t agsize; /* new AG size */
78 struct list_head buffer_list; /* buffer writeback list */
79 xfs_rfsblock_t nfree; /* cumulative new free space */
82 xfs_daddr_t daddr; /* header location */
83 size_t numblks; /* size of header */
84 xfs_btnum_t type; /* type of btree root block */
88 * Generic btree root block init function
94 struct aghdr_init_data *id)
96 xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0);
100 * Alloc btree root block init functions
104 struct xfs_mount *mp,
106 struct aghdr_init_data *id)
108 struct xfs_alloc_rec *arec;
110 xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0);
111 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
112 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
113 arec->ar_blockcount = cpu_to_be32(id->agsize -
114 be32_to_cpu(arec->ar_startblock));
119 struct xfs_mount *mp,
121 struct aghdr_init_data *id)
123 struct xfs_alloc_rec *arec;
125 xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0);
126 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
127 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
128 arec->ar_blockcount = cpu_to_be32(id->agsize -
129 be32_to_cpu(arec->ar_startblock));
133 * Reverse map root block init
137 struct xfs_mount *mp,
139 struct aghdr_init_data *id)
141 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
142 struct xfs_rmap_rec *rrec;
144 xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0);
147 * mark the AG header regions as static metadata The BNO
148 * btree block is the first block after the headers, so
149 * it's location defines the size of region the static
152 * Note: unlike mkfs, we never have to account for log
153 * space when growing the data regions
155 rrec = XFS_RMAP_REC_ADDR(block, 1);
156 rrec->rm_startblock = 0;
157 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
158 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
161 /* account freespace btree root blocks */
162 rrec = XFS_RMAP_REC_ADDR(block, 2);
163 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
164 rrec->rm_blockcount = cpu_to_be32(2);
165 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
168 /* account inode btree root blocks */
169 rrec = XFS_RMAP_REC_ADDR(block, 3);
170 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
171 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
173 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
176 /* account for rmap btree root */
177 rrec = XFS_RMAP_REC_ADDR(block, 4);
178 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
179 rrec->rm_blockcount = cpu_to_be32(1);
180 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
183 /* account for refc btree root */
184 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
185 rrec = XFS_RMAP_REC_ADDR(block, 5);
186 rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
187 rrec->rm_blockcount = cpu_to_be32(1);
188 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
190 be16_add_cpu(&block->bb_numrecs, 1);
195 * Initialise new secondary superblocks with the pre-grow geometry, but mark
196 * them as "in progress" so we know they haven't yet been activated. This will
197 * get cleared when the update with the new geometry information is done after
198 * changes to the primary are committed. This isn't strictly necessary, but we
199 * get it for free with the delayed buffer write lists and it means we can tell
200 * if a grow operation didn't complete properly after the fact.
204 struct xfs_mount *mp,
206 struct aghdr_init_data *id)
208 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
210 xfs_sb_to_disk(dsb, &mp->m_sb);
211 dsb->sb_inprogress = 1;
216 struct xfs_mount *mp,
218 struct aghdr_init_data *id)
220 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
221 xfs_extlen_t tmpsize;
223 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
224 agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
225 agf->agf_seqno = cpu_to_be32(id->agno);
226 agf->agf_length = cpu_to_be32(id->agsize);
227 agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
228 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
229 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
230 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
231 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
232 agf->agf_roots[XFS_BTNUM_RMAPi] =
233 cpu_to_be32(XFS_RMAP_BLOCK(mp));
234 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
235 agf->agf_rmap_blocks = cpu_to_be32(1);
238 agf->agf_flfirst = cpu_to_be32(1);
240 agf->agf_flcount = 0;
241 tmpsize = id->agsize - mp->m_ag_prealloc_blocks;
242 agf->agf_freeblks = cpu_to_be32(tmpsize);
243 agf->agf_longest = cpu_to_be32(tmpsize);
244 if (xfs_sb_version_hascrc(&mp->m_sb))
245 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
246 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
247 agf->agf_refcount_root = cpu_to_be32(
249 agf->agf_refcount_level = cpu_to_be32(1);
250 agf->agf_refcount_blocks = cpu_to_be32(1);
256 struct xfs_mount *mp,
258 struct aghdr_init_data *id)
260 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
264 if (xfs_sb_version_hascrc(&mp->m_sb)) {
265 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
266 agfl->agfl_seqno = cpu_to_be32(id->agno);
267 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
270 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
271 for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
272 agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
277 struct xfs_mount *mp,
279 struct aghdr_init_data *id)
281 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
284 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
285 agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
286 agi->agi_seqno = cpu_to_be32(id->agno);
287 agi->agi_length = cpu_to_be32(id->agsize);
289 agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
290 agi->agi_level = cpu_to_be32(1);
291 agi->agi_freecount = 0;
292 agi->agi_newino = cpu_to_be32(NULLAGINO);
293 agi->agi_dirino = cpu_to_be32(NULLAGINO);
294 if (xfs_sb_version_hascrc(&mp->m_sb))
295 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
296 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
297 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
298 agi->agi_free_level = cpu_to_be32(1);
300 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
301 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
304 typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp,
305 struct aghdr_init_data *id);
307 xfs_growfs_init_aghdr(
308 struct xfs_mount *mp,
309 struct aghdr_init_data *id,
310 aghdr_init_work_f work,
311 const struct xfs_buf_ops *ops)
316 bp = xfs_growfs_get_hdr_buf(mp, id->daddr, id->numblks, 0, ops);
322 xfs_buf_delwri_queue(bp, &id->buffer_list);
327 struct xfs_aghdr_grow_data {
330 const struct xfs_buf_ops *ops;
331 aghdr_init_work_f work;
337 * Write new AG headers to disk. Non-transactional, but written
338 * synchronously so they are completed prior to the growfs transaction
343 struct xfs_mount *mp,
344 struct aghdr_init_data *id)
347 struct xfs_aghdr_grow_data aghdr_data[] = {
349 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR),
350 .numblks = XFS_FSS_TO_BB(mp, 1),
351 .ops = &xfs_sb_buf_ops,
352 .work = &xfs_sbblock_init,
356 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)),
357 .numblks = XFS_FSS_TO_BB(mp, 1),
358 .ops = &xfs_agf_buf_ops,
359 .work = &xfs_agfblock_init,
363 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)),
364 .numblks = XFS_FSS_TO_BB(mp, 1),
365 .ops = &xfs_agfl_buf_ops,
366 .work = &xfs_agflblock_init,
370 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)),
371 .numblks = XFS_FSS_TO_BB(mp, 1),
372 .ops = &xfs_agi_buf_ops,
373 .work = &xfs_agiblock_init,
376 { /* BNO root block */
377 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)),
378 .numblks = BTOBB(mp->m_sb.sb_blocksize),
379 .ops = &xfs_allocbt_buf_ops,
380 .work = &xfs_bnoroot_init,
383 { /* CNT root block */
384 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)),
385 .numblks = BTOBB(mp->m_sb.sb_blocksize),
386 .ops = &xfs_allocbt_buf_ops,
387 .work = &xfs_cntroot_init,
390 { /* INO root block */
391 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)),
392 .numblks = BTOBB(mp->m_sb.sb_blocksize),
393 .ops = &xfs_inobt_buf_ops,
394 .work = &xfs_btroot_init,
395 .type = XFS_BTNUM_INO,
398 { /* FINO root block */
399 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)),
400 .numblks = BTOBB(mp->m_sb.sb_blocksize),
401 .ops = &xfs_inobt_buf_ops,
402 .work = &xfs_btroot_init,
403 .type = XFS_BTNUM_FINO,
404 .need_init = xfs_sb_version_hasfinobt(&mp->m_sb)
406 { /* RMAP root block */
407 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)),
408 .numblks = BTOBB(mp->m_sb.sb_blocksize),
409 .ops = &xfs_rmapbt_buf_ops,
410 .work = &xfs_rmaproot_init,
411 .need_init = xfs_sb_version_hasrmapbt(&mp->m_sb)
413 { /* REFC root block */
414 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)),
415 .numblks = BTOBB(mp->m_sb.sb_blocksize),
416 .ops = &xfs_refcountbt_buf_ops,
417 .work = &xfs_btroot_init,
418 .type = XFS_BTNUM_REFC,
419 .need_init = xfs_sb_version_hasreflink(&mp->m_sb)
421 { /* NULL terminating block */
422 .daddr = XFS_BUF_DADDR_NULL,
425 struct xfs_aghdr_grow_data *dp;
428 /* Account for AG free space in new AG */
429 id->nfree += id->agsize - mp->m_ag_prealloc_blocks;
430 for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) {
434 id->daddr = dp->daddr;
435 id->numblks = dp->numblks;
437 error = xfs_growfs_init_aghdr(mp, id, dp->work, dp->ops);
445 xfs_growfs_data_private(
446 xfs_mount_t *mp, /* mount point for filesystem */
447 xfs_growfs_data_t *in) /* growfs data input struct */
453 xfs_agnumber_t nagcount;
454 xfs_agnumber_t nagimax = 0;
455 xfs_rfsblock_t nb, nb_mod;
457 xfs_agnumber_t oagcount;
459 LIST_HEAD (buffer_list);
460 struct aghdr_init_data id = {};
463 if (nb < mp->m_sb.sb_dblocks)
465 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
467 error = xfs_buf_read_uncached(mp->m_ddev_targp,
468 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
469 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
474 new = nb; /* use new as a temporary here */
475 nb_mod = do_div(new, mp->m_sb.sb_agblocks);
476 nagcount = new + (nb_mod != 0);
477 if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
479 nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
480 if (nb < mp->m_sb.sb_dblocks)
483 new = nb - mp->m_sb.sb_dblocks;
484 oagcount = mp->m_sb.sb_agcount;
486 /* allocate the new per-ag structures */
487 if (nagcount > oagcount) {
488 error = xfs_initialize_perag(mp, nagcount, &nagimax);
493 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
494 XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
499 * Write new AG headers to disk. Non-transactional, but need to be
500 * written and completed prior to the growfs transaction being logged.
501 * To do this, we use a delayed write buffer list and wait for
502 * submission and IO completion of the list as a whole. This allows the
503 * IO subsystem to merge all the AG headers in a single AG into a single
504 * IO and hide most of the latency of the IO from us.
506 * This also means that if we get an error whilst building the buffer
507 * list to write, we can cancel the entire list without having written
510 INIT_LIST_HEAD(&id.buffer_list);
511 for (id.agno = nagcount - 1;
513 id.agno--, new -= id.agsize) {
515 if (id.agno == nagcount - 1)
517 (id.agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
519 id.agsize = mp->m_sb.sb_agblocks;
521 error = xfs_grow_ag_headers(mp, &id);
523 xfs_buf_delwri_cancel(&id.buffer_list);
524 goto out_trans_cancel;
527 error = xfs_buf_delwri_submit(&id.buffer_list);
529 goto out_trans_cancel;
531 xfs_trans_agblocks_delta(tp, id.nfree);
534 * There are new blocks in the old last a.g.
537 struct xfs_owner_info oinfo;
540 * Change the agi length.
542 error = xfs_ialloc_read_agi(mp, tp, id.agno, &bp);
544 goto out_trans_cancel;
547 agi = XFS_BUF_TO_AGI(bp);
548 be32_add_cpu(&agi->agi_length, new);
549 ASSERT(nagcount == oagcount ||
550 be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
551 xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
556 error = xfs_alloc_read_agf(mp, tp, id.agno, 0, &bp);
558 goto out_trans_cancel;
561 agf = XFS_BUF_TO_AGF(bp);
562 be32_add_cpu(&agf->agf_length, new);
563 ASSERT(be32_to_cpu(agf->agf_length) ==
564 be32_to_cpu(agi->agi_length));
566 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
569 * Free the new space.
571 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
572 * this doesn't actually exist in the rmap btree.
574 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
575 error = xfs_rmap_free(tp, bp, id.agno,
576 be32_to_cpu(agf->agf_length) - new,
579 goto out_trans_cancel;
580 error = xfs_free_extent(tp,
581 XFS_AGB_TO_FSB(mp, id.agno,
582 be32_to_cpu(agf->agf_length) - new),
583 new, &oinfo, XFS_AG_RESV_NONE);
585 goto out_trans_cancel;
589 * Update changed superblock fields transactionally. These are not
590 * seen by the rest of the world until the transaction commit applies
591 * them atomically to the superblock.
593 if (nagcount > oagcount)
594 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
595 if (nb > mp->m_sb.sb_dblocks)
596 xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS,
597 nb - mp->m_sb.sb_dblocks);
599 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree);
600 xfs_trans_set_sync(tp);
601 error = xfs_trans_commit(tp);
605 /* New allocation groups fully initialized, so update mount struct */
607 mp->m_maxagi = nagimax;
608 xfs_set_low_space_thresholds(mp);
609 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
612 * If we expanded the last AG, free the per-AG reservation
613 * so we can reinitialize it with the new size.
616 struct xfs_perag *pag;
618 pag = xfs_perag_get(mp, id.agno);
619 error = xfs_ag_resv_free(pag);
626 * Reserve AG metadata blocks. ENOSPC here does not mean there was a
627 * growfs failure, just that there still isn't space for new user data
628 * after the grow has been run.
630 error = xfs_fs_reserve_ag_blocks(mp);
631 if (error == -ENOSPC)
636 xfs_trans_cancel(tp);
641 xfs_growfs_log_private(
642 xfs_mount_t *mp, /* mount point for filesystem */
643 xfs_growfs_log_t *in) /* growfs log input struct */
648 if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
650 if (nb == mp->m_sb.sb_logblocks &&
651 in->isint == (mp->m_sb.sb_logstart != 0))
654 * Moving the log is hard, need new interfaces to sync
655 * the log first, hold off all activity while moving it.
656 * Can have shorter or longer log in the same space,
657 * or transform internal to external log or vice versa.
664 struct xfs_mount *mp,
667 struct xfs_trans *tp;
674 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
675 XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
679 dpct = imaxpct - mp->m_sb.sb_imax_pct;
680 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
681 xfs_trans_set_sync(tp);
682 return xfs_trans_commit(tp);
686 * After a grow operation, we need to update all the secondary superblocks
687 * to match the new state of the primary. Because we are completely overwriting
688 * all the existing fields in the secondary superblock buffers, there is no need
689 * to read them in from disk. Just get a new buffer, stamp it and write it.
691 * The sb buffers need to be cached here so that we serialise against scrub
692 * scanning secondary superblocks, but we don't want to keep it in memory once
693 * it is written so we mark it as a one-shot buffer.
696 xfs_growfs_update_superblocks(
697 struct xfs_mount *mp)
702 LIST_HEAD (buffer_list);
704 /* update secondary superblocks. */
705 for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
708 bp = xfs_buf_get(mp->m_ddev_targp,
709 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
710 XFS_FSS_TO_BB(mp, 1), 0);
712 * If we get an error reading or writing alternate superblocks,
713 * continue. xfs_repair chooses the "best" superblock based
714 * on most matches; if we break early, we'll leave more
715 * superblocks un-updated than updated, and xfs_repair may
716 * pick them over the properly-updated primary.
720 "error allocating secondary superblock for ag %d",
723 saved_error = -ENOMEM;
727 bp->b_ops = &xfs_sb_buf_ops;
729 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
730 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
731 xfs_buf_delwri_queue(bp, &buffer_list);
734 /* don't hold too many buffers at once */
738 error = xfs_buf_delwri_submit(&buffer_list);
741 "write error %d updating a secondary superblock near ag %d",
748 error = xfs_buf_delwri_submit(&buffer_list);
751 "write error %d updating a secondary superblock near ag %d",
755 return saved_error ? saved_error : error;
759 * protected versions of growfs function acquire and release locks on the mount
760 * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
765 struct xfs_mount *mp,
766 struct xfs_growfs_data *in)
770 if (!capable(CAP_SYS_ADMIN))
772 if (!mutex_trylock(&mp->m_growlock))
775 /* update imaxpct separately to the physical grow of the filesystem */
776 if (in->imaxpct != mp->m_sb.sb_imax_pct) {
777 error = xfs_growfs_imaxpct(mp, in->imaxpct);
782 if (in->newblocks != mp->m_sb.sb_dblocks) {
783 error = xfs_growfs_data_private(mp, in);
788 /* Post growfs calculations needed to reflect new state in operations */
789 if (mp->m_sb.sb_imax_pct) {
790 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
792 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
796 /* Update secondary superblocks now the physical grow has completed */
797 error = xfs_growfs_update_superblocks(mp);
801 * Increment the generation unconditionally, the error could be from
802 * updating the secondary superblocks, in which case the new size
806 mutex_unlock(&mp->m_growlock);
813 xfs_growfs_log_t *in)
817 if (!capable(CAP_SYS_ADMIN))
819 if (!mutex_trylock(&mp->m_growlock))
821 error = xfs_growfs_log_private(mp, in);
822 mutex_unlock(&mp->m_growlock);
827 * exported through ioctl XFS_IOC_FSCOUNTS
833 xfs_fsop_counts_t *cnt)
835 cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
836 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
837 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
838 mp->m_alloc_set_aside;
840 spin_lock(&mp->m_sb_lock);
841 cnt->freertx = mp->m_sb.sb_frextents;
842 spin_unlock(&mp->m_sb_lock);
847 * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS
849 * xfs_reserve_blocks is called to set m_resblks
850 * in the in-core mount table. The number of unused reserved blocks
851 * is kept in m_resblks_avail.
853 * Reserve the requested number of blocks if available. Otherwise return
854 * as many as possible to satisfy the request. The actual number
855 * reserved are returned in outval
857 * A null inval pointer indicates that only the current reserved blocks
858 * available should be returned no settings are changed.
865 xfs_fsop_resblks_t *outval)
867 int64_t lcounter, delta;
868 int64_t fdblks_delta = 0;
873 /* If inval is null, report current values and return */
874 if (inval == (uint64_t *)NULL) {
877 outval->resblks = mp->m_resblks;
878 outval->resblks_avail = mp->m_resblks_avail;
885 * With per-cpu counters, this becomes an interesting problem. we need
886 * to work out if we are freeing or allocation blocks first, then we can
887 * do the modification as necessary.
889 * We do this under the m_sb_lock so that if we are near ENOSPC, we will
890 * hold out any changes while we work out what to do. This means that
891 * the amount of free space can change while we do this, so we need to
892 * retry if we end up trying to reserve more space than is available.
894 spin_lock(&mp->m_sb_lock);
897 * If our previous reservation was larger than the current value,
898 * then move any unused blocks back to the free pool. Modify the resblks
899 * counters directly since we shouldn't have any problems unreserving
902 if (mp->m_resblks > request) {
903 lcounter = mp->m_resblks_avail - request;
904 if (lcounter > 0) { /* release unused blocks */
905 fdblks_delta = lcounter;
906 mp->m_resblks_avail -= lcounter;
908 mp->m_resblks = request;
910 spin_unlock(&mp->m_sb_lock);
911 error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
912 spin_lock(&mp->m_sb_lock);
919 * If the request is larger than the current reservation, reserve the
920 * blocks before we update the reserve counters. Sample m_fdblocks and
921 * perform a partial reservation if the request exceeds free space.
925 free = percpu_counter_sum(&mp->m_fdblocks) -
926 mp->m_alloc_set_aside;
930 delta = request - mp->m_resblks;
931 lcounter = free - delta;
933 /* We can't satisfy the request, just get what we can */
936 fdblks_delta = delta;
939 * We'll either succeed in getting space from the free block
940 * count or we'll get an ENOSPC. If we get a ENOSPC, it means
941 * things changed while we were calculating fdblks_delta and so
942 * we should try again to see if there is anything left to
945 * Don't set the reserved flag here - we don't want to reserve
946 * the extra reserve blocks from the reserve.....
948 spin_unlock(&mp->m_sb_lock);
949 error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
950 spin_lock(&mp->m_sb_lock);
951 } while (error == -ENOSPC);
954 * Update the reserve counters if blocks have been successfully
957 if (!error && fdblks_delta) {
958 mp->m_resblks += fdblks_delta;
959 mp->m_resblks_avail += fdblks_delta;
964 outval->resblks = mp->m_resblks;
965 outval->resblks_avail = mp->m_resblks_avail;
968 spin_unlock(&mp->m_sb_lock);
978 case XFS_FSOP_GOING_FLAGS_DEFAULT: {
979 struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
981 if (sb && !IS_ERR(sb)) {
982 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
983 thaw_bdev(sb->s_bdev, sb);
988 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
989 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
991 case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
992 xfs_force_shutdown(mp,
993 SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
1003 * Force a shutdown of the filesystem instantly while keeping the filesystem
1004 * consistent. We don't do an unmount here; just shutdown the shop, make sure
1005 * that absolutely nothing persistent happens to this filesystem after this
1009 xfs_do_force_shutdown(
1017 logerror = flags & SHUTDOWN_LOG_IO_ERROR;
1019 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
1021 "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT,
1022 __func__, flags, lnnum, fname, __return_address);
1025 * No need to duplicate efforts.
1027 if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
1031 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
1032 * queue up anybody new on the log reservations, and wakes up
1033 * everybody who's sleeping on log reservations to tell them
1036 if (xfs_log_force_umount(mp, logerror))
1039 if (flags & SHUTDOWN_CORRUPT_INCORE) {
1040 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
1041 "Corruption of in-memory data detected. Shutting down filesystem");
1042 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
1044 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
1046 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
1047 "Log I/O Error Detected. Shutting down filesystem");
1048 } else if (flags & SHUTDOWN_DEVICE_REQ) {
1049 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
1050 "All device paths lost. Shutting down filesystem");
1051 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
1052 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
1053 "I/O Error Detected. Shutting down filesystem");
1056 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
1058 "Please umount the filesystem and rectify the problem(s)");
1063 * Reserve free space for per-AG metadata.
1066 xfs_fs_reserve_ag_blocks(
1067 struct xfs_mount *mp)
1069 xfs_agnumber_t agno;
1070 struct xfs_perag *pag;
1074 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
1075 pag = xfs_perag_get(mp, agno);
1076 err2 = xfs_ag_resv_init(pag);
1082 if (error && error != -ENOSPC) {
1084 "Error %d reserving per-AG metadata reserve pool.", error);
1085 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1092 * Free space reserved for per-AG metadata.
1095 xfs_fs_unreserve_ag_blocks(
1096 struct xfs_mount *mp)
1098 xfs_agnumber_t agno;
1099 struct xfs_perag *pag;
1103 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
1104 pag = xfs_perag_get(mp, agno);
1105 err2 = xfs_ag_resv_free(pag);
1113 "Error %d freeing per-AG metadata reserve pool.", error);