fs/xfs/xfs_super.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "xfs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_sb.h"
  13 #include "xfs_mount.h"
  14 #include "xfs_inode.h"
  15 #include "xfs_btree.h"
  16 #include "xfs_bmap.h"
  17 #include "xfs_alloc.h"
  18 #include "xfs_fsops.h"
  19 #include "xfs_trans.h"
  20 #include "xfs_buf_item.h"
  21 #include "xfs_log.h"
  22 #include "xfs_log_priv.h"
  23 #include "xfs_dir2.h"
  24 #include "xfs_extfree_item.h"
  25 #include "xfs_mru_cache.h"
  26 #include "xfs_inode_item.h"
  27 #include "xfs_icache.h"
  28 #include "xfs_trace.h"
  29 #include "xfs_icreate_item.h"
  30 #include "xfs_filestream.h"
  31 #include "xfs_quota.h"
  32 #include "xfs_sysfs.h"
  33 #include "xfs_ondisk.h"
  34 #include "xfs_rmap_item.h"
  35 #include "xfs_refcount_item.h"
  36 #include "xfs_bmap_item.h"
  37 #include "xfs_reflink.h"
  38
  39 #include <linux/magic.h>
  40 #include <linux/fs_context.h>
  41 #include <linux/fs_parser.h>
  42
  43 static const struct super_operations xfs_super_operations;
  44
  45 static struct kset *xfs_kset;           /* top-level xfs sysfs dir */
  46 #ifdef DEBUG
  47 static struct xfs_kobj xfs_dbg_kobj;    /* global debug sysfs attrs */
  48 #endif
  49
  50 /*
  51  * Table driven mount option parser.
  52  */
  53 enum {
  54         Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
  55         Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
  56         Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
  57         Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
  58         Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
  59         Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
  60         Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
  61         Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
  62         Opt_discard, Opt_nodiscard, Opt_dax,
  63 };
  64
  65 static const struct fs_parameter_spec xfs_param_specs[] = {
  66         fsparam_u32("logbufs",          Opt_logbufs),
  67         fsparam_string("logbsize",      Opt_logbsize),
  68         fsparam_string("logdev",        Opt_logdev),
  69         fsparam_string("rtdev",         Opt_rtdev),
  70         fsparam_flag("wsync",           Opt_wsync),
  71         fsparam_flag("noalign",         Opt_noalign),
  72         fsparam_flag("swalloc",         Opt_swalloc),
  73         fsparam_u32("sunit",            Opt_sunit),
  74         fsparam_u32("swidth",           Opt_swidth),
  75         fsparam_flag("nouuid",          Opt_nouuid),
  76         fsparam_flag("grpid",           Opt_grpid),
  77         fsparam_flag("nogrpid",         Opt_nogrpid),
  78         fsparam_flag("bsdgroups",       Opt_bsdgroups),
  79         fsparam_flag("sysvgroups",      Opt_sysvgroups),
  80         fsparam_string("allocsize",     Opt_allocsize),
  81         fsparam_flag("norecovery",      Opt_norecovery),
  82         fsparam_flag("inode64",         Opt_inode64),
  83         fsparam_flag("inode32",         Opt_inode32),
  84         fsparam_flag("ikeep",           Opt_ikeep),
  85         fsparam_flag("noikeep",         Opt_noikeep),
  86         fsparam_flag("largeio",         Opt_largeio),
  87         fsparam_flag("nolargeio",       Opt_nolargeio),
  88         fsparam_flag("attr2",           Opt_attr2),
  89         fsparam_flag("noattr2",         Opt_noattr2),
  90         fsparam_flag("filestreams",     Opt_filestreams),
  91         fsparam_flag("quota",           Opt_quota),
  92         fsparam_flag("noquota",         Opt_noquota),
  93         fsparam_flag("usrquota",        Opt_usrquota),
  94         fsparam_flag("grpquota",        Opt_grpquota),
  95         fsparam_flag("prjquota",        Opt_prjquota),
  96         fsparam_flag("uquota",          Opt_uquota),
  97         fsparam_flag("gquota",          Opt_gquota),
  98         fsparam_flag("pquota",          Opt_pquota),
  99         fsparam_flag("uqnoenforce",     Opt_uqnoenforce),
 100         fsparam_flag("gqnoenforce",     Opt_gqnoenforce),
 101         fsparam_flag("pqnoenforce",     Opt_pqnoenforce),
 102         fsparam_flag("qnoenforce",      Opt_qnoenforce),
 103         fsparam_flag("discard",         Opt_discard),
 104         fsparam_flag("nodiscard",       Opt_nodiscard),
 105         fsparam_flag("dax",             Opt_dax),
 106         {}
 107 };
 108
 109 static const struct fs_parameter_description xfs_fs_parameters = {
 110         .name           = "xfs",
 111         .specs          = xfs_param_specs,
 112 };
 113
 114 struct proc_xfs_info {
 115         uint64_t        flag;
 116         char            *str;
 117 };
 118
 119 static int
 120 xfs_fs_show_options(
 121         struct seq_file         *m,
 122         struct dentry           *root)
 123 {
 124         static struct proc_xfs_info xfs_info_set[] = {
 125                 /* the few simple ones we can get from the mount struct */
 126                 { XFS_MOUNT_IKEEP,              ",ikeep" },
 127                 { XFS_MOUNT_WSYNC,              ",wsync" },
 128                 { XFS_MOUNT_NOALIGN,            ",noalign" },
 129                 { XFS_MOUNT_SWALLOC,            ",swalloc" },
 130                 { XFS_MOUNT_NOUUID,             ",nouuid" },
 131                 { XFS_MOUNT_NORECOVERY,         ",norecovery" },
 132                 { XFS_MOUNT_ATTR2,              ",attr2" },
 133                 { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
 134                 { XFS_MOUNT_GRPID,              ",grpid" },
 135                 { XFS_MOUNT_DISCARD,            ",discard" },
 136                 { XFS_MOUNT_LARGEIO,            ",largeio" },
 137                 { XFS_MOUNT_DAX,                ",dax" },
 138                 { 0, NULL }
 139         };
 140         struct xfs_mount        *mp = XFS_M(root->d_sb);
 141         struct proc_xfs_info    *xfs_infop;
 142
 143         for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
 144                 if (mp->m_flags & xfs_infop->flag)
 145                         seq_puts(m, xfs_infop->str);
 146         }
 147
 148         seq_printf(m, ",inode%d",
 149                 (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64);
 150
 151         if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
 152                 seq_printf(m, ",allocsize=%dk",
 153                            (1 << mp->m_allocsize_log) >> 10);
 154
 155         if (mp->m_logbufs > 0)
 156                 seq_printf(m, ",logbufs=%d", mp->m_logbufs);
 157         if (mp->m_logbsize > 0)
 158                 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
 159
 160         if (mp->m_logname)
 161                 seq_show_option(m, "logdev", mp->m_logname);
 162         if (mp->m_rtname)
 163                 seq_show_option(m, "rtdev", mp->m_rtname);
 164
 165         if (mp->m_dalign > 0)
 166                 seq_printf(m, ",sunit=%d",
 167                                 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
 168         if (mp->m_swidth > 0)
 169                 seq_printf(m, ",swidth=%d",
 170                                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 171
 172         if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
 173                 seq_puts(m, ",usrquota");
 174         else if (mp->m_qflags & XFS_UQUOTA_ACCT)
 175                 seq_puts(m, ",uqnoenforce");
 176
 177         if (mp->m_qflags & XFS_PQUOTA_ACCT) {
 178                 if (mp->m_qflags & XFS_PQUOTA_ENFD)
 179                         seq_puts(m, ",prjquota");
 180                 else
 181                         seq_puts(m, ",pqnoenforce");
 182         }
 183         if (mp->m_qflags & XFS_GQUOTA_ACCT) {
 184                 if (mp->m_qflags & XFS_GQUOTA_ENFD)
 185                         seq_puts(m, ",grpquota");
 186                 else
 187                         seq_puts(m, ",gqnoenforce");
 188         }
 189
 190         if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 191                 seq_puts(m, ",noquota");
 192
 193         return 0;
 194 }
 195
 196 static uint64_t
 197 xfs_max_file_offset(
 198         unsigned int            blockshift)
 199 {
 200         unsigned int            pagefactor = 1;
 201         unsigned int            bitshift = BITS_PER_LONG - 1;
 202
 203         /* Figure out maximum filesize, on Linux this can depend on
 204          * the filesystem blocksize (on 32 bit platforms).
 205          * __block_write_begin does this in an [unsigned] long long...
 206          *      page->index << (PAGE_SHIFT - bbits)
 207          * So, for page sized blocks (4K on 32 bit platforms),
 208          * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
 209          *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 210          * but for smaller blocksizes it is less (bbits = log2 bsize).
 211          */
 212
 213 #if BITS_PER_LONG == 32
 214         ASSERT(sizeof(sector_t) == 8);
 215         pagefactor = PAGE_SIZE;
 216         bitshift = BITS_PER_LONG;
 217 #endif
 218
 219         return (((uint64_t)pagefactor) << bitshift) - 1;
 220 }
 221
 222 /*
 223  * Set parameters for inode allocation heuristics, taking into account
 224  * filesystem size and inode32/inode64 mount options; i.e. specifically
 225  * whether or not XFS_MOUNT_SMALL_INUMS is set.
 226  *
 227  * Inode allocation patterns are altered only if inode32 is requested
 228  * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
 229  * If altered, XFS_MOUNT_32BITINODES is set as well.
 230  *
 231  * An agcount independent of that in the mount structure is provided
 232  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
 233  * to the potentially higher ag count.
 234  *
 235  * Returns the maximum AG index which may contain inodes.
 236  */
 237 xfs_agnumber_t
 238 xfs_set_inode_alloc(
 239         struct xfs_mount *mp,
 240         xfs_agnumber_t  agcount)
 241 {
 242         xfs_agnumber_t  index;
 243         xfs_agnumber_t  maxagi = 0;
 244         xfs_sb_t        *sbp = &mp->m_sb;
 245         xfs_agnumber_t  max_metadata;
 246         xfs_agino_t     agino;
 247         xfs_ino_t       ino;
 248
 249         /*
 250          * Calculate how much should be reserved for inodes to meet
 251          * the max inode percentage.  Used only for inode32.
 252          */
 253         if (M_IGEO(mp)->maxicount) {
 254                 uint64_t        icount;
 255
 256                 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 257                 do_div(icount, 100);
 258                 icount += sbp->sb_agblocks - 1;
 259                 do_div(icount, sbp->sb_agblocks);
 260                 max_metadata = icount;
 261         } else {
 262                 max_metadata = agcount;
 263         }
 264
 265         /* Get the last possible inode in the filesystem */
 266         agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
 267         ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
 268
 269         /*
 270          * If user asked for no more than 32-bit inodes, and the fs is
 271          * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
 272          * the allocator to accommodate the request.
 273          */
 274         if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
 275                 mp->m_flags |= XFS_MOUNT_32BITINODES;
 276         else
 277                 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 278
 279         for (index = 0; index < agcount; index++) {
 280                 struct xfs_perag        *pag;
 281
 282                 ino = XFS_AGINO_TO_INO(mp, index, agino);
 283
 284                 pag = xfs_perag_get(mp, index);
 285
 286                 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
 287                         if (ino > XFS_MAXINUMBER_32) {
 288                                 pag->pagi_inodeok = 0;
 289                                 pag->pagf_metadata = 0;
 290                         } else {
 291                                 pag->pagi_inodeok = 1;
 292                                 maxagi++;
 293                                 if (index < max_metadata)
 294                                         pag->pagf_metadata = 1;
 295                                 else
 296                                         pag->pagf_metadata = 0;
 297                         }
 298                 } else {
 299                         pag->pagi_inodeok = 1;
 300                         pag->pagf_metadata = 0;
 301                 }
 302
 303                 xfs_perag_put(pag);
 304         }
 305
 306         return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
 307 }
 308
 309 STATIC int
 310 xfs_blkdev_get(
 311         xfs_mount_t             *mp,
 312         const char              *name,
 313         struct block_device     **bdevp)
 314 {
 315         int                     error = 0;
 316
 317         *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 318                                     mp);
 319         if (IS_ERR(*bdevp)) {
 320                 error = PTR_ERR(*bdevp);
 321                 xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
 322         }
 323
 324         return error;
 325 }
 326
 327 STATIC void
 328 xfs_blkdev_put(
 329         struct block_device     *bdev)
 330 {
 331         if (bdev)
 332                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 333 }
 334
 335 void
 336 xfs_blkdev_issue_flush(
 337         xfs_buftarg_t           *buftarg)
 338 {
 339         blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
 340 }
 341
 342 STATIC void
 343 xfs_close_devices(
 344         struct xfs_mount        *mp)
 345 {
 346         struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
 347
 348         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 349                 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 350                 struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
 351
 352                 xfs_free_buftarg(mp->m_logdev_targp);
 353                 xfs_blkdev_put(logdev);
 354                 fs_put_dax(dax_logdev);
 355         }
 356         if (mp->m_rtdev_targp) {
 357                 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 358                 struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
 359
 360                 xfs_free_buftarg(mp->m_rtdev_targp);
 361                 xfs_blkdev_put(rtdev);
 362                 fs_put_dax(dax_rtdev);
 363         }
 364         xfs_free_buftarg(mp->m_ddev_targp);
 365         fs_put_dax(dax_ddev);
 366 }
 367
 368 /*
 369  * The file system configurations are:
 370  *      (1) device (partition) with data and internal log
 371  *      (2) logical volume with data and log subvolumes.
 372  *      (3) logical volume with data, log, and realtime subvolumes.
 373  *
 374  * We only have to handle opening the log and realtime volumes here if
 375  * they are present.  The data subvolume has already been opened by
 376  * get_sb_bdev() and is stored in sb->s_bdev.
 377  */
 378 STATIC int
 379 xfs_open_devices(
 380         struct xfs_mount        *mp)
 381 {
 382         struct block_device     *ddev = mp->m_super->s_bdev;
 383         struct dax_device       *dax_ddev = fs_dax_get_by_bdev(ddev);
 384         struct dax_device       *dax_logdev = NULL, *dax_rtdev = NULL;
 385         struct block_device     *logdev = NULL, *rtdev = NULL;
 386         int                     error;
 387
 388         /*
 389          * Open real time and log devices - order is important.
 390          */
 391         if (mp->m_logname) {
 392                 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
 393                 if (error)
 394                         goto out;
 395                 dax_logdev = fs_dax_get_by_bdev(logdev);
 396         }
 397
 398         if (mp->m_rtname) {
 399                 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
 400                 if (error)
 401                         goto out_close_logdev;
 402
 403                 if (rtdev == ddev || rtdev == logdev) {
 404                         xfs_warn(mp,
 405         "Cannot mount filesystem with identical rtdev and ddev/logdev.");
 406                         error = -EINVAL;
 407                         goto out_close_rtdev;
 408                 }
 409                 dax_rtdev = fs_dax_get_by_bdev(rtdev);
 410         }
 411
 412         /*
 413          * Setup xfs_mount buffer target pointers
 414          */
 415         error = -ENOMEM;
 416         mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
 417         if (!mp->m_ddev_targp)
 418                 goto out_close_rtdev;
 419
 420         if (rtdev) {
 421                 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
 422                 if (!mp->m_rtdev_targp)
 423                         goto out_free_ddev_targ;
 424         }
 425
 426         if (logdev && logdev != ddev) {
 427                 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
 428                 if (!mp->m_logdev_targp)
 429                         goto out_free_rtdev_targ;
 430         } else {
 431                 mp->m_logdev_targp = mp->m_ddev_targp;
 432         }
 433
 434         return 0;
 435
 436  out_free_rtdev_targ:
 437         if (mp->m_rtdev_targp)
 438                 xfs_free_buftarg(mp->m_rtdev_targp);
 439  out_free_ddev_targ:
 440         xfs_free_buftarg(mp->m_ddev_targp);
 441  out_close_rtdev:
 442         xfs_blkdev_put(rtdev);
 443         fs_put_dax(dax_rtdev);
 444  out_close_logdev:
 445         if (logdev && logdev != ddev) {
 446                 xfs_blkdev_put(logdev);
 447                 fs_put_dax(dax_logdev);
 448         }
 449  out:
 450         fs_put_dax(dax_ddev);
 451         return error;
 452 }
 453
 454 /*
 455  * Setup xfs_mount buffer target pointers based on superblock
 456  */
 457 STATIC int
 458 xfs_setup_devices(
 459         struct xfs_mount        *mp)
 460 {
 461         int                     error;
 462
 463         error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
 464         if (error)
 465                 return error;
 466
 467         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 468                 unsigned int    log_sector_size = BBSIZE;
 469
 470                 if (xfs_sb_version_hassector(&mp->m_sb))
 471                         log_sector_size = mp->m_sb.sb_logsectsize;
 472                 error = xfs_setsize_buftarg(mp->m_logdev_targp,
 473                                             log_sector_size);
 474                 if (error)
 475                         return error;
 476         }
 477         if (mp->m_rtdev_targp) {
 478                 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
 479                                             mp->m_sb.sb_sectsize);
 480                 if (error)
 481                         return error;
 482         }
 483
 484         return 0;
 485 }
 486
 487 STATIC int
 488 xfs_init_mount_workqueues(
 489         struct xfs_mount        *mp)
 490 {
 491         mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
 492                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_super->s_id);
 493         if (!mp->m_buf_workqueue)
 494                 goto out;
 495
 496         mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
 497                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
 498         if (!mp->m_unwritten_workqueue)
 499                 goto out_destroy_buf;
 500
 501         mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
 502                         WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND,
 503                         0, mp->m_super->s_id);
 504         if (!mp->m_cil_workqueue)
 505                 goto out_destroy_unwritten;
 506
 507         mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
 508                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
 509         if (!mp->m_reclaim_workqueue)
 510                 goto out_destroy_cil;
 511
 512         mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
 513                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
 514         if (!mp->m_eofblocks_workqueue)
 515                 goto out_destroy_reclaim;
 516
 517         mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
 518                                                mp->m_super->s_id);
 519         if (!mp->m_sync_workqueue)
 520                 goto out_destroy_eofb;
 521
 522         return 0;
 523
 524 out_destroy_eofb:
 525         destroy_workqueue(mp->m_eofblocks_workqueue);
 526 out_destroy_reclaim:
 527         destroy_workqueue(mp->m_reclaim_workqueue);
 528 out_destroy_cil:
 529         destroy_workqueue(mp->m_cil_workqueue);
 530 out_destroy_unwritten:
 531         destroy_workqueue(mp->m_unwritten_workqueue);
 532 out_destroy_buf:
 533         destroy_workqueue(mp->m_buf_workqueue);
 534 out:
 535         return -ENOMEM;
 536 }
 537
 538 STATIC void
 539 xfs_destroy_mount_workqueues(
 540         struct xfs_mount        *mp)
 541 {
 542         destroy_workqueue(mp->m_sync_workqueue);
 543         destroy_workqueue(mp->m_eofblocks_workqueue);
 544         destroy_workqueue(mp->m_reclaim_workqueue);
 545         destroy_workqueue(mp->m_cil_workqueue);
 546         destroy_workqueue(mp->m_unwritten_workqueue);
 547         destroy_workqueue(mp->m_buf_workqueue);
 548 }
 549
 550 /*
 551  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
 552  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
 553  * for IO to complete so that we effectively throttle multiple callers to the
 554  * rate at which IO is completing.
 555  */
 556 void
 557 xfs_flush_inodes(
 558         struct xfs_mount        *mp)
 559 {
 560         struct super_block      *sb = mp->m_super;
 561
 562         if (down_read_trylock(&sb->s_umount)) {
 563                 sync_inodes_sb(sb);
 564                 up_read(&sb->s_umount);
 565         }
 566 }
 567
 568 /* Catch misguided souls that try to use this interface on XFS */
 569 STATIC struct inode *
 570 xfs_fs_alloc_inode(
 571         struct super_block      *sb)
 572 {
 573         BUG();
 574         return NULL;
 575 }
 576
 577 #ifdef DEBUG
 578 static void
 579 xfs_check_delalloc(
 580         struct xfs_inode        *ip,
 581         int                     whichfork)
 582 {
 583         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 584         struct xfs_bmbt_irec    got;
 585         struct xfs_iext_cursor  icur;
 586
 587         if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
 588                 return;
 589         do {
 590                 if (isnullstartblock(got.br_startblock)) {
 591                         xfs_warn(ip->i_mount,
 592         "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
 593                                 ip->i_ino,
 594                                 whichfork == XFS_DATA_FORK ? "data" : "cow",
 595                                 got.br_startoff, got.br_blockcount);
 596                 }
 597         } while (xfs_iext_next_extent(ifp, &icur, &got));
 598 }
 599 #else
 600 #define xfs_check_delalloc(ip, whichfork)       do { } while (0)
 601 #endif
 602
 603 /*
 604  * Now that the generic code is guaranteed not to be accessing
 605  * the linux inode, we can inactivate and reclaim the inode.
 606  */
 607 STATIC void
 608 xfs_fs_destroy_inode(
 609         struct inode            *inode)
 610 {
 611         struct xfs_inode        *ip = XFS_I(inode);
 612
 613         trace_xfs_destroy_inode(ip);
 614
 615         ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 616         XFS_STATS_INC(ip->i_mount, vn_rele);
 617         XFS_STATS_INC(ip->i_mount, vn_remove);
 618
 619         xfs_inactive(ip);
 620
 621         if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
 622                 xfs_check_delalloc(ip, XFS_DATA_FORK);
 623                 xfs_check_delalloc(ip, XFS_COW_FORK);
 624                 ASSERT(0);
 625         }
 626
 627         XFS_STATS_INC(ip->i_mount, vn_reclaim);
 628
 629         /*
 630          * We should never get here with one of the reclaim flags already set.
 631          */
 632         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 633         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
 634
 635         /*
 636          * We always use background reclaim here because even if the
 637          * inode is clean, it still may be under IO and hence we have
 638          * to take the flush lock. The background reclaim path handles
 639          * this more efficiently than we can here, so simply let background
 640          * reclaim tear down all inodes.
 641          */
 642         xfs_inode_set_reclaim_tag(ip);
 643 }
 644
 645 static void
 646 xfs_fs_dirty_inode(
 647         struct inode                    *inode,
 648         int                             flag)
 649 {
 650         struct xfs_inode                *ip = XFS_I(inode);
 651         struct xfs_mount                *mp = ip->i_mount;
 652         struct xfs_trans                *tp;
 653
 654         if (!(inode->i_sb->s_flags & SB_LAZYTIME))
 655                 return;
 656         if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
 657                 return;
 658
 659         if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
 660                 return;
 661         xfs_ilock(ip, XFS_ILOCK_EXCL);
 662         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 663         xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
 664         xfs_trans_commit(tp);
 665 }
 666
 667 /*
 668  * Slab object creation initialisation for the XFS inode.
 669  * This covers only the idempotent fields in the XFS inode;
 670  * all other fields need to be initialised on allocation
 671  * from the slab. This avoids the need to repeatedly initialise
 672  * fields in the xfs inode that left in the initialise state
 673  * when freeing the inode.
 674  */
 675 STATIC void
 676 xfs_fs_inode_init_once(
 677         void                    *inode)
 678 {
 679         struct xfs_inode        *ip = inode;
 680
 681         memset(ip, 0, sizeof(struct xfs_inode));
 682
 683         /* vfs inode */
 684         inode_init_once(VFS_I(ip));
 685
 686         /* xfs inode */
 687         atomic_set(&ip->i_pincount, 0);
 688         spin_lock_init(&ip->i_flags_lock);
 689
 690         mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
 691                      "xfsino", ip->i_ino);
 692         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
 693                      "xfsino", ip->i_ino);
 694 }
 695
 696 /*
 697  * We do an unlocked check for XFS_IDONTCACHE here because we are already
 698  * serialised against cache hits here via the inode->i_lock and igrab() in
 699  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
 700  * racing with us, and it avoids needing to grab a spinlock here for every inode
 701  * we drop the final reference on.
 702  */
 703 STATIC int
 704 xfs_fs_drop_inode(
 705         struct inode            *inode)
 706 {
 707         struct xfs_inode        *ip = XFS_I(inode);
 708
 709         /*
 710          * If this unlinked inode is in the middle of recovery, don't
 711          * drop the inode just yet; log recovery will take care of
 712          * that.  See the comment for this inode flag.
 713          */
 714         if (ip->i_flags & XFS_IRECOVERY) {
 715                 ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
 716                 return 0;
 717         }
 718
 719         return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
 720 }
 721
 722 static void
 723 xfs_mount_free(
 724         struct xfs_mount        *mp)
 725 {
 726         kfree(mp->m_rtname);
 727         kfree(mp->m_logname);
 728         kmem_free(mp);
 729 }
 730
 731 STATIC int
 732 xfs_fs_sync_fs(
 733         struct super_block      *sb,
 734         int                     wait)
 735 {
 736         struct xfs_mount        *mp = XFS_M(sb);
 737
 738         /*
 739          * Doing anything during the async pass would be counterproductive.
 740          */
 741         if (!wait)
 742                 return 0;
 743
 744         xfs_log_force(mp, XFS_LOG_SYNC);
 745         if (laptop_mode) {
 746                 /*
 747                  * The disk must be active because we're syncing.
 748                  * We schedule log work now (now that the disk is
 749                  * active) instead of later (when it might not be).
 750                  */
 751                 flush_delayed_work(&mp->m_log->l_work);
 752         }
 753
 754         return 0;
 755 }
 756
 757 STATIC int
 758 xfs_fs_statfs(
 759         struct dentry           *dentry,
 760         struct kstatfs          *statp)
 761 {
 762         struct xfs_mount        *mp = XFS_M(dentry->d_sb);
 763         xfs_sb_t                *sbp = &mp->m_sb;
 764         struct xfs_inode        *ip = XFS_I(d_inode(dentry));
 765         uint64_t                fakeinos, id;
 766         uint64_t                icount;
 767         uint64_t                ifree;
 768         uint64_t                fdblocks;
 769         xfs_extlen_t            lsize;
 770         int64_t                 ffree;
 771
 772         statp->f_type = XFS_SUPER_MAGIC;
 773         statp->f_namelen = MAXNAMELEN - 1;
 774
 775         id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
 776         statp->f_fsid.val[0] = (u32)id;
 777         statp->f_fsid.val[1] = (u32)(id >> 32);
 778
 779         icount = percpu_counter_sum(&mp->m_icount);
 780         ifree = percpu_counter_sum(&mp->m_ifree);
 781         fdblocks = percpu_counter_sum(&mp->m_fdblocks);
 782
 783         spin_lock(&mp->m_sb_lock);
 784         statp->f_bsize = sbp->sb_blocksize;
 785         lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
 786         statp->f_blocks = sbp->sb_dblocks - lsize;
 787         spin_unlock(&mp->m_sb_lock);
 788
 789         statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
 790         statp->f_bavail = statp->f_bfree;
 791
 792         fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
 793         statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
 794         if (M_IGEO(mp)->maxicount)
 795                 statp->f_files = min_t(typeof(statp->f_files),
 796                                         statp->f_files,
 797                                         M_IGEO(mp)->maxicount);
 798
 799         /* If sb_icount overshot maxicount, report actual allocation */
 800         statp->f_files = max_t(typeof(statp->f_files),
 801                                         statp->f_files,
 802                                         sbp->sb_icount);
 803
 804         /* make sure statp->f_ffree does not underflow */
 805         ffree = statp->f_files - (icount - ifree);
 806         statp->f_ffree = max_t(int64_t, ffree, 0);
 807
 808
 809         if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
 810             ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
 811                               (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
 812                 xfs_qm_statvfs(ip, statp);
 813
 814         if (XFS_IS_REALTIME_MOUNT(mp) &&
 815             (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
 816                 statp->f_blocks = sbp->sb_rblocks;
 817                 statp->f_bavail = statp->f_bfree =
 818                         sbp->sb_frextents * sbp->sb_rextsize;
 819         }
 820
 821         return 0;
 822 }
 823
 824 STATIC void
 825 xfs_save_resvblks(struct xfs_mount *mp)
 826 {
 827         uint64_t resblks = 0;
 828
 829         mp->m_resblks_save = mp->m_resblks;
 830         xfs_reserve_blocks(mp, &resblks, NULL);
 831 }
 832
 833 STATIC void
 834 xfs_restore_resvblks(struct xfs_mount *mp)
 835 {
 836         uint64_t resblks;
 837
 838         if (mp->m_resblks_save) {
 839                 resblks = mp->m_resblks_save;
 840                 mp->m_resblks_save = 0;
 841         } else
 842                 resblks = xfs_default_resblks(mp);
 843
 844         xfs_reserve_blocks(mp, &resblks, NULL);
 845 }
 846
 847 /*
 848  * Trigger writeback of all the dirty metadata in the file system.
 849  *
 850  * This ensures that the metadata is written to their location on disk rather
 851  * than just existing in transactions in the log. This means after a quiesce
 852  * there is no log replay required to write the inodes to disk - this is the
 853  * primary difference between a sync and a quiesce.
 854  *
 855  * Note: xfs_log_quiesce() stops background log work - the callers must ensure
 856  * it is started again when appropriate.
 857  */
 858 void
 859 xfs_quiesce_attr(
 860         struct xfs_mount        *mp)
 861 {
 862         int     error = 0;
 863
 864         /* wait for all modifications to complete */
 865         while (atomic_read(&mp->m_active_trans) > 0)
 866                 delay(100);
 867
 868         /* force the log to unpin objects from the now complete transactions */
 869         xfs_log_force(mp, XFS_LOG_SYNC);
 870
 871         /* reclaim inodes to do any IO before the freeze completes */
 872         xfs_reclaim_inodes(mp, 0);
 873         xfs_reclaim_inodes(mp, SYNC_WAIT);
 874
 875         /* Push the superblock and write an unmount record */
 876         error = xfs_log_sbcount(mp);
 877         if (error)
 878                 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
 879                                 "Frozen image may not be consistent.");
 880         /*
 881          * Just warn here till VFS can correctly support
 882          * read-only remount without racing.
 883          */
 884         WARN_ON(atomic_read(&mp->m_active_trans) != 0);
 885
 886         xfs_log_quiesce(mp);
 887 }
 888
 889 /*
 890  * Second stage of a freeze. The data is already frozen so we only
 891  * need to take care of the metadata. Once that's done sync the superblock
 892  * to the log to dirty it in case of a crash while frozen. This ensures that we
 893  * will recover the unlinked inode lists on the next mount.
 894  */
 895 STATIC int
 896 xfs_fs_freeze(
 897         struct super_block      *sb)
 898 {
 899         struct xfs_mount        *mp = XFS_M(sb);
 900
 901         xfs_stop_block_reaping(mp);
 902         xfs_save_resvblks(mp);
 903         xfs_quiesce_attr(mp);
 904         return xfs_sync_sb(mp, true);
 905 }
 906
 907 STATIC int
 908 xfs_fs_unfreeze(
 909         struct super_block      *sb)
 910 {
 911         struct xfs_mount        *mp = XFS_M(sb);
 912
 913         xfs_restore_resvblks(mp);
 914         xfs_log_work_queue(mp);
 915         xfs_start_block_reaping(mp);
 916         return 0;
 917 }
 918
 919 /*
 920  * This function fills in xfs_mount_t fields based on mount args.
 921  * Note: the superblock _has_ now been read in.
 922  */
 923 STATIC int
 924 xfs_finish_flags(
 925         struct xfs_mount        *mp)
 926 {
 927         int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
 928
 929         /* Fail a mount where the logbuf is smaller than the log stripe */
 930         if (xfs_sb_version_haslogv2(&mp->m_sb)) {
 931                 if (mp->m_logbsize <= 0 &&
 932                     mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
 933                         mp->m_logbsize = mp->m_sb.sb_logsunit;
 934                 } else if (mp->m_logbsize > 0 &&
 935                            mp->m_logbsize < mp->m_sb.sb_logsunit) {
 936                         xfs_warn(mp,
 937                 "logbuf size must be greater than or equal to log stripe size");
 938                         return -EINVAL;
 939                 }
 940         } else {
 941                 /* Fail a mount if the logbuf is larger than 32K */
 942                 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
 943                         xfs_warn(mp,
 944                 "logbuf size for version 1 logs must be 16K or 32K");
 945                         return -EINVAL;
 946                 }
 947         }
 948
 949         /*
 950          * V5 filesystems always use attr2 format for attributes.
 951          */
 952         if (xfs_sb_version_hascrc(&mp->m_sb) &&
 953             (mp->m_flags & XFS_MOUNT_NOATTR2)) {
 954                 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
 955                              "attr2 is always enabled for V5 filesystems.");
 956                 return -EINVAL;
 957         }
 958
 959         /*
 960          * mkfs'ed attr2 will turn on attr2 mount unless explicitly
 961          * told by noattr2 to turn it off
 962          */
 963         if (xfs_sb_version_hasattr2(&mp->m_sb) &&
 964             !(mp->m_flags & XFS_MOUNT_NOATTR2))
 965                 mp->m_flags |= XFS_MOUNT_ATTR2;
 966
 967         /*
 968          * prohibit r/w mounts of read-only filesystems
 969          */
 970         if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
 971                 xfs_warn(mp,
 972                         "cannot mount a read-only filesystem as read-write");
 973                 return -EROFS;
 974         }
 975
 976         if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
 977             (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
 978             !xfs_sb_version_has_pquotino(&mp->m_sb)) {
 979                 xfs_warn(mp,
 980                   "Super block does not support project and group quota together");
 981                 return -EINVAL;
 982         }
 983
 984         return 0;
 985 }
 986
 987 static int
 988 xfs_init_percpu_counters(
 989         struct xfs_mount        *mp)
 990 {
 991         int             error;
 992
 993         error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
 994         if (error)
 995                 return -ENOMEM;
 996
 997         error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
 998         if (error)
 999                 goto free_icount;
1000
1001         error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1002         if (error)
1003                 goto free_ifree;
1004
1005         error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
1006         if (error)
1007                 goto free_fdblocks;
1008
1009         return 0;
1010
1011 free_fdblocks:
1012         percpu_counter_destroy(&mp->m_fdblocks);
1013 free_ifree:
1014         percpu_counter_destroy(&mp->m_ifree);
1015 free_icount:
1016         percpu_counter_destroy(&mp->m_icount);
1017         return -ENOMEM;
1018 }
1019
1020 void
1021 xfs_reinit_percpu_counters(
1022         struct xfs_mount        *mp)
1023 {
1024         percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1025         percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1026         percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1027 }
1028
1029 static void
1030 xfs_destroy_percpu_counters(
1031         struct xfs_mount        *mp)
1032 {
1033         percpu_counter_destroy(&mp->m_icount);
1034         percpu_counter_destroy(&mp->m_ifree);
1035         percpu_counter_destroy(&mp->m_fdblocks);
1036         ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1037                percpu_counter_sum(&mp->m_delalloc_blks) == 0);
1038         percpu_counter_destroy(&mp->m_delalloc_blks);
1039 }
1040
1041 static void
1042 xfs_fs_put_super(
1043         struct super_block      *sb)
1044 {
1045         struct xfs_mount        *mp = XFS_M(sb);
1046
1047         /* if ->fill_super failed, we have no mount to tear down */
1048         if (!sb->s_fs_info)
1049                 return;
1050
1051         xfs_notice(mp, "Unmounting Filesystem");
1052         xfs_filestream_unmount(mp);
1053         xfs_unmountfs(mp);
1054
1055         xfs_freesb(mp);
1056         free_percpu(mp->m_stats.xs_stats);
1057         xfs_destroy_percpu_counters(mp);
1058         xfs_destroy_mount_workqueues(mp);
1059         xfs_close_devices(mp);
1060
1061         sb->s_fs_info = NULL;
1062         xfs_mount_free(mp);
1063 }
1064
1065 static long
1066 xfs_fs_nr_cached_objects(
1067         struct super_block      *sb,
1068         struct shrink_control   *sc)
1069 {
1070         /* Paranoia: catch incorrect calls during mount setup or teardown */
1071         if (WARN_ON_ONCE(!sb->s_fs_info))
1072                 return 0;
1073         return xfs_reclaim_inodes_count(XFS_M(sb));
1074 }
1075
1076 static long
1077 xfs_fs_free_cached_objects(
1078         struct super_block      *sb,
1079         struct shrink_control   *sc)
1080 {
1081         return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1082 }
1083
1084 static const struct super_operations xfs_super_operations = {
1085         .alloc_inode            = xfs_fs_alloc_inode,
1086         .destroy_inode          = xfs_fs_destroy_inode,
1087         .dirty_inode            = xfs_fs_dirty_inode,
1088         .drop_inode             = xfs_fs_drop_inode,
1089         .put_super              = xfs_fs_put_super,
1090         .sync_fs                = xfs_fs_sync_fs,
1091         .freeze_fs              = xfs_fs_freeze,
1092         .unfreeze_fs            = xfs_fs_unfreeze,
1093         .statfs                 = xfs_fs_statfs,
1094         .show_options           = xfs_fs_show_options,
1095         .nr_cached_objects      = xfs_fs_nr_cached_objects,
1096         .free_cached_objects    = xfs_fs_free_cached_objects,
1097 };
1098
1099 static int
1100 suffix_kstrtoint(
1101         const char      *s,
1102         unsigned int    base,
1103         int             *res)
1104 {
1105         int             last, shift_left_factor = 0, _res;
1106         char            *value;
1107         int             ret = 0;
1108
1109         value = kstrdup(s, GFP_KERNEL);
1110         if (!value)
1111                 return -ENOMEM;
1112
1113         last = strlen(value) - 1;
1114         if (value[last] == 'K' || value[last] == 'k') {
1115                 shift_left_factor = 10;
1116                 value[last] = '\0';
1117         }
1118         if (value[last] == 'M' || value[last] == 'm') {
1119                 shift_left_factor = 20;
1120                 value[last] = '\0';
1121         }
1122         if (value[last] == 'G' || value[last] == 'g') {
1123                 shift_left_factor = 30;
1124                 value[last] = '\0';
1125         }
1126
1127         if (kstrtoint(value, base, &_res))
1128                 ret = -EINVAL;
1129         kfree(value);
1130         *res = _res << shift_left_factor;
1131         return ret;
1132 }
1133
1134 /*
1135  * Set mount state from a mount option.
1136  *
1137  * NOTE: mp->m_super is NULL here!
1138  */
1139 static int
1140 xfs_fc_parse_param(
1141         struct fs_context       *fc,
1142         struct fs_parameter     *param)
1143 {
1144         struct xfs_mount        *mp = fc->s_fs_info;
1145         struct fs_parse_result  result;
1146         int                     size = 0;
1147         int                     opt;
1148
1149         opt = fs_parse(fc, &xfs_fs_parameters, param, &result);
1150         if (opt < 0)
1151                 return opt;
1152
1153         switch (opt) {
1154         case Opt_logbufs:
1155                 mp->m_logbufs = result.uint_32;
1156                 return 0;
1157         case Opt_logbsize:
1158                 if (suffix_kstrtoint(param->string, 10, &mp->m_logbsize))
1159                         return -EINVAL;
1160                 return 0;
1161         case Opt_logdev:
1162                 kfree(mp->m_logname);
1163                 mp->m_logname = kstrdup(param->string, GFP_KERNEL);
1164                 if (!mp->m_logname)
1165                         return -ENOMEM;
1166                 return 0;
1167         case Opt_rtdev:
1168                 kfree(mp->m_rtname);
1169                 mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
1170                 if (!mp->m_rtname)
1171                         return -ENOMEM;
1172                 return 0;
1173         case Opt_allocsize:
1174                 if (suffix_kstrtoint(param->string, 10, &size))
1175                         return -EINVAL;
1176                 mp->m_allocsize_log = ffs(size) - 1;
1177                 mp->m_flags |= XFS_MOUNT_ALLOCSIZE;
1178                 return 0;
1179         case Opt_grpid:
1180         case Opt_bsdgroups:
1181                 mp->m_flags |= XFS_MOUNT_GRPID;
1182                 return 0;
1183         case Opt_nogrpid:
1184         case Opt_sysvgroups:
1185                 mp->m_flags &= ~XFS_MOUNT_GRPID;
1186                 return 0;
1187         case Opt_wsync:
1188                 mp->m_flags |= XFS_MOUNT_WSYNC;
1189                 return 0;
1190         case Opt_norecovery:
1191                 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1192                 return 0;
1193         case Opt_noalign:
1194                 mp->m_flags |= XFS_MOUNT_NOALIGN;
1195                 return 0;
1196         case Opt_swalloc:
1197                 mp->m_flags |= XFS_MOUNT_SWALLOC;
1198                 return 0;
1199         case Opt_sunit:
1200                 mp->m_dalign = result.uint_32;
1201                 return 0;
1202         case Opt_swidth:
1203                 mp->m_swidth = result.uint_32;
1204                 return 0;
1205         case Opt_inode32:
1206                 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1207                 return 0;
1208         case Opt_inode64:
1209                 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1210                 return 0;
1211         case Opt_nouuid:
1212                 mp->m_flags |= XFS_MOUNT_NOUUID;
1213                 return 0;
1214         case Opt_ikeep:
1215                 mp->m_flags |= XFS_MOUNT_IKEEP;
1216                 return 0;
1217         case Opt_noikeep:
1218                 mp->m_flags &= ~XFS_MOUNT_IKEEP;
1219                 return 0;
1220         case Opt_largeio:
1221                 mp->m_flags |= XFS_MOUNT_LARGEIO;
1222                 return 0;
1223         case Opt_nolargeio:
1224                 mp->m_flags &= ~XFS_MOUNT_LARGEIO;
1225                 return 0;
1226         case Opt_attr2:
1227                 mp->m_flags |= XFS_MOUNT_ATTR2;
1228                 return 0;
1229         case Opt_noattr2:
1230                 mp->m_flags &= ~XFS_MOUNT_ATTR2;
1231                 mp->m_flags |= XFS_MOUNT_NOATTR2;
1232                 return 0;
1233         case Opt_filestreams:
1234                 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1235                 return 0;
1236         case Opt_noquota:
1237                 mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
1238                 mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
1239                 mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
1240                 return 0;
1241         case Opt_quota:
1242         case Opt_uquota:
1243         case Opt_usrquota:
1244                 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
1245                                  XFS_UQUOTA_ENFD);
1246                 return 0;
1247         case Opt_qnoenforce:
1248         case Opt_uqnoenforce:
1249                 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1250                 mp->m_qflags &= ~XFS_UQUOTA_ENFD;
1251                 return 0;
1252         case Opt_pquota:
1253         case Opt_prjquota:
1254                 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
1255                                  XFS_PQUOTA_ENFD);
1256                 return 0;
1257         case Opt_pqnoenforce:
1258                 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1259                 mp->m_qflags &= ~XFS_PQUOTA_ENFD;
1260                 return 0;
1261         case Opt_gquota:
1262         case Opt_grpquota:
1263                 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
1264                                  XFS_GQUOTA_ENFD);
1265                 return 0;
1266         case Opt_gqnoenforce:
1267                 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1268                 mp->m_qflags &= ~XFS_GQUOTA_ENFD;
1269                 return 0;
1270         case Opt_discard:
1271                 mp->m_flags |= XFS_MOUNT_DISCARD;
1272                 return 0;
1273         case Opt_nodiscard:
1274                 mp->m_flags &= ~XFS_MOUNT_DISCARD;
1275                 return 0;
1276 #ifdef CONFIG_FS_DAX
1277         case Opt_dax:
1278                 mp->m_flags |= XFS_MOUNT_DAX;
1279                 return 0;
1280 #endif
1281         default:
1282                 xfs_warn(mp, "unknown mount option [%s].", param->key);
1283                 return -EINVAL;
1284         }
1285
1286         return 0;
1287 }
1288
1289 static int
1290 xfs_fc_validate_params(
1291         struct xfs_mount        *mp)
1292 {
1293         /*
1294          * no recovery flag requires a read-only mount
1295          */
1296         if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
1297             !(mp->m_flags & XFS_MOUNT_RDONLY)) {
1298                 xfs_warn(mp, "no-recovery mounts must be read-only.");
1299                 return -EINVAL;
1300         }
1301
1302         if ((mp->m_flags & XFS_MOUNT_NOALIGN) &&
1303             (mp->m_dalign || mp->m_swidth)) {
1304                 xfs_warn(mp,
1305         "sunit and swidth options incompatible with the noalign option");
1306                 return -EINVAL;
1307         }
1308
1309         if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
1310                 xfs_warn(mp, "quota support not available in this kernel.");
1311                 return -EINVAL;
1312         }
1313
1314         if ((mp->m_dalign && !mp->m_swidth) ||
1315             (!mp->m_dalign && mp->m_swidth)) {
1316                 xfs_warn(mp, "sunit and swidth must be specified together");
1317                 return -EINVAL;
1318         }
1319
1320         if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
1321                 xfs_warn(mp,
1322         "stripe width (%d) must be a multiple of the stripe unit (%d)",
1323                         mp->m_swidth, mp->m_dalign);
1324                 return -EINVAL;
1325         }
1326
1327         if (mp->m_logbufs != -1 &&
1328             mp->m_logbufs != 0 &&
1329             (mp->m_logbufs < XLOG_MIN_ICLOGS ||
1330              mp->m_logbufs > XLOG_MAX_ICLOGS)) {
1331                 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
1332                         mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1333                 return -EINVAL;
1334         }
1335
1336         if (mp->m_logbsize != -1 &&
1337             mp->m_logbsize !=  0 &&
1338             (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
1339              mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
1340              !is_power_of_2(mp->m_logbsize))) {
1341                 xfs_warn(mp,
1342                         "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1343                         mp->m_logbsize);
1344                 return -EINVAL;
1345         }
1346
1347         if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
1348             (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
1349              mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
1350                 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
1351                         mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
1352                 return -EINVAL;
1353         }
1354
1355         return 0;
1356 }
1357
1358 static int
1359 xfs_fc_fill_super(
1360         struct super_block      *sb,
1361         struct fs_context       *fc)
1362 {
1363         struct xfs_mount        *mp = sb->s_fs_info;
1364         struct inode            *root;
1365         int                     flags = 0, error;
1366
1367         mp->m_super = sb;
1368
1369         error = xfs_fc_validate_params(mp);
1370         if (error)
1371                 goto out_free_names;
1372
1373         sb_min_blocksize(sb, BBSIZE);
1374         sb->s_xattr = xfs_xattr_handlers;
1375         sb->s_export_op = &xfs_export_operations;
1376 #ifdef CONFIG_XFS_QUOTA
1377         sb->s_qcop = &xfs_quotactl_operations;
1378         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1379 #endif
1380         sb->s_op = &xfs_super_operations;
1381
1382         /*
1383          * Delay mount work if the debug hook is set. This is debug
1384          * instrumention to coordinate simulation of xfs mount failures with
1385          * VFS superblock operations
1386          */
1387         if (xfs_globals.mount_delay) {
1388                 xfs_notice(mp, "Delaying mount for %d seconds.",
1389                         xfs_globals.mount_delay);
1390                 msleep(xfs_globals.mount_delay * 1000);
1391         }
1392
1393         if (fc->sb_flags & SB_SILENT)
1394                 flags |= XFS_MFSI_QUIET;
1395
1396         error = xfs_open_devices(mp);
1397         if (error)
1398                 goto out_free_names;
1399
1400         error = xfs_init_mount_workqueues(mp);
1401         if (error)
1402                 goto out_close_devices;
1403
1404         error = xfs_init_percpu_counters(mp);
1405         if (error)
1406                 goto out_destroy_workqueues;
1407
1408         /* Allocate stats memory before we do operations that might use it */
1409         mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1410         if (!mp->m_stats.xs_stats) {
1411                 error = -ENOMEM;
1412                 goto out_destroy_counters;
1413         }
1414
1415         error = xfs_readsb(mp, flags);
1416         if (error)
1417                 goto out_free_stats;
1418
1419         error = xfs_finish_flags(mp);
1420         if (error)
1421                 goto out_free_sb;
1422
1423         error = xfs_setup_devices(mp);
1424         if (error)
1425                 goto out_free_sb;
1426
1427         error = xfs_filestream_mount(mp);
1428         if (error)
1429                 goto out_free_sb;
1430
1431         /*
1432          * we must configure the block size in the superblock before we run the
1433          * full mount process as the mount process can lookup and cache inodes.
1434          */
1435         sb->s_magic = XFS_SUPER_MAGIC;
1436         sb->s_blocksize = mp->m_sb.sb_blocksize;
1437         sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1438         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1439         sb->s_max_links = XFS_MAXLINK;
1440         sb->s_time_gran = 1;
1441         sb->s_time_min = S32_MIN;
1442         sb->s_time_max = S32_MAX;
1443         sb->s_iflags |= SB_I_CGROUPWB;
1444
1445         set_posix_acl_flag(sb);
1446
1447         /* version 5 superblocks support inode version counters. */
1448         if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1449                 sb->s_flags |= SB_I_VERSION;
1450
1451         if (mp->m_flags & XFS_MOUNT_DAX) {
1452                 bool rtdev_is_dax = false, datadev_is_dax;
1453
1454                 xfs_warn(mp,
1455                 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1456
1457                 datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
1458                         sb->s_blocksize);
1459                 if (mp->m_rtdev_targp)
1460                         rtdev_is_dax = bdev_dax_supported(
1461                                 mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
1462                 if (!rtdev_is_dax && !datadev_is_dax) {
1463                         xfs_alert(mp,
1464                         "DAX unsupported by block device. Turning off DAX.");
1465                         mp->m_flags &= ~XFS_MOUNT_DAX;
1466                 }
1467                 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1468                         xfs_alert(mp,
1469                 "DAX and reflink cannot be used together!");
1470                         error = -EINVAL;
1471                         goto out_filestream_unmount;
1472                 }
1473         }
1474
1475         if (mp->m_flags & XFS_MOUNT_DISCARD) {
1476                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
1477
1478                 if (!blk_queue_discard(q)) {
1479                         xfs_warn(mp, "mounting with \"discard\" option, but "
1480                                         "the device does not support discard");
1481                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
1482                 }
1483         }
1484
1485         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1486                 if (mp->m_sb.sb_rblocks) {
1487                         xfs_alert(mp,
1488         "reflink not compatible with realtime device!");
1489                         error = -EINVAL;
1490                         goto out_filestream_unmount;
1491                 }
1492
1493                 if (xfs_globals.always_cow) {
1494                         xfs_info(mp, "using DEBUG-only always_cow mode.");
1495                         mp->m_always_cow = true;
1496                 }
1497         }
1498
1499         if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1500                 xfs_alert(mp,
1501         "reverse mapping btree not compatible with realtime device!");
1502                 error = -EINVAL;
1503                 goto out_filestream_unmount;
1504         }
1505
1506         error = xfs_mountfs(mp);
1507         if (error)
1508                 goto out_filestream_unmount;
1509
1510         root = igrab(VFS_I(mp->m_rootip));
1511         if (!root) {
1512                 error = -ENOENT;
1513                 goto out_unmount;
1514         }
1515         sb->s_root = d_make_root(root);
1516         if (!sb->s_root) {
1517                 error = -ENOMEM;
1518                 goto out_unmount;
1519         }
1520
1521         return 0;
1522
1523  out_filestream_unmount:
1524         xfs_filestream_unmount(mp);
1525  out_free_sb:
1526         xfs_freesb(mp);
1527  out_free_stats:
1528         free_percpu(mp->m_stats.xs_stats);
1529  out_destroy_counters:
1530         xfs_destroy_percpu_counters(mp);
1531  out_destroy_workqueues:
1532         xfs_destroy_mount_workqueues(mp);
1533  out_close_devices:
1534         xfs_close_devices(mp);
1535  out_free_names:
1536         sb->s_fs_info = NULL;
1537         xfs_mount_free(mp);
1538         return error;
1539
1540  out_unmount:
1541         xfs_filestream_unmount(mp);
1542         xfs_unmountfs(mp);
1543         goto out_free_sb;
1544 }
1545
1546 static int
1547 xfs_fc_get_tree(
1548         struct fs_context       *fc)
1549 {
1550         return get_tree_bdev(fc, xfs_fc_fill_super);
1551 }
1552
1553 static int
1554 xfs_remount_rw(
1555         struct xfs_mount        *mp)
1556 {
1557         struct xfs_sb           *sbp = &mp->m_sb;
1558         int error;
1559
1560         if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1561                 xfs_warn(mp,
1562                         "ro->rw transition prohibited on norecovery mount");
1563                 return -EINVAL;
1564         }
1565
1566         if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1567             xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1568                 xfs_warn(mp,
1569         "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1570                         (sbp->sb_features_ro_compat &
1571                                 XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1572                 return -EINVAL;
1573         }
1574
1575         mp->m_flags &= ~XFS_MOUNT_RDONLY;
1576
1577         /*
1578          * If this is the first remount to writeable state we might have some
1579          * superblock changes to update.
1580          */
1581         if (mp->m_update_sb) {
1582                 error = xfs_sync_sb(mp, false);
1583                 if (error) {
1584                         xfs_warn(mp, "failed to write sb changes");
1585                         return error;
1586                 }
1587                 mp->m_update_sb = false;
1588         }
1589
1590         /*
1591          * Fill out the reserve pool if it is empty. Use the stashed value if
1592          * it is non-zero, otherwise go with the default.
1593          */
1594         xfs_restore_resvblks(mp);
1595         xfs_log_work_queue(mp);
1596
1597         /* Recover any CoW blocks that never got remapped. */
1598         error = xfs_reflink_recover_cow(mp);
1599         if (error) {
1600                 xfs_err(mp,
1601                         "Error %d recovering leftover CoW allocations.", error);
1602                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1603                 return error;
1604         }
1605         xfs_start_block_reaping(mp);
1606
1607         /* Create the per-AG metadata reservation pool .*/
1608         error = xfs_fs_reserve_ag_blocks(mp);
1609         if (error && error != -ENOSPC)
1610                 return error;
1611
1612         return 0;
1613 }
1614
1615 static int
1616 xfs_remount_ro(
1617         struct xfs_mount        *mp)
1618 {
1619         int error;
1620
1621         /*
1622          * Cancel background eofb scanning so it cannot race with the final
1623          * log force+buftarg wait and deadlock the remount.
1624          */
1625         xfs_stop_block_reaping(mp);
1626
1627         /* Get rid of any leftover CoW reservations... */
1628         error = xfs_icache_free_cowblocks(mp, NULL);
1629         if (error) {
1630                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1631                 return error;
1632         }
1633
1634         /* Free the per-AG metadata reservation pool. */
1635         error = xfs_fs_unreserve_ag_blocks(mp);
1636         if (error) {
1637                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1638                 return error;
1639         }
1640
1641         /*
1642          * Before we sync the metadata, we need to free up the reserve block
1643          * pool so that the used block count in the superblock on disk is
1644          * correct at the end of the remount. Stash the current* reserve pool
1645          * size so that if we get remounted rw, we can return it to the same
1646          * size.
1647          */
1648         xfs_save_resvblks(mp);
1649
1650         xfs_quiesce_attr(mp);
1651         mp->m_flags |= XFS_MOUNT_RDONLY;
1652
1653         return 0;
1654 }
1655
1656 /*
1657  * Logically we would return an error here to prevent users from believing
1658  * they might have changed mount options using remount which can't be changed.
1659  *
1660  * But unfortunately mount(8) adds all options from mtab and fstab to the mount
1661  * arguments in some cases so we can't blindly reject options, but have to
1662  * check for each specified option if it actually differs from the currently
1663  * set option and only reject it if that's the case.
1664  *
1665  * Until that is implemented we return success for every remount request, and
1666  * silently ignore all options that we can't actually change.
1667  */
1668 static int
1669 xfs_fc_reconfigure(
1670         struct fs_context *fc)
1671 {
1672         struct xfs_mount        *mp = XFS_M(fc->root->d_sb);
1673         struct xfs_mount        *new_mp = fc->s_fs_info;
1674         xfs_sb_t                *sbp = &mp->m_sb;
1675         int                     flags = fc->sb_flags;
1676         int                     error;
1677
1678         error = xfs_fc_validate_params(new_mp);
1679         if (error)
1680                 return error;
1681
1682         sync_filesystem(mp->m_super);
1683
1684         /* inode32 -> inode64 */
1685         if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
1686             !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
1687                 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1688                 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1689         }
1690
1691         /* inode64 -> inode32 */
1692         if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
1693             (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
1694                 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1695                 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1696         }
1697
1698         /* ro -> rw */
1699         if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) {
1700                 error = xfs_remount_rw(mp);
1701                 if (error)
1702                         return error;
1703         }
1704
1705         /* rw -> ro */
1706         if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) {
1707                 error = xfs_remount_ro(mp);
1708                 if (error)
1709                         return error;
1710         }
1711
1712         return 0;
1713 }
1714
1715 static void xfs_fc_free(
1716         struct fs_context       *fc)
1717 {
1718         struct xfs_mount        *mp = fc->s_fs_info;
1719
1720         /*
1721          * mp is stored in the fs_context when it is initialized.
1722          * mp is transferred to the superblock on a successful mount,
1723          * but if an error occurs before the transfer we have to free
1724          * it here.
1725          */
1726         if (mp)
1727                 xfs_mount_free(mp);
1728 }
1729
1730 static const struct fs_context_operations xfs_context_ops = {
1731         .parse_param = xfs_fc_parse_param,
1732         .get_tree    = xfs_fc_get_tree,
1733         .reconfigure = xfs_fc_reconfigure,
1734         .free        = xfs_fc_free,
1735 };
1736
1737 static int xfs_init_fs_context(
1738         struct fs_context       *fc)
1739 {
1740         struct xfs_mount        *mp;
1741
1742         mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO);
1743         if (!mp)
1744                 return -ENOMEM;
1745
1746         spin_lock_init(&mp->m_sb_lock);
1747         spin_lock_init(&mp->m_agirotor_lock);
1748         INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1749         spin_lock_init(&mp->m_perag_lock);
1750         mutex_init(&mp->m_growlock);
1751         atomic_set(&mp->m_active_trans, 0);
1752         INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1753         INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1754         INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
1755         mp->m_kobj.kobject.kset = xfs_kset;
1756         /*
1757          * We don't create the finobt per-ag space reservation until after log
1758          * recovery, so we must set this to true so that an ifree transaction
1759          * started during log recovery will not depend on space reservations
1760          * for finobt expansion.
1761          */
1762         mp->m_finobt_nores = true;
1763
1764         /*
1765          * These can be overridden by the mount option parsing.
1766          */
1767         mp->m_logbufs = -1;
1768         mp->m_logbsize = -1;
1769         mp->m_allocsize_log = 16; /* 64k */
1770
1771         /*
1772          * Copy binary VFS mount flags we are interested in.
1773          */
1774         if (fc->sb_flags & SB_RDONLY)
1775                 mp->m_flags |= XFS_MOUNT_RDONLY;
1776         if (fc->sb_flags & SB_DIRSYNC)
1777                 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1778         if (fc->sb_flags & SB_SYNCHRONOUS)
1779                 mp->m_flags |= XFS_MOUNT_WSYNC;
1780
1781         fc->s_fs_info = mp;
1782         fc->ops = &xfs_context_ops;
1783
1784         return 0;
1785 }
1786
1787 static struct file_system_type xfs_fs_type = {
1788         .owner                  = THIS_MODULE,
1789         .name                   = "xfs",
1790         .init_fs_context        = xfs_init_fs_context,
1791         .parameters             = &xfs_fs_parameters,
1792         .kill_sb                = kill_block_super,
1793         .fs_flags               = FS_REQUIRES_DEV,
1794 };
1795 MODULE_ALIAS_FS("xfs");
1796
1797 STATIC int __init
1798 xfs_init_zones(void)
1799 {
1800         xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket",
1801                                                 sizeof(struct xlog_ticket),
1802                                                 0, 0, NULL);
1803         if (!xfs_log_ticket_zone)
1804                 goto out;
1805
1806         xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item",
1807                                         sizeof(struct xfs_extent_free_item),
1808                                         0, 0, NULL);
1809         if (!xfs_bmap_free_item_zone)
1810                 goto out_destroy_log_ticket_zone;
1811
1812         xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur",
1813                                                sizeof(struct xfs_btree_cur),
1814                                                0, 0, NULL);
1815         if (!xfs_btree_cur_zone)
1816                 goto out_destroy_bmap_free_item_zone;
1817
1818         xfs_da_state_zone = kmem_cache_create("xfs_da_state",
1819                                               sizeof(struct xfs_da_state),
1820                                               0, 0, NULL);
1821         if (!xfs_da_state_zone)
1822                 goto out_destroy_btree_cur_zone;
1823
1824         xfs_ifork_zone = kmem_cache_create("xfs_ifork",
1825                                            sizeof(struct xfs_ifork),
1826                                            0, 0, NULL);
1827         if (!xfs_ifork_zone)
1828                 goto out_destroy_da_state_zone;
1829
1830         xfs_trans_zone = kmem_cache_create("xf_trans",
1831                                            sizeof(struct xfs_trans),
1832                                            0, 0, NULL);
1833         if (!xfs_trans_zone)
1834                 goto out_destroy_ifork_zone;
1835
1836
1837         /*
1838          * The size of the zone allocated buf log item is the maximum
1839          * size possible under XFS.  This wastes a little bit of memory,
1840          * but it is much faster.
1841          */
1842         xfs_buf_item_zone = kmem_cache_create("xfs_buf_item",
1843                                               sizeof(struct xfs_buf_log_item),
1844                                               0, 0, NULL);
1845         if (!xfs_buf_item_zone)
1846                 goto out_destroy_trans_zone;
1847
1848         xfs_efd_zone = kmem_cache_create("xfs_efd_item",
1849                                         (sizeof(struct xfs_efd_log_item) +
1850                                         (XFS_EFD_MAX_FAST_EXTENTS - 1) *
1851                                         sizeof(struct xfs_extent)),
1852                                         0, 0, NULL);
1853         if (!xfs_efd_zone)
1854                 goto out_destroy_buf_item_zone;
1855
1856         xfs_efi_zone = kmem_cache_create("xfs_efi_item",
1857                                          (sizeof(struct xfs_efi_log_item) +
1858                                          (XFS_EFI_MAX_FAST_EXTENTS - 1) *
1859                                          sizeof(struct xfs_extent)),
1860                                          0, 0, NULL);
1861         if (!xfs_efi_zone)
1862                 goto out_destroy_efd_zone;
1863
1864         xfs_inode_zone = kmem_cache_create("xfs_inode",
1865                                            sizeof(struct xfs_inode), 0,
1866                                            (SLAB_HWCACHE_ALIGN |
1867                                             SLAB_RECLAIM_ACCOUNT |
1868                                             SLAB_MEM_SPREAD | SLAB_ACCOUNT),
1869                                            xfs_fs_inode_init_once);
1870         if (!xfs_inode_zone)
1871                 goto out_destroy_efi_zone;
1872
1873         xfs_ili_zone = kmem_cache_create("xfs_ili",
1874                                          sizeof(struct xfs_inode_log_item), 0,
1875                                          SLAB_MEM_SPREAD, NULL);
1876         if (!xfs_ili_zone)
1877                 goto out_destroy_inode_zone;
1878
1879         xfs_icreate_zone = kmem_cache_create("xfs_icr",
1880                                              sizeof(struct xfs_icreate_item),
1881                                              0, 0, NULL);
1882         if (!xfs_icreate_zone)
1883                 goto out_destroy_ili_zone;
1884
1885         xfs_rud_zone = kmem_cache_create("xfs_rud_item",
1886                                          sizeof(struct xfs_rud_log_item),
1887                                          0, 0, NULL);
1888         if (!xfs_rud_zone)
1889                 goto out_destroy_icreate_zone;
1890
1891         xfs_rui_zone = kmem_cache_create("xfs_rui_item",
1892                         xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1893                         0, 0, NULL);
1894         if (!xfs_rui_zone)
1895                 goto out_destroy_rud_zone;
1896
1897         xfs_cud_zone = kmem_cache_create("xfs_cud_item",
1898                                          sizeof(struct xfs_cud_log_item),
1899                                          0, 0, NULL);
1900         if (!xfs_cud_zone)
1901                 goto out_destroy_rui_zone;
1902
1903         xfs_cui_zone = kmem_cache_create("xfs_cui_item",
1904                         xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
1905                         0, 0, NULL);
1906         if (!xfs_cui_zone)
1907                 goto out_destroy_cud_zone;
1908
1909         xfs_bud_zone = kmem_cache_create("xfs_bud_item",
1910                                          sizeof(struct xfs_bud_log_item),
1911                                          0, 0, NULL);
1912         if (!xfs_bud_zone)
1913                 goto out_destroy_cui_zone;
1914
1915         xfs_bui_zone = kmem_cache_create("xfs_bui_item",
1916                         xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
1917                         0, 0, NULL);
1918         if (!xfs_bui_zone)
1919                 goto out_destroy_bud_zone;
1920
1921         return 0;
1922
1923  out_destroy_bud_zone:
1924         kmem_cache_destroy(xfs_bud_zone);
1925  out_destroy_cui_zone:
1926         kmem_cache_destroy(xfs_cui_zone);
1927  out_destroy_cud_zone:
1928         kmem_cache_destroy(xfs_cud_zone);
1929  out_destroy_rui_zone:
1930         kmem_cache_destroy(xfs_rui_zone);
1931  out_destroy_rud_zone:
1932         kmem_cache_destroy(xfs_rud_zone);
1933  out_destroy_icreate_zone:
1934         kmem_cache_destroy(xfs_icreate_zone);
1935  out_destroy_ili_zone:
1936         kmem_cache_destroy(xfs_ili_zone);
1937  out_destroy_inode_zone:
1938         kmem_cache_destroy(xfs_inode_zone);
1939  out_destroy_efi_zone:
1940         kmem_cache_destroy(xfs_efi_zone);
1941  out_destroy_efd_zone:
1942         kmem_cache_destroy(xfs_efd_zone);
1943  out_destroy_buf_item_zone:
1944         kmem_cache_destroy(xfs_buf_item_zone);
1945  out_destroy_trans_zone:
1946         kmem_cache_destroy(xfs_trans_zone);
1947  out_destroy_ifork_zone:
1948         kmem_cache_destroy(xfs_ifork_zone);
1949  out_destroy_da_state_zone:
1950         kmem_cache_destroy(xfs_da_state_zone);
1951  out_destroy_btree_cur_zone:
1952         kmem_cache_destroy(xfs_btree_cur_zone);
1953  out_destroy_bmap_free_item_zone:
1954         kmem_cache_destroy(xfs_bmap_free_item_zone);
1955  out_destroy_log_ticket_zone:
1956         kmem_cache_destroy(xfs_log_ticket_zone);
1957  out:
1958         return -ENOMEM;
1959 }
1960
1961 STATIC void
1962 xfs_destroy_zones(void)
1963 {
1964         /*
1965          * Make sure all delayed rcu free are flushed before we
1966          * destroy caches.
1967          */
1968         rcu_barrier();
1969         kmem_cache_destroy(xfs_bui_zone);
1970         kmem_cache_destroy(xfs_bud_zone);
1971         kmem_cache_destroy(xfs_cui_zone);
1972         kmem_cache_destroy(xfs_cud_zone);
1973         kmem_cache_destroy(xfs_rui_zone);
1974         kmem_cache_destroy(xfs_rud_zone);
1975         kmem_cache_destroy(xfs_icreate_zone);
1976         kmem_cache_destroy(xfs_ili_zone);
1977         kmem_cache_destroy(xfs_inode_zone);
1978         kmem_cache_destroy(xfs_efi_zone);
1979         kmem_cache_destroy(xfs_efd_zone);
1980         kmem_cache_destroy(xfs_buf_item_zone);
1981         kmem_cache_destroy(xfs_trans_zone);
1982         kmem_cache_destroy(xfs_ifork_zone);
1983         kmem_cache_destroy(xfs_da_state_zone);
1984         kmem_cache_destroy(xfs_btree_cur_zone);
1985         kmem_cache_destroy(xfs_bmap_free_item_zone);
1986         kmem_cache_destroy(xfs_log_ticket_zone);
1987 }
1988
1989 STATIC int __init
1990 xfs_init_workqueues(void)
1991 {
1992         /*
1993          * The allocation workqueue can be used in memory reclaim situations
1994          * (writepage path), and parallelism is only limited by the number of
1995          * AGs in all the filesystems mounted. Hence use the default large
1996          * max_active value for this workqueue.
1997          */
1998         xfs_alloc_wq = alloc_workqueue("xfsalloc",
1999                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
2000         if (!xfs_alloc_wq)
2001                 return -ENOMEM;
2002
2003         xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
2004         if (!xfs_discard_wq)
2005                 goto out_free_alloc_wq;
2006
2007         return 0;
2008 out_free_alloc_wq:
2009         destroy_workqueue(xfs_alloc_wq);
2010         return -ENOMEM;
2011 }
2012
2013 STATIC void
2014 xfs_destroy_workqueues(void)
2015 {
2016         destroy_workqueue(xfs_discard_wq);
2017         destroy_workqueue(xfs_alloc_wq);
2018 }
2019
2020 STATIC int __init
2021 init_xfs_fs(void)
2022 {
2023         int                     error;
2024
2025         xfs_check_ondisk_structs();
2026
2027         printk(KERN_INFO XFS_VERSION_STRING " with "
2028                          XFS_BUILD_OPTIONS " enabled\n");
2029
2030         xfs_dir_startup();
2031
2032         error = xfs_init_zones();
2033         if (error)
2034                 goto out;
2035
2036         error = xfs_init_workqueues();
2037         if (error)
2038                 goto out_destroy_zones;
2039
2040         error = xfs_mru_cache_init();
2041         if (error)
2042                 goto out_destroy_wq;
2043
2044         error = xfs_buf_init();
2045         if (error)
2046                 goto out_mru_cache_uninit;
2047
2048         error = xfs_init_procfs();
2049         if (error)
2050                 goto out_buf_terminate;
2051
2052         error = xfs_sysctl_register();
2053         if (error)
2054                 goto out_cleanup_procfs;
2055
2056         xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2057         if (!xfs_kset) {
2058                 error = -ENOMEM;
2059                 goto out_sysctl_unregister;
2060         }
2061
2062         xfsstats.xs_kobj.kobject.kset = xfs_kset;
2063
2064         xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2065         if (!xfsstats.xs_stats) {
2066                 error = -ENOMEM;
2067                 goto out_kset_unregister;
2068         }
2069
2070         error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2071                                "stats");
2072         if (error)
2073                 goto out_free_stats;
2074
2075 #ifdef DEBUG
2076         xfs_dbg_kobj.kobject.kset = xfs_kset;
2077         error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2078         if (error)
2079                 goto out_remove_stats_kobj;
2080 #endif
2081
2082         error = xfs_qm_init();
2083         if (error)
2084                 goto out_remove_dbg_kobj;
2085
2086         error = register_filesystem(&xfs_fs_type);
2087         if (error)
2088                 goto out_qm_exit;
2089         return 0;
2090
2091  out_qm_exit:
2092         xfs_qm_exit();
2093  out_remove_dbg_kobj:
2094 #ifdef DEBUG
2095         xfs_sysfs_del(&xfs_dbg_kobj);
2096  out_remove_stats_kobj:
2097 #endif
2098         xfs_sysfs_del(&xfsstats.xs_kobj);
2099  out_free_stats:
2100         free_percpu(xfsstats.xs_stats);
2101  out_kset_unregister:
2102         kset_unregister(xfs_kset);
2103  out_sysctl_unregister:
2104         xfs_sysctl_unregister();
2105  out_cleanup_procfs:
2106         xfs_cleanup_procfs();
2107  out_buf_terminate:
2108         xfs_buf_terminate();
2109  out_mru_cache_uninit:
2110         xfs_mru_cache_uninit();
2111  out_destroy_wq:
2112         xfs_destroy_workqueues();
2113  out_destroy_zones:
2114         xfs_destroy_zones();
2115  out:
2116         return error;
2117 }
2118
2119 STATIC void __exit
2120 exit_xfs_fs(void)
2121 {
2122         xfs_qm_exit();
2123         unregister_filesystem(&xfs_fs_type);
2124 #ifdef DEBUG
2125         xfs_sysfs_del(&xfs_dbg_kobj);
2126 #endif
2127         xfs_sysfs_del(&xfsstats.xs_kobj);
2128         free_percpu(xfsstats.xs_stats);
2129         kset_unregister(xfs_kset);
2130         xfs_sysctl_unregister();
2131         xfs_cleanup_procfs();
2132         xfs_buf_terminate();
2133         xfs_mru_cache_uninit();
2134         xfs_destroy_workqueues();
2135         xfs_destroy_zones();
2136         xfs_uuid_table_free();
2137 }
2138
2139 module_init(init_xfs_fs);
2140 module_exit(exit_xfs_fs);
2141
2142 MODULE_AUTHOR("Silicon Graphics, Inc.");
2143 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2144 MODULE_LICENSE("GPL");