]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/xfs/xfs_super.c
d9ae27ddf253bba6763a10fa8fde72425c5d6df1
[linux.git] / fs / xfs / xfs_super.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_btree.h"
16 #include "xfs_bmap.h"
17 #include "xfs_alloc.h"
18 #include "xfs_fsops.h"
19 #include "xfs_trans.h"
20 #include "xfs_buf_item.h"
21 #include "xfs_log.h"
22 #include "xfs_log_priv.h"
23 #include "xfs_dir2.h"
24 #include "xfs_extfree_item.h"
25 #include "xfs_mru_cache.h"
26 #include "xfs_inode_item.h"
27 #include "xfs_icache.h"
28 #include "xfs_trace.h"
29 #include "xfs_icreate_item.h"
30 #include "xfs_filestream.h"
31 #include "xfs_quota.h"
32 #include "xfs_sysfs.h"
33 #include "xfs_ondisk.h"
34 #include "xfs_rmap_item.h"
35 #include "xfs_refcount_item.h"
36 #include "xfs_bmap_item.h"
37 #include "xfs_reflink.h"
38
39 #include <linux/magic.h>
40 #include <linux/fs_context.h>
41 #include <linux/fs_parser.h>
42
43 static const struct super_operations xfs_super_operations;
44
45 static struct kset *xfs_kset;           /* top-level xfs sysfs dir */
46 #ifdef DEBUG
47 static struct xfs_kobj xfs_dbg_kobj;    /* global debug sysfs attrs */
48 #endif
49
50 /*
51  * Table driven mount option parser.
52  */
53 enum {
54         Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
55         Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
56         Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
57         Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
58         Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
59         Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
60         Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
61         Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
62         Opt_discard, Opt_nodiscard, Opt_dax,
63 };
64
65 static const struct fs_parameter_spec xfs_param_specs[] = {
66         fsparam_u32("logbufs",          Opt_logbufs),
67         fsparam_string("logbsize",      Opt_logbsize),
68         fsparam_string("logdev",        Opt_logdev),
69         fsparam_string("rtdev",         Opt_rtdev),
70         fsparam_flag("wsync",           Opt_wsync),
71         fsparam_flag("noalign",         Opt_noalign),
72         fsparam_flag("swalloc",         Opt_swalloc),
73         fsparam_u32("sunit",            Opt_sunit),
74         fsparam_u32("swidth",           Opt_swidth),
75         fsparam_flag("nouuid",          Opt_nouuid),
76         fsparam_flag("grpid",           Opt_grpid),
77         fsparam_flag("nogrpid",         Opt_nogrpid),
78         fsparam_flag("bsdgroups",       Opt_bsdgroups),
79         fsparam_flag("sysvgroups",      Opt_sysvgroups),
80         fsparam_string("allocsize",     Opt_allocsize),
81         fsparam_flag("norecovery",      Opt_norecovery),
82         fsparam_flag("inode64",         Opt_inode64),
83         fsparam_flag("inode32",         Opt_inode32),
84         fsparam_flag("ikeep",           Opt_ikeep),
85         fsparam_flag("noikeep",         Opt_noikeep),
86         fsparam_flag("largeio",         Opt_largeio),
87         fsparam_flag("nolargeio",       Opt_nolargeio),
88         fsparam_flag("attr2",           Opt_attr2),
89         fsparam_flag("noattr2",         Opt_noattr2),
90         fsparam_flag("filestreams",     Opt_filestreams),
91         fsparam_flag("quota",           Opt_quota),
92         fsparam_flag("noquota",         Opt_noquota),
93         fsparam_flag("usrquota",        Opt_usrquota),
94         fsparam_flag("grpquota",        Opt_grpquota),
95         fsparam_flag("prjquota",        Opt_prjquota),
96         fsparam_flag("uquota",          Opt_uquota),
97         fsparam_flag("gquota",          Opt_gquota),
98         fsparam_flag("pquota",          Opt_pquota),
99         fsparam_flag("uqnoenforce",     Opt_uqnoenforce),
100         fsparam_flag("gqnoenforce",     Opt_gqnoenforce),
101         fsparam_flag("pqnoenforce",     Opt_pqnoenforce),
102         fsparam_flag("qnoenforce",      Opt_qnoenforce),
103         fsparam_flag("discard",         Opt_discard),
104         fsparam_flag("nodiscard",       Opt_nodiscard),
105         fsparam_flag("dax",             Opt_dax),
106         {}
107 };
108
109 static const struct fs_parameter_description xfs_fs_parameters = {
110         .name           = "xfs",
111         .specs          = xfs_param_specs,
112 };
113
114 struct proc_xfs_info {
115         uint64_t        flag;
116         char            *str;
117 };
118
119 static int
120 xfs_fs_show_options(
121         struct seq_file         *m,
122         struct dentry           *root)
123 {
124         static struct proc_xfs_info xfs_info_set[] = {
125                 /* the few simple ones we can get from the mount struct */
126                 { XFS_MOUNT_IKEEP,              ",ikeep" },
127                 { XFS_MOUNT_WSYNC,              ",wsync" },
128                 { XFS_MOUNT_NOALIGN,            ",noalign" },
129                 { XFS_MOUNT_SWALLOC,            ",swalloc" },
130                 { XFS_MOUNT_NOUUID,             ",nouuid" },
131                 { XFS_MOUNT_NORECOVERY,         ",norecovery" },
132                 { XFS_MOUNT_ATTR2,              ",attr2" },
133                 { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
134                 { XFS_MOUNT_GRPID,              ",grpid" },
135                 { XFS_MOUNT_DISCARD,            ",discard" },
136                 { XFS_MOUNT_LARGEIO,            ",largeio" },
137                 { XFS_MOUNT_DAX,                ",dax" },
138                 { 0, NULL }
139         };
140         struct xfs_mount        *mp = XFS_M(root->d_sb);
141         struct proc_xfs_info    *xfs_infop;
142
143         for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
144                 if (mp->m_flags & xfs_infop->flag)
145                         seq_puts(m, xfs_infop->str);
146         }
147
148         seq_printf(m, ",inode%d",
149                 (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64);
150
151         if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
152                 seq_printf(m, ",allocsize=%dk",
153                            (1 << mp->m_allocsize_log) >> 10);
154
155         if (mp->m_logbufs > 0)
156                 seq_printf(m, ",logbufs=%d", mp->m_logbufs);
157         if (mp->m_logbsize > 0)
158                 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
159
160         if (mp->m_logname)
161                 seq_show_option(m, "logdev", mp->m_logname);
162         if (mp->m_rtname)
163                 seq_show_option(m, "rtdev", mp->m_rtname);
164
165         if (mp->m_dalign > 0)
166                 seq_printf(m, ",sunit=%d",
167                                 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
168         if (mp->m_swidth > 0)
169                 seq_printf(m, ",swidth=%d",
170                                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
171
172         if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
173                 seq_puts(m, ",usrquota");
174         else if (mp->m_qflags & XFS_UQUOTA_ACCT)
175                 seq_puts(m, ",uqnoenforce");
176
177         if (mp->m_qflags & XFS_PQUOTA_ACCT) {
178                 if (mp->m_qflags & XFS_PQUOTA_ENFD)
179                         seq_puts(m, ",prjquota");
180                 else
181                         seq_puts(m, ",pqnoenforce");
182         }
183         if (mp->m_qflags & XFS_GQUOTA_ACCT) {
184                 if (mp->m_qflags & XFS_GQUOTA_ENFD)
185                         seq_puts(m, ",grpquota");
186                 else
187                         seq_puts(m, ",gqnoenforce");
188         }
189
190         if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
191                 seq_puts(m, ",noquota");
192
193         return 0;
194 }
195
196 static uint64_t
197 xfs_max_file_offset(
198         unsigned int            blockshift)
199 {
200         unsigned int            pagefactor = 1;
201         unsigned int            bitshift = BITS_PER_LONG - 1;
202
203         /* Figure out maximum filesize, on Linux this can depend on
204          * the filesystem blocksize (on 32 bit platforms).
205          * __block_write_begin does this in an [unsigned] long long...
206          *      page->index << (PAGE_SHIFT - bbits)
207          * So, for page sized blocks (4K on 32 bit platforms),
208          * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
209          *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
210          * but for smaller blocksizes it is less (bbits = log2 bsize).
211          */
212
213 #if BITS_PER_LONG == 32
214         ASSERT(sizeof(sector_t) == 8);
215         pagefactor = PAGE_SIZE;
216         bitshift = BITS_PER_LONG;
217 #endif
218
219         return (((uint64_t)pagefactor) << bitshift) - 1;
220 }
221
222 /*
223  * Set parameters for inode allocation heuristics, taking into account
224  * filesystem size and inode32/inode64 mount options; i.e. specifically
225  * whether or not XFS_MOUNT_SMALL_INUMS is set.
226  *
227  * Inode allocation patterns are altered only if inode32 is requested
228  * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
229  * If altered, XFS_MOUNT_32BITINODES is set as well.
230  *
231  * An agcount independent of that in the mount structure is provided
232  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
233  * to the potentially higher ag count.
234  *
235  * Returns the maximum AG index which may contain inodes.
236  */
237 xfs_agnumber_t
238 xfs_set_inode_alloc(
239         struct xfs_mount *mp,
240         xfs_agnumber_t  agcount)
241 {
242         xfs_agnumber_t  index;
243         xfs_agnumber_t  maxagi = 0;
244         xfs_sb_t        *sbp = &mp->m_sb;
245         xfs_agnumber_t  max_metadata;
246         xfs_agino_t     agino;
247         xfs_ino_t       ino;
248
249         /*
250          * Calculate how much should be reserved for inodes to meet
251          * the max inode percentage.  Used only for inode32.
252          */
253         if (M_IGEO(mp)->maxicount) {
254                 uint64_t        icount;
255
256                 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
257                 do_div(icount, 100);
258                 icount += sbp->sb_agblocks - 1;
259                 do_div(icount, sbp->sb_agblocks);
260                 max_metadata = icount;
261         } else {
262                 max_metadata = agcount;
263         }
264
265         /* Get the last possible inode in the filesystem */
266         agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
267         ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
268
269         /*
270          * If user asked for no more than 32-bit inodes, and the fs is
271          * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
272          * the allocator to accommodate the request.
273          */
274         if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
275                 mp->m_flags |= XFS_MOUNT_32BITINODES;
276         else
277                 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
278
279         for (index = 0; index < agcount; index++) {
280                 struct xfs_perag        *pag;
281
282                 ino = XFS_AGINO_TO_INO(mp, index, agino);
283
284                 pag = xfs_perag_get(mp, index);
285
286                 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
287                         if (ino > XFS_MAXINUMBER_32) {
288                                 pag->pagi_inodeok = 0;
289                                 pag->pagf_metadata = 0;
290                         } else {
291                                 pag->pagi_inodeok = 1;
292                                 maxagi++;
293                                 if (index < max_metadata)
294                                         pag->pagf_metadata = 1;
295                                 else
296                                         pag->pagf_metadata = 0;
297                         }
298                 } else {
299                         pag->pagi_inodeok = 1;
300                         pag->pagf_metadata = 0;
301                 }
302
303                 xfs_perag_put(pag);
304         }
305
306         return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
307 }
308
309 STATIC int
310 xfs_blkdev_get(
311         xfs_mount_t             *mp,
312         const char              *name,
313         struct block_device     **bdevp)
314 {
315         int                     error = 0;
316
317         *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
318                                     mp);
319         if (IS_ERR(*bdevp)) {
320                 error = PTR_ERR(*bdevp);
321                 xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
322         }
323
324         return error;
325 }
326
327 STATIC void
328 xfs_blkdev_put(
329         struct block_device     *bdev)
330 {
331         if (bdev)
332                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
333 }
334
335 void
336 xfs_blkdev_issue_flush(
337         xfs_buftarg_t           *buftarg)
338 {
339         blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
340 }
341
342 STATIC void
343 xfs_close_devices(
344         struct xfs_mount        *mp)
345 {
346         struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
347
348         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
349                 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
350                 struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
351
352                 xfs_free_buftarg(mp->m_logdev_targp);
353                 xfs_blkdev_put(logdev);
354                 fs_put_dax(dax_logdev);
355         }
356         if (mp->m_rtdev_targp) {
357                 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
358                 struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
359
360                 xfs_free_buftarg(mp->m_rtdev_targp);
361                 xfs_blkdev_put(rtdev);
362                 fs_put_dax(dax_rtdev);
363         }
364         xfs_free_buftarg(mp->m_ddev_targp);
365         fs_put_dax(dax_ddev);
366 }
367
368 /*
369  * The file system configurations are:
370  *      (1) device (partition) with data and internal log
371  *      (2) logical volume with data and log subvolumes.
372  *      (3) logical volume with data, log, and realtime subvolumes.
373  *
374  * We only have to handle opening the log and realtime volumes here if
375  * they are present.  The data subvolume has already been opened by
376  * get_sb_bdev() and is stored in sb->s_bdev.
377  */
378 STATIC int
379 xfs_open_devices(
380         struct xfs_mount        *mp)
381 {
382         struct block_device     *ddev = mp->m_super->s_bdev;
383         struct dax_device       *dax_ddev = fs_dax_get_by_bdev(ddev);
384         struct dax_device       *dax_logdev = NULL, *dax_rtdev = NULL;
385         struct block_device     *logdev = NULL, *rtdev = NULL;
386         int                     error;
387
388         /*
389          * Open real time and log devices - order is important.
390          */
391         if (mp->m_logname) {
392                 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
393                 if (error)
394                         goto out;
395                 dax_logdev = fs_dax_get_by_bdev(logdev);
396         }
397
398         if (mp->m_rtname) {
399                 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
400                 if (error)
401                         goto out_close_logdev;
402
403                 if (rtdev == ddev || rtdev == logdev) {
404                         xfs_warn(mp,
405         "Cannot mount filesystem with identical rtdev and ddev/logdev.");
406                         error = -EINVAL;
407                         goto out_close_rtdev;
408                 }
409                 dax_rtdev = fs_dax_get_by_bdev(rtdev);
410         }
411
412         /*
413          * Setup xfs_mount buffer target pointers
414          */
415         error = -ENOMEM;
416         mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
417         if (!mp->m_ddev_targp)
418                 goto out_close_rtdev;
419
420         if (rtdev) {
421                 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
422                 if (!mp->m_rtdev_targp)
423                         goto out_free_ddev_targ;
424         }
425
426         if (logdev && logdev != ddev) {
427                 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
428                 if (!mp->m_logdev_targp)
429                         goto out_free_rtdev_targ;
430         } else {
431                 mp->m_logdev_targp = mp->m_ddev_targp;
432         }
433
434         return 0;
435
436  out_free_rtdev_targ:
437         if (mp->m_rtdev_targp)
438                 xfs_free_buftarg(mp->m_rtdev_targp);
439  out_free_ddev_targ:
440         xfs_free_buftarg(mp->m_ddev_targp);
441  out_close_rtdev:
442         xfs_blkdev_put(rtdev);
443         fs_put_dax(dax_rtdev);
444  out_close_logdev:
445         if (logdev && logdev != ddev) {
446                 xfs_blkdev_put(logdev);
447                 fs_put_dax(dax_logdev);
448         }
449  out:
450         fs_put_dax(dax_ddev);
451         return error;
452 }
453
454 /*
455  * Setup xfs_mount buffer target pointers based on superblock
456  */
457 STATIC int
458 xfs_setup_devices(
459         struct xfs_mount        *mp)
460 {
461         int                     error;
462
463         error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
464         if (error)
465                 return error;
466
467         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
468                 unsigned int    log_sector_size = BBSIZE;
469
470                 if (xfs_sb_version_hassector(&mp->m_sb))
471                         log_sector_size = mp->m_sb.sb_logsectsize;
472                 error = xfs_setsize_buftarg(mp->m_logdev_targp,
473                                             log_sector_size);
474                 if (error)
475                         return error;
476         }
477         if (mp->m_rtdev_targp) {
478                 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
479                                             mp->m_sb.sb_sectsize);
480                 if (error)
481                         return error;
482         }
483
484         return 0;
485 }
486
487 STATIC int
488 xfs_init_mount_workqueues(
489         struct xfs_mount        *mp)
490 {
491         mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
492                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_super->s_id);
493         if (!mp->m_buf_workqueue)
494                 goto out;
495
496         mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
497                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
498         if (!mp->m_unwritten_workqueue)
499                 goto out_destroy_buf;
500
501         mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
502                         WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND,
503                         0, mp->m_super->s_id);
504         if (!mp->m_cil_workqueue)
505                 goto out_destroy_unwritten;
506
507         mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
508                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
509         if (!mp->m_reclaim_workqueue)
510                 goto out_destroy_cil;
511
512         mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
513                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
514         if (!mp->m_eofblocks_workqueue)
515                 goto out_destroy_reclaim;
516
517         mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
518                                                mp->m_super->s_id);
519         if (!mp->m_sync_workqueue)
520                 goto out_destroy_eofb;
521
522         return 0;
523
524 out_destroy_eofb:
525         destroy_workqueue(mp->m_eofblocks_workqueue);
526 out_destroy_reclaim:
527         destroy_workqueue(mp->m_reclaim_workqueue);
528 out_destroy_cil:
529         destroy_workqueue(mp->m_cil_workqueue);
530 out_destroy_unwritten:
531         destroy_workqueue(mp->m_unwritten_workqueue);
532 out_destroy_buf:
533         destroy_workqueue(mp->m_buf_workqueue);
534 out:
535         return -ENOMEM;
536 }
537
538 STATIC void
539 xfs_destroy_mount_workqueues(
540         struct xfs_mount        *mp)
541 {
542         destroy_workqueue(mp->m_sync_workqueue);
543         destroy_workqueue(mp->m_eofblocks_workqueue);
544         destroy_workqueue(mp->m_reclaim_workqueue);
545         destroy_workqueue(mp->m_cil_workqueue);
546         destroy_workqueue(mp->m_unwritten_workqueue);
547         destroy_workqueue(mp->m_buf_workqueue);
548 }
549
550 /*
551  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
552  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
553  * for IO to complete so that we effectively throttle multiple callers to the
554  * rate at which IO is completing.
555  */
556 void
557 xfs_flush_inodes(
558         struct xfs_mount        *mp)
559 {
560         struct super_block      *sb = mp->m_super;
561
562         if (down_read_trylock(&sb->s_umount)) {
563                 sync_inodes_sb(sb);
564                 up_read(&sb->s_umount);
565         }
566 }
567
568 /* Catch misguided souls that try to use this interface on XFS */
569 STATIC struct inode *
570 xfs_fs_alloc_inode(
571         struct super_block      *sb)
572 {
573         BUG();
574         return NULL;
575 }
576
577 #ifdef DEBUG
578 static void
579 xfs_check_delalloc(
580         struct xfs_inode        *ip,
581         int                     whichfork)
582 {
583         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
584         struct xfs_bmbt_irec    got;
585         struct xfs_iext_cursor  icur;
586
587         if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
588                 return;
589         do {
590                 if (isnullstartblock(got.br_startblock)) {
591                         xfs_warn(ip->i_mount,
592         "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
593                                 ip->i_ino,
594                                 whichfork == XFS_DATA_FORK ? "data" : "cow",
595                                 got.br_startoff, got.br_blockcount);
596                 }
597         } while (xfs_iext_next_extent(ifp, &icur, &got));
598 }
599 #else
600 #define xfs_check_delalloc(ip, whichfork)       do { } while (0)
601 #endif
602
603 /*
604  * Now that the generic code is guaranteed not to be accessing
605  * the linux inode, we can inactivate and reclaim the inode.
606  */
607 STATIC void
608 xfs_fs_destroy_inode(
609         struct inode            *inode)
610 {
611         struct xfs_inode        *ip = XFS_I(inode);
612
613         trace_xfs_destroy_inode(ip);
614
615         ASSERT(!rwsem_is_locked(&inode->i_rwsem));
616         XFS_STATS_INC(ip->i_mount, vn_rele);
617         XFS_STATS_INC(ip->i_mount, vn_remove);
618
619         xfs_inactive(ip);
620
621         if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
622                 xfs_check_delalloc(ip, XFS_DATA_FORK);
623                 xfs_check_delalloc(ip, XFS_COW_FORK);
624                 ASSERT(0);
625         }
626
627         XFS_STATS_INC(ip->i_mount, vn_reclaim);
628
629         /*
630          * We should never get here with one of the reclaim flags already set.
631          */
632         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
633         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
634
635         /*
636          * We always use background reclaim here because even if the
637          * inode is clean, it still may be under IO and hence we have
638          * to take the flush lock. The background reclaim path handles
639          * this more efficiently than we can here, so simply let background
640          * reclaim tear down all inodes.
641          */
642         xfs_inode_set_reclaim_tag(ip);
643 }
644
645 static void
646 xfs_fs_dirty_inode(
647         struct inode                    *inode,
648         int                             flag)
649 {
650         struct xfs_inode                *ip = XFS_I(inode);
651         struct xfs_mount                *mp = ip->i_mount;
652         struct xfs_trans                *tp;
653
654         if (!(inode->i_sb->s_flags & SB_LAZYTIME))
655                 return;
656         if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
657                 return;
658
659         if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
660                 return;
661         xfs_ilock(ip, XFS_ILOCK_EXCL);
662         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
663         xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
664         xfs_trans_commit(tp);
665 }
666
667 /*
668  * Slab object creation initialisation for the XFS inode.
669  * This covers only the idempotent fields in the XFS inode;
670  * all other fields need to be initialised on allocation
671  * from the slab. This avoids the need to repeatedly initialise
672  * fields in the xfs inode that left in the initialise state
673  * when freeing the inode.
674  */
675 STATIC void
676 xfs_fs_inode_init_once(
677         void                    *inode)
678 {
679         struct xfs_inode        *ip = inode;
680
681         memset(ip, 0, sizeof(struct xfs_inode));
682
683         /* vfs inode */
684         inode_init_once(VFS_I(ip));
685
686         /* xfs inode */
687         atomic_set(&ip->i_pincount, 0);
688         spin_lock_init(&ip->i_flags_lock);
689
690         mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
691                      "xfsino", ip->i_ino);
692         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
693                      "xfsino", ip->i_ino);
694 }
695
696 /*
697  * We do an unlocked check for XFS_IDONTCACHE here because we are already
698  * serialised against cache hits here via the inode->i_lock and igrab() in
699  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
700  * racing with us, and it avoids needing to grab a spinlock here for every inode
701  * we drop the final reference on.
702  */
703 STATIC int
704 xfs_fs_drop_inode(
705         struct inode            *inode)
706 {
707         struct xfs_inode        *ip = XFS_I(inode);
708
709         /*
710          * If this unlinked inode is in the middle of recovery, don't
711          * drop the inode just yet; log recovery will take care of
712          * that.  See the comment for this inode flag.
713          */
714         if (ip->i_flags & XFS_IRECOVERY) {
715                 ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
716                 return 0;
717         }
718
719         return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
720 }
721
722 static void
723 xfs_mount_free(
724         struct xfs_mount        *mp)
725 {
726         kfree(mp->m_rtname);
727         kfree(mp->m_logname);
728         kmem_free(mp);
729 }
730
731 STATIC int
732 xfs_fs_sync_fs(
733         struct super_block      *sb,
734         int                     wait)
735 {
736         struct xfs_mount        *mp = XFS_M(sb);
737
738         /*
739          * Doing anything during the async pass would be counterproductive.
740          */
741         if (!wait)
742                 return 0;
743
744         xfs_log_force(mp, XFS_LOG_SYNC);
745         if (laptop_mode) {
746                 /*
747                  * The disk must be active because we're syncing.
748                  * We schedule log work now (now that the disk is
749                  * active) instead of later (when it might not be).
750                  */
751                 flush_delayed_work(&mp->m_log->l_work);
752         }
753
754         return 0;
755 }
756
757 STATIC int
758 xfs_fs_statfs(
759         struct dentry           *dentry,
760         struct kstatfs          *statp)
761 {
762         struct xfs_mount        *mp = XFS_M(dentry->d_sb);
763         xfs_sb_t                *sbp = &mp->m_sb;
764         struct xfs_inode        *ip = XFS_I(d_inode(dentry));
765         uint64_t                fakeinos, id;
766         uint64_t                icount;
767         uint64_t                ifree;
768         uint64_t                fdblocks;
769         xfs_extlen_t            lsize;
770         int64_t                 ffree;
771
772         statp->f_type = XFS_SUPER_MAGIC;
773         statp->f_namelen = MAXNAMELEN - 1;
774
775         id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
776         statp->f_fsid.val[0] = (u32)id;
777         statp->f_fsid.val[1] = (u32)(id >> 32);
778
779         icount = percpu_counter_sum(&mp->m_icount);
780         ifree = percpu_counter_sum(&mp->m_ifree);
781         fdblocks = percpu_counter_sum(&mp->m_fdblocks);
782
783         spin_lock(&mp->m_sb_lock);
784         statp->f_bsize = sbp->sb_blocksize;
785         lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
786         statp->f_blocks = sbp->sb_dblocks - lsize;
787         spin_unlock(&mp->m_sb_lock);
788
789         statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
790         statp->f_bavail = statp->f_bfree;
791
792         fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
793         statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
794         if (M_IGEO(mp)->maxicount)
795                 statp->f_files = min_t(typeof(statp->f_files),
796                                         statp->f_files,
797                                         M_IGEO(mp)->maxicount);
798
799         /* If sb_icount overshot maxicount, report actual allocation */
800         statp->f_files = max_t(typeof(statp->f_files),
801                                         statp->f_files,
802                                         sbp->sb_icount);
803
804         /* make sure statp->f_ffree does not underflow */
805         ffree = statp->f_files - (icount - ifree);
806         statp->f_ffree = max_t(int64_t, ffree, 0);
807
808
809         if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
810             ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
811                               (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
812                 xfs_qm_statvfs(ip, statp);
813
814         if (XFS_IS_REALTIME_MOUNT(mp) &&
815             (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
816                 statp->f_blocks = sbp->sb_rblocks;
817                 statp->f_bavail = statp->f_bfree =
818                         sbp->sb_frextents * sbp->sb_rextsize;
819         }
820
821         return 0;
822 }
823
824 STATIC void
825 xfs_save_resvblks(struct xfs_mount *mp)
826 {
827         uint64_t resblks = 0;
828
829         mp->m_resblks_save = mp->m_resblks;
830         xfs_reserve_blocks(mp, &resblks, NULL);
831 }
832
833 STATIC void
834 xfs_restore_resvblks(struct xfs_mount *mp)
835 {
836         uint64_t resblks;
837
838         if (mp->m_resblks_save) {
839                 resblks = mp->m_resblks_save;
840                 mp->m_resblks_save = 0;
841         } else
842                 resblks = xfs_default_resblks(mp);
843
844         xfs_reserve_blocks(mp, &resblks, NULL);
845 }
846
847 /*
848  * Trigger writeback of all the dirty metadata in the file system.
849  *
850  * This ensures that the metadata is written to their location on disk rather
851  * than just existing in transactions in the log. This means after a quiesce
852  * there is no log replay required to write the inodes to disk - this is the
853  * primary difference between a sync and a quiesce.
854  *
855  * Note: xfs_log_quiesce() stops background log work - the callers must ensure
856  * it is started again when appropriate.
857  */
858 void
859 xfs_quiesce_attr(
860         struct xfs_mount        *mp)
861 {
862         int     error = 0;
863
864         /* wait for all modifications to complete */
865         while (atomic_read(&mp->m_active_trans) > 0)
866                 delay(100);
867
868         /* force the log to unpin objects from the now complete transactions */
869         xfs_log_force(mp, XFS_LOG_SYNC);
870
871         /* reclaim inodes to do any IO before the freeze completes */
872         xfs_reclaim_inodes(mp, 0);
873         xfs_reclaim_inodes(mp, SYNC_WAIT);
874
875         /* Push the superblock and write an unmount record */
876         error = xfs_log_sbcount(mp);
877         if (error)
878                 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
879                                 "Frozen image may not be consistent.");
880         /*
881          * Just warn here till VFS can correctly support
882          * read-only remount without racing.
883          */
884         WARN_ON(atomic_read(&mp->m_active_trans) != 0);
885
886         xfs_log_quiesce(mp);
887 }
888
889 /*
890  * Second stage of a freeze. The data is already frozen so we only
891  * need to take care of the metadata. Once that's done sync the superblock
892  * to the log to dirty it in case of a crash while frozen. This ensures that we
893  * will recover the unlinked inode lists on the next mount.
894  */
895 STATIC int
896 xfs_fs_freeze(
897         struct super_block      *sb)
898 {
899         struct xfs_mount        *mp = XFS_M(sb);
900
901         xfs_stop_block_reaping(mp);
902         xfs_save_resvblks(mp);
903         xfs_quiesce_attr(mp);
904         return xfs_sync_sb(mp, true);
905 }
906
907 STATIC int
908 xfs_fs_unfreeze(
909         struct super_block      *sb)
910 {
911         struct xfs_mount        *mp = XFS_M(sb);
912
913         xfs_restore_resvblks(mp);
914         xfs_log_work_queue(mp);
915         xfs_start_block_reaping(mp);
916         return 0;
917 }
918
919 /*
920  * This function fills in xfs_mount_t fields based on mount args.
921  * Note: the superblock _has_ now been read in.
922  */
923 STATIC int
924 xfs_finish_flags(
925         struct xfs_mount        *mp)
926 {
927         int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
928
929         /* Fail a mount where the logbuf is smaller than the log stripe */
930         if (xfs_sb_version_haslogv2(&mp->m_sb)) {
931                 if (mp->m_logbsize <= 0 &&
932                     mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
933                         mp->m_logbsize = mp->m_sb.sb_logsunit;
934                 } else if (mp->m_logbsize > 0 &&
935                            mp->m_logbsize < mp->m_sb.sb_logsunit) {
936                         xfs_warn(mp,
937                 "logbuf size must be greater than or equal to log stripe size");
938                         return -EINVAL;
939                 }
940         } else {
941                 /* Fail a mount if the logbuf is larger than 32K */
942                 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
943                         xfs_warn(mp,
944                 "logbuf size for version 1 logs must be 16K or 32K");
945                         return -EINVAL;
946                 }
947         }
948
949         /*
950          * V5 filesystems always use attr2 format for attributes.
951          */
952         if (xfs_sb_version_hascrc(&mp->m_sb) &&
953             (mp->m_flags & XFS_MOUNT_NOATTR2)) {
954                 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
955                              "attr2 is always enabled for V5 filesystems.");
956                 return -EINVAL;
957         }
958
959         /*
960          * mkfs'ed attr2 will turn on attr2 mount unless explicitly
961          * told by noattr2 to turn it off
962          */
963         if (xfs_sb_version_hasattr2(&mp->m_sb) &&
964             !(mp->m_flags & XFS_MOUNT_NOATTR2))
965                 mp->m_flags |= XFS_MOUNT_ATTR2;
966
967         /*
968          * prohibit r/w mounts of read-only filesystems
969          */
970         if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
971                 xfs_warn(mp,
972                         "cannot mount a read-only filesystem as read-write");
973                 return -EROFS;
974         }
975
976         if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
977             (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
978             !xfs_sb_version_has_pquotino(&mp->m_sb)) {
979                 xfs_warn(mp,
980                   "Super block does not support project and group quota together");
981                 return -EINVAL;
982         }
983
984         return 0;
985 }
986
987 static int
988 xfs_init_percpu_counters(
989         struct xfs_mount        *mp)
990 {
991         int             error;
992
993         error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
994         if (error)
995                 return -ENOMEM;
996
997         error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
998         if (error)
999                 goto free_icount;
1000
1001         error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1002         if (error)
1003                 goto free_ifree;
1004
1005         error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
1006         if (error)
1007                 goto free_fdblocks;
1008
1009         return 0;
1010
1011 free_fdblocks:
1012         percpu_counter_destroy(&mp->m_fdblocks);
1013 free_ifree:
1014         percpu_counter_destroy(&mp->m_ifree);
1015 free_icount:
1016         percpu_counter_destroy(&mp->m_icount);
1017         return -ENOMEM;
1018 }
1019
1020 void
1021 xfs_reinit_percpu_counters(
1022         struct xfs_mount        *mp)
1023 {
1024         percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1025         percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1026         percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1027 }
1028
1029 static void
1030 xfs_destroy_percpu_counters(
1031         struct xfs_mount        *mp)
1032 {
1033         percpu_counter_destroy(&mp->m_icount);
1034         percpu_counter_destroy(&mp->m_ifree);
1035         percpu_counter_destroy(&mp->m_fdblocks);
1036         ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1037                percpu_counter_sum(&mp->m_delalloc_blks) == 0);
1038         percpu_counter_destroy(&mp->m_delalloc_blks);
1039 }
1040
1041 static void
1042 xfs_fs_put_super(
1043         struct super_block      *sb)
1044 {
1045         struct xfs_mount        *mp = XFS_M(sb);
1046
1047         /* if ->fill_super failed, we have no mount to tear down */
1048         if (!sb->s_fs_info)
1049                 return;
1050
1051         xfs_notice(mp, "Unmounting Filesystem");
1052         xfs_filestream_unmount(mp);
1053         xfs_unmountfs(mp);
1054
1055         xfs_freesb(mp);
1056         free_percpu(mp->m_stats.xs_stats);
1057         xfs_destroy_percpu_counters(mp);
1058         xfs_destroy_mount_workqueues(mp);
1059         xfs_close_devices(mp);
1060
1061         sb->s_fs_info = NULL;
1062         xfs_mount_free(mp);
1063 }
1064
1065 static long
1066 xfs_fs_nr_cached_objects(
1067         struct super_block      *sb,
1068         struct shrink_control   *sc)
1069 {
1070         /* Paranoia: catch incorrect calls during mount setup or teardown */
1071         if (WARN_ON_ONCE(!sb->s_fs_info))
1072                 return 0;
1073         return xfs_reclaim_inodes_count(XFS_M(sb));
1074 }
1075
1076 static long
1077 xfs_fs_free_cached_objects(
1078         struct super_block      *sb,
1079         struct shrink_control   *sc)
1080 {
1081         return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1082 }
1083
1084 static const struct super_operations xfs_super_operations = {
1085         .alloc_inode            = xfs_fs_alloc_inode,
1086         .destroy_inode          = xfs_fs_destroy_inode,
1087         .dirty_inode            = xfs_fs_dirty_inode,
1088         .drop_inode             = xfs_fs_drop_inode,
1089         .put_super              = xfs_fs_put_super,
1090         .sync_fs                = xfs_fs_sync_fs,
1091         .freeze_fs              = xfs_fs_freeze,
1092         .unfreeze_fs            = xfs_fs_unfreeze,
1093         .statfs                 = xfs_fs_statfs,
1094         .show_options           = xfs_fs_show_options,
1095         .nr_cached_objects      = xfs_fs_nr_cached_objects,
1096         .free_cached_objects    = xfs_fs_free_cached_objects,
1097 };
1098
1099 static int
1100 suffix_kstrtoint(
1101         const char      *s,
1102         unsigned int    base,
1103         int             *res)
1104 {
1105         int             last, shift_left_factor = 0, _res;
1106         char            *value;
1107         int             ret = 0;
1108
1109         value = kstrdup(s, GFP_KERNEL);
1110         if (!value)
1111                 return -ENOMEM;
1112
1113         last = strlen(value) - 1;
1114         if (value[last] == 'K' || value[last] == 'k') {
1115                 shift_left_factor = 10;
1116                 value[last] = '\0';
1117         }
1118         if (value[last] == 'M' || value[last] == 'm') {
1119                 shift_left_factor = 20;
1120                 value[last] = '\0';
1121         }
1122         if (value[last] == 'G' || value[last] == 'g') {
1123                 shift_left_factor = 30;
1124                 value[last] = '\0';
1125         }
1126
1127         if (kstrtoint(value, base, &_res))
1128                 ret = -EINVAL;
1129         kfree(value);
1130         *res = _res << shift_left_factor;
1131         return ret;
1132 }
1133
1134 /*
1135  * Set mount state from a mount option.
1136  *
1137  * NOTE: mp->m_super is NULL here!
1138  */
1139 static int
1140 xfs_fc_parse_param(
1141         struct fs_context       *fc,
1142         struct fs_parameter     *param)
1143 {
1144         struct xfs_mount        *mp = fc->s_fs_info;
1145         struct fs_parse_result  result;
1146         int                     size = 0;
1147         int                     opt;
1148
1149         opt = fs_parse(fc, &xfs_fs_parameters, param, &result);
1150         if (opt < 0)
1151                 return opt;
1152
1153         switch (opt) {
1154         case Opt_logbufs:
1155                 mp->m_logbufs = result.uint_32;
1156                 return 0;
1157         case Opt_logbsize:
1158                 if (suffix_kstrtoint(param->string, 10, &mp->m_logbsize))
1159                         return -EINVAL;
1160                 return 0;
1161         case Opt_logdev:
1162                 kfree(mp->m_logname);
1163                 mp->m_logname = kstrdup(param->string, GFP_KERNEL);
1164                 if (!mp->m_logname)
1165                         return -ENOMEM;
1166                 return 0;
1167         case Opt_rtdev:
1168                 kfree(mp->m_rtname);
1169                 mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
1170                 if (!mp->m_rtname)
1171                         return -ENOMEM;
1172                 return 0;
1173         case Opt_allocsize:
1174                 if (suffix_kstrtoint(param->string, 10, &size))
1175                         return -EINVAL;
1176                 mp->m_allocsize_log = ffs(size) - 1;
1177                 mp->m_flags |= XFS_MOUNT_ALLOCSIZE;
1178                 return 0;
1179         case Opt_grpid:
1180         case Opt_bsdgroups:
1181                 mp->m_flags |= XFS_MOUNT_GRPID;
1182                 return 0;
1183         case Opt_nogrpid:
1184         case Opt_sysvgroups:
1185                 mp->m_flags &= ~XFS_MOUNT_GRPID;
1186                 return 0;
1187         case Opt_wsync:
1188                 mp->m_flags |= XFS_MOUNT_WSYNC;
1189                 return 0;
1190         case Opt_norecovery:
1191                 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1192                 return 0;
1193         case Opt_noalign:
1194                 mp->m_flags |= XFS_MOUNT_NOALIGN;
1195                 return 0;
1196         case Opt_swalloc:
1197                 mp->m_flags |= XFS_MOUNT_SWALLOC;
1198                 return 0;
1199         case Opt_sunit:
1200                 mp->m_dalign = result.uint_32;
1201                 return 0;
1202         case Opt_swidth:
1203                 mp->m_swidth = result.uint_32;
1204                 return 0;
1205         case Opt_inode32:
1206                 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1207                 return 0;
1208         case Opt_inode64:
1209                 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1210                 return 0;
1211         case Opt_nouuid:
1212                 mp->m_flags |= XFS_MOUNT_NOUUID;
1213                 return 0;
1214         case Opt_ikeep:
1215                 mp->m_flags |= XFS_MOUNT_IKEEP;
1216                 return 0;
1217         case Opt_noikeep:
1218                 mp->m_flags &= ~XFS_MOUNT_IKEEP;
1219                 return 0;
1220         case Opt_largeio:
1221                 mp->m_flags |= XFS_MOUNT_LARGEIO;
1222                 return 0;
1223         case Opt_nolargeio:
1224                 mp->m_flags &= ~XFS_MOUNT_LARGEIO;
1225                 return 0;
1226         case Opt_attr2:
1227                 mp->m_flags |= XFS_MOUNT_ATTR2;
1228                 return 0;
1229         case Opt_noattr2:
1230                 mp->m_flags &= ~XFS_MOUNT_ATTR2;
1231                 mp->m_flags |= XFS_MOUNT_NOATTR2;
1232                 return 0;
1233         case Opt_filestreams:
1234                 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1235                 return 0;
1236         case Opt_noquota:
1237                 mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
1238                 mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
1239                 mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
1240                 return 0;
1241         case Opt_quota:
1242         case Opt_uquota:
1243         case Opt_usrquota:
1244                 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
1245                                  XFS_UQUOTA_ENFD);
1246                 return 0;
1247         case Opt_qnoenforce:
1248         case Opt_uqnoenforce:
1249                 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1250                 mp->m_qflags &= ~XFS_UQUOTA_ENFD;
1251                 return 0;
1252         case Opt_pquota:
1253         case Opt_prjquota:
1254                 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
1255                                  XFS_PQUOTA_ENFD);
1256                 return 0;
1257         case Opt_pqnoenforce:
1258                 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1259                 mp->m_qflags &= ~XFS_PQUOTA_ENFD;
1260                 return 0;
1261         case Opt_gquota:
1262         case Opt_grpquota:
1263                 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
1264                                  XFS_GQUOTA_ENFD);
1265                 return 0;
1266         case Opt_gqnoenforce:
1267                 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1268                 mp->m_qflags &= ~XFS_GQUOTA_ENFD;
1269                 return 0;
1270         case Opt_discard:
1271                 mp->m_flags |= XFS_MOUNT_DISCARD;
1272                 return 0;
1273         case Opt_nodiscard:
1274                 mp->m_flags &= ~XFS_MOUNT_DISCARD;
1275                 return 0;
1276 #ifdef CONFIG_FS_DAX
1277         case Opt_dax:
1278                 mp->m_flags |= XFS_MOUNT_DAX;
1279                 return 0;
1280 #endif
1281         default:
1282                 xfs_warn(mp, "unknown mount option [%s].", param->key);
1283                 return -EINVAL;
1284         }
1285
1286         return 0;
1287 }
1288
1289 static int
1290 xfs_fc_validate_params(
1291         struct xfs_mount        *mp)
1292 {
1293         /*
1294          * no recovery flag requires a read-only mount
1295          */
1296         if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
1297             !(mp->m_flags & XFS_MOUNT_RDONLY)) {
1298                 xfs_warn(mp, "no-recovery mounts must be read-only.");
1299                 return -EINVAL;
1300         }
1301
1302         if ((mp->m_flags & XFS_MOUNT_NOALIGN) &&
1303             (mp->m_dalign || mp->m_swidth)) {
1304                 xfs_warn(mp,
1305         "sunit and swidth options incompatible with the noalign option");
1306                 return -EINVAL;
1307         }
1308
1309         if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
1310                 xfs_warn(mp, "quota support not available in this kernel.");
1311                 return -EINVAL;
1312         }
1313
1314         if ((mp->m_dalign && !mp->m_swidth) ||
1315             (!mp->m_dalign && mp->m_swidth)) {
1316                 xfs_warn(mp, "sunit and swidth must be specified together");
1317                 return -EINVAL;
1318         }
1319
1320         if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
1321                 xfs_warn(mp,
1322         "stripe width (%d) must be a multiple of the stripe unit (%d)",
1323                         mp->m_swidth, mp->m_dalign);
1324                 return -EINVAL;
1325         }
1326
1327         if (mp->m_logbufs != -1 &&
1328             mp->m_logbufs != 0 &&
1329             (mp->m_logbufs < XLOG_MIN_ICLOGS ||
1330              mp->m_logbufs > XLOG_MAX_ICLOGS)) {
1331                 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
1332                         mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1333                 return -EINVAL;
1334         }
1335
1336         if (mp->m_logbsize != -1 &&
1337             mp->m_logbsize !=  0 &&
1338             (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
1339              mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
1340              !is_power_of_2(mp->m_logbsize))) {
1341                 xfs_warn(mp,
1342                         "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1343                         mp->m_logbsize);
1344                 return -EINVAL;
1345         }
1346
1347         if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
1348             (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
1349              mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
1350                 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
1351                         mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
1352                 return -EINVAL;
1353         }
1354
1355         return 0;
1356 }
1357
1358 static int
1359 xfs_fc_fill_super(
1360         struct super_block      *sb,
1361         struct fs_context       *fc)
1362 {
1363         struct xfs_mount        *mp = sb->s_fs_info;
1364         struct inode            *root;
1365         int                     flags = 0, error;
1366
1367         mp->m_super = sb;
1368
1369         error = xfs_fc_validate_params(mp);
1370         if (error)
1371                 goto out_free_names;
1372
1373         sb_min_blocksize(sb, BBSIZE);
1374         sb->s_xattr = xfs_xattr_handlers;
1375         sb->s_export_op = &xfs_export_operations;
1376 #ifdef CONFIG_XFS_QUOTA
1377         sb->s_qcop = &xfs_quotactl_operations;
1378         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1379 #endif
1380         sb->s_op = &xfs_super_operations;
1381
1382         /*
1383          * Delay mount work if the debug hook is set. This is debug
1384          * instrumention to coordinate simulation of xfs mount failures with
1385          * VFS superblock operations
1386          */
1387         if (xfs_globals.mount_delay) {
1388                 xfs_notice(mp, "Delaying mount for %d seconds.",
1389                         xfs_globals.mount_delay);
1390                 msleep(xfs_globals.mount_delay * 1000);
1391         }
1392
1393         if (fc->sb_flags & SB_SILENT)
1394                 flags |= XFS_MFSI_QUIET;
1395
1396         error = xfs_open_devices(mp);
1397         if (error)
1398                 goto out_free_names;
1399
1400         error = xfs_init_mount_workqueues(mp);
1401         if (error)
1402                 goto out_close_devices;
1403
1404         error = xfs_init_percpu_counters(mp);
1405         if (error)
1406                 goto out_destroy_workqueues;
1407
1408         /* Allocate stats memory before we do operations that might use it */
1409         mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1410         if (!mp->m_stats.xs_stats) {
1411                 error = -ENOMEM;
1412                 goto out_destroy_counters;
1413         }
1414
1415         error = xfs_readsb(mp, flags);
1416         if (error)
1417                 goto out_free_stats;
1418
1419         error = xfs_finish_flags(mp);
1420         if (error)
1421                 goto out_free_sb;
1422
1423         error = xfs_setup_devices(mp);
1424         if (error)
1425                 goto out_free_sb;
1426
1427         error = xfs_filestream_mount(mp);
1428         if (error)
1429                 goto out_free_sb;
1430
1431         /*
1432          * we must configure the block size in the superblock before we run the
1433          * full mount process as the mount process can lookup and cache inodes.
1434          */
1435         sb->s_magic = XFS_SUPER_MAGIC;
1436         sb->s_blocksize = mp->m_sb.sb_blocksize;
1437         sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1438         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1439         sb->s_max_links = XFS_MAXLINK;
1440         sb->s_time_gran = 1;
1441         sb->s_time_min = S32_MIN;
1442         sb->s_time_max = S32_MAX;
1443         sb->s_iflags |= SB_I_CGROUPWB;
1444
1445         set_posix_acl_flag(sb);
1446
1447         /* version 5 superblocks support inode version counters. */
1448         if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1449                 sb->s_flags |= SB_I_VERSION;
1450
1451         if (mp->m_flags & XFS_MOUNT_DAX) {
1452                 bool rtdev_is_dax = false, datadev_is_dax;
1453
1454                 xfs_warn(mp,
1455                 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1456
1457                 datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
1458                         sb->s_blocksize);
1459                 if (mp->m_rtdev_targp)
1460                         rtdev_is_dax = bdev_dax_supported(
1461                                 mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
1462                 if (!rtdev_is_dax && !datadev_is_dax) {
1463                         xfs_alert(mp,
1464                         "DAX unsupported by block device. Turning off DAX.");
1465                         mp->m_flags &= ~XFS_MOUNT_DAX;
1466                 }
1467                 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1468                         xfs_alert(mp,
1469                 "DAX and reflink cannot be used together!");
1470                         error = -EINVAL;
1471                         goto out_filestream_unmount;
1472                 }
1473         }
1474
1475         if (mp->m_flags & XFS_MOUNT_DISCARD) {
1476                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
1477
1478                 if (!blk_queue_discard(q)) {
1479                         xfs_warn(mp, "mounting with \"discard\" option, but "
1480                                         "the device does not support discard");
1481                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
1482                 }
1483         }
1484
1485         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1486                 if (mp->m_sb.sb_rblocks) {
1487                         xfs_alert(mp,
1488         "reflink not compatible with realtime device!");
1489                         error = -EINVAL;
1490                         goto out_filestream_unmount;
1491                 }
1492
1493                 if (xfs_globals.always_cow) {
1494                         xfs_info(mp, "using DEBUG-only always_cow mode.");
1495                         mp->m_always_cow = true;
1496                 }
1497         }
1498
1499         if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1500                 xfs_alert(mp,
1501         "reverse mapping btree not compatible with realtime device!");
1502                 error = -EINVAL;
1503                 goto out_filestream_unmount;
1504         }
1505
1506         error = xfs_mountfs(mp);
1507         if (error)
1508                 goto out_filestream_unmount;
1509
1510         root = igrab(VFS_I(mp->m_rootip));
1511         if (!root) {
1512                 error = -ENOENT;
1513                 goto out_unmount;
1514         }
1515         sb->s_root = d_make_root(root);
1516         if (!sb->s_root) {
1517                 error = -ENOMEM;
1518                 goto out_unmount;
1519         }
1520
1521         return 0;
1522
1523  out_filestream_unmount:
1524         xfs_filestream_unmount(mp);
1525  out_free_sb:
1526         xfs_freesb(mp);
1527  out_free_stats:
1528         free_percpu(mp->m_stats.xs_stats);
1529  out_destroy_counters:
1530         xfs_destroy_percpu_counters(mp);
1531  out_destroy_workqueues:
1532         xfs_destroy_mount_workqueues(mp);
1533  out_close_devices:
1534         xfs_close_devices(mp);
1535  out_free_names:
1536         sb->s_fs_info = NULL;
1537         xfs_mount_free(mp);
1538         return error;
1539
1540  out_unmount:
1541         xfs_filestream_unmount(mp);
1542         xfs_unmountfs(mp);
1543         goto out_free_sb;
1544 }
1545
1546 static int
1547 xfs_fc_get_tree(
1548         struct fs_context       *fc)
1549 {
1550         return get_tree_bdev(fc, xfs_fc_fill_super);
1551 }
1552
1553 static int
1554 xfs_remount_rw(
1555         struct xfs_mount        *mp)
1556 {
1557         struct xfs_sb           *sbp = &mp->m_sb;
1558         int error;
1559
1560         if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1561                 xfs_warn(mp,
1562                         "ro->rw transition prohibited on norecovery mount");
1563                 return -EINVAL;
1564         }
1565
1566         if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1567             xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1568                 xfs_warn(mp,
1569         "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1570                         (sbp->sb_features_ro_compat &
1571                                 XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1572                 return -EINVAL;
1573         }
1574
1575         mp->m_flags &= ~XFS_MOUNT_RDONLY;
1576
1577         /*
1578          * If this is the first remount to writeable state we might have some
1579          * superblock changes to update.
1580          */
1581         if (mp->m_update_sb) {
1582                 error = xfs_sync_sb(mp, false);
1583                 if (error) {
1584                         xfs_warn(mp, "failed to write sb changes");
1585                         return error;
1586                 }
1587                 mp->m_update_sb = false;
1588         }
1589
1590         /*
1591          * Fill out the reserve pool if it is empty. Use the stashed value if
1592          * it is non-zero, otherwise go with the default.
1593          */
1594         xfs_restore_resvblks(mp);
1595         xfs_log_work_queue(mp);
1596
1597         /* Recover any CoW blocks that never got remapped. */
1598         error = xfs_reflink_recover_cow(mp);
1599         if (error) {
1600                 xfs_err(mp,
1601                         "Error %d recovering leftover CoW allocations.", error);
1602                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1603                 return error;
1604         }
1605         xfs_start_block_reaping(mp);
1606
1607         /* Create the per-AG metadata reservation pool .*/
1608         error = xfs_fs_reserve_ag_blocks(mp);
1609         if (error && error != -ENOSPC)
1610                 return error;
1611
1612         return 0;
1613 }
1614
1615 static int
1616 xfs_remount_ro(
1617         struct xfs_mount        *mp)
1618 {
1619         int error;
1620
1621         /*
1622          * Cancel background eofb scanning so it cannot race with the final
1623          * log force+buftarg wait and deadlock the remount.
1624          */
1625         xfs_stop_block_reaping(mp);
1626
1627         /* Get rid of any leftover CoW reservations... */
1628         error = xfs_icache_free_cowblocks(mp, NULL);
1629         if (error) {
1630                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1631                 return error;
1632         }
1633
1634         /* Free the per-AG metadata reservation pool. */
1635         error = xfs_fs_unreserve_ag_blocks(mp);
1636         if (error) {
1637                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1638                 return error;
1639         }
1640
1641         /*
1642          * Before we sync the metadata, we need to free up the reserve block
1643          * pool so that the used block count in the superblock on disk is
1644          * correct at the end of the remount. Stash the current* reserve pool
1645          * size so that if we get remounted rw, we can return it to the same
1646          * size.
1647          */
1648         xfs_save_resvblks(mp);
1649
1650         xfs_quiesce_attr(mp);
1651         mp->m_flags |= XFS_MOUNT_RDONLY;
1652
1653         return 0;
1654 }
1655
1656 /*
1657  * Logically we would return an error here to prevent users from believing
1658  * they might have changed mount options using remount which can't be changed.
1659  *
1660  * But unfortunately mount(8) adds all options from mtab and fstab to the mount
1661  * arguments in some cases so we can't blindly reject options, but have to
1662  * check for each specified option if it actually differs from the currently
1663  * set option and only reject it if that's the case.
1664  *
1665  * Until that is implemented we return success for every remount request, and
1666  * silently ignore all options that we can't actually change.
1667  */
1668 static int
1669 xfs_fc_reconfigure(
1670         struct fs_context *fc)
1671 {
1672         struct xfs_mount        *mp = XFS_M(fc->root->d_sb);
1673         struct xfs_mount        *new_mp = fc->s_fs_info;
1674         xfs_sb_t                *sbp = &mp->m_sb;
1675         int                     flags = fc->sb_flags;
1676         int                     error;
1677
1678         error = xfs_fc_validate_params(new_mp);
1679         if (error)
1680                 return error;
1681
1682         sync_filesystem(mp->m_super);
1683
1684         /* inode32 -> inode64 */
1685         if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
1686             !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
1687                 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1688                 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1689         }
1690
1691         /* inode64 -> inode32 */
1692         if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
1693             (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
1694                 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1695                 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1696         }
1697
1698         /* ro -> rw */
1699         if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) {
1700                 error = xfs_remount_rw(mp);
1701                 if (error)
1702                         return error;
1703         }
1704
1705         /* rw -> ro */
1706         if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) {
1707                 error = xfs_remount_ro(mp);
1708                 if (error)
1709                         return error;
1710         }
1711
1712         return 0;
1713 }
1714
1715 static void xfs_fc_free(
1716         struct fs_context       *fc)
1717 {
1718         struct xfs_mount        *mp = fc->s_fs_info;
1719
1720         /*
1721          * mp is stored in the fs_context when it is initialized.
1722          * mp is transferred to the superblock on a successful mount,
1723          * but if an error occurs before the transfer we have to free
1724          * it here.
1725          */
1726         if (mp)
1727                 xfs_mount_free(mp);
1728 }
1729
1730 static const struct fs_context_operations xfs_context_ops = {
1731         .parse_param = xfs_fc_parse_param,
1732         .get_tree    = xfs_fc_get_tree,
1733         .reconfigure = xfs_fc_reconfigure,
1734         .free        = xfs_fc_free,
1735 };
1736
1737 static int xfs_init_fs_context(
1738         struct fs_context       *fc)
1739 {
1740         struct xfs_mount        *mp;
1741
1742         mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO);
1743         if (!mp)
1744                 return -ENOMEM;
1745
1746         spin_lock_init(&mp->m_sb_lock);
1747         spin_lock_init(&mp->m_agirotor_lock);
1748         INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1749         spin_lock_init(&mp->m_perag_lock);
1750         mutex_init(&mp->m_growlock);
1751         atomic_set(&mp->m_active_trans, 0);
1752         INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1753         INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1754         INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
1755         mp->m_kobj.kobject.kset = xfs_kset;
1756         /*
1757          * We don't create the finobt per-ag space reservation until after log
1758          * recovery, so we must set this to true so that an ifree transaction
1759          * started during log recovery will not depend on space reservations
1760          * for finobt expansion.
1761          */
1762         mp->m_finobt_nores = true;
1763
1764         /*
1765          * These can be overridden by the mount option parsing.
1766          */
1767         mp->m_logbufs = -1;
1768         mp->m_logbsize = -1;
1769         mp->m_allocsize_log = 16; /* 64k */
1770
1771         /*
1772          * Copy binary VFS mount flags we are interested in.
1773          */
1774         if (fc->sb_flags & SB_RDONLY)
1775                 mp->m_flags |= XFS_MOUNT_RDONLY;
1776         if (fc->sb_flags & SB_DIRSYNC)
1777                 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1778         if (fc->sb_flags & SB_SYNCHRONOUS)
1779                 mp->m_flags |= XFS_MOUNT_WSYNC;
1780
1781         fc->s_fs_info = mp;
1782         fc->ops = &xfs_context_ops;
1783
1784         return 0;
1785 }
1786
1787 static struct file_system_type xfs_fs_type = {
1788         .owner                  = THIS_MODULE,
1789         .name                   = "xfs",
1790         .init_fs_context        = xfs_init_fs_context,
1791         .parameters             = &xfs_fs_parameters,
1792         .kill_sb                = kill_block_super,
1793         .fs_flags               = FS_REQUIRES_DEV,
1794 };
1795 MODULE_ALIAS_FS("xfs");
1796
1797 STATIC int __init
1798 xfs_init_zones(void)
1799 {
1800         xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket",
1801                                                 sizeof(struct xlog_ticket),
1802                                                 0, 0, NULL);
1803         if (!xfs_log_ticket_zone)
1804                 goto out;
1805
1806         xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item",
1807                                         sizeof(struct xfs_extent_free_item),
1808                                         0, 0, NULL);
1809         if (!xfs_bmap_free_item_zone)
1810                 goto out_destroy_log_ticket_zone;
1811
1812         xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur",
1813                                                sizeof(struct xfs_btree_cur),
1814                                                0, 0, NULL);
1815         if (!xfs_btree_cur_zone)
1816                 goto out_destroy_bmap_free_item_zone;
1817
1818         xfs_da_state_zone = kmem_cache_create("xfs_da_state",
1819                                               sizeof(struct xfs_da_state),
1820                                               0, 0, NULL);
1821         if (!xfs_da_state_zone)
1822                 goto out_destroy_btree_cur_zone;
1823
1824         xfs_ifork_zone = kmem_cache_create("xfs_ifork",
1825                                            sizeof(struct xfs_ifork),
1826                                            0, 0, NULL);
1827         if (!xfs_ifork_zone)
1828                 goto out_destroy_da_state_zone;
1829
1830         xfs_trans_zone = kmem_cache_create("xf_trans",
1831                                            sizeof(struct xfs_trans),
1832                                            0, 0, NULL);
1833         if (!xfs_trans_zone)
1834                 goto out_destroy_ifork_zone;
1835
1836
1837         /*
1838          * The size of the zone allocated buf log item is the maximum
1839          * size possible under XFS.  This wastes a little bit of memory,
1840          * but it is much faster.
1841          */
1842         xfs_buf_item_zone = kmem_cache_create("xfs_buf_item",
1843                                               sizeof(struct xfs_buf_log_item),
1844                                               0, 0, NULL);
1845         if (!xfs_buf_item_zone)
1846                 goto out_destroy_trans_zone;
1847
1848         xfs_efd_zone = kmem_cache_create("xfs_efd_item",
1849                                         (sizeof(struct xfs_efd_log_item) +
1850                                         (XFS_EFD_MAX_FAST_EXTENTS - 1) *
1851                                         sizeof(struct xfs_extent)),
1852                                         0, 0, NULL);
1853         if (!xfs_efd_zone)
1854                 goto out_destroy_buf_item_zone;
1855
1856         xfs_efi_zone = kmem_cache_create("xfs_efi_item",
1857                                          (sizeof(struct xfs_efi_log_item) +
1858                                          (XFS_EFI_MAX_FAST_EXTENTS - 1) *
1859                                          sizeof(struct xfs_extent)),
1860                                          0, 0, NULL);
1861         if (!xfs_efi_zone)
1862                 goto out_destroy_efd_zone;
1863
1864         xfs_inode_zone = kmem_cache_create("xfs_inode",
1865                                            sizeof(struct xfs_inode), 0,
1866                                            (SLAB_HWCACHE_ALIGN |
1867                                             SLAB_RECLAIM_ACCOUNT |
1868                                             SLAB_MEM_SPREAD | SLAB_ACCOUNT),
1869                                            xfs_fs_inode_init_once);
1870         if (!xfs_inode_zone)
1871                 goto out_destroy_efi_zone;
1872
1873         xfs_ili_zone = kmem_cache_create("xfs_ili",
1874                                          sizeof(struct xfs_inode_log_item), 0,
1875                                          SLAB_MEM_SPREAD, NULL);
1876         if (!xfs_ili_zone)
1877                 goto out_destroy_inode_zone;
1878
1879         xfs_icreate_zone = kmem_cache_create("xfs_icr",
1880                                              sizeof(struct xfs_icreate_item),
1881                                              0, 0, NULL);
1882         if (!xfs_icreate_zone)
1883                 goto out_destroy_ili_zone;
1884
1885         xfs_rud_zone = kmem_cache_create("xfs_rud_item",
1886                                          sizeof(struct xfs_rud_log_item),
1887                                          0, 0, NULL);
1888         if (!xfs_rud_zone)
1889                 goto out_destroy_icreate_zone;
1890
1891         xfs_rui_zone = kmem_cache_create("xfs_rui_item",
1892                         xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1893                         0, 0, NULL);
1894         if (!xfs_rui_zone)
1895                 goto out_destroy_rud_zone;
1896
1897         xfs_cud_zone = kmem_cache_create("xfs_cud_item",
1898                                          sizeof(struct xfs_cud_log_item),
1899                                          0, 0, NULL);
1900         if (!xfs_cud_zone)
1901                 goto out_destroy_rui_zone;
1902
1903         xfs_cui_zone = kmem_cache_create("xfs_cui_item",
1904                         xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
1905                         0, 0, NULL);
1906         if (!xfs_cui_zone)
1907                 goto out_destroy_cud_zone;
1908
1909         xfs_bud_zone = kmem_cache_create("xfs_bud_item",
1910                                          sizeof(struct xfs_bud_log_item),
1911                                          0, 0, NULL);
1912         if (!xfs_bud_zone)
1913                 goto out_destroy_cui_zone;
1914
1915         xfs_bui_zone = kmem_cache_create("xfs_bui_item",
1916                         xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
1917                         0, 0, NULL);
1918         if (!xfs_bui_zone)
1919                 goto out_destroy_bud_zone;
1920
1921         return 0;
1922
1923  out_destroy_bud_zone:
1924         kmem_cache_destroy(xfs_bud_zone);
1925  out_destroy_cui_zone:
1926         kmem_cache_destroy(xfs_cui_zone);
1927  out_destroy_cud_zone:
1928         kmem_cache_destroy(xfs_cud_zone);
1929  out_destroy_rui_zone:
1930         kmem_cache_destroy(xfs_rui_zone);
1931  out_destroy_rud_zone:
1932         kmem_cache_destroy(xfs_rud_zone);
1933  out_destroy_icreate_zone:
1934         kmem_cache_destroy(xfs_icreate_zone);
1935  out_destroy_ili_zone:
1936         kmem_cache_destroy(xfs_ili_zone);
1937  out_destroy_inode_zone:
1938         kmem_cache_destroy(xfs_inode_zone);
1939  out_destroy_efi_zone:
1940         kmem_cache_destroy(xfs_efi_zone);
1941  out_destroy_efd_zone:
1942         kmem_cache_destroy(xfs_efd_zone);
1943  out_destroy_buf_item_zone:
1944         kmem_cache_destroy(xfs_buf_item_zone);
1945  out_destroy_trans_zone:
1946         kmem_cache_destroy(xfs_trans_zone);
1947  out_destroy_ifork_zone:
1948         kmem_cache_destroy(xfs_ifork_zone);
1949  out_destroy_da_state_zone:
1950         kmem_cache_destroy(xfs_da_state_zone);
1951  out_destroy_btree_cur_zone:
1952         kmem_cache_destroy(xfs_btree_cur_zone);
1953  out_destroy_bmap_free_item_zone:
1954         kmem_cache_destroy(xfs_bmap_free_item_zone);
1955  out_destroy_log_ticket_zone:
1956         kmem_cache_destroy(xfs_log_ticket_zone);
1957  out:
1958         return -ENOMEM;
1959 }
1960
1961 STATIC void
1962 xfs_destroy_zones(void)
1963 {
1964         /*
1965          * Make sure all delayed rcu free are flushed before we
1966          * destroy caches.
1967          */
1968         rcu_barrier();
1969         kmem_cache_destroy(xfs_bui_zone);
1970         kmem_cache_destroy(xfs_bud_zone);
1971         kmem_cache_destroy(xfs_cui_zone);
1972         kmem_cache_destroy(xfs_cud_zone);
1973         kmem_cache_destroy(xfs_rui_zone);
1974         kmem_cache_destroy(xfs_rud_zone);
1975         kmem_cache_destroy(xfs_icreate_zone);
1976         kmem_cache_destroy(xfs_ili_zone);
1977         kmem_cache_destroy(xfs_inode_zone);
1978         kmem_cache_destroy(xfs_efi_zone);
1979         kmem_cache_destroy(xfs_efd_zone);
1980         kmem_cache_destroy(xfs_buf_item_zone);
1981         kmem_cache_destroy(xfs_trans_zone);
1982         kmem_cache_destroy(xfs_ifork_zone);
1983         kmem_cache_destroy(xfs_da_state_zone);
1984         kmem_cache_destroy(xfs_btree_cur_zone);
1985         kmem_cache_destroy(xfs_bmap_free_item_zone);
1986         kmem_cache_destroy(xfs_log_ticket_zone);
1987 }
1988
1989 STATIC int __init
1990 xfs_init_workqueues(void)
1991 {
1992         /*
1993          * The allocation workqueue can be used in memory reclaim situations
1994          * (writepage path), and parallelism is only limited by the number of
1995          * AGs in all the filesystems mounted. Hence use the default large
1996          * max_active value for this workqueue.
1997          */
1998         xfs_alloc_wq = alloc_workqueue("xfsalloc",
1999                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
2000         if (!xfs_alloc_wq)
2001                 return -ENOMEM;
2002
2003         xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
2004         if (!xfs_discard_wq)
2005                 goto out_free_alloc_wq;
2006
2007         return 0;
2008 out_free_alloc_wq:
2009         destroy_workqueue(xfs_alloc_wq);
2010         return -ENOMEM;
2011 }
2012
2013 STATIC void
2014 xfs_destroy_workqueues(void)
2015 {
2016         destroy_workqueue(xfs_discard_wq);
2017         destroy_workqueue(xfs_alloc_wq);
2018 }
2019
2020 STATIC int __init
2021 init_xfs_fs(void)
2022 {
2023         int                     error;
2024
2025         xfs_check_ondisk_structs();
2026
2027         printk(KERN_INFO XFS_VERSION_STRING " with "
2028                          XFS_BUILD_OPTIONS " enabled\n");
2029
2030         xfs_dir_startup();
2031
2032         error = xfs_init_zones();
2033         if (error)
2034                 goto out;
2035
2036         error = xfs_init_workqueues();
2037         if (error)
2038                 goto out_destroy_zones;
2039
2040         error = xfs_mru_cache_init();
2041         if (error)
2042                 goto out_destroy_wq;
2043
2044         error = xfs_buf_init();
2045         if (error)
2046                 goto out_mru_cache_uninit;
2047
2048         error = xfs_init_procfs();
2049         if (error)
2050                 goto out_buf_terminate;
2051
2052         error = xfs_sysctl_register();
2053         if (error)
2054                 goto out_cleanup_procfs;
2055
2056         xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2057         if (!xfs_kset) {
2058                 error = -ENOMEM;
2059                 goto out_sysctl_unregister;
2060         }
2061
2062         xfsstats.xs_kobj.kobject.kset = xfs_kset;
2063
2064         xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2065         if (!xfsstats.xs_stats) {
2066                 error = -ENOMEM;
2067                 goto out_kset_unregister;
2068         }
2069
2070         error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2071                                "stats");
2072         if (error)
2073                 goto out_free_stats;
2074
2075 #ifdef DEBUG
2076         xfs_dbg_kobj.kobject.kset = xfs_kset;
2077         error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2078         if (error)
2079                 goto out_remove_stats_kobj;
2080 #endif
2081
2082         error = xfs_qm_init();
2083         if (error)
2084                 goto out_remove_dbg_kobj;
2085
2086         error = register_filesystem(&xfs_fs_type);
2087         if (error)
2088                 goto out_qm_exit;
2089         return 0;
2090
2091  out_qm_exit:
2092         xfs_qm_exit();
2093  out_remove_dbg_kobj:
2094 #ifdef DEBUG
2095         xfs_sysfs_del(&xfs_dbg_kobj);
2096  out_remove_stats_kobj:
2097 #endif
2098         xfs_sysfs_del(&xfsstats.xs_kobj);
2099  out_free_stats:
2100         free_percpu(xfsstats.xs_stats);
2101  out_kset_unregister:
2102         kset_unregister(xfs_kset);
2103  out_sysctl_unregister:
2104         xfs_sysctl_unregister();
2105  out_cleanup_procfs:
2106         xfs_cleanup_procfs();
2107  out_buf_terminate:
2108         xfs_buf_terminate();
2109  out_mru_cache_uninit:
2110         xfs_mru_cache_uninit();
2111  out_destroy_wq:
2112         xfs_destroy_workqueues();
2113  out_destroy_zones:
2114         xfs_destroy_zones();
2115  out:
2116         return error;
2117 }
2118
2119 STATIC void __exit
2120 exit_xfs_fs(void)
2121 {
2122         xfs_qm_exit();
2123         unregister_filesystem(&xfs_fs_type);
2124 #ifdef DEBUG
2125         xfs_sysfs_del(&xfs_dbg_kobj);
2126 #endif
2127         xfs_sysfs_del(&xfsstats.xs_kobj);
2128         free_percpu(xfsstats.xs_stats);
2129         kset_unregister(xfs_kset);
2130         xfs_sysctl_unregister();
2131         xfs_cleanup_procfs();
2132         xfs_buf_terminate();
2133         xfs_mru_cache_uninit();
2134         xfs_destroy_workqueues();
2135         xfs_destroy_zones();
2136         xfs_uuid_table_free();
2137 }
2138
2139 module_init(init_xfs_fs);
2140 module_exit(exit_xfs_fs);
2141
2142 MODULE_AUTHOR("Silicon Graphics, Inc.");
2143 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2144 MODULE_LICENSE("GPL");