]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/xfs/libxfs/xfs_bmap.c
60cfa90163b86e04f68c8b5e5f1f27d56db7f682
[linux.git] / fs / xfs / libxfs / xfs_bmap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_sb.h"
26 #include "xfs_mount.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_dir2.h"
30 #include "xfs_inode.h"
31 #include "xfs_btree.h"
32 #include "xfs_trans.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_extfree_item.h"
35 #include "xfs_alloc.h"
36 #include "xfs_bmap.h"
37 #include "xfs_bmap_util.h"
38 #include "xfs_bmap_btree.h"
39 #include "xfs_rtalloc.h"
40 #include "xfs_error.h"
41 #include "xfs_quota.h"
42 #include "xfs_trans_space.h"
43 #include "xfs_buf_item.h"
44 #include "xfs_trace.h"
45 #include "xfs_symlink.h"
46 #include "xfs_attr_leaf.h"
47 #include "xfs_filestream.h"
48
49
50 kmem_zone_t             *xfs_bmap_free_item_zone;
51
52 /*
53  * Miscellaneous helper functions
54  */
55
56 /*
57  * Compute and fill in the value of the maximum depth of a bmap btree
58  * in this filesystem.  Done once, during mount.
59  */
60 void
61 xfs_bmap_compute_maxlevels(
62         xfs_mount_t     *mp,            /* file system mount structure */
63         int             whichfork)      /* data or attr fork */
64 {
65         int             level;          /* btree level */
66         uint            maxblocks;      /* max blocks at this level */
67         uint            maxleafents;    /* max leaf entries possible */
68         int             maxrootrecs;    /* max records in root block */
69         int             minleafrecs;    /* min records in leaf block */
70         int             minnoderecs;    /* min records in node block */
71         int             sz;             /* root block size */
72
73         /*
74          * The maximum number of extents in a file, hence the maximum
75          * number of leaf entries, is controlled by the type of di_nextents
76          * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
77          * (a signed 16-bit number, xfs_aextnum_t).
78          *
79          * Note that we can no longer assume that if we are in ATTR1 that
80          * the fork offset of all the inodes will be
81          * (xfs_default_attroffset(ip) >> 3) because we could have mounted
82          * with ATTR2 and then mounted back with ATTR1, keeping the
83          * di_forkoff's fixed but probably at various positions. Therefore,
84          * for both ATTR1 and ATTR2 we have to assume the worst case scenario
85          * of a minimum size available.
86          */
87         if (whichfork == XFS_DATA_FORK) {
88                 maxleafents = MAXEXTNUM;
89                 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
90         } else {
91                 maxleafents = MAXAEXTNUM;
92                 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
93         }
94         maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
95         minleafrecs = mp->m_bmap_dmnr[0];
96         minnoderecs = mp->m_bmap_dmnr[1];
97         maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
98         for (level = 1; maxblocks > 1; level++) {
99                 if (maxblocks <= maxrootrecs)
100                         maxblocks = 1;
101                 else
102                         maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
103         }
104         mp->m_bm_maxlevels[whichfork] = level;
105 }
106
107 STATIC int                              /* error */
108 xfs_bmbt_lookup_eq(
109         struct xfs_btree_cur    *cur,
110         xfs_fileoff_t           off,
111         xfs_fsblock_t           bno,
112         xfs_filblks_t           len,
113         int                     *stat)  /* success/failure */
114 {
115         cur->bc_rec.b.br_startoff = off;
116         cur->bc_rec.b.br_startblock = bno;
117         cur->bc_rec.b.br_blockcount = len;
118         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
119 }
120
121 STATIC int                              /* error */
122 xfs_bmbt_lookup_ge(
123         struct xfs_btree_cur    *cur,
124         xfs_fileoff_t           off,
125         xfs_fsblock_t           bno,
126         xfs_filblks_t           len,
127         int                     *stat)  /* success/failure */
128 {
129         cur->bc_rec.b.br_startoff = off;
130         cur->bc_rec.b.br_startblock = bno;
131         cur->bc_rec.b.br_blockcount = len;
132         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
133 }
134
135 /*
136  * Check if the inode needs to be converted to btree format.
137  */
138 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
139 {
140         return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
141                 XFS_IFORK_NEXTENTS(ip, whichfork) >
142                         XFS_IFORK_MAXEXT(ip, whichfork);
143 }
144
145 /*
146  * Check if the inode should be converted to extent format.
147  */
148 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
149 {
150         return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
151                 XFS_IFORK_NEXTENTS(ip, whichfork) <=
152                         XFS_IFORK_MAXEXT(ip, whichfork);
153 }
154
155 /*
156  * Update the record referred to by cur to the value given
157  * by [off, bno, len, state].
158  * This either works (return 0) or gets an EFSCORRUPTED error.
159  */
160 STATIC int
161 xfs_bmbt_update(
162         struct xfs_btree_cur    *cur,
163         xfs_fileoff_t           off,
164         xfs_fsblock_t           bno,
165         xfs_filblks_t           len,
166         xfs_exntst_t            state)
167 {
168         union xfs_btree_rec     rec;
169
170         xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
171         return xfs_btree_update(cur, &rec);
172 }
173
174 /*
175  * Compute the worst-case number of indirect blocks that will be used
176  * for ip's delayed extent of length "len".
177  */
178 STATIC xfs_filblks_t
179 xfs_bmap_worst_indlen(
180         xfs_inode_t     *ip,            /* incore inode pointer */
181         xfs_filblks_t   len)            /* delayed extent length */
182 {
183         int             level;          /* btree level number */
184         int             maxrecs;        /* maximum record count at this level */
185         xfs_mount_t     *mp;            /* mount structure */
186         xfs_filblks_t   rval;           /* return value */
187
188         mp = ip->i_mount;
189         maxrecs = mp->m_bmap_dmxr[0];
190         for (level = 0, rval = 0;
191              level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
192              level++) {
193                 len += maxrecs - 1;
194                 do_div(len, maxrecs);
195                 rval += len;
196                 if (len == 1)
197                         return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
198                                 level - 1;
199                 if (level == 0)
200                         maxrecs = mp->m_bmap_dmxr[1];
201         }
202         return rval;
203 }
204
205 /*
206  * Calculate the default attribute fork offset for newly created inodes.
207  */
208 uint
209 xfs_default_attroffset(
210         struct xfs_inode        *ip)
211 {
212         struct xfs_mount        *mp = ip->i_mount;
213         uint                    offset;
214
215         if (mp->m_sb.sb_inodesize == 256) {
216                 offset = XFS_LITINO(mp, ip->i_d.di_version) -
217                                 XFS_BMDR_SPACE_CALC(MINABTPTRS);
218         } else {
219                 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
220         }
221
222         ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
223         return offset;
224 }
225
226 /*
227  * Helper routine to reset inode di_forkoff field when switching
228  * attribute fork from local to extent format - we reset it where
229  * possible to make space available for inline data fork extents.
230  */
231 STATIC void
232 xfs_bmap_forkoff_reset(
233         xfs_inode_t     *ip,
234         int             whichfork)
235 {
236         if (whichfork == XFS_ATTR_FORK &&
237             ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
238             ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
239             ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
240                 uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
241
242                 if (dfl_forkoff > ip->i_d.di_forkoff)
243                         ip->i_d.di_forkoff = dfl_forkoff;
244         }
245 }
246
247 /*
248  * Debug/sanity checking code
249  */
250
251 STATIC int
252 xfs_bmap_sanity_check(
253         struct xfs_mount        *mp,
254         struct xfs_buf          *bp,
255         int                     level)
256 {
257         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
258
259         if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
260             block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
261                 return 0;
262
263         if (be16_to_cpu(block->bb_level) != level ||
264             be16_to_cpu(block->bb_numrecs) == 0 ||
265             be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
266                 return 0;
267
268         return 1;
269 }
270
271 #ifdef DEBUG
272 STATIC struct xfs_buf *
273 xfs_bmap_get_bp(
274         struct xfs_btree_cur    *cur,
275         xfs_fsblock_t           bno)
276 {
277         struct xfs_log_item_desc *lidp;
278         int                     i;
279
280         if (!cur)
281                 return NULL;
282
283         for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
284                 if (!cur->bc_bufs[i])
285                         break;
286                 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
287                         return cur->bc_bufs[i];
288         }
289
290         /* Chase down all the log items to see if the bp is there */
291         list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
292                 struct xfs_buf_log_item *bip;
293                 bip = (struct xfs_buf_log_item *)lidp->lid_item;
294                 if (bip->bli_item.li_type == XFS_LI_BUF &&
295                     XFS_BUF_ADDR(bip->bli_buf) == bno)
296                         return bip->bli_buf;
297         }
298
299         return NULL;
300 }
301
302 STATIC void
303 xfs_check_block(
304         struct xfs_btree_block  *block,
305         xfs_mount_t             *mp,
306         int                     root,
307         short                   sz)
308 {
309         int                     i, j, dmxr;
310         __be64                  *pp, *thispa;   /* pointer to block address */
311         xfs_bmbt_key_t          *prevp, *keyp;
312
313         ASSERT(be16_to_cpu(block->bb_level) > 0);
314
315         prevp = NULL;
316         for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
317                 dmxr = mp->m_bmap_dmxr[0];
318                 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
319
320                 if (prevp) {
321                         ASSERT(be64_to_cpu(prevp->br_startoff) <
322                                be64_to_cpu(keyp->br_startoff));
323                 }
324                 prevp = keyp;
325
326                 /*
327                  * Compare the block numbers to see if there are dups.
328                  */
329                 if (root)
330                         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
331                 else
332                         pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
333
334                 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
335                         if (root)
336                                 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
337                         else
338                                 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
339                         if (*thispa == *pp) {
340                                 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
341                                         __func__, j, i,
342                                         (unsigned long long)be64_to_cpu(*thispa));
343                                 panic("%s: ptrs are equal in node\n",
344                                         __func__);
345                         }
346                 }
347         }
348 }
349
350 /*
351  * Check that the extents for the inode ip are in the right order in all
352  * btree leaves.
353  */
354
355 STATIC void
356 xfs_bmap_check_leaf_extents(
357         xfs_btree_cur_t         *cur,   /* btree cursor or null */
358         xfs_inode_t             *ip,            /* incore inode pointer */
359         int                     whichfork)      /* data or attr fork */
360 {
361         struct xfs_btree_block  *block; /* current btree block */
362         xfs_fsblock_t           bno;    /* block # of "block" */
363         xfs_buf_t               *bp;    /* buffer for "block" */
364         int                     error;  /* error return value */
365         xfs_extnum_t            i=0, j; /* index into the extents list */
366         xfs_ifork_t             *ifp;   /* fork structure */
367         int                     level;  /* btree level, for checking */
368         xfs_mount_t             *mp;    /* file system mount structure */
369         __be64                  *pp;    /* pointer to block address */
370         xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
371         xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
372         xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
373         int                     bp_release = 0;
374
375         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
376                 return;
377         }
378
379         bno = NULLFSBLOCK;
380         mp = ip->i_mount;
381         ifp = XFS_IFORK_PTR(ip, whichfork);
382         block = ifp->if_broot;
383         /*
384          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
385          */
386         level = be16_to_cpu(block->bb_level);
387         ASSERT(level > 0);
388         xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
389         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
390         bno = be64_to_cpu(*pp);
391
392         ASSERT(bno != NULLFSBLOCK);
393         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
394         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
395
396         /*
397          * Go down the tree until leaf level is reached, following the first
398          * pointer (leftmost) at each level.
399          */
400         while (level-- > 0) {
401                 /* See if buf is in cur first */
402                 bp_release = 0;
403                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
404                 if (!bp) {
405                         bp_release = 1;
406                         error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
407                                                 XFS_BMAP_BTREE_REF,
408                                                 &xfs_bmbt_buf_ops);
409                         if (error)
410                                 goto error_norelse;
411                 }
412                 block = XFS_BUF_TO_BLOCK(bp);
413                 XFS_WANT_CORRUPTED_GOTO(mp,
414                         xfs_bmap_sanity_check(mp, bp, level),
415                         error0);
416                 if (level == 0)
417                         break;
418
419                 /*
420                  * Check this block for basic sanity (increasing keys and
421                  * no duplicate blocks).
422                  */
423
424                 xfs_check_block(block, mp, 0, 0);
425                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
426                 bno = be64_to_cpu(*pp);
427                 XFS_WANT_CORRUPTED_GOTO(mp,
428                                         XFS_FSB_SANITY_CHECK(mp, bno), error0);
429                 if (bp_release) {
430                         bp_release = 0;
431                         xfs_trans_brelse(NULL, bp);
432                 }
433         }
434
435         /*
436          * Here with bp and block set to the leftmost leaf node in the tree.
437          */
438         i = 0;
439
440         /*
441          * Loop over all leaf nodes checking that all extents are in the right order.
442          */
443         for (;;) {
444                 xfs_fsblock_t   nextbno;
445                 xfs_extnum_t    num_recs;
446
447
448                 num_recs = xfs_btree_get_numrecs(block);
449
450                 /*
451                  * Read-ahead the next leaf block, if any.
452                  */
453
454                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
455
456                 /*
457                  * Check all the extents to make sure they are OK.
458                  * If we had a previous block, the last entry should
459                  * conform with the first entry in this one.
460                  */
461
462                 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
463                 if (i) {
464                         ASSERT(xfs_bmbt_disk_get_startoff(&last) +
465                                xfs_bmbt_disk_get_blockcount(&last) <=
466                                xfs_bmbt_disk_get_startoff(ep));
467                 }
468                 for (j = 1; j < num_recs; j++) {
469                         nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
470                         ASSERT(xfs_bmbt_disk_get_startoff(ep) +
471                                xfs_bmbt_disk_get_blockcount(ep) <=
472                                xfs_bmbt_disk_get_startoff(nextp));
473                         ep = nextp;
474                 }
475
476                 last = *ep;
477                 i += num_recs;
478                 if (bp_release) {
479                         bp_release = 0;
480                         xfs_trans_brelse(NULL, bp);
481                 }
482                 bno = nextbno;
483                 /*
484                  * If we've reached the end, stop.
485                  */
486                 if (bno == NULLFSBLOCK)
487                         break;
488
489                 bp_release = 0;
490                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
491                 if (!bp) {
492                         bp_release = 1;
493                         error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
494                                                 XFS_BMAP_BTREE_REF,
495                                                 &xfs_bmbt_buf_ops);
496                         if (error)
497                                 goto error_norelse;
498                 }
499                 block = XFS_BUF_TO_BLOCK(bp);
500         }
501         if (bp_release) {
502                 bp_release = 0;
503                 xfs_trans_brelse(NULL, bp);
504         }
505         return;
506
507 error0:
508         xfs_warn(mp, "%s: at error0", __func__);
509         if (bp_release)
510                 xfs_trans_brelse(NULL, bp);
511 error_norelse:
512         xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
513                 __func__, i);
514         panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
515         return;
516 }
517
518 /*
519  * Add bmap trace insert entries for all the contents of the extent records.
520  */
521 void
522 xfs_bmap_trace_exlist(
523         xfs_inode_t     *ip,            /* incore inode pointer */
524         xfs_extnum_t    cnt,            /* count of entries in the list */
525         int             whichfork,      /* data or attr fork */
526         unsigned long   caller_ip)
527 {
528         xfs_extnum_t    idx;            /* extent record index */
529         xfs_ifork_t     *ifp;           /* inode fork pointer */
530         int             state = 0;
531
532         if (whichfork == XFS_ATTR_FORK)
533                 state |= BMAP_ATTRFORK;
534
535         ifp = XFS_IFORK_PTR(ip, whichfork);
536         ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
537         for (idx = 0; idx < cnt; idx++)
538                 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
539 }
540
541 /*
542  * Validate that the bmbt_irecs being returned from bmapi are valid
543  * given the caller's original parameters.  Specifically check the
544  * ranges of the returned irecs to ensure that they only extend beyond
545  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
546  */
547 STATIC void
548 xfs_bmap_validate_ret(
549         xfs_fileoff_t           bno,
550         xfs_filblks_t           len,
551         int                     flags,
552         xfs_bmbt_irec_t         *mval,
553         int                     nmap,
554         int                     ret_nmap)
555 {
556         int                     i;              /* index to map values */
557
558         ASSERT(ret_nmap <= nmap);
559
560         for (i = 0; i < ret_nmap; i++) {
561                 ASSERT(mval[i].br_blockcount > 0);
562                 if (!(flags & XFS_BMAPI_ENTIRE)) {
563                         ASSERT(mval[i].br_startoff >= bno);
564                         ASSERT(mval[i].br_blockcount <= len);
565                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
566                                bno + len);
567                 } else {
568                         ASSERT(mval[i].br_startoff < bno + len);
569                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
570                                bno);
571                 }
572                 ASSERT(i == 0 ||
573                        mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
574                        mval[i].br_startoff);
575                 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
576                        mval[i].br_startblock != HOLESTARTBLOCK);
577                 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
578                        mval[i].br_state == XFS_EXT_UNWRITTEN);
579         }
580 }
581
582 #else
583 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
584 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
585 #endif /* DEBUG */
586
587 /*
588  * bmap free list manipulation functions
589  */
590
591 /*
592  * Add the extent to the list of extents to be free at transaction end.
593  * The list is maintained sorted (by block number).
594  */
595 void
596 xfs_bmap_add_free(
597         xfs_fsblock_t           bno,            /* fs block number of extent */
598         xfs_filblks_t           len,            /* length of extent */
599         xfs_bmap_free_t         *flist,         /* list of extents */
600         xfs_mount_t             *mp)            /* mount point structure */
601 {
602         xfs_bmap_free_item_t    *cur;           /* current (next) element */
603         xfs_bmap_free_item_t    *new;           /* new element */
604         xfs_bmap_free_item_t    *prev;          /* previous element */
605 #ifdef DEBUG
606         xfs_agnumber_t          agno;
607         xfs_agblock_t           agbno;
608
609         ASSERT(bno != NULLFSBLOCK);
610         ASSERT(len > 0);
611         ASSERT(len <= MAXEXTLEN);
612         ASSERT(!isnullstartblock(bno));
613         agno = XFS_FSB_TO_AGNO(mp, bno);
614         agbno = XFS_FSB_TO_AGBNO(mp, bno);
615         ASSERT(agno < mp->m_sb.sb_agcount);
616         ASSERT(agbno < mp->m_sb.sb_agblocks);
617         ASSERT(len < mp->m_sb.sb_agblocks);
618         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
619 #endif
620         ASSERT(xfs_bmap_free_item_zone != NULL);
621         new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
622         new->xbfi_startblock = bno;
623         new->xbfi_blockcount = (xfs_extlen_t)len;
624         for (prev = NULL, cur = flist->xbf_first;
625              cur != NULL;
626              prev = cur, cur = cur->xbfi_next) {
627                 if (cur->xbfi_startblock >= bno)
628                         break;
629         }
630         if (prev)
631                 prev->xbfi_next = new;
632         else
633                 flist->xbf_first = new;
634         new->xbfi_next = cur;
635         flist->xbf_count++;
636 }
637
638 /*
639  * Remove the entry "free" from the free item list.  Prev points to the
640  * previous entry, unless "free" is the head of the list.
641  */
642 void
643 xfs_bmap_del_free(
644         xfs_bmap_free_t         *flist, /* free item list header */
645         xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
646         xfs_bmap_free_item_t    *free)  /* list item to be freed */
647 {
648         if (prev)
649                 prev->xbfi_next = free->xbfi_next;
650         else
651                 flist->xbf_first = free->xbfi_next;
652         flist->xbf_count--;
653         kmem_zone_free(xfs_bmap_free_item_zone, free);
654 }
655
656 /*
657  * Free up any items left in the list.
658  */
659 void
660 xfs_bmap_cancel(
661         xfs_bmap_free_t         *flist) /* list of bmap_free_items */
662 {
663         xfs_bmap_free_item_t    *free;  /* free list item */
664         xfs_bmap_free_item_t    *next;
665
666         if (flist->xbf_count == 0)
667                 return;
668         ASSERT(flist->xbf_first != NULL);
669         for (free = flist->xbf_first; free; free = next) {
670                 next = free->xbfi_next;
671                 xfs_bmap_del_free(flist, NULL, free);
672         }
673         ASSERT(flist->xbf_count == 0);
674 }
675
676 /*
677  * Inode fork format manipulation functions
678  */
679
680 /*
681  * Transform a btree format file with only one leaf node, where the
682  * extents list will fit in the inode, into an extents format file.
683  * Since the file extents are already in-core, all we have to do is
684  * give up the space for the btree root and pitch the leaf block.
685  */
686 STATIC int                              /* error */
687 xfs_bmap_btree_to_extents(
688         xfs_trans_t             *tp,    /* transaction pointer */
689         xfs_inode_t             *ip,    /* incore inode pointer */
690         xfs_btree_cur_t         *cur,   /* btree cursor */
691         int                     *logflagsp, /* inode logging flags */
692         int                     whichfork)  /* data or attr fork */
693 {
694         /* REFERENCED */
695         struct xfs_btree_block  *cblock;/* child btree block */
696         xfs_fsblock_t           cbno;   /* child block number */
697         xfs_buf_t               *cbp;   /* child block's buffer */
698         int                     error;  /* error return value */
699         xfs_ifork_t             *ifp;   /* inode fork data */
700         xfs_mount_t             *mp;    /* mount point structure */
701         __be64                  *pp;    /* ptr to block address */
702         struct xfs_btree_block  *rblock;/* root btree block */
703
704         mp = ip->i_mount;
705         ifp = XFS_IFORK_PTR(ip, whichfork);
706         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
707         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
708         rblock = ifp->if_broot;
709         ASSERT(be16_to_cpu(rblock->bb_level) == 1);
710         ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
711         ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
712         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
713         cbno = be64_to_cpu(*pp);
714         *logflagsp = 0;
715 #ifdef DEBUG
716         if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
717                 return error;
718 #endif
719         error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
720                                 &xfs_bmbt_buf_ops);
721         if (error)
722                 return error;
723         cblock = XFS_BUF_TO_BLOCK(cbp);
724         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
725                 return error;
726         xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
727         ip->i_d.di_nblocks--;
728         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
729         xfs_trans_binval(tp, cbp);
730         if (cur->bc_bufs[0] == cbp)
731                 cur->bc_bufs[0] = NULL;
732         xfs_iroot_realloc(ip, -1, whichfork);
733         ASSERT(ifp->if_broot == NULL);
734         ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
735         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
736         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
737         return 0;
738 }
739
740 /*
741  * Convert an extents-format file into a btree-format file.
742  * The new file will have a root block (in the inode) and a single child block.
743  */
744 STATIC int                                      /* error */
745 xfs_bmap_extents_to_btree(
746         xfs_trans_t             *tp,            /* transaction pointer */
747         xfs_inode_t             *ip,            /* incore inode pointer */
748         xfs_fsblock_t           *firstblock,    /* first-block-allocated */
749         xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
750         xfs_btree_cur_t         **curp,         /* cursor returned to caller */
751         int                     wasdel,         /* converting a delayed alloc */
752         int                     *logflagsp,     /* inode logging flags */
753         int                     whichfork)      /* data or attr fork */
754 {
755         struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
756         xfs_buf_t               *abp;           /* buffer for ablock */
757         xfs_alloc_arg_t         args;           /* allocation arguments */
758         xfs_bmbt_rec_t          *arp;           /* child record pointer */
759         struct xfs_btree_block  *block;         /* btree root block */
760         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
761         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
762         int                     error;          /* error return value */
763         xfs_extnum_t            i, cnt;         /* extent record index */
764         xfs_ifork_t             *ifp;           /* inode fork pointer */
765         xfs_bmbt_key_t          *kp;            /* root block key pointer */
766         xfs_mount_t             *mp;            /* mount structure */
767         xfs_extnum_t            nextents;       /* number of file extents */
768         xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
769
770         mp = ip->i_mount;
771         ifp = XFS_IFORK_PTR(ip, whichfork);
772         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
773
774         /*
775          * Make space in the inode incore.
776          */
777         xfs_iroot_realloc(ip, 1, whichfork);
778         ifp->if_flags |= XFS_IFBROOT;
779
780         /*
781          * Fill in the root.
782          */
783         block = ifp->if_broot;
784         if (xfs_sb_version_hascrc(&mp->m_sb))
785                 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
786                                  XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
787                                  XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
788         else
789                 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
790                                  XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
791                                  XFS_BTREE_LONG_PTRS);
792
793         /*
794          * Need a cursor.  Can't allocate until bb_level is filled in.
795          */
796         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
797         cur->bc_private.b.firstblock = *firstblock;
798         cur->bc_private.b.flist = flist;
799         cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
800         /*
801          * Convert to a btree with two levels, one record in root.
802          */
803         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
804         memset(&args, 0, sizeof(args));
805         args.tp = tp;
806         args.mp = mp;
807         args.firstblock = *firstblock;
808         if (*firstblock == NULLFSBLOCK) {
809                 args.type = XFS_ALLOCTYPE_START_BNO;
810                 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
811         } else if (flist->xbf_low) {
812                 args.type = XFS_ALLOCTYPE_START_BNO;
813                 args.fsbno = *firstblock;
814         } else {
815                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
816                 args.fsbno = *firstblock;
817         }
818         args.minlen = args.maxlen = args.prod = 1;
819         args.wasdel = wasdel;
820         *logflagsp = 0;
821         if ((error = xfs_alloc_vextent(&args))) {
822                 xfs_iroot_realloc(ip, -1, whichfork);
823                 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
824                 return error;
825         }
826         /*
827          * Allocation can't fail, the space was reserved.
828          */
829         ASSERT(args.fsbno != NULLFSBLOCK);
830         ASSERT(*firstblock == NULLFSBLOCK ||
831                args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
832                (flist->xbf_low &&
833                 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
834         *firstblock = cur->bc_private.b.firstblock = args.fsbno;
835         cur->bc_private.b.allocated++;
836         ip->i_d.di_nblocks++;
837         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
838         abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
839         /*
840          * Fill in the child block.
841          */
842         abp->b_ops = &xfs_bmbt_buf_ops;
843         ablock = XFS_BUF_TO_BLOCK(abp);
844         if (xfs_sb_version_hascrc(&mp->m_sb))
845                 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
846                                 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
847                                 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
848         else
849                 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
850                                 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
851                                 XFS_BTREE_LONG_PTRS);
852
853         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
854         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
855         for (cnt = i = 0; i < nextents; i++) {
856                 ep = xfs_iext_get_ext(ifp, i);
857                 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
858                         arp->l0 = cpu_to_be64(ep->l0);
859                         arp->l1 = cpu_to_be64(ep->l1);
860                         arp++; cnt++;
861                 }
862         }
863         ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
864         xfs_btree_set_numrecs(ablock, cnt);
865
866         /*
867          * Fill in the root key and pointer.
868          */
869         kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
870         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
871         kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
872         pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
873                                                 be16_to_cpu(block->bb_level)));
874         *pp = cpu_to_be64(args.fsbno);
875
876         /*
877          * Do all this logging at the end so that
878          * the root is at the right level.
879          */
880         xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
881         xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
882         ASSERT(*curp == NULL);
883         *curp = cur;
884         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
885         return 0;
886 }
887
888 /*
889  * Convert a local file to an extents file.
890  * This code is out of bounds for data forks of regular files,
891  * since the file data needs to get logged so things will stay consistent.
892  * (The bmap-level manipulations are ok, though).
893  */
894 void
895 xfs_bmap_local_to_extents_empty(
896         struct xfs_inode        *ip,
897         int                     whichfork)
898 {
899         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
900
901         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
902         ASSERT(ifp->if_bytes == 0);
903         ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
904
905         xfs_bmap_forkoff_reset(ip, whichfork);
906         ifp->if_flags &= ~XFS_IFINLINE;
907         ifp->if_flags |= XFS_IFEXTENTS;
908         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
909 }
910
911
912 STATIC int                              /* error */
913 xfs_bmap_local_to_extents(
914         xfs_trans_t     *tp,            /* transaction pointer */
915         xfs_inode_t     *ip,            /* incore inode pointer */
916         xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
917         xfs_extlen_t    total,          /* total blocks needed by transaction */
918         int             *logflagsp,     /* inode logging flags */
919         int             whichfork,
920         void            (*init_fn)(struct xfs_trans *tp,
921                                    struct xfs_buf *bp,
922                                    struct xfs_inode *ip,
923                                    struct xfs_ifork *ifp))
924 {
925         int             error = 0;
926         int             flags;          /* logging flags returned */
927         xfs_ifork_t     *ifp;           /* inode fork pointer */
928         xfs_alloc_arg_t args;           /* allocation arguments */
929         xfs_buf_t       *bp;            /* buffer for extent block */
930         xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
931
932         /*
933          * We don't want to deal with the case of keeping inode data inline yet.
934          * So sending the data fork of a regular inode is invalid.
935          */
936         ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
937         ifp = XFS_IFORK_PTR(ip, whichfork);
938         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
939
940         if (!ifp->if_bytes) {
941                 xfs_bmap_local_to_extents_empty(ip, whichfork);
942                 flags = XFS_ILOG_CORE;
943                 goto done;
944         }
945
946         flags = 0;
947         error = 0;
948         ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
949                                                                 XFS_IFINLINE);
950         memset(&args, 0, sizeof(args));
951         args.tp = tp;
952         args.mp = ip->i_mount;
953         args.firstblock = *firstblock;
954         /*
955          * Allocate a block.  We know we need only one, since the
956          * file currently fits in an inode.
957          */
958         if (*firstblock == NULLFSBLOCK) {
959                 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
960                 args.type = XFS_ALLOCTYPE_START_BNO;
961         } else {
962                 args.fsbno = *firstblock;
963                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
964         }
965         args.total = total;
966         args.minlen = args.maxlen = args.prod = 1;
967         error = xfs_alloc_vextent(&args);
968         if (error)
969                 goto done;
970
971         /* Can't fail, the space was reserved. */
972         ASSERT(args.fsbno != NULLFSBLOCK);
973         ASSERT(args.len == 1);
974         *firstblock = args.fsbno;
975         bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
976
977         /*
978          * Initialise the block and copy the data
979          *
980          * Note: init_fn must set the buffer log item type correctly!
981          */
982         init_fn(tp, bp, ip, ifp);
983
984         /* account for the change in fork size and log everything */
985         xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
986         xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
987         xfs_bmap_local_to_extents_empty(ip, whichfork);
988         flags |= XFS_ILOG_CORE;
989
990         xfs_iext_add(ifp, 0, 1);
991         ep = xfs_iext_get_ext(ifp, 0);
992         xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
993         trace_xfs_bmap_post_update(ip, 0,
994                         whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
995                         _THIS_IP_);
996         XFS_IFORK_NEXT_SET(ip, whichfork, 1);
997         ip->i_d.di_nblocks = 1;
998         xfs_trans_mod_dquot_byino(tp, ip,
999                 XFS_TRANS_DQ_BCOUNT, 1L);
1000         flags |= xfs_ilog_fext(whichfork);
1001
1002 done:
1003         *logflagsp = flags;
1004         return error;
1005 }
1006
1007 /*
1008  * Called from xfs_bmap_add_attrfork to handle btree format files.
1009  */
1010 STATIC int                                      /* error */
1011 xfs_bmap_add_attrfork_btree(
1012         xfs_trans_t             *tp,            /* transaction pointer */
1013         xfs_inode_t             *ip,            /* incore inode pointer */
1014         xfs_fsblock_t           *firstblock,    /* first block allocated */
1015         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
1016         int                     *flags)         /* inode logging flags */
1017 {
1018         xfs_btree_cur_t         *cur;           /* btree cursor */
1019         int                     error;          /* error return value */
1020         xfs_mount_t             *mp;            /* file system mount struct */
1021         int                     stat;           /* newroot status */
1022
1023         mp = ip->i_mount;
1024         if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
1025                 *flags |= XFS_ILOG_DBROOT;
1026         else {
1027                 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
1028                 cur->bc_private.b.flist = flist;
1029                 cur->bc_private.b.firstblock = *firstblock;
1030                 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
1031                         goto error0;
1032                 /* must be at least one entry */
1033                 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
1034                 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
1035                         goto error0;
1036                 if (stat == 0) {
1037                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1038                         return -ENOSPC;
1039                 }
1040                 *firstblock = cur->bc_private.b.firstblock;
1041                 cur->bc_private.b.allocated = 0;
1042                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1043         }
1044         return 0;
1045 error0:
1046         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1047         return error;
1048 }
1049
1050 /*
1051  * Called from xfs_bmap_add_attrfork to handle extents format files.
1052  */
1053 STATIC int                                      /* error */
1054 xfs_bmap_add_attrfork_extents(
1055         xfs_trans_t             *tp,            /* transaction pointer */
1056         xfs_inode_t             *ip,            /* incore inode pointer */
1057         xfs_fsblock_t           *firstblock,    /* first block allocated */
1058         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
1059         int                     *flags)         /* inode logging flags */
1060 {
1061         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
1062         int                     error;          /* error return value */
1063
1064         if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1065                 return 0;
1066         cur = NULL;
1067         error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
1068                 flags, XFS_DATA_FORK);
1069         if (cur) {
1070                 cur->bc_private.b.allocated = 0;
1071                 xfs_btree_del_cursor(cur,
1072                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1073         }
1074         return error;
1075 }
1076
1077 /*
1078  * Called from xfs_bmap_add_attrfork to handle local format files. Each
1079  * different data fork content type needs a different callout to do the
1080  * conversion. Some are basic and only require special block initialisation
1081  * callouts for the data formating, others (directories) are so specialised they
1082  * handle everything themselves.
1083  *
1084  * XXX (dgc): investigate whether directory conversion can use the generic
1085  * formatting callout. It should be possible - it's just a very complex
1086  * formatter.
1087  */
1088 STATIC int                                      /* error */
1089 xfs_bmap_add_attrfork_local(
1090         xfs_trans_t             *tp,            /* transaction pointer */
1091         xfs_inode_t             *ip,            /* incore inode pointer */
1092         xfs_fsblock_t           *firstblock,    /* first block allocated */
1093         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
1094         int                     *flags)         /* inode logging flags */
1095 {
1096         xfs_da_args_t           dargs;          /* args for dir/attr code */
1097
1098         if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1099                 return 0;
1100
1101         if (S_ISDIR(ip->i_d.di_mode)) {
1102                 memset(&dargs, 0, sizeof(dargs));
1103                 dargs.geo = ip->i_mount->m_dir_geo;
1104                 dargs.dp = ip;
1105                 dargs.firstblock = firstblock;
1106                 dargs.flist = flist;
1107                 dargs.total = dargs.geo->fsbcount;
1108                 dargs.whichfork = XFS_DATA_FORK;
1109                 dargs.trans = tp;
1110                 return xfs_dir2_sf_to_block(&dargs);
1111         }
1112
1113         if (S_ISLNK(ip->i_d.di_mode))
1114                 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1115                                                  flags, XFS_DATA_FORK,
1116                                                  xfs_symlink_local_to_remote);
1117
1118         /* should only be called for types that support local format data */
1119         ASSERT(0);
1120         return -EFSCORRUPTED;
1121 }
1122
1123 /*
1124  * Convert inode from non-attributed to attributed.
1125  * Must not be in a transaction, ip must not be locked.
1126  */
1127 int                                             /* error code */
1128 xfs_bmap_add_attrfork(
1129         xfs_inode_t             *ip,            /* incore inode pointer */
1130         int                     size,           /* space new attribute needs */
1131         int                     rsvd)           /* xact may use reserved blks */
1132 {
1133         xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
1134         xfs_bmap_free_t         flist;          /* freed extent records */
1135         xfs_mount_t             *mp;            /* mount structure */
1136         xfs_trans_t             *tp;            /* transaction pointer */
1137         int                     blks;           /* space reservation */
1138         int                     version = 1;    /* superblock attr version */
1139         int                     committed;      /* xaction was committed */
1140         int                     logflags;       /* logging flags */
1141         int                     error;          /* error return value */
1142         int                     cancel_flags = 0;
1143
1144         ASSERT(XFS_IFORK_Q(ip) == 0);
1145
1146         mp = ip->i_mount;
1147         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1148         tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
1149         blks = XFS_ADDAFORK_SPACE_RES(mp);
1150         if (rsvd)
1151                 tp->t_flags |= XFS_TRANS_RESERVE;
1152         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
1153         if (error) {
1154                 xfs_trans_cancel(tp, 0);
1155                 return error;
1156         }
1157         cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1158         xfs_ilock(ip, XFS_ILOCK_EXCL);
1159         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1160                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1161                         XFS_QMOPT_RES_REGBLKS);
1162         if (error)
1163                 goto trans_cancel;
1164         cancel_flags |= XFS_TRANS_ABORT;
1165         if (XFS_IFORK_Q(ip))
1166                 goto trans_cancel;
1167         if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1168                 /*
1169                  * For inodes coming from pre-6.2 filesystems.
1170                  */
1171                 ASSERT(ip->i_d.di_aformat == 0);
1172                 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1173         }
1174         ASSERT(ip->i_d.di_anextents == 0);
1175
1176         xfs_trans_ijoin(tp, ip, 0);
1177         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1178
1179         switch (ip->i_d.di_format) {
1180         case XFS_DINODE_FMT_DEV:
1181                 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1182                 break;
1183         case XFS_DINODE_FMT_UUID:
1184                 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1185                 break;
1186         case XFS_DINODE_FMT_LOCAL:
1187         case XFS_DINODE_FMT_EXTENTS:
1188         case XFS_DINODE_FMT_BTREE:
1189                 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1190                 if (!ip->i_d.di_forkoff)
1191                         ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1192                 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1193                         version = 2;
1194                 break;
1195         default:
1196                 ASSERT(0);
1197                 error = -EINVAL;
1198                 goto trans_cancel;
1199         }
1200
1201         ASSERT(ip->i_afp == NULL);
1202         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1203         ip->i_afp->if_flags = XFS_IFEXTENTS;
1204         logflags = 0;
1205         xfs_bmap_init(&flist, &firstblock);
1206         switch (ip->i_d.di_format) {
1207         case XFS_DINODE_FMT_LOCAL:
1208                 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
1209                         &logflags);
1210                 break;
1211         case XFS_DINODE_FMT_EXTENTS:
1212                 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1213                         &flist, &logflags);
1214                 break;
1215         case XFS_DINODE_FMT_BTREE:
1216                 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
1217                         &logflags);
1218                 break;
1219         default:
1220                 error = 0;
1221                 break;
1222         }
1223         if (logflags)
1224                 xfs_trans_log_inode(tp, ip, logflags);
1225         if (error)
1226                 goto bmap_cancel;
1227         if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1228            (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1229                 bool log_sb = false;
1230
1231                 spin_lock(&mp->m_sb_lock);
1232                 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1233                         xfs_sb_version_addattr(&mp->m_sb);
1234                         log_sb = true;
1235                 }
1236                 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1237                         xfs_sb_version_addattr2(&mp->m_sb);
1238                         log_sb = true;
1239                 }
1240                 spin_unlock(&mp->m_sb_lock);
1241                 if (log_sb)
1242                         xfs_log_sb(tp);
1243         }
1244
1245         error = xfs_bmap_finish(&tp, &flist, &committed);
1246         if (error)
1247                 goto bmap_cancel;
1248         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1249         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1250         return error;
1251
1252 bmap_cancel:
1253         xfs_bmap_cancel(&flist);
1254 trans_cancel:
1255         xfs_trans_cancel(tp, cancel_flags);
1256         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1257         return error;
1258 }
1259
1260 /*
1261  * Internal and external extent tree search functions.
1262  */
1263
1264 /*
1265  * Read in the extents to if_extents.
1266  * All inode fields are set up by caller, we just traverse the btree
1267  * and copy the records in. If the file system cannot contain unwritten
1268  * extents, the records are checked for no "state" flags.
1269  */
1270 int                                     /* error */
1271 xfs_bmap_read_extents(
1272         xfs_trans_t             *tp,    /* transaction pointer */
1273         xfs_inode_t             *ip,    /* incore inode */
1274         int                     whichfork) /* data or attr fork */
1275 {
1276         struct xfs_btree_block  *block; /* current btree block */
1277         xfs_fsblock_t           bno;    /* block # of "block" */
1278         xfs_buf_t               *bp;    /* buffer for "block" */
1279         int                     error;  /* error return value */
1280         xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
1281         xfs_extnum_t            i, j;   /* index into the extents list */
1282         xfs_ifork_t             *ifp;   /* fork structure */
1283         int                     level;  /* btree level, for checking */
1284         xfs_mount_t             *mp;    /* file system mount structure */
1285         __be64                  *pp;    /* pointer to block address */
1286         /* REFERENCED */
1287         xfs_extnum_t            room;   /* number of entries there's room for */
1288
1289         bno = NULLFSBLOCK;
1290         mp = ip->i_mount;
1291         ifp = XFS_IFORK_PTR(ip, whichfork);
1292         exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1293                                         XFS_EXTFMT_INODE(ip);
1294         block = ifp->if_broot;
1295         /*
1296          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1297          */
1298         level = be16_to_cpu(block->bb_level);
1299         ASSERT(level > 0);
1300         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1301         bno = be64_to_cpu(*pp);
1302         ASSERT(bno != NULLFSBLOCK);
1303         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1304         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1305         /*
1306          * Go down the tree until leaf level is reached, following the first
1307          * pointer (leftmost) at each level.
1308          */
1309         while (level-- > 0) {
1310                 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1311                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1312                 if (error)
1313                         return error;
1314                 block = XFS_BUF_TO_BLOCK(bp);
1315                 XFS_WANT_CORRUPTED_GOTO(mp,
1316                         xfs_bmap_sanity_check(mp, bp, level), error0);
1317                 if (level == 0)
1318                         break;
1319                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1320                 bno = be64_to_cpu(*pp);
1321                 XFS_WANT_CORRUPTED_GOTO(mp,
1322                         XFS_FSB_SANITY_CHECK(mp, bno), error0);
1323                 xfs_trans_brelse(tp, bp);
1324         }
1325         /*
1326          * Here with bp and block set to the leftmost leaf node in the tree.
1327          */
1328         room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1329         i = 0;
1330         /*
1331          * Loop over all leaf nodes.  Copy information to the extent records.
1332          */
1333         for (;;) {
1334                 xfs_bmbt_rec_t  *frp;
1335                 xfs_fsblock_t   nextbno;
1336                 xfs_extnum_t    num_recs;
1337                 xfs_extnum_t    start;
1338
1339                 num_recs = xfs_btree_get_numrecs(block);
1340                 if (unlikely(i + num_recs > room)) {
1341                         ASSERT(i + num_recs <= room);
1342                         xfs_warn(ip->i_mount,
1343                                 "corrupt dinode %Lu, (btree extents).",
1344                                 (unsigned long long) ip->i_ino);
1345                         XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1346                                 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1347                         goto error0;
1348                 }
1349                 XFS_WANT_CORRUPTED_GOTO(mp,
1350                         xfs_bmap_sanity_check(mp, bp, 0),
1351                         error0);
1352                 /*
1353                  * Read-ahead the next leaf block, if any.
1354                  */
1355                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1356                 if (nextbno != NULLFSBLOCK)
1357                         xfs_btree_reada_bufl(mp, nextbno, 1,
1358                                              &xfs_bmbt_buf_ops);
1359                 /*
1360                  * Copy records into the extent records.
1361                  */
1362                 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1363                 start = i;
1364                 for (j = 0; j < num_recs; j++, i++, frp++) {
1365                         xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1366                         trp->l0 = be64_to_cpu(frp->l0);
1367                         trp->l1 = be64_to_cpu(frp->l1);
1368                 }
1369                 if (exntf == XFS_EXTFMT_NOSTATE) {
1370                         /*
1371                          * Check all attribute bmap btree records and
1372                          * any "older" data bmap btree records for a
1373                          * set bit in the "extent flag" position.
1374                          */
1375                         if (unlikely(xfs_check_nostate_extents(ifp,
1376                                         start, num_recs))) {
1377                                 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1378                                                  XFS_ERRLEVEL_LOW,
1379                                                  ip->i_mount);
1380                                 goto error0;
1381                         }
1382                 }
1383                 xfs_trans_brelse(tp, bp);
1384                 bno = nextbno;
1385                 /*
1386                  * If we've reached the end, stop.
1387                  */
1388                 if (bno == NULLFSBLOCK)
1389                         break;
1390                 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1391                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1392                 if (error)
1393                         return error;
1394                 block = XFS_BUF_TO_BLOCK(bp);
1395         }
1396         ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1397         ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1398         XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1399         return 0;
1400 error0:
1401         xfs_trans_brelse(tp, bp);
1402         return -EFSCORRUPTED;
1403 }
1404
1405
1406 /*
1407  * Search the extent records for the entry containing block bno.
1408  * If bno lies in a hole, point to the next entry.  If bno lies
1409  * past eof, *eofp will be set, and *prevp will contain the last
1410  * entry (null if none).  Else, *lastxp will be set to the index
1411  * of the found entry; *gotp will contain the entry.
1412  */
1413 STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
1414 xfs_bmap_search_multi_extents(
1415         xfs_ifork_t     *ifp,           /* inode fork pointer */
1416         xfs_fileoff_t   bno,            /* block number searched for */
1417         int             *eofp,          /* out: end of file found */
1418         xfs_extnum_t    *lastxp,        /* out: last extent index */
1419         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1420         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1421 {
1422         xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
1423         xfs_extnum_t    lastx;          /* last extent index */
1424
1425         /*
1426          * Initialize the extent entry structure to catch access to
1427          * uninitialized br_startblock field.
1428          */
1429         gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1430         gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1431         gotp->br_state = XFS_EXT_INVALID;
1432         gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1433         prevp->br_startoff = NULLFILEOFF;
1434
1435         ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1436         if (lastx > 0) {
1437                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1438         }
1439         if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1440                 xfs_bmbt_get_all(ep, gotp);
1441                 *eofp = 0;
1442         } else {
1443                 if (lastx > 0) {
1444                         *gotp = *prevp;
1445                 }
1446                 *eofp = 1;
1447                 ep = NULL;
1448         }
1449         *lastxp = lastx;
1450         return ep;
1451 }
1452
1453 /*
1454  * Search the extents list for the inode, for the extent containing bno.
1455  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
1456  * *eofp will be set, and *prevp will contain the last entry (null if none).
1457  * Else, *lastxp will be set to the index of the found
1458  * entry; *gotp will contain the entry.
1459  */
1460 STATIC xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
1461 xfs_bmap_search_extents(
1462         xfs_inode_t     *ip,            /* incore inode pointer */
1463         xfs_fileoff_t   bno,            /* block number searched for */
1464         int             fork,           /* data or attr fork */
1465         int             *eofp,          /* out: end of file found */
1466         xfs_extnum_t    *lastxp,        /* out: last extent index */
1467         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1468         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1469 {
1470         xfs_ifork_t     *ifp;           /* inode fork pointer */
1471         xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
1472
1473         XFS_STATS_INC(xs_look_exlist);
1474         ifp = XFS_IFORK_PTR(ip, fork);
1475
1476         ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1477
1478         if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1479                      !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1480                 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1481                                 "Access to block zero in inode %llu "
1482                                 "start_block: %llx start_off: %llx "
1483                                 "blkcnt: %llx extent-state: %x lastx: %x",
1484                         (unsigned long long)ip->i_ino,
1485                         (unsigned long long)gotp->br_startblock,
1486                         (unsigned long long)gotp->br_startoff,
1487                         (unsigned long long)gotp->br_blockcount,
1488                         gotp->br_state, *lastxp);
1489                 *lastxp = NULLEXTNUM;
1490                 *eofp = 1;
1491                 return NULL;
1492         }
1493         return ep;
1494 }
1495
1496 /*
1497  * Returns the file-relative block number of the first unused block(s)
1498  * in the file with at least "len" logically contiguous blocks free.
1499  * This is the lowest-address hole if the file has holes, else the first block
1500  * past the end of file.
1501  * Return 0 if the file is currently local (in-inode).
1502  */
1503 int                                             /* error */
1504 xfs_bmap_first_unused(
1505         xfs_trans_t     *tp,                    /* transaction pointer */
1506         xfs_inode_t     *ip,                    /* incore inode */
1507         xfs_extlen_t    len,                    /* size of hole to find */
1508         xfs_fileoff_t   *first_unused,          /* unused block */
1509         int             whichfork)              /* data or attr fork */
1510 {
1511         int             error;                  /* error return value */
1512         int             idx;                    /* extent record index */
1513         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1514         xfs_fileoff_t   lastaddr;               /* last block number seen */
1515         xfs_fileoff_t   lowest;                 /* lowest useful block */
1516         xfs_fileoff_t   max;                    /* starting useful block */
1517         xfs_fileoff_t   off;                    /* offset for this block */
1518         xfs_extnum_t    nextents;               /* number of extent entries */
1519
1520         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1521                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1522                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1523         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1524                 *first_unused = 0;
1525                 return 0;
1526         }
1527         ifp = XFS_IFORK_PTR(ip, whichfork);
1528         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1529             (error = xfs_iread_extents(tp, ip, whichfork)))
1530                 return error;
1531         lowest = *first_unused;
1532         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1533         for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1534                 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1535                 off = xfs_bmbt_get_startoff(ep);
1536                 /*
1537                  * See if the hole before this extent will work.
1538                  */
1539                 if (off >= lowest + len && off - max >= len) {
1540                         *first_unused = max;
1541                         return 0;
1542                 }
1543                 lastaddr = off + xfs_bmbt_get_blockcount(ep);
1544                 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1545         }
1546         *first_unused = max;
1547         return 0;
1548 }
1549
1550 /*
1551  * Returns the file-relative block number of the last block - 1 before
1552  * last_block (input value) in the file.
1553  * This is not based on i_size, it is based on the extent records.
1554  * Returns 0 for local files, as they do not have extent records.
1555  */
1556 int                                             /* error */
1557 xfs_bmap_last_before(
1558         xfs_trans_t     *tp,                    /* transaction pointer */
1559         xfs_inode_t     *ip,                    /* incore inode */
1560         xfs_fileoff_t   *last_block,            /* last block */
1561         int             whichfork)              /* data or attr fork */
1562 {
1563         xfs_fileoff_t   bno;                    /* input file offset */
1564         int             eof;                    /* hit end of file */
1565         xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
1566         int             error;                  /* error return value */
1567         xfs_bmbt_irec_t got;                    /* current extent value */
1568         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1569         xfs_extnum_t    lastx;                  /* last extent used */
1570         xfs_bmbt_irec_t prev;                   /* previous extent value */
1571
1572         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1573             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1574             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1575                return -EIO;
1576         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1577                 *last_block = 0;
1578                 return 0;
1579         }
1580         ifp = XFS_IFORK_PTR(ip, whichfork);
1581         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1582             (error = xfs_iread_extents(tp, ip, whichfork)))
1583                 return error;
1584         bno = *last_block - 1;
1585         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1586                 &prev);
1587         if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1588                 if (prev.br_startoff == NULLFILEOFF)
1589                         *last_block = 0;
1590                 else
1591                         *last_block = prev.br_startoff + prev.br_blockcount;
1592         }
1593         /*
1594          * Otherwise *last_block is already the right answer.
1595          */
1596         return 0;
1597 }
1598
1599 int
1600 xfs_bmap_last_extent(
1601         struct xfs_trans        *tp,
1602         struct xfs_inode        *ip,
1603         int                     whichfork,
1604         struct xfs_bmbt_irec    *rec,
1605         int                     *is_empty)
1606 {
1607         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1608         int                     error;
1609         int                     nextents;
1610
1611         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1612                 error = xfs_iread_extents(tp, ip, whichfork);
1613                 if (error)
1614                         return error;
1615         }
1616
1617         nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1618         if (nextents == 0) {
1619                 *is_empty = 1;
1620                 return 0;
1621         }
1622
1623         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1624         *is_empty = 0;
1625         return 0;
1626 }
1627
1628 /*
1629  * Check the last inode extent to determine whether this allocation will result
1630  * in blocks being allocated at the end of the file. When we allocate new data
1631  * blocks at the end of the file which do not start at the previous data block,
1632  * we will try to align the new blocks at stripe unit boundaries.
1633  *
1634  * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1635  * at, or past the EOF.
1636  */
1637 STATIC int
1638 xfs_bmap_isaeof(
1639         struct xfs_bmalloca     *bma,
1640         int                     whichfork)
1641 {
1642         struct xfs_bmbt_irec    rec;
1643         int                     is_empty;
1644         int                     error;
1645
1646         bma->aeof = 0;
1647         error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1648                                      &is_empty);
1649         if (error)
1650                 return error;
1651
1652         if (is_empty) {
1653                 bma->aeof = 1;
1654                 return 0;
1655         }
1656
1657         /*
1658          * Check if we are allocation or past the last extent, or at least into
1659          * the last delayed allocated extent.
1660          */
1661         bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1662                 (bma->offset >= rec.br_startoff &&
1663                  isnullstartblock(rec.br_startblock));
1664         return 0;
1665 }
1666
1667 /*
1668  * Returns the file-relative block number of the first block past eof in
1669  * the file.  This is not based on i_size, it is based on the extent records.
1670  * Returns 0 for local files, as they do not have extent records.
1671  */
1672 int
1673 xfs_bmap_last_offset(
1674         struct xfs_inode        *ip,
1675         xfs_fileoff_t           *last_block,
1676         int                     whichfork)
1677 {
1678         struct xfs_bmbt_irec    rec;
1679         int                     is_empty;
1680         int                     error;
1681
1682         *last_block = 0;
1683
1684         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1685                 return 0;
1686
1687         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1688             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1689                return -EIO;
1690
1691         error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1692         if (error || is_empty)
1693                 return error;
1694
1695         *last_block = rec.br_startoff + rec.br_blockcount;
1696         return 0;
1697 }
1698
1699 /*
1700  * Returns whether the selected fork of the inode has exactly one
1701  * block or not.  For the data fork we check this matches di_size,
1702  * implying the file's range is 0..bsize-1.
1703  */
1704 int                                     /* 1=>1 block, 0=>otherwise */
1705 xfs_bmap_one_block(
1706         xfs_inode_t     *ip,            /* incore inode */
1707         int             whichfork)      /* data or attr fork */
1708 {
1709         xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
1710         xfs_ifork_t     *ifp;           /* inode fork pointer */
1711         int             rval;           /* return value */
1712         xfs_bmbt_irec_t s;              /* internal version of extent */
1713
1714 #ifndef DEBUG
1715         if (whichfork == XFS_DATA_FORK)
1716                 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1717 #endif  /* !DEBUG */
1718         if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1719                 return 0;
1720         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1721                 return 0;
1722         ifp = XFS_IFORK_PTR(ip, whichfork);
1723         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1724         ep = xfs_iext_get_ext(ifp, 0);
1725         xfs_bmbt_get_all(ep, &s);
1726         rval = s.br_startoff == 0 && s.br_blockcount == 1;
1727         if (rval && whichfork == XFS_DATA_FORK)
1728                 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1729         return rval;
1730 }
1731
1732 /*
1733  * Extent tree manipulation functions used during allocation.
1734  */
1735
1736 /*
1737  * Convert a delayed allocation to a real allocation.
1738  */
1739 STATIC int                              /* error */
1740 xfs_bmap_add_extent_delay_real(
1741         struct xfs_bmalloca     *bma)
1742 {
1743         struct xfs_bmbt_irec    *new = &bma->got;
1744         int                     diff;   /* temp value */
1745         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
1746         int                     error;  /* error return value */
1747         int                     i;      /* temp state */
1748         xfs_ifork_t             *ifp;   /* inode fork pointer */
1749         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1750         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1751                                         /* left is 0, right is 1, prev is 2 */
1752         int                     rval=0; /* return value (logging flags) */
1753         int                     state = 0;/* state bits, accessed thru macros */
1754         xfs_filblks_t           da_new; /* new count del alloc blocks used */
1755         xfs_filblks_t           da_old; /* old count del alloc blocks used */
1756         xfs_filblks_t           temp=0; /* value for da_new calculations */
1757         xfs_filblks_t           temp2=0;/* value for da_new calculations */
1758         int                     tmp_rval;       /* partial logging flags */
1759         struct xfs_mount        *mp;
1760
1761         mp  = bma->tp ? bma->tp->t_mountp : NULL;
1762         ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
1763
1764         ASSERT(bma->idx >= 0);
1765         ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
1766         ASSERT(!isnullstartblock(new->br_startblock));
1767         ASSERT(!bma->cur ||
1768                (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1769
1770         XFS_STATS_INC(xs_add_exlist);
1771
1772 #define LEFT            r[0]
1773 #define RIGHT           r[1]
1774 #define PREV            r[2]
1775
1776         /*
1777          * Set up a bunch of variables to make the tests simpler.
1778          */
1779         ep = xfs_iext_get_ext(ifp, bma->idx);
1780         xfs_bmbt_get_all(ep, &PREV);
1781         new_endoff = new->br_startoff + new->br_blockcount;
1782         ASSERT(PREV.br_startoff <= new->br_startoff);
1783         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1784
1785         da_old = startblockval(PREV.br_startblock);
1786         da_new = 0;
1787
1788         /*
1789          * Set flags determining what part of the previous delayed allocation
1790          * extent is being replaced by a real allocation.
1791          */
1792         if (PREV.br_startoff == new->br_startoff)
1793                 state |= BMAP_LEFT_FILLING;
1794         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1795                 state |= BMAP_RIGHT_FILLING;
1796
1797         /*
1798          * Check and set flags if this segment has a left neighbor.
1799          * Don't set contiguous if the combined extent would be too large.
1800          */
1801         if (bma->idx > 0) {
1802                 state |= BMAP_LEFT_VALID;
1803                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
1804
1805                 if (isnullstartblock(LEFT.br_startblock))
1806                         state |= BMAP_LEFT_DELAY;
1807         }
1808
1809         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1810             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1811             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1812             LEFT.br_state == new->br_state &&
1813             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1814                 state |= BMAP_LEFT_CONTIG;
1815
1816         /*
1817          * Check and set flags if this segment has a right neighbor.
1818          * Don't set contiguous if the combined extent would be too large.
1819          * Also check for all-three-contiguous being too large.
1820          */
1821         if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1822                 state |= BMAP_RIGHT_VALID;
1823                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1824
1825                 if (isnullstartblock(RIGHT.br_startblock))
1826                         state |= BMAP_RIGHT_DELAY;
1827         }
1828
1829         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1830             new_endoff == RIGHT.br_startoff &&
1831             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1832             new->br_state == RIGHT.br_state &&
1833             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1834             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1835                        BMAP_RIGHT_FILLING)) !=
1836                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1837                        BMAP_RIGHT_FILLING) ||
1838              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1839                         <= MAXEXTLEN))
1840                 state |= BMAP_RIGHT_CONTIG;
1841
1842         error = 0;
1843         /*
1844          * Switch out based on the FILLING and CONTIG state bits.
1845          */
1846         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1847                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1848         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1849              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1850                 /*
1851                  * Filling in all of a previously delayed allocation extent.
1852                  * The left and right neighbors are both contiguous with new.
1853                  */
1854                 bma->idx--;
1855                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1856                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1857                         LEFT.br_blockcount + PREV.br_blockcount +
1858                         RIGHT.br_blockcount);
1859                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1860
1861                 xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1862                 bma->ip->i_d.di_nextents--;
1863                 if (bma->cur == NULL)
1864                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1865                 else {
1866                         rval = XFS_ILOG_CORE;
1867                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1868                                         RIGHT.br_startblock,
1869                                         RIGHT.br_blockcount, &i);
1870                         if (error)
1871                                 goto done;
1872                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1873                         error = xfs_btree_delete(bma->cur, &i);
1874                         if (error)
1875                                 goto done;
1876                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1877                         error = xfs_btree_decrement(bma->cur, 0, &i);
1878                         if (error)
1879                                 goto done;
1880                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1881                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1882                                         LEFT.br_startblock,
1883                                         LEFT.br_blockcount +
1884                                         PREV.br_blockcount +
1885                                         RIGHT.br_blockcount, LEFT.br_state);
1886                         if (error)
1887                                 goto done;
1888                 }
1889                 break;
1890
1891         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1892                 /*
1893                  * Filling in all of a previously delayed allocation extent.
1894                  * The left neighbor is contiguous, the right is not.
1895                  */
1896                 bma->idx--;
1897
1898                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1899                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1900                         LEFT.br_blockcount + PREV.br_blockcount);
1901                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1902
1903                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1904                 if (bma->cur == NULL)
1905                         rval = XFS_ILOG_DEXT;
1906                 else {
1907                         rval = 0;
1908                         error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1909                                         LEFT.br_startblock, LEFT.br_blockcount,
1910                                         &i);
1911                         if (error)
1912                                 goto done;
1913                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1914                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1915                                         LEFT.br_startblock,
1916                                         LEFT.br_blockcount +
1917                                         PREV.br_blockcount, LEFT.br_state);
1918                         if (error)
1919                                 goto done;
1920                 }
1921                 break;
1922
1923         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1924                 /*
1925                  * Filling in all of a previously delayed allocation extent.
1926                  * The right neighbor is contiguous, the left is not.
1927                  */
1928                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1929                 xfs_bmbt_set_startblock(ep, new->br_startblock);
1930                 xfs_bmbt_set_blockcount(ep,
1931                         PREV.br_blockcount + RIGHT.br_blockcount);
1932                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1933
1934                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1935                 if (bma->cur == NULL)
1936                         rval = XFS_ILOG_DEXT;
1937                 else {
1938                         rval = 0;
1939                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1940                                         RIGHT.br_startblock,
1941                                         RIGHT.br_blockcount, &i);
1942                         if (error)
1943                                 goto done;
1944                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1945                         error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1946                                         new->br_startblock,
1947                                         PREV.br_blockcount +
1948                                         RIGHT.br_blockcount, PREV.br_state);
1949                         if (error)
1950                                 goto done;
1951                 }
1952                 break;
1953
1954         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1955                 /*
1956                  * Filling in all of a previously delayed allocation extent.
1957                  * Neither the left nor right neighbors are contiguous with
1958                  * the new one.
1959                  */
1960                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1961                 xfs_bmbt_set_startblock(ep, new->br_startblock);
1962                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1963
1964                 bma->ip->i_d.di_nextents++;
1965                 if (bma->cur == NULL)
1966                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1967                 else {
1968                         rval = XFS_ILOG_CORE;
1969                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
1970                                         new->br_startblock, new->br_blockcount,
1971                                         &i);
1972                         if (error)
1973                                 goto done;
1974                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1975                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1976                         error = xfs_btree_insert(bma->cur, &i);
1977                         if (error)
1978                                 goto done;
1979                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1980                 }
1981                 break;
1982
1983         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1984                 /*
1985                  * Filling in the first part of a previous delayed allocation.
1986                  * The left neighbor is contiguous.
1987                  */
1988                 trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1989                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
1990                         LEFT.br_blockcount + new->br_blockcount);
1991                 xfs_bmbt_set_startoff(ep,
1992                         PREV.br_startoff + new->br_blockcount);
1993                 trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1994
1995                 temp = PREV.br_blockcount - new->br_blockcount;
1996                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1997                 xfs_bmbt_set_blockcount(ep, temp);
1998                 if (bma->cur == NULL)
1999                         rval = XFS_ILOG_DEXT;
2000                 else {
2001                         rval = 0;
2002                         error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
2003                                         LEFT.br_startblock, LEFT.br_blockcount,
2004                                         &i);
2005                         if (error)
2006                                 goto done;
2007                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2008                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
2009                                         LEFT.br_startblock,
2010                                         LEFT.br_blockcount +
2011                                         new->br_blockcount,
2012                                         LEFT.br_state);
2013                         if (error)
2014                                 goto done;
2015                 }
2016                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2017                         startblockval(PREV.br_startblock));
2018                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2019                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2020
2021                 bma->idx--;
2022                 break;
2023
2024         case BMAP_LEFT_FILLING:
2025                 /*
2026                  * Filling in the first part of a previous delayed allocation.
2027                  * The left neighbor is not contiguous.
2028                  */
2029                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2030                 xfs_bmbt_set_startoff(ep, new_endoff);
2031                 temp = PREV.br_blockcount - new->br_blockcount;
2032                 xfs_bmbt_set_blockcount(ep, temp);
2033                 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
2034                 bma->ip->i_d.di_nextents++;
2035                 if (bma->cur == NULL)
2036                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2037                 else {
2038                         rval = XFS_ILOG_CORE;
2039                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2040                                         new->br_startblock, new->br_blockcount,
2041                                         &i);
2042                         if (error)
2043                                 goto done;
2044                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2045                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2046                         error = xfs_btree_insert(bma->cur, &i);
2047                         if (error)
2048                                 goto done;
2049                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2050                 }
2051
2052                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2053                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2054                                         bma->firstblock, bma->flist,
2055                                         &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
2056                         rval |= tmp_rval;
2057                         if (error)
2058                                 goto done;
2059                 }
2060                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2061                         startblockval(PREV.br_startblock) -
2062                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2063                 ep = xfs_iext_get_ext(ifp, bma->idx + 1);
2064                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2065                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2066                 break;
2067
2068         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2069                 /*
2070                  * Filling in the last part of a previous delayed allocation.
2071                  * The right neighbor is contiguous with the new allocation.
2072                  */
2073                 temp = PREV.br_blockcount - new->br_blockcount;
2074                 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2075                 xfs_bmbt_set_blockcount(ep, temp);
2076                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
2077                         new->br_startoff, new->br_startblock,
2078                         new->br_blockcount + RIGHT.br_blockcount,
2079                         RIGHT.br_state);
2080                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2081                 if (bma->cur == NULL)
2082                         rval = XFS_ILOG_DEXT;
2083                 else {
2084                         rval = 0;
2085                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
2086                                         RIGHT.br_startblock,
2087                                         RIGHT.br_blockcount, &i);
2088                         if (error)
2089                                 goto done;
2090                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2091                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
2092                                         new->br_startblock,
2093                                         new->br_blockcount +
2094                                         RIGHT.br_blockcount,
2095                                         RIGHT.br_state);
2096                         if (error)
2097                                 goto done;
2098                 }
2099
2100                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2101                         startblockval(PREV.br_startblock));
2102                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2103                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2104                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2105
2106                 bma->idx++;
2107                 break;
2108
2109         case BMAP_RIGHT_FILLING:
2110                 /*
2111                  * Filling in the last part of a previous delayed allocation.
2112                  * The right neighbor is not contiguous.
2113                  */
2114                 temp = PREV.br_blockcount - new->br_blockcount;
2115                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2116                 xfs_bmbt_set_blockcount(ep, temp);
2117                 xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
2118                 bma->ip->i_d.di_nextents++;
2119                 if (bma->cur == NULL)
2120                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2121                 else {
2122                         rval = XFS_ILOG_CORE;
2123                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2124                                         new->br_startblock, new->br_blockcount,
2125                                         &i);
2126                         if (error)
2127                                 goto done;
2128                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2129                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2130                         error = xfs_btree_insert(bma->cur, &i);
2131                         if (error)
2132                                 goto done;
2133                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2134                 }
2135
2136                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2137                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2138                                 bma->firstblock, bma->flist, &bma->cur, 1,
2139                                 &tmp_rval, XFS_DATA_FORK);
2140                         rval |= tmp_rval;
2141                         if (error)
2142                                 goto done;
2143                 }
2144                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2145                         startblockval(PREV.br_startblock) -
2146                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2147                 ep = xfs_iext_get_ext(ifp, bma->idx);
2148                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2149                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2150
2151                 bma->idx++;
2152                 break;
2153
2154         case 0:
2155                 /*
2156                  * Filling in the middle part of a previous delayed allocation.
2157                  * Contiguity is impossible here.
2158                  * This case is avoided almost all the time.
2159                  *
2160                  * We start with a delayed allocation:
2161                  *
2162                  * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
2163                  *  PREV @ idx
2164                  *
2165                  * and we are allocating:
2166                  *                     +rrrrrrrrrrrrrrrrr+
2167                  *                            new
2168                  *
2169                  * and we set it up for insertion as:
2170                  * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
2171                  *                            new
2172                  *  PREV @ idx          LEFT              RIGHT
2173                  *                      inserted at idx + 1
2174                  */
2175                 temp = new->br_startoff - PREV.br_startoff;
2176                 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
2177                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
2178                 xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
2179                 LEFT = *new;
2180                 RIGHT.br_state = PREV.br_state;
2181                 RIGHT.br_startblock = nullstartblock(
2182                                 (int)xfs_bmap_worst_indlen(bma->ip, temp2));
2183                 RIGHT.br_startoff = new_endoff;
2184                 RIGHT.br_blockcount = temp2;
2185                 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
2186                 xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
2187                 bma->ip->i_d.di_nextents++;
2188                 if (bma->cur == NULL)
2189                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2190                 else {
2191                         rval = XFS_ILOG_CORE;
2192                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2193                                         new->br_startblock, new->br_blockcount,
2194                                         &i);
2195                         if (error)
2196                                 goto done;
2197                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2198                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2199                         error = xfs_btree_insert(bma->cur, &i);
2200                         if (error)
2201                                 goto done;
2202                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2203                 }
2204
2205                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2206                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2207                                         bma->firstblock, bma->flist, &bma->cur,
2208                                         1, &tmp_rval, XFS_DATA_FORK);
2209                         rval |= tmp_rval;
2210                         if (error)
2211                                 goto done;
2212                 }
2213                 temp = xfs_bmap_worst_indlen(bma->ip, temp);
2214                 temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2215                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2216                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2217                 if (diff > 0) {
2218                         error = xfs_icsb_modify_counters(bma->ip->i_mount,
2219                                         XFS_SBS_FDBLOCKS,
2220                                         -((int64_t)diff), 0);
2221                         ASSERT(!error);
2222                         if (error)
2223                                 goto done;
2224                 }
2225
2226                 ep = xfs_iext_get_ext(ifp, bma->idx);
2227                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2228                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2229                 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2230                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
2231                         nullstartblock((int)temp2));
2232                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2233
2234                 bma->idx++;
2235                 da_new = temp + temp2;
2236                 break;
2237
2238         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2239         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2240         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2241         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2242         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2243         case BMAP_LEFT_CONTIG:
2244         case BMAP_RIGHT_CONTIG:
2245                 /*
2246                  * These cases are all impossible.
2247                  */
2248                 ASSERT(0);
2249         }
2250
2251         /* convert to a btree if necessary */
2252         if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2253                 int     tmp_logflags;   /* partial log flag return val */
2254
2255                 ASSERT(bma->cur == NULL);
2256                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2257                                 bma->firstblock, bma->flist, &bma->cur,
2258                                 da_old > 0, &tmp_logflags, XFS_DATA_FORK);
2259                 bma->logflags |= tmp_logflags;
2260                 if (error)
2261                         goto done;
2262         }
2263
2264         /* adjust for changes in reserved delayed indirect blocks */
2265         if (da_old || da_new) {
2266                 temp = da_new;
2267                 if (bma->cur)
2268                         temp += bma->cur->bc_private.b.allocated;
2269                 ASSERT(temp <= da_old);
2270                 if (temp < da_old)
2271                         xfs_icsb_modify_counters(bma->ip->i_mount,
2272                                         XFS_SBS_FDBLOCKS,
2273                                         (int64_t)(da_old - temp), 0);
2274         }
2275
2276         /* clear out the allocated field, done with it now in any case. */
2277         if (bma->cur)
2278                 bma->cur->bc_private.b.allocated = 0;
2279
2280         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
2281 done:
2282         bma->logflags |= rval;
2283         return error;
2284 #undef  LEFT
2285 #undef  RIGHT
2286 #undef  PREV
2287 }
2288
2289 /*
2290  * Convert an unwritten allocation to a real allocation or vice versa.
2291  */
2292 STATIC int                              /* error */
2293 xfs_bmap_add_extent_unwritten_real(
2294         struct xfs_trans        *tp,
2295         xfs_inode_t             *ip,    /* incore inode pointer */
2296         xfs_extnum_t            *idx,   /* extent number to update/insert */
2297         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
2298         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2299         xfs_fsblock_t           *first, /* pointer to firstblock variable */
2300         xfs_bmap_free_t         *flist, /* list of extents to be freed */
2301         int                     *logflagsp) /* inode logging flags */
2302 {
2303         xfs_btree_cur_t         *cur;   /* btree cursor */
2304         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
2305         int                     error;  /* error return value */
2306         int                     i;      /* temp state */
2307         xfs_ifork_t             *ifp;   /* inode fork pointer */
2308         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2309         xfs_exntst_t            newext; /* new extent state */
2310         xfs_exntst_t            oldext; /* old extent state */
2311         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2312                                         /* left is 0, right is 1, prev is 2 */
2313         int                     rval=0; /* return value (logging flags) */
2314         int                     state = 0;/* state bits, accessed thru macros */
2315         struct xfs_mount        *mp = tp->t_mountp;
2316
2317         *logflagsp = 0;
2318
2319         cur = *curp;
2320         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2321
2322         ASSERT(*idx >= 0);
2323         ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2324         ASSERT(!isnullstartblock(new->br_startblock));
2325
2326         XFS_STATS_INC(xs_add_exlist);
2327
2328 #define LEFT            r[0]
2329 #define RIGHT           r[1]
2330 #define PREV            r[2]
2331
2332         /*
2333          * Set up a bunch of variables to make the tests simpler.
2334          */
2335         error = 0;
2336         ep = xfs_iext_get_ext(ifp, *idx);
2337         xfs_bmbt_get_all(ep, &PREV);
2338         newext = new->br_state;
2339         oldext = (newext == XFS_EXT_UNWRITTEN) ?
2340                 XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
2341         ASSERT(PREV.br_state == oldext);
2342         new_endoff = new->br_startoff + new->br_blockcount;
2343         ASSERT(PREV.br_startoff <= new->br_startoff);
2344         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2345
2346         /*
2347          * Set flags determining what part of the previous oldext allocation
2348          * extent is being replaced by a newext allocation.
2349          */
2350         if (PREV.br_startoff == new->br_startoff)
2351                 state |= BMAP_LEFT_FILLING;
2352         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2353                 state |= BMAP_RIGHT_FILLING;
2354
2355         /*
2356          * Check and set flags if this segment has a left neighbor.
2357          * Don't set contiguous if the combined extent would be too large.
2358          */
2359         if (*idx > 0) {
2360                 state |= BMAP_LEFT_VALID;
2361                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
2362
2363                 if (isnullstartblock(LEFT.br_startblock))
2364                         state |= BMAP_LEFT_DELAY;
2365         }
2366
2367         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2368             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2369             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2370             LEFT.br_state == newext &&
2371             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2372                 state |= BMAP_LEFT_CONTIG;
2373
2374         /*
2375          * Check and set flags if this segment has a right neighbor.
2376          * Don't set contiguous if the combined extent would be too large.
2377          * Also check for all-three-contiguous being too large.
2378          */
2379         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
2380                 state |= BMAP_RIGHT_VALID;
2381                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2382                 if (isnullstartblock(RIGHT.br_startblock))
2383                         state |= BMAP_RIGHT_DELAY;
2384         }
2385
2386         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2387             new_endoff == RIGHT.br_startoff &&
2388             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2389             newext == RIGHT.br_state &&
2390             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2391             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2392                        BMAP_RIGHT_FILLING)) !=
2393                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2394                        BMAP_RIGHT_FILLING) ||
2395              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2396                         <= MAXEXTLEN))
2397                 state |= BMAP_RIGHT_CONTIG;
2398
2399         /*
2400          * Switch out based on the FILLING and CONTIG state bits.
2401          */
2402         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2403                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2404         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2405              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2406                 /*
2407                  * Setting all of a previous oldext extent to newext.
2408                  * The left and right neighbors are both contiguous with new.
2409                  */
2410                 --*idx;
2411
2412                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2413                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2414                         LEFT.br_blockcount + PREV.br_blockcount +
2415                         RIGHT.br_blockcount);
2416                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2417
2418                 xfs_iext_remove(ip, *idx + 1, 2, state);
2419                 ip->i_d.di_nextents -= 2;
2420                 if (cur == NULL)
2421                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2422                 else {
2423                         rval = XFS_ILOG_CORE;
2424                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2425                                         RIGHT.br_startblock,
2426                                         RIGHT.br_blockcount, &i)))
2427                                 goto done;
2428                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2429                         if ((error = xfs_btree_delete(cur, &i)))
2430                                 goto done;
2431                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2432                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2433                                 goto done;
2434                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2435                         if ((error = xfs_btree_delete(cur, &i)))
2436                                 goto done;
2437                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2438                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2439                                 goto done;
2440                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2441                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2442                                 LEFT.br_startblock,
2443                                 LEFT.br_blockcount + PREV.br_blockcount +
2444                                 RIGHT.br_blockcount, LEFT.br_state)))
2445                                 goto done;
2446                 }
2447                 break;
2448
2449         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2450                 /*
2451                  * Setting all of a previous oldext extent to newext.
2452                  * The left neighbor is contiguous, the right is not.
2453                  */
2454                 --*idx;
2455
2456                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2457                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2458                         LEFT.br_blockcount + PREV.br_blockcount);
2459                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2460
2461                 xfs_iext_remove(ip, *idx + 1, 1, state);
2462                 ip->i_d.di_nextents--;
2463                 if (cur == NULL)
2464                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2465                 else {
2466                         rval = XFS_ILOG_CORE;
2467                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2468                                         PREV.br_startblock, PREV.br_blockcount,
2469                                         &i)))
2470                                 goto done;
2471                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2472                         if ((error = xfs_btree_delete(cur, &i)))
2473                                 goto done;
2474                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2475                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2476                                 goto done;
2477                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2478                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2479                                 LEFT.br_startblock,
2480                                 LEFT.br_blockcount + PREV.br_blockcount,
2481                                 LEFT.br_state)))
2482                                 goto done;
2483                 }
2484                 break;
2485
2486         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2487                 /*
2488                  * Setting all of a previous oldext extent to newext.
2489                  * The right neighbor is contiguous, the left is not.
2490                  */
2491                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2492                 xfs_bmbt_set_blockcount(ep,
2493                         PREV.br_blockcount + RIGHT.br_blockcount);
2494                 xfs_bmbt_set_state(ep, newext);
2495                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2496                 xfs_iext_remove(ip, *idx + 1, 1, state);
2497                 ip->i_d.di_nextents--;
2498                 if (cur == NULL)
2499                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2500                 else {
2501                         rval = XFS_ILOG_CORE;
2502                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2503                                         RIGHT.br_startblock,
2504                                         RIGHT.br_blockcount, &i)))
2505                                 goto done;
2506                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2507                         if ((error = xfs_btree_delete(cur, &i)))
2508                                 goto done;
2509                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2510                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2511                                 goto done;
2512                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2513                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2514                                 new->br_startblock,
2515                                 new->br_blockcount + RIGHT.br_blockcount,
2516                                 newext)))
2517                                 goto done;
2518                 }
2519                 break;
2520
2521         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2522                 /*
2523                  * Setting all of a previous oldext extent to newext.
2524                  * Neither the left nor right neighbors are contiguous with
2525                  * the new one.
2526                  */
2527                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2528                 xfs_bmbt_set_state(ep, newext);
2529                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2530
2531                 if (cur == NULL)
2532                         rval = XFS_ILOG_DEXT;
2533                 else {
2534                         rval = 0;
2535                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2536                                         new->br_startblock, new->br_blockcount,
2537                                         &i)))
2538                                 goto done;
2539                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2540                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2541                                 new->br_startblock, new->br_blockcount,
2542                                 newext)))
2543                                 goto done;
2544                 }
2545                 break;
2546
2547         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2548                 /*
2549                  * Setting the first part of a previous oldext extent to newext.
2550                  * The left neighbor is contiguous.
2551                  */
2552                 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
2553                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
2554                         LEFT.br_blockcount + new->br_blockcount);
2555                 xfs_bmbt_set_startoff(ep,
2556                         PREV.br_startoff + new->br_blockcount);
2557                 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
2558
2559                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2560                 xfs_bmbt_set_startblock(ep,
2561                         new->br_startblock + new->br_blockcount);
2562                 xfs_bmbt_set_blockcount(ep,
2563                         PREV.br_blockcount - new->br_blockcount);
2564                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2565
2566                 --*idx;
2567
2568                 if (cur == NULL)
2569                         rval = XFS_ILOG_DEXT;
2570                 else {
2571                         rval = 0;
2572                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2573                                         PREV.br_startblock, PREV.br_blockcount,
2574                                         &i)))
2575                                 goto done;
2576                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2577                         if ((error = xfs_bmbt_update(cur,
2578                                 PREV.br_startoff + new->br_blockcount,
2579                                 PREV.br_startblock + new->br_blockcount,
2580                                 PREV.br_blockcount - new->br_blockcount,
2581                                 oldext)))
2582                                 goto done;
2583                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2584                                 goto done;
2585                         error = xfs_bmbt_update(cur, LEFT.br_startoff,
2586                                 LEFT.br_startblock,
2587                                 LEFT.br_blockcount + new->br_blockcount,
2588                                 LEFT.br_state);
2589                         if (error)
2590                                 goto done;
2591                 }
2592                 break;
2593
2594         case BMAP_LEFT_FILLING:
2595                 /*
2596                  * Setting the first part of a previous oldext extent to newext.
2597                  * The left neighbor is not contiguous.
2598                  */
2599                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2600                 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
2601                 xfs_bmbt_set_startoff(ep, new_endoff);
2602                 xfs_bmbt_set_blockcount(ep,
2603                         PREV.br_blockcount - new->br_blockcount);
2604                 xfs_bmbt_set_startblock(ep,
2605                         new->br_startblock + new->br_blockcount);
2606                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2607
2608                 xfs_iext_insert(ip, *idx, 1, new, state);
2609                 ip->i_d.di_nextents++;
2610                 if (cur == NULL)
2611                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2612                 else {
2613                         rval = XFS_ILOG_CORE;
2614                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2615                                         PREV.br_startblock, PREV.br_blockcount,
2616                                         &i)))
2617                                 goto done;
2618                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2619                         if ((error = xfs_bmbt_update(cur,
2620                                 PREV.br_startoff + new->br_blockcount,
2621                                 PREV.br_startblock + new->br_blockcount,
2622                                 PREV.br_blockcount - new->br_blockcount,
2623                                 oldext)))
2624                                 goto done;
2625                         cur->bc_rec.b = *new;
2626                         if ((error = xfs_btree_insert(cur, &i)))
2627                                 goto done;
2628                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2629                 }
2630                 break;
2631
2632         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2633                 /*
2634                  * Setting the last part of a previous oldext extent to newext.
2635                  * The right neighbor is contiguous with the new allocation.
2636                  */
2637                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2638                 xfs_bmbt_set_blockcount(ep,
2639                         PREV.br_blockcount - new->br_blockcount);
2640                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2641
2642                 ++*idx;
2643
2644                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2645                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2646                         new->br_startoff, new->br_startblock,
2647                         new->br_blockcount + RIGHT.br_blockcount, newext);
2648                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2649
2650                 if (cur == NULL)
2651                         rval = XFS_ILOG_DEXT;
2652                 else {
2653                         rval = 0;
2654                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2655                                         PREV.br_startblock,
2656                                         PREV.br_blockcount, &i)))
2657                                 goto done;
2658                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2659                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2660                                 PREV.br_startblock,
2661                                 PREV.br_blockcount - new->br_blockcount,
2662                                 oldext)))
2663                                 goto done;
2664                         if ((error = xfs_btree_increment(cur, 0, &i)))
2665                                 goto done;
2666                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2667                                 new->br_startblock,
2668                                 new->br_blockcount + RIGHT.br_blockcount,
2669                                 newext)))
2670                                 goto done;
2671                 }
2672                 break;
2673
2674         case BMAP_RIGHT_FILLING:
2675                 /*
2676                  * Setting the last part of a previous oldext extent to newext.
2677                  * The right neighbor is not contiguous.
2678                  */
2679                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2680                 xfs_bmbt_set_blockcount(ep,
2681                         PREV.br_blockcount - new->br_blockcount);
2682                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2683
2684                 ++*idx;
2685                 xfs_iext_insert(ip, *idx, 1, new, state);
2686
2687                 ip->i_d.di_nextents++;
2688                 if (cur == NULL)
2689                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2690                 else {
2691                         rval = XFS_ILOG_CORE;
2692                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2693                                         PREV.br_startblock, PREV.br_blockcount,
2694                                         &i)))
2695                                 goto done;
2696                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2697                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2698                                 PREV.br_startblock,
2699                                 PREV.br_blockcount - new->br_blockcount,
2700                                 oldext)))
2701                                 goto done;
2702                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2703                                         new->br_startblock, new->br_blockcount,
2704                                         &i)))
2705                                 goto done;
2706                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2707                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
2708                         if ((error = xfs_btree_insert(cur, &i)))
2709                                 goto done;
2710                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2711                 }
2712                 break;
2713
2714         case 0:
2715                 /*
2716                  * Setting the middle part of a previous oldext extent to
2717                  * newext.  Contiguity is impossible here.
2718                  * One extent becomes three extents.
2719                  */
2720                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2721                 xfs_bmbt_set_blockcount(ep,
2722                         new->br_startoff - PREV.br_startoff);
2723                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2724
2725                 r[0] = *new;
2726                 r[1].br_startoff = new_endoff;
2727                 r[1].br_blockcount =
2728                         PREV.br_startoff + PREV.br_blockcount - new_endoff;
2729                 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2730                 r[1].br_state = oldext;
2731
2732                 ++*idx;
2733                 xfs_iext_insert(ip, *idx, 2, &r[0], state);
2734
2735                 ip->i_d.di_nextents += 2;
2736                 if (cur == NULL)
2737                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2738                 else {
2739                         rval = XFS_ILOG_CORE;
2740                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2741                                         PREV.br_startblock, PREV.br_blockcount,
2742                                         &i)))
2743                                 goto done;
2744                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2745                         /* new right extent - oldext */
2746                         if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2747                                 r[1].br_startblock, r[1].br_blockcount,
2748                                 r[1].br_state)))
2749                                 goto done;
2750                         /* new left extent - oldext */
2751                         cur->bc_rec.b = PREV;
2752                         cur->bc_rec.b.br_blockcount =
2753                                 new->br_startoff - PREV.br_startoff;
2754                         if ((error = xfs_btree_insert(cur, &i)))
2755                                 goto done;
2756                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2757                         /*
2758                          * Reset the cursor to the position of the new extent
2759                          * we are about to insert as we can't trust it after
2760                          * the previous insert.
2761                          */
2762                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2763                                         new->br_startblock, new->br_blockcount,
2764                                         &i)))
2765                                 goto done;
2766                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2767                         /* new middle extent - newext */
2768                         cur->bc_rec.b.br_state = new->br_state;
2769                         if ((error = xfs_btree_insert(cur, &i)))
2770                                 goto done;
2771                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2772                 }
2773                 break;
2774
2775         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2776         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2777         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2778         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2779         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2780         case BMAP_LEFT_CONTIG:
2781         case BMAP_RIGHT_CONTIG:
2782                 /*
2783                  * These cases are all impossible.
2784                  */
2785                 ASSERT(0);
2786         }
2787
2788         /* convert to a btree if necessary */
2789         if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2790                 int     tmp_logflags;   /* partial log flag return val */
2791
2792                 ASSERT(cur == NULL);
2793                 error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
2794                                 0, &tmp_logflags, XFS_DATA_FORK);
2795                 *logflagsp |= tmp_logflags;
2796                 if (error)
2797                         goto done;
2798         }
2799
2800         /* clear out the allocated field, done with it now in any case. */
2801         if (cur) {
2802                 cur->bc_private.b.allocated = 0;
2803                 *curp = cur;
2804         }
2805
2806         xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
2807 done:
2808         *logflagsp |= rval;
2809         return error;
2810 #undef  LEFT
2811 #undef  RIGHT
2812 #undef  PREV
2813 }
2814
2815 /*
2816  * Convert a hole to a delayed allocation.
2817  */
2818 STATIC void
2819 xfs_bmap_add_extent_hole_delay(
2820         xfs_inode_t             *ip,    /* incore inode pointer */
2821         xfs_extnum_t            *idx,   /* extent number to update/insert */
2822         xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2823 {
2824         xfs_ifork_t             *ifp;   /* inode fork pointer */
2825         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2826         xfs_filblks_t           newlen=0;       /* new indirect size */
2827         xfs_filblks_t           oldlen=0;       /* old indirect size */
2828         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2829         int                     state;  /* state bits, accessed thru macros */
2830         xfs_filblks_t           temp=0; /* temp for indirect calculations */
2831
2832         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2833         state = 0;
2834         ASSERT(isnullstartblock(new->br_startblock));
2835
2836         /*
2837          * Check and set flags if this segment has a left neighbor
2838          */
2839         if (*idx > 0) {
2840                 state |= BMAP_LEFT_VALID;
2841                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2842
2843                 if (isnullstartblock(left.br_startblock))
2844                         state |= BMAP_LEFT_DELAY;
2845         }
2846
2847         /*
2848          * Check and set flags if the current (right) segment exists.
2849          * If it doesn't exist, we're converting the hole at end-of-file.
2850          */
2851         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2852                 state |= BMAP_RIGHT_VALID;
2853                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2854
2855                 if (isnullstartblock(right.br_startblock))
2856                         state |= BMAP_RIGHT_DELAY;
2857         }
2858
2859         /*
2860          * Set contiguity flags on the left and right neighbors.
2861          * Don't let extents get too large, even if the pieces are contiguous.
2862          */
2863         if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2864             left.br_startoff + left.br_blockcount == new->br_startoff &&
2865             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2866                 state |= BMAP_LEFT_CONTIG;
2867
2868         if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2869             new->br_startoff + new->br_blockcount == right.br_startoff &&
2870             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2871             (!(state & BMAP_LEFT_CONTIG) ||
2872              (left.br_blockcount + new->br_blockcount +
2873               right.br_blockcount <= MAXEXTLEN)))
2874                 state |= BMAP_RIGHT_CONTIG;
2875
2876         /*
2877          * Switch out based on the contiguity flags.
2878          */
2879         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2880         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2881                 /*
2882                  * New allocation is contiguous with delayed allocations
2883                  * on the left and on the right.
2884                  * Merge all three into a single extent record.
2885                  */
2886                 --*idx;
2887                 temp = left.br_blockcount + new->br_blockcount +
2888                         right.br_blockcount;
2889
2890                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2891                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2892                 oldlen = startblockval(left.br_startblock) +
2893                         startblockval(new->br_startblock) +
2894                         startblockval(right.br_startblock);
2895                 newlen = xfs_bmap_worst_indlen(ip, temp);
2896                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2897                         nullstartblock((int)newlen));
2898                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2899
2900                 xfs_iext_remove(ip, *idx + 1, 1, state);
2901                 break;
2902
2903         case BMAP_LEFT_CONTIG:
2904                 /*
2905                  * New allocation is contiguous with a delayed allocation
2906                  * on the left.
2907                  * Merge the new allocation with the left neighbor.
2908                  */
2909                 --*idx;
2910                 temp = left.br_blockcount + new->br_blockcount;
2911
2912                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2913                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2914                 oldlen = startblockval(left.br_startblock) +
2915                         startblockval(new->br_startblock);
2916                 newlen = xfs_bmap_worst_indlen(ip, temp);
2917                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2918                         nullstartblock((int)newlen));
2919                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2920                 break;
2921
2922         case BMAP_RIGHT_CONTIG:
2923                 /*
2924                  * New allocation is contiguous with a delayed allocation
2925                  * on the right.
2926                  * Merge the new allocation with the right neighbor.
2927                  */
2928                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2929                 temp = new->br_blockcount + right.br_blockcount;
2930                 oldlen = startblockval(new->br_startblock) +
2931                         startblockval(right.br_startblock);
2932                 newlen = xfs_bmap_worst_indlen(ip, temp);
2933                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2934                         new->br_startoff,
2935                         nullstartblock((int)newlen), temp, right.br_state);
2936                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2937                 break;
2938
2939         case 0:
2940                 /*
2941                  * New allocation is not contiguous with another
2942                  * delayed allocation.
2943                  * Insert a new entry.
2944                  */
2945                 oldlen = newlen = 0;
2946                 xfs_iext_insert(ip, *idx, 1, new, state);
2947                 break;
2948         }
2949         if (oldlen != newlen) {
2950                 ASSERT(oldlen > newlen);
2951                 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
2952                         (int64_t)(oldlen - newlen), 0);
2953                 /*
2954                  * Nothing to do for disk quota accounting here.
2955                  */
2956         }
2957 }
2958
2959 /*
2960  * Convert a hole to a real allocation.
2961  */
2962 STATIC int                              /* error */
2963 xfs_bmap_add_extent_hole_real(
2964         struct xfs_bmalloca     *bma,
2965         int                     whichfork)
2966 {
2967         struct xfs_bmbt_irec    *new = &bma->got;
2968         int                     error;  /* error return value */
2969         int                     i;      /* temp state */
2970         xfs_ifork_t             *ifp;   /* inode fork pointer */
2971         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2972         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2973         int                     rval=0; /* return value (logging flags) */
2974         int                     state;  /* state bits, accessed thru macros */
2975         struct xfs_mount        *mp;
2976
2977         mp = bma->tp ? bma->tp->t_mountp : NULL;
2978         ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2979
2980         ASSERT(bma->idx >= 0);
2981         ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2982         ASSERT(!isnullstartblock(new->br_startblock));
2983         ASSERT(!bma->cur ||
2984                !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2985
2986         XFS_STATS_INC(xs_add_exlist);
2987
2988         state = 0;
2989         if (whichfork == XFS_ATTR_FORK)
2990                 state |= BMAP_ATTRFORK;
2991
2992         /*
2993          * Check and set flags if this segment has a left neighbor.
2994          */
2995         if (bma->idx > 0) {
2996                 state |= BMAP_LEFT_VALID;
2997                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
2998                 if (isnullstartblock(left.br_startblock))
2999                         state |= BMAP_LEFT_DELAY;
3000         }
3001
3002         /*
3003          * Check and set flags if this segment has a current value.
3004          * Not true if we're inserting into the "hole" at eof.
3005          */
3006         if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
3007                 state |= BMAP_RIGHT_VALID;
3008                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
3009                 if (isnullstartblock(right.br_startblock))
3010                         state |= BMAP_RIGHT_DELAY;
3011         }
3012
3013         /*
3014          * We're inserting a real allocation between "left" and "right".
3015          * Set the contiguity flags.  Don't let extents get too large.
3016          */
3017         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
3018             left.br_startoff + left.br_blockcount == new->br_startoff &&
3019             left.br_startblock + left.br_blockcount == new->br_startblock &&
3020             left.br_state == new->br_state &&
3021             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
3022                 state |= BMAP_LEFT_CONTIG;
3023
3024         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
3025             new->br_startoff + new->br_blockcount == right.br_startoff &&
3026             new->br_startblock + new->br_blockcount == right.br_startblock &&
3027             new->br_state == right.br_state &&
3028             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
3029             (!(state & BMAP_LEFT_CONTIG) ||
3030              left.br_blockcount + new->br_blockcount +
3031              right.br_blockcount <= MAXEXTLEN))
3032                 state |= BMAP_RIGHT_CONTIG;
3033
3034         error = 0;
3035         /*
3036          * Select which case we're in here, and implement it.
3037          */
3038         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
3039         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
3040                 /*
3041                  * New allocation is contiguous with real allocations on the
3042                  * left and on the right.
3043                  * Merge all three into a single extent record.
3044                  */
3045                 --bma->idx;
3046                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3047                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3048                         left.br_blockcount + new->br_blockcount +
3049                         right.br_blockcount);
3050                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3051
3052                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
3053
3054                 XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3055                         XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
3056                 if (bma->cur == NULL) {
3057                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3058                 } else {
3059                         rval = XFS_ILOG_CORE;
3060                         error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
3061                                         right.br_startblock, right.br_blockcount,
3062                                         &i);
3063                         if (error)
3064                                 goto done;
3065                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3066                         error = xfs_btree_delete(bma->cur, &i);
3067                         if (error)
3068                                 goto done;
3069                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3070                         error = xfs_btree_decrement(bma->cur, 0, &i);
3071                         if (error)
3072                                 goto done;
3073                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3074                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
3075                                         left.br_startblock,
3076                                         left.br_blockcount +
3077                                                 new->br_blockcount +
3078                                                 right.br_blockcount,
3079                                         left.br_state);
3080                         if (error)
3081                                 goto done;
3082                 }
3083                 break;
3084
3085         case BMAP_LEFT_CONTIG:
3086                 /*
3087                  * New allocation is contiguous with a real allocation
3088                  * on the left.
3089                  * Merge the new allocation with the left neighbor.
3090                  */
3091                 --bma->idx;
3092                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3093                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3094                         left.br_blockcount + new->br_blockcount);
3095                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3096
3097                 if (bma->cur == NULL) {
3098                         rval = xfs_ilog_fext(whichfork);
3099                 } else {
3100                         rval = 0;
3101                         error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
3102                                         left.br_startblock, left.br_blockcount,
3103                                         &i);
3104                         if (error)
3105                                 goto done;
3106                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3107                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
3108                                         left.br_startblock,
3109                                         left.br_blockcount +
3110                                                 new->br_blockcount,
3111                                         left.br_state);
3112                         if (error)
3113                                 goto done;
3114                 }
3115                 break;
3116
3117         case BMAP_RIGHT_CONTIG:
3118                 /*
3119                  * New allocation is contiguous with a real allocation
3120                  * on the right.
3121                  * Merge the new allocation with the right neighbor.
3122                  */
3123                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3124                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
3125                         new->br_startoff, new->br_startblock,
3126                         new->br_blockcount + right.br_blockcount,
3127                         right.br_state);
3128                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3129
3130                 if (bma->cur == NULL) {
3131                         rval = xfs_ilog_fext(whichfork);
3132                 } else {
3133                         rval = 0;
3134                         error = xfs_bmbt_lookup_eq(bma->cur,
3135                                         right.br_startoff,
3136                                         right.br_startblock,
3137                                         right.br_blockcount, &i);
3138                         if (error)
3139                                 goto done;
3140                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3141                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
3142                                         new->br_startblock,
3143                                         new->br_blockcount +
3144                                                 right.br_blockcount,
3145                                         right.br_state);
3146                         if (error)
3147                                 goto done;
3148                 }
3149                 break;
3150
3151         case 0:
3152                 /*
3153                  * New allocation is not contiguous with another
3154                  * real allocation.
3155                  * Insert a new entry.
3156                  */
3157                 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
3158                 XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3159                         XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
3160                 if (bma->cur == NULL) {
3161                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3162                 } else {
3163                         rval = XFS_ILOG_CORE;
3164                         error = xfs_bmbt_lookup_eq(bma->cur,
3165                                         new->br_startoff,
3166                                         new->br_startblock,
3167                                         new->br_blockcount, &i);
3168                         if (error)
3169                                 goto done;
3170                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3171                         bma->cur->bc_rec.b.br_state = new->br_state;
3172                         error = xfs_btree_insert(bma->cur, &i);
3173                         if (error)
3174                                 goto done;
3175                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3176                 }
3177                 break;
3178         }
3179
3180         /* convert to a btree if necessary */
3181         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3182                 int     tmp_logflags;   /* partial log flag return val */
3183
3184                 ASSERT(bma->cur == NULL);
3185                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3186                                 bma->firstblock, bma->flist, &bma->cur,
3187                                 0, &tmp_logflags, whichfork);
3188                 bma->logflags |= tmp_logflags;
3189                 if (error)
3190                         goto done;
3191         }
3192
3193         /* clear out the allocated field, done with it now in any case. */
3194         if (bma->cur)
3195                 bma->cur->bc_private.b.allocated = 0;
3196
3197         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
3198 done:
3199         bma->logflags |= rval;
3200         return error;
3201 }
3202
3203 /*
3204  * Functions used in the extent read, allocate and remove paths
3205  */
3206
3207 /*
3208  * Adjust the size of the new extent based on di_extsize and rt extsize.
3209  */
3210 int
3211 xfs_bmap_extsize_align(
3212         xfs_mount_t     *mp,
3213         xfs_bmbt_irec_t *gotp,          /* next extent pointer */
3214         xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
3215         xfs_extlen_t    extsz,          /* align to this extent size */
3216         int             rt,             /* is this a realtime inode? */
3217         int             eof,            /* is extent at end-of-file? */
3218         int             delay,          /* creating delalloc extent? */
3219         int             convert,        /* overwriting unwritten extent? */
3220         xfs_fileoff_t   *offp,          /* in/out: aligned offset */
3221         xfs_extlen_t    *lenp)          /* in/out: aligned length */
3222 {
3223         xfs_fileoff_t   orig_off;       /* original offset */
3224         xfs_extlen_t    orig_alen;      /* original length */
3225         xfs_fileoff_t   orig_end;       /* original off+len */
3226         xfs_fileoff_t   nexto;          /* next file offset */
3227         xfs_fileoff_t   prevo;          /* previous file offset */
3228         xfs_fileoff_t   align_off;      /* temp for offset */
3229         xfs_extlen_t    align_alen;     /* temp for length */
3230         xfs_extlen_t    temp;           /* temp for calculations */
3231
3232         if (convert)
3233                 return 0;
3234
3235         orig_off = align_off = *offp;
3236         orig_alen = align_alen = *lenp;
3237         orig_end = orig_off + orig_alen;
3238
3239         /*
3240          * If this request overlaps an existing extent, then don't
3241          * attempt to perform any additional alignment.
3242          */
3243         if (!delay && !eof &&
3244             (orig_off >= gotp->br_startoff) &&
3245             (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
3246                 return 0;
3247         }
3248
3249         /*
3250          * If the file offset is unaligned vs. the extent size
3251          * we need to align it.  This will be possible unless
3252          * the file was previously written with a kernel that didn't
3253          * perform this alignment, or if a truncate shot us in the
3254          * foot.
3255          */
3256         temp = do_mod(orig_off, extsz);
3257         if (temp) {
3258                 align_alen += temp;
3259                 align_off -= temp;
3260         }
3261         /*
3262          * Same adjustment for the end of the requested area.
3263          */
3264         if ((temp = (align_alen % extsz))) {
3265                 align_alen += extsz - temp;
3266         }
3267         /*
3268          * If the previous block overlaps with this proposed allocation
3269          * then move the start forward without adjusting the length.
3270          */
3271         if (prevp->br_startoff != NULLFILEOFF) {
3272                 if (prevp->br_startblock == HOLESTARTBLOCK)
3273                         prevo = prevp->br_startoff;
3274                 else
3275                         prevo = prevp->br_startoff + prevp->br_blockcount;
3276         } else
3277                 prevo = 0;
3278         if (align_off != orig_off && align_off < prevo)
3279                 align_off = prevo;
3280         /*
3281          * If the next block overlaps with this proposed allocation
3282          * then move the start back without adjusting the length,
3283          * but not before offset 0.
3284          * This may of course make the start overlap previous block,
3285          * and if we hit the offset 0 limit then the next block
3286          * can still overlap too.
3287          */
3288         if (!eof && gotp->br_startoff != NULLFILEOFF) {
3289                 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3290                     (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3291                         nexto = gotp->br_startoff + gotp->br_blockcount;
3292                 else
3293                         nexto = gotp->br_startoff;
3294         } else
3295                 nexto = NULLFILEOFF;
3296         if (!eof &&
3297             align_off + align_alen != orig_end &&
3298             align_off + align_alen > nexto)
3299                 align_off = nexto > align_alen ? nexto - align_alen : 0;
3300         /*
3301          * If we're now overlapping the next or previous extent that
3302          * means we can't fit an extsz piece in this hole.  Just move
3303          * the start forward to the first valid spot and set
3304          * the length so we hit the end.
3305          */
3306         if (align_off != orig_off && align_off < prevo)
3307                 align_off = prevo;
3308         if (align_off + align_alen != orig_end &&
3309             align_off + align_alen > nexto &&
3310             nexto != NULLFILEOFF) {
3311                 ASSERT(nexto > prevo);
3312                 align_alen = nexto - align_off;
3313         }
3314
3315         /*
3316          * If realtime, and the result isn't a multiple of the realtime
3317          * extent size we need to remove blocks until it is.
3318          */
3319         if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3320                 /*
3321                  * We're not covering the original request, or
3322                  * we won't be able to once we fix the length.
3323                  */
3324                 if (orig_off < align_off ||
3325                     orig_end > align_off + align_alen ||
3326                     align_alen - temp < orig_alen)
3327                         return -EINVAL;
3328                 /*
3329                  * Try to fix it by moving the start up.
3330                  */
3331                 if (align_off + temp <= orig_off) {
3332                         align_alen -= temp;
3333                         align_off += temp;
3334                 }
3335                 /*
3336                  * Try to fix it by moving the end in.
3337                  */
3338                 else if (align_off + align_alen - temp >= orig_end)
3339                         align_alen -= temp;
3340                 /*
3341                  * Set the start to the minimum then trim the length.
3342                  */
3343                 else {
3344                         align_alen -= orig_off - align_off;
3345                         align_off = orig_off;
3346                         align_alen -= align_alen % mp->m_sb.sb_rextsize;
3347                 }
3348                 /*
3349                  * Result doesn't cover the request, fail it.
3350                  */
3351                 if (orig_off < align_off || orig_end > align_off + align_alen)
3352                         return -EINVAL;
3353         } else {
3354                 ASSERT(orig_off >= align_off);
3355                 ASSERT(orig_end <= align_off + align_alen);
3356         }
3357
3358 #ifdef DEBUG
3359         if (!eof && gotp->br_startoff != NULLFILEOFF)
3360                 ASSERT(align_off + align_alen <= gotp->br_startoff);
3361         if (prevp->br_startoff != NULLFILEOFF)
3362                 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3363 #endif
3364
3365         *lenp = align_alen;
3366         *offp = align_off;
3367         return 0;
3368 }
3369
3370 #define XFS_ALLOC_GAP_UNITS     4
3371
3372 void
3373 xfs_bmap_adjacent(
3374         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3375 {
3376         xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3377         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3378         xfs_mount_t     *mp;            /* mount point structure */
3379         int             nullfb;         /* true if ap->firstblock isn't set */
3380         int             rt;             /* true if inode is realtime */
3381
3382 #define ISVALID(x,y)    \
3383         (rt ? \
3384                 (x) < mp->m_sb.sb_rblocks : \
3385                 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3386                 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3387                 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3388
3389         mp = ap->ip->i_mount;
3390         nullfb = *ap->firstblock == NULLFSBLOCK;
3391         rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
3392         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3393         /*
3394          * If allocating at eof, and there's a previous real block,
3395          * try to use its last block as our starting point.
3396          */
3397         if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3398             !isnullstartblock(ap->prev.br_startblock) &&
3399             ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3400                     ap->prev.br_startblock)) {
3401                 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3402                 /*
3403                  * Adjust for the gap between prevp and us.
3404                  */
3405                 adjust = ap->offset -
3406                         (ap->prev.br_startoff + ap->prev.br_blockcount);
3407                 if (adjust &&
3408                     ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3409                         ap->blkno += adjust;
3410         }
3411         /*
3412          * If not at eof, then compare the two neighbor blocks.
3413          * Figure out whether either one gives us a good starting point,
3414          * and pick the better one.
3415          */
3416         else if (!ap->eof) {
3417                 xfs_fsblock_t   gotbno;         /* right side block number */
3418                 xfs_fsblock_t   gotdiff=0;      /* right side difference */
3419                 xfs_fsblock_t   prevbno;        /* left side block number */
3420                 xfs_fsblock_t   prevdiff=0;     /* left side difference */
3421
3422                 /*
3423                  * If there's a previous (left) block, select a requested
3424                  * start block based on it.
3425                  */
3426                 if (ap->prev.br_startoff != NULLFILEOFF &&
3427                     !isnullstartblock(ap->prev.br_startblock) &&
3428                     (prevbno = ap->prev.br_startblock +
3429                                ap->prev.br_blockcount) &&
3430                     ISVALID(prevbno, ap->prev.br_startblock)) {
3431                         /*
3432                          * Calculate gap to end of previous block.
3433                          */
3434                         adjust = prevdiff = ap->offset -
3435                                 (ap->prev.br_startoff +
3436                                  ap->prev.br_blockcount);
3437                         /*
3438                          * Figure the startblock based on the previous block's
3439                          * end and the gap size.
3440                          * Heuristic!
3441                          * If the gap is large relative to the piece we're
3442                          * allocating, or using it gives us an invalid block
3443                          * number, then just use the end of the previous block.
3444                          */
3445                         if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3446                             ISVALID(prevbno + prevdiff,
3447                                     ap->prev.br_startblock))
3448                                 prevbno += adjust;
3449                         else
3450                                 prevdiff += adjust;
3451                         /*
3452                          * If the firstblock forbids it, can't use it,
3453                          * must use default.
3454                          */
3455                         if (!rt && !nullfb &&
3456                             XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3457                                 prevbno = NULLFSBLOCK;
3458                 }
3459                 /*
3460                  * No previous block or can't follow it, just default.
3461                  */
3462                 else
3463                         prevbno = NULLFSBLOCK;
3464                 /*
3465                  * If there's a following (right) block, select a requested
3466                  * start block based on it.
3467                  */
3468                 if (!isnullstartblock(ap->got.br_startblock)) {
3469                         /*
3470                          * Calculate gap to start of next block.
3471                          */
3472                         adjust = gotdiff = ap->got.br_startoff - ap->offset;
3473                         /*
3474                          * Figure the startblock based on the next block's
3475                          * start and the gap size.
3476                          */
3477                         gotbno = ap->got.br_startblock;
3478                         /*
3479                          * Heuristic!
3480                          * If the gap is large relative to the piece we're
3481                          * allocating, or using it gives us an invalid block
3482                          * number, then just use the start of the next block
3483                          * offset by our length.
3484                          */
3485                         if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3486                             ISVALID(gotbno - gotdiff, gotbno))
3487                                 gotbno -= adjust;
3488                         else if (ISVALID(gotbno - ap->length, gotbno)) {
3489                                 gotbno -= ap->length;
3490                                 gotdiff += adjust - ap->length;
3491                         } else
3492                                 gotdiff += adjust;
3493                         /*
3494                          * If the firstblock forbids it, can't use it,
3495                          * must use default.
3496                          */
3497                         if (!rt && !nullfb &&
3498                             XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3499                                 gotbno = NULLFSBLOCK;
3500                 }
3501                 /*
3502                  * No next block, just default.
3503                  */
3504                 else
3505                         gotbno = NULLFSBLOCK;
3506                 /*
3507                  * If both valid, pick the better one, else the only good
3508                  * one, else ap->blkno is already set (to 0 or the inode block).
3509                  */
3510                 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3511                         ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3512                 else if (prevbno != NULLFSBLOCK)
3513                         ap->blkno = prevbno;
3514                 else if (gotbno != NULLFSBLOCK)
3515                         ap->blkno = gotbno;
3516         }
3517 #undef ISVALID
3518 }
3519
3520 static int
3521 xfs_bmap_longest_free_extent(
3522         struct xfs_trans        *tp,
3523         xfs_agnumber_t          ag,
3524         xfs_extlen_t            *blen,
3525         int                     *notinit)
3526 {
3527         struct xfs_mount        *mp = tp->t_mountp;
3528         struct xfs_perag        *pag;
3529         xfs_extlen_t            longest;
3530         int                     error = 0;
3531
3532         pag = xfs_perag_get(mp, ag);
3533         if (!pag->pagf_init) {
3534                 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3535                 if (error)
3536                         goto out;
3537
3538                 if (!pag->pagf_init) {
3539                         *notinit = 1;
3540                         goto out;
3541                 }
3542         }
3543
3544         longest = xfs_alloc_longest_free_extent(mp, pag);
3545         if (*blen < longest)
3546                 *blen = longest;
3547
3548 out:
3549         xfs_perag_put(pag);
3550         return error;
3551 }
3552
3553 static void
3554 xfs_bmap_select_minlen(
3555         struct xfs_bmalloca     *ap,
3556         struct xfs_alloc_arg    *args,
3557         xfs_extlen_t            *blen,
3558         int                     notinit)
3559 {
3560         if (notinit || *blen < ap->minlen) {
3561                 /*
3562                  * Since we did a BUF_TRYLOCK above, it is possible that
3563                  * there is space for this request.
3564                  */
3565                 args->minlen = ap->minlen;
3566         } else if (*blen < args->maxlen) {
3567                 /*
3568                  * If the best seen length is less than the request length,
3569                  * use the best as the minimum.
3570                  */
3571                 args->minlen = *blen;
3572         } else {
3573                 /*
3574                  * Otherwise we've seen an extent as big as maxlen, use that
3575                  * as the minimum.
3576                  */
3577                 args->minlen = args->maxlen;
3578         }
3579 }
3580
3581 STATIC int
3582 xfs_bmap_btalloc_nullfb(
3583         struct xfs_bmalloca     *ap,
3584         struct xfs_alloc_arg    *args,
3585         xfs_extlen_t            *blen)
3586 {
3587         struct xfs_mount        *mp = ap->ip->i_mount;
3588         xfs_agnumber_t          ag, startag;
3589         int                     notinit = 0;
3590         int                     error;
3591
3592         args->type = XFS_ALLOCTYPE_START_BNO;
3593         args->total = ap->total;
3594
3595         startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3596         if (startag == NULLAGNUMBER)
3597                 startag = ag = 0;
3598
3599         while (*blen < args->maxlen) {
3600                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3601                                                      &notinit);
3602                 if (error)
3603                         return error;
3604
3605                 if (++ag == mp->m_sb.sb_agcount)
3606                         ag = 0;
3607                 if (ag == startag)
3608                         break;
3609         }
3610
3611         xfs_bmap_select_minlen(ap, args, blen, notinit);
3612         return 0;
3613 }
3614
3615 STATIC int
3616 xfs_bmap_btalloc_filestreams(
3617         struct xfs_bmalloca     *ap,
3618         struct xfs_alloc_arg    *args,
3619         xfs_extlen_t            *blen)
3620 {
3621         struct xfs_mount        *mp = ap->ip->i_mount;
3622         xfs_agnumber_t          ag;
3623         int                     notinit = 0;
3624         int                     error;
3625
3626         args->type = XFS_ALLOCTYPE_NEAR_BNO;
3627         args->total = ap->total;
3628
3629         ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3630         if (ag == NULLAGNUMBER)
3631                 ag = 0;
3632
3633         error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3634         if (error)
3635                 return error;
3636
3637         if (*blen < args->maxlen) {
3638                 error = xfs_filestream_new_ag(ap, &ag);
3639                 if (error)
3640                         return error;
3641
3642                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3643                                                      &notinit);
3644                 if (error)
3645                         return error;
3646
3647         }
3648
3649         xfs_bmap_select_minlen(ap, args, blen, notinit);
3650
3651         /*
3652          * Set the failure fallback case to look in the selected AG as stream
3653          * may have moved.
3654          */
3655         ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3656         return 0;
3657 }
3658
3659 STATIC int
3660 xfs_bmap_btalloc(
3661         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3662 {
3663         xfs_mount_t     *mp;            /* mount point structure */
3664         xfs_alloctype_t atype = 0;      /* type for allocation routines */
3665         xfs_extlen_t    align;          /* minimum allocation alignment */
3666         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3667         xfs_agnumber_t  ag;
3668         xfs_alloc_arg_t args;
3669         xfs_extlen_t    blen;
3670         xfs_extlen_t    nextminlen = 0;
3671         int             nullfb;         /* true if ap->firstblock isn't set */
3672         int             isaligned;
3673         int             tryagain;
3674         int             error;
3675         int             stripe_align;
3676
3677         ASSERT(ap->length);
3678
3679         mp = ap->ip->i_mount;
3680
3681         /* stripe alignment for allocation is determined by mount parameters */
3682         stripe_align = 0;
3683         if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3684                 stripe_align = mp->m_swidth;
3685         else if (mp->m_dalign)
3686                 stripe_align = mp->m_dalign;
3687
3688         align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
3689         if (unlikely(align)) {
3690                 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3691                                                 align, 0, ap->eof, 0, ap->conv,
3692                                                 &ap->offset, &ap->length);
3693                 ASSERT(!error);
3694                 ASSERT(ap->length);
3695         }
3696
3697
3698         nullfb = *ap->firstblock == NULLFSBLOCK;
3699         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3700         if (nullfb) {
3701                 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
3702                         ag = xfs_filestream_lookup_ag(ap->ip);
3703                         ag = (ag != NULLAGNUMBER) ? ag : 0;
3704                         ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3705                 } else {
3706                         ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3707                 }
3708         } else
3709                 ap->blkno = *ap->firstblock;
3710
3711         xfs_bmap_adjacent(ap);
3712
3713         /*
3714          * If allowed, use ap->blkno; otherwise must use firstblock since
3715          * it's in the right allocation group.
3716          */
3717         if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3718                 ;
3719         else
3720                 ap->blkno = *ap->firstblock;
3721         /*
3722          * Normal allocation, done through xfs_alloc_vextent.
3723          */
3724         tryagain = isaligned = 0;
3725         memset(&args, 0, sizeof(args));
3726         args.tp = ap->tp;
3727         args.mp = mp;
3728         args.fsbno = ap->blkno;
3729
3730         /* Trim the allocation back to the maximum an AG can fit. */
3731         args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
3732         args.firstblock = *ap->firstblock;
3733         blen = 0;
3734         if (nullfb) {
3735                 /*
3736                  * Search for an allocation group with a single extent large
3737                  * enough for the request.  If one isn't found, then adjust
3738                  * the minimum allocation size to the largest space found.
3739                  */
3740                 if (ap->userdata && xfs_inode_is_filestream(ap->ip))
3741                         error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3742                 else
3743                         error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3744                 if (error)
3745                         return error;
3746         } else if (ap->flist->xbf_low) {
3747                 if (xfs_inode_is_filestream(ap->ip))
3748                         args.type = XFS_ALLOCTYPE_FIRST_AG;
3749                 else
3750                         args.type = XFS_ALLOCTYPE_START_BNO;
3751                 args.total = args.minlen = ap->minlen;
3752         } else {
3753                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3754                 args.total = ap->total;
3755                 args.minlen = ap->minlen;
3756         }
3757         /* apply extent size hints if obtained earlier */
3758         if (unlikely(align)) {
3759                 args.prod = align;
3760                 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3761                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
3762         } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
3763                 args.prod = 1;
3764                 args.mod = 0;
3765         } else {
3766                 args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
3767                 if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3768                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
3769         }
3770         /*
3771          * If we are not low on available data blocks, and the
3772          * underlying logical volume manager is a stripe, and
3773          * the file offset is zero then try to allocate data
3774          * blocks on stripe unit boundary.
3775          * NOTE: ap->aeof is only set if the allocation length
3776          * is >= the stripe unit and the allocation offset is
3777          * at the end of file.
3778          */
3779         if (!ap->flist->xbf_low && ap->aeof) {
3780                 if (!ap->offset) {
3781                         args.alignment = stripe_align;
3782                         atype = args.type;
3783                         isaligned = 1;
3784                         /*
3785                          * Adjust for alignment
3786                          */
3787                         if (blen > args.alignment && blen <= args.maxlen)
3788                                 args.minlen = blen - args.alignment;
3789                         args.minalignslop = 0;
3790                 } else {
3791                         /*
3792                          * First try an exact bno allocation.
3793                          * If it fails then do a near or start bno
3794                          * allocation with alignment turned on.
3795                          */
3796                         atype = args.type;
3797                         tryagain = 1;
3798                         args.type = XFS_ALLOCTYPE_THIS_BNO;
3799                         args.alignment = 1;
3800                         /*
3801                          * Compute the minlen+alignment for the
3802                          * next case.  Set slop so that the value
3803                          * of minlen+alignment+slop doesn't go up
3804                          * between the calls.
3805                          */
3806                         if (blen > stripe_align && blen <= args.maxlen)
3807                                 nextminlen = blen - stripe_align;
3808                         else
3809                                 nextminlen = args.minlen;
3810                         if (nextminlen + stripe_align > args.minlen + 1)
3811                                 args.minalignslop =
3812                                         nextminlen + stripe_align -
3813                                         args.minlen - 1;
3814                         else
3815                                 args.minalignslop = 0;
3816                 }
3817         } else {
3818                 args.alignment = 1;
3819                 args.minalignslop = 0;
3820         }
3821         args.minleft = ap->minleft;
3822         args.wasdel = ap->wasdel;
3823         args.isfl = 0;
3824         args.userdata = ap->userdata;
3825         if ((error = xfs_alloc_vextent(&args)))
3826                 return error;
3827         if (tryagain && args.fsbno == NULLFSBLOCK) {
3828                 /*
3829                  * Exact allocation failed. Now try with alignment
3830                  * turned on.
3831                  */
3832                 args.type = atype;
3833                 args.fsbno = ap->blkno;
3834                 args.alignment = stripe_align;
3835                 args.minlen = nextminlen;
3836                 args.minalignslop = 0;
3837                 isaligned = 1;
3838                 if ((error = xfs_alloc_vextent(&args)))
3839                         return error;
3840         }
3841         if (isaligned && args.fsbno == NULLFSBLOCK) {
3842                 /*
3843                  * allocation failed, so turn off alignment and
3844                  * try again.
3845                  */
3846                 args.type = atype;
3847                 args.fsbno = ap->blkno;
3848                 args.alignment = 0;
3849                 if ((error = xfs_alloc_vextent(&args)))
3850                         return error;
3851         }
3852         if (args.fsbno == NULLFSBLOCK && nullfb &&
3853             args.minlen > ap->minlen) {
3854                 args.minlen = ap->minlen;
3855                 args.type = XFS_ALLOCTYPE_START_BNO;
3856                 args.fsbno = ap->blkno;
3857                 if ((error = xfs_alloc_vextent(&args)))
3858                         return error;
3859         }
3860         if (args.fsbno == NULLFSBLOCK && nullfb) {
3861                 args.fsbno = 0;
3862                 args.type = XFS_ALLOCTYPE_FIRST_AG;
3863                 args.total = ap->minlen;
3864                 args.minleft = 0;
3865                 if ((error = xfs_alloc_vextent(&args)))
3866                         return error;
3867                 ap->flist->xbf_low = 1;
3868         }
3869         if (args.fsbno != NULLFSBLOCK) {
3870                 /*
3871                  * check the allocation happened at the same or higher AG than
3872                  * the first block that was allocated.
3873                  */
3874                 ASSERT(*ap->firstblock == NULLFSBLOCK ||
3875                        XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3876                        XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3877                        (ap->flist->xbf_low &&
3878                         XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3879                         XFS_FSB_TO_AGNO(mp, args.fsbno)));
3880
3881                 ap->blkno = args.fsbno;
3882                 if (*ap->firstblock == NULLFSBLOCK)
3883                         *ap->firstblock = args.fsbno;
3884                 ASSERT(nullfb || fb_agno == args.agno ||
3885                        (ap->flist->xbf_low && fb_agno < args.agno));
3886                 ap->length = args.len;
3887                 ap->ip->i_d.di_nblocks += args.len;
3888                 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3889                 if (ap->wasdel)
3890                         ap->ip->i_delayed_blks -= args.len;
3891                 /*
3892                  * Adjust the disk quota also. This was reserved
3893                  * earlier.
3894                  */
3895                 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3896                         ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3897                                         XFS_TRANS_DQ_BCOUNT,
3898                         (long) args.len);
3899         } else {
3900                 ap->blkno = NULLFSBLOCK;
3901                 ap->length = 0;
3902         }
3903         return 0;
3904 }
3905
3906 /*
3907  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3908  * It figures out where to ask the underlying allocator to put the new extent.
3909  */
3910 STATIC int
3911 xfs_bmap_alloc(
3912         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3913 {
3914         if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
3915                 return xfs_bmap_rtalloc(ap);
3916         return xfs_bmap_btalloc(ap);
3917 }
3918
3919 /*
3920  * Trim the returned map to the required bounds
3921  */
3922 STATIC void
3923 xfs_bmapi_trim_map(
3924         struct xfs_bmbt_irec    *mval,
3925         struct xfs_bmbt_irec    *got,
3926         xfs_fileoff_t           *bno,
3927         xfs_filblks_t           len,
3928         xfs_fileoff_t           obno,
3929         xfs_fileoff_t           end,
3930         int                     n,
3931         int                     flags)
3932 {
3933         if ((flags & XFS_BMAPI_ENTIRE) ||
3934             got->br_startoff + got->br_blockcount <= obno) {
3935                 *mval = *got;
3936                 if (isnullstartblock(got->br_startblock))
3937                         mval->br_startblock = DELAYSTARTBLOCK;
3938                 return;
3939         }
3940
3941         if (obno > *bno)
3942                 *bno = obno;
3943         ASSERT((*bno >= obno) || (n == 0));
3944         ASSERT(*bno < end);
3945         mval->br_startoff = *bno;
3946         if (isnullstartblock(got->br_startblock))
3947                 mval->br_startblock = DELAYSTARTBLOCK;
3948         else
3949                 mval->br_startblock = got->br_startblock +
3950                                         (*bno - got->br_startoff);
3951         /*
3952          * Return the minimum of what we got and what we asked for for
3953          * the length.  We can use the len variable here because it is
3954          * modified below and we could have been there before coming
3955          * here if the first part of the allocation didn't overlap what
3956          * was asked for.
3957          */
3958         mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3959                         got->br_blockcount - (*bno - got->br_startoff));
3960         mval->br_state = got->br_state;
3961         ASSERT(mval->br_blockcount <= len);
3962         return;
3963 }
3964
3965 /*
3966  * Update and validate the extent map to return
3967  */
3968 STATIC void
3969 xfs_bmapi_update_map(
3970         struct xfs_bmbt_irec    **map,
3971         xfs_fileoff_t           *bno,
3972         xfs_filblks_t           *len,
3973         xfs_fileoff_t           obno,
3974         xfs_fileoff_t           end,
3975         int                     *n,
3976         int                     flags)
3977 {
3978         xfs_bmbt_irec_t *mval = *map;
3979
3980         ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3981                ((mval->br_startoff + mval->br_blockcount) <= end));
3982         ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3983                (mval->br_startoff < obno));
3984
3985         *bno = mval->br_startoff + mval->br_blockcount;
3986         *len = end - *bno;
3987         if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3988                 /* update previous map with new information */
3989                 ASSERT(mval->br_startblock == mval[-1].br_startblock);
3990                 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3991                 ASSERT(mval->br_state == mval[-1].br_state);
3992                 mval[-1].br_blockcount = mval->br_blockcount;
3993                 mval[-1].br_state = mval->br_state;
3994         } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3995                    mval[-1].br_startblock != DELAYSTARTBLOCK &&
3996                    mval[-1].br_startblock != HOLESTARTBLOCK &&
3997                    mval->br_startblock == mval[-1].br_startblock +
3998                                           mval[-1].br_blockcount &&
3999                    ((flags & XFS_BMAPI_IGSTATE) ||
4000                         mval[-1].br_state == mval->br_state)) {
4001                 ASSERT(mval->br_startoff ==
4002                        mval[-1].br_startoff + mval[-1].br_blockcount);
4003                 mval[-1].br_blockcount += mval->br_blockcount;
4004         } else if (*n > 0 &&
4005                    mval->br_startblock == DELAYSTARTBLOCK &&
4006                    mval[-1].br_startblock == DELAYSTARTBLOCK &&
4007                    mval->br_startoff ==
4008                    mval[-1].br_startoff + mval[-1].br_blockcount) {
4009                 mval[-1].br_blockcount += mval->br_blockcount;
4010                 mval[-1].br_state = mval->br_state;
4011         } else if (!((*n == 0) &&
4012                      ((mval->br_startoff + mval->br_blockcount) <=
4013                       obno))) {
4014                 mval++;
4015                 (*n)++;
4016         }
4017         *map = mval;
4018 }
4019
4020 /*
4021  * Map file blocks to filesystem blocks without allocation.
4022  */
4023 int
4024 xfs_bmapi_read(
4025         struct xfs_inode        *ip,
4026         xfs_fileoff_t           bno,
4027         xfs_filblks_t           len,
4028         struct xfs_bmbt_irec    *mval,
4029         int                     *nmap,
4030         int                     flags)
4031 {
4032         struct xfs_mount        *mp = ip->i_mount;
4033         struct xfs_ifork        *ifp;
4034         struct xfs_bmbt_irec    got;
4035         struct xfs_bmbt_irec    prev;
4036         xfs_fileoff_t           obno;
4037         xfs_fileoff_t           end;
4038         xfs_extnum_t            lastx;
4039         int                     error;
4040         int                     eof;
4041         int                     n = 0;
4042         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4043                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4044
4045         ASSERT(*nmap >= 1);
4046         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
4047                            XFS_BMAPI_IGSTATE)));
4048         ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
4049
4050         if (unlikely(XFS_TEST_ERROR(
4051             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4052              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4053              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4054                 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4055                 return -EFSCORRUPTED;
4056         }
4057
4058         if (XFS_FORCED_SHUTDOWN(mp))
4059                 return -EIO;
4060
4061         XFS_STATS_INC(xs_blk_mapr);
4062
4063         ifp = XFS_IFORK_PTR(ip, whichfork);
4064
4065         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4066                 error = xfs_iread_extents(NULL, ip, whichfork);
4067                 if (error)
4068                         return error;
4069         }
4070
4071         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4072         end = bno + len;
4073         obno = bno;
4074
4075         while (bno < end && n < *nmap) {
4076                 /* Reading past eof, act as though there's a hole up to end. */
4077                 if (eof)
4078                         got.br_startoff = end;
4079                 if (got.br_startoff > bno) {
4080                         /* Reading in a hole.  */
4081                         mval->br_startoff = bno;
4082                         mval->br_startblock = HOLESTARTBLOCK;
4083                         mval->br_blockcount =
4084                                 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4085                         mval->br_state = XFS_EXT_NORM;
4086                         bno += mval->br_blockcount;
4087                         len -= mval->br_blockcount;
4088                         mval++;
4089                         n++;
4090                         continue;
4091                 }
4092
4093                 /* set up the extent map to return. */
4094                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4095                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4096
4097                 /* If we're done, stop now. */
4098                 if (bno >= end || n >= *nmap)
4099                         break;
4100
4101                 /* Else go on to the next record. */
4102                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4103                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4104                 else
4105                         eof = 1;
4106         }
4107         *nmap = n;
4108         return 0;
4109 }
4110
4111 STATIC int
4112 xfs_bmapi_reserve_delalloc(
4113         struct xfs_inode        *ip,
4114         xfs_fileoff_t           aoff,
4115         xfs_filblks_t           len,
4116         struct xfs_bmbt_irec    *got,
4117         struct xfs_bmbt_irec    *prev,
4118         xfs_extnum_t            *lastx,
4119         int                     eof)
4120 {
4121         struct xfs_mount        *mp = ip->i_mount;
4122         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4123         xfs_extlen_t            alen;
4124         xfs_extlen_t            indlen;
4125         char                    rt = XFS_IS_REALTIME_INODE(ip);
4126         xfs_extlen_t            extsz;
4127         int                     error;
4128
4129         alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
4130         if (!eof)
4131                 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4132
4133         /* Figure out the extent size, adjust alen */
4134         extsz = xfs_get_extsz_hint(ip);
4135         if (extsz) {
4136                 /*
4137                  * Make sure we don't exceed a single extent length when we
4138                  * align the extent by reducing length we are going to
4139                  * allocate by the maximum amount extent size aligment may
4140                  * require.
4141                  */
4142                 alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
4143                 error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
4144                                                1, 0, &aoff, &alen);
4145                 ASSERT(!error);
4146         }
4147
4148         if (rt)
4149                 extsz = alen / mp->m_sb.sb_rextsize;
4150
4151         /*
4152          * Make a transaction-less quota reservation for delayed allocation
4153          * blocks.  This number gets adjusted later.  We return if we haven't
4154          * allocated blocks already inside this loop.
4155          */
4156         error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4157                         rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4158         if (error)
4159                 return error;
4160
4161         /*
4162          * Split changing sb for alen and indlen since they could be coming
4163          * from different places.
4164          */
4165         indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4166         ASSERT(indlen > 0);
4167
4168         if (rt) {
4169                 error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
4170                                           -((int64_t)extsz), 0);
4171         } else {
4172                 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
4173                                                  -((int64_t)alen), 0);
4174         }
4175
4176         if (error)
4177                 goto out_unreserve_quota;
4178
4179         error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
4180                                          -((int64_t)indlen), 0);
4181         if (error)
4182                 goto out_unreserve_blocks;
4183
4184
4185         ip->i_delayed_blks += alen;
4186
4187         got->br_startoff = aoff;
4188         got->br_startblock = nullstartblock(indlen);
4189         got->br_blockcount = alen;
4190         got->br_state = XFS_EXT_NORM;
4191         xfs_bmap_add_extent_hole_delay(ip, lastx, got);
4192
4193         /*
4194          * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4195          * might have merged it into one of the neighbouring ones.
4196          */
4197         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4198
4199         ASSERT(got->br_startoff <= aoff);
4200         ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4201         ASSERT(isnullstartblock(got->br_startblock));
4202         ASSERT(got->br_state == XFS_EXT_NORM);
4203         return 0;
4204
4205 out_unreserve_blocks:
4206         if (rt)
4207                 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
4208         else
4209                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
4210 out_unreserve_quota:
4211         if (XFS_IS_QUOTA_ON(mp))
4212                 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
4213                                 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4214         return error;
4215 }
4216
4217 /*
4218  * Map file blocks to filesystem blocks, adding delayed allocations as needed.
4219  */
4220 int
4221 xfs_bmapi_delay(
4222         struct xfs_inode        *ip,    /* incore inode */
4223         xfs_fileoff_t           bno,    /* starting file offs. mapped */
4224         xfs_filblks_t           len,    /* length to map in file */
4225         struct xfs_bmbt_irec    *mval,  /* output: map values */
4226         int                     *nmap,  /* i/o: mval size/count */
4227         int                     flags)  /* XFS_BMAPI_... */
4228 {
4229         struct xfs_mount        *mp = ip->i_mount;
4230         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4231         struct xfs_bmbt_irec    got;    /* current file extent record */
4232         struct xfs_bmbt_irec    prev;   /* previous file extent record */
4233         xfs_fileoff_t           obno;   /* old block number (offset) */
4234         xfs_fileoff_t           end;    /* end of mapped file region */
4235         xfs_extnum_t            lastx;  /* last useful extent number */
4236         int                     eof;    /* we've hit the end of extents */
4237         int                     n = 0;  /* current extent index */
4238         int                     error = 0;
4239
4240         ASSERT(*nmap >= 1);
4241         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4242         ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
4243         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4244
4245         if (unlikely(XFS_TEST_ERROR(
4246             (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4247              XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4248              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4249                 XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
4250                 return -EFSCORRUPTED;
4251         }
4252
4253         if (XFS_FORCED_SHUTDOWN(mp))
4254                 return -EIO;
4255
4256         XFS_STATS_INC(xs_blk_mapw);
4257
4258         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4259                 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4260                 if (error)
4261                         return error;
4262         }
4263
4264         xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
4265         end = bno + len;
4266         obno = bno;
4267
4268         while (bno < end && n < *nmap) {
4269                 if (eof || got.br_startoff > bno) {
4270                         error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
4271                                                            &prev, &lastx, eof);
4272                         if (error) {
4273                                 if (n == 0) {
4274                                         *nmap = 0;
4275                                         return error;
4276                                 }
4277                                 break;
4278                         }
4279                 }
4280
4281                 /* set up the extent map to return. */
4282                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4283                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4284
4285                 /* If we're done, stop now. */
4286                 if (bno >= end || n >= *nmap)
4287                         break;
4288
4289                 /* Else go on to the next record. */
4290                 prev = got;
4291                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4292                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4293                 else
4294                         eof = 1;
4295         }
4296
4297         *nmap = n;
4298         return 0;
4299 }
4300
4301
4302 static int
4303 xfs_bmapi_allocate(
4304         struct xfs_bmalloca     *bma)
4305 {
4306         struct xfs_mount        *mp = bma->ip->i_mount;
4307         int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4308                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4309         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4310         int                     tmp_logflags = 0;
4311         int                     error;
4312
4313         ASSERT(bma->length > 0);
4314
4315         /*
4316          * For the wasdelay case, we could also just allocate the stuff asked
4317          * for in this bmap call but that wouldn't be as good.
4318          */
4319         if (bma->wasdel) {
4320                 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4321                 bma->offset = bma->got.br_startoff;
4322                 if (bma->idx != NULLEXTNUM && bma->idx) {
4323                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
4324                                          &bma->prev);
4325                 }
4326         } else {
4327                 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4328                 if (!bma->eof)
4329                         bma->length = XFS_FILBLKS_MIN(bma->length,
4330                                         bma->got.br_startoff - bma->offset);
4331         }
4332
4333         /*
4334          * Indicate if this is the first user data in the file, or just any
4335          * user data.
4336          */
4337         if (!(bma->flags & XFS_BMAPI_METADATA)) {
4338                 bma->userdata = (bma->offset == 0) ?
4339                         XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4340         }
4341
4342         bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4343
4344         /*
4345          * Only want to do the alignment at the eof if it is userdata and
4346          * allocation length is larger than a stripe unit.
4347          */
4348         if (mp->m_dalign && bma->length >= mp->m_dalign &&
4349             !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4350                 error = xfs_bmap_isaeof(bma, whichfork);
4351                 if (error)
4352                         return error;
4353         }
4354
4355         error = xfs_bmap_alloc(bma);
4356         if (error)
4357                 return error;
4358
4359         if (bma->flist->xbf_low)
4360                 bma->minleft = 0;
4361         if (bma->cur)
4362                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4363         if (bma->blkno == NULLFSBLOCK)
4364                 return 0;
4365         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4366                 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4367                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4368                 bma->cur->bc_private.b.flist = bma->flist;
4369         }
4370         /*
4371          * Bump the number of extents we've allocated
4372          * in this call.
4373          */
4374         bma->nallocs++;
4375
4376         if (bma->cur)
4377                 bma->cur->bc_private.b.flags =
4378                         bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4379
4380         bma->got.br_startoff = bma->offset;
4381         bma->got.br_startblock = bma->blkno;
4382         bma->got.br_blockcount = bma->length;
4383         bma->got.br_state = XFS_EXT_NORM;
4384
4385         /*
4386          * A wasdelay extent has been initialized, so shouldn't be flagged
4387          * as unwritten.
4388          */
4389         if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4390             xfs_sb_version_hasextflgbit(&mp->m_sb))
4391                 bma->got.br_state = XFS_EXT_UNWRITTEN;
4392
4393         if (bma->wasdel)
4394                 error = xfs_bmap_add_extent_delay_real(bma);
4395         else
4396                 error = xfs_bmap_add_extent_hole_real(bma, whichfork);
4397
4398         bma->logflags |= tmp_logflags;
4399         if (error)
4400                 return error;
4401
4402         /*
4403          * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4404          * or xfs_bmap_add_extent_hole_real might have merged it into one of
4405          * the neighbouring ones.
4406          */
4407         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4408
4409         ASSERT(bma->got.br_startoff <= bma->offset);
4410         ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4411                bma->offset + bma->length);
4412         ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4413                bma->got.br_state == XFS_EXT_UNWRITTEN);
4414         return 0;
4415 }
4416
4417 STATIC int
4418 xfs_bmapi_convert_unwritten(
4419         struct xfs_bmalloca     *bma,
4420         struct xfs_bmbt_irec    *mval,
4421         xfs_filblks_t           len,
4422         int                     flags)
4423 {
4424         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4425                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4426         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4427         int                     tmp_logflags = 0;
4428         int                     error;
4429
4430         /* check if we need to do unwritten->real conversion */
4431         if (mval->br_state == XFS_EXT_UNWRITTEN &&
4432             (flags & XFS_BMAPI_PREALLOC))
4433                 return 0;
4434
4435         /* check if we need to do real->unwritten conversion */
4436         if (mval->br_state == XFS_EXT_NORM &&
4437             (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4438                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4439                 return 0;
4440
4441         /*
4442          * Modify (by adding) the state flag, if writing.
4443          */
4444         ASSERT(mval->br_blockcount <= len);
4445         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4446                 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4447                                         bma->ip, whichfork);
4448                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4449                 bma->cur->bc_private.b.flist = bma->flist;
4450         }
4451         mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4452                                 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4453
4454         error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4455                         &bma->cur, mval, bma->firstblock, bma->flist,
4456                         &tmp_logflags);
4457         bma->logflags |= tmp_logflags;
4458         if (error)
4459                 return error;
4460
4461         /*
4462          * Update our extent pointer, given that
4463          * xfs_bmap_add_extent_unwritten_real might have merged it into one
4464          * of the neighbouring ones.
4465          */
4466         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4467
4468         /*
4469          * We may have combined previously unwritten space with written space,
4470          * so generate another request.
4471          */
4472         if (mval->br_blockcount < len)
4473                 return -EAGAIN;
4474         return 0;
4475 }
4476
4477 /*
4478  * Map file blocks to filesystem blocks, and allocate blocks or convert the
4479  * extent state if necessary.  Details behaviour is controlled by the flags
4480  * parameter.  Only allocates blocks from a single allocation group, to avoid
4481  * locking problems.
4482  *
4483  * The returned value in "firstblock" from the first call in a transaction
4484  * must be remembered and presented to subsequent calls in "firstblock".
4485  * An upper bound for the number of blocks to be allocated is supplied to
4486  * the first call in "total"; if no allocation group has that many free
4487  * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4488  */
4489 int
4490 xfs_bmapi_write(
4491         struct xfs_trans        *tp,            /* transaction pointer */
4492         struct xfs_inode        *ip,            /* incore inode */
4493         xfs_fileoff_t           bno,            /* starting file offs. mapped */
4494         xfs_filblks_t           len,            /* length to map in file */
4495         int                     flags,          /* XFS_BMAPI_... */
4496         xfs_fsblock_t           *firstblock,    /* first allocated block
4497                                                    controls a.g. for allocs */
4498         xfs_extlen_t            total,          /* total blocks needed */
4499         struct xfs_bmbt_irec    *mval,          /* output: map values */
4500         int                     *nmap,          /* i/o: mval size/count */
4501         struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
4502 {
4503         struct xfs_mount        *mp = ip->i_mount;
4504         struct xfs_ifork        *ifp;
4505         struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
4506         xfs_fileoff_t           end;            /* end of mapped file region */
4507         int                     eof;            /* after the end of extents */
4508         int                     error;          /* error return */
4509         int                     n;              /* current extent index */
4510         xfs_fileoff_t           obno;           /* old block number (offset) */
4511         int                     whichfork;      /* data or attr fork */
4512         char                    inhole;         /* current location is hole in file */
4513         char                    wasdelay;       /* old extent was delayed */
4514
4515 #ifdef DEBUG
4516         xfs_fileoff_t           orig_bno;       /* original block number value */
4517         int                     orig_flags;     /* original flags arg value */
4518         xfs_filblks_t           orig_len;       /* original value of len arg */
4519         struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4520         int                     orig_nmap;      /* original value of *nmap */
4521
4522         orig_bno = bno;
4523         orig_len = len;
4524         orig_flags = flags;
4525         orig_mval = mval;
4526         orig_nmap = *nmap;
4527 #endif
4528         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4529                 XFS_ATTR_FORK : XFS_DATA_FORK;
4530
4531         ASSERT(*nmap >= 1);
4532         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4533         ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4534         ASSERT(tp != NULL);
4535         ASSERT(len > 0);
4536         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4537         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4538
4539         if (unlikely(XFS_TEST_ERROR(
4540             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4541              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4542              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4543                 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4544                 return -EFSCORRUPTED;
4545         }
4546
4547         if (XFS_FORCED_SHUTDOWN(mp))
4548                 return -EIO;
4549
4550         ifp = XFS_IFORK_PTR(ip, whichfork);
4551
4552         XFS_STATS_INC(xs_blk_mapw);
4553
4554         if (*firstblock == NULLFSBLOCK) {
4555                 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4556                         bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4557                 else
4558                         bma.minleft = 1;
4559         } else {
4560                 bma.minleft = 0;
4561         }
4562
4563         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4564                 error = xfs_iread_extents(tp, ip, whichfork);
4565                 if (error)
4566                         goto error0;
4567         }
4568
4569         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
4570                                 &bma.prev);
4571         n = 0;
4572         end = bno + len;
4573         obno = bno;
4574
4575         bma.tp = tp;
4576         bma.ip = ip;
4577         bma.total = total;
4578         bma.userdata = 0;
4579         bma.flist = flist;
4580         bma.firstblock = firstblock;
4581
4582         while (bno < end && n < *nmap) {
4583                 inhole = eof || bma.got.br_startoff > bno;
4584                 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4585
4586                 /*
4587                  * First, deal with the hole before the allocated space
4588                  * that we found, if any.
4589                  */
4590                 if (inhole || wasdelay) {
4591                         bma.eof = eof;
4592                         bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4593                         bma.wasdel = wasdelay;
4594                         bma.offset = bno;
4595                         bma.flags = flags;
4596
4597                         /*
4598                          * There's a 32/64 bit type mismatch between the
4599                          * allocation length request (which can be 64 bits in
4600                          * length) and the bma length request, which is
4601                          * xfs_extlen_t and therefore 32 bits. Hence we have to
4602                          * check for 32-bit overflows and handle them here.
4603                          */
4604                         if (len > (xfs_filblks_t)MAXEXTLEN)
4605                                 bma.length = MAXEXTLEN;
4606                         else
4607                                 bma.length = len;
4608
4609                         ASSERT(len > 0);
4610                         ASSERT(bma.length > 0);
4611                         error = xfs_bmapi_allocate(&bma);
4612                         if (error)
4613                                 goto error0;
4614                         if (bma.blkno == NULLFSBLOCK)
4615                                 break;
4616                 }
4617
4618                 /* Deal with the allocated space we found.  */
4619                 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4620                                                         end, n, flags);
4621
4622                 /* Execute unwritten extent conversion if necessary */
4623                 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4624                 if (error == -EAGAIN)
4625                         continue;
4626                 if (error)
4627                         goto error0;
4628
4629                 /* update the extent map to return */
4630                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4631
4632                 /*
4633                  * If we're done, stop now.  Stop when we've allocated
4634                  * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4635                  * the transaction may get too big.
4636                  */
4637                 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4638                         break;
4639
4640                 /* Else go on to the next record. */
4641                 bma.prev = bma.got;
4642                 if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
4643                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
4644                                          &bma.got);
4645                 } else
4646                         eof = 1;
4647         }
4648         *nmap = n;
4649
4650         /*
4651          * Transform from btree to extents, give it cur.
4652          */
4653         if (xfs_bmap_wants_extents(ip, whichfork)) {
4654                 int             tmp_logflags = 0;
4655
4656                 ASSERT(bma.cur);
4657                 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4658                         &tmp_logflags, whichfork);
4659                 bma.logflags |= tmp_logflags;
4660                 if (error)
4661                         goto error0;
4662         }
4663
4664         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4665                XFS_IFORK_NEXTENTS(ip, whichfork) >
4666                 XFS_IFORK_MAXEXT(ip, whichfork));
4667         error = 0;
4668 error0:
4669         /*
4670          * Log everything.  Do this after conversion, there's no point in
4671          * logging the extent records if we've converted to btree format.
4672          */
4673         if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4674             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4675                 bma.logflags &= ~xfs_ilog_fext(whichfork);
4676         else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4677                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4678                 bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4679         /*
4680          * Log whatever the flags say, even if error.  Otherwise we might miss
4681          * detecting a case where the data is changed, there's an error,
4682          * and it's not logged so we don't shutdown when we should.
4683          */
4684         if (bma.logflags)
4685                 xfs_trans_log_inode(tp, ip, bma.logflags);
4686
4687         if (bma.cur) {
4688                 if (!error) {
4689                         ASSERT(*firstblock == NULLFSBLOCK ||
4690                                XFS_FSB_TO_AGNO(mp, *firstblock) ==
4691                                XFS_FSB_TO_AGNO(mp,
4692                                        bma.cur->bc_private.b.firstblock) ||
4693                                (flist->xbf_low &&
4694                                 XFS_FSB_TO_AGNO(mp, *firstblock) <
4695                                 XFS_FSB_TO_AGNO(mp,
4696                                         bma.cur->bc_private.b.firstblock)));
4697                         *firstblock = bma.cur->bc_private.b.firstblock;
4698                 }
4699                 xfs_btree_del_cursor(bma.cur,
4700                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4701         }
4702         if (!error)
4703                 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4704                         orig_nmap, *nmap);
4705         return error;
4706 }
4707
4708 /*
4709  * Called by xfs_bmapi to update file extent records and the btree
4710  * after removing space (or undoing a delayed allocation).
4711  */
4712 STATIC int                              /* error */
4713 xfs_bmap_del_extent(
4714         xfs_inode_t             *ip,    /* incore inode pointer */
4715         xfs_trans_t             *tp,    /* current transaction pointer */
4716         xfs_extnum_t            *idx,   /* extent number to update/delete */
4717         xfs_bmap_free_t         *flist, /* list of extents to be freed */
4718         xfs_btree_cur_t         *cur,   /* if null, not a btree */
4719         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
4720         int                     *logflagsp, /* inode logging flags */
4721         int                     whichfork) /* data or attr fork */
4722 {
4723         xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
4724         xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
4725         xfs_fsblock_t           del_endblock=0; /* first block past del */
4726         xfs_fileoff_t           del_endoff;     /* first offset past del */
4727         int                     delay;  /* current block is delayed allocated */
4728         int                     do_fx;  /* free extent at end of routine */
4729         xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
4730         int                     error;  /* error return value */
4731         int                     flags;  /* inode logging flags */
4732         xfs_bmbt_irec_t         got;    /* current extent entry */
4733         xfs_fileoff_t           got_endoff;     /* first offset past got */
4734         int                     i;      /* temp state */
4735         xfs_ifork_t             *ifp;   /* inode fork pointer */
4736         xfs_mount_t             *mp;    /* mount structure */
4737         xfs_filblks_t           nblks;  /* quota/sb block count */
4738         xfs_bmbt_irec_t         new;    /* new record to be inserted */
4739         /* REFERENCED */
4740         uint                    qfield; /* quota field to update */
4741         xfs_filblks_t           temp;   /* for indirect length calculations */
4742         xfs_filblks_t           temp2;  /* for indirect length calculations */
4743         int                     state = 0;
4744
4745         XFS_STATS_INC(xs_del_exlist);
4746
4747         if (whichfork == XFS_ATTR_FORK)
4748                 state |= BMAP_ATTRFORK;
4749
4750         mp = ip->i_mount;
4751         ifp = XFS_IFORK_PTR(ip, whichfork);
4752         ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
4753                 (uint)sizeof(xfs_bmbt_rec_t)));
4754         ASSERT(del->br_blockcount > 0);
4755         ep = xfs_iext_get_ext(ifp, *idx);
4756         xfs_bmbt_get_all(ep, &got);
4757         ASSERT(got.br_startoff <= del->br_startoff);
4758         del_endoff = del->br_startoff + del->br_blockcount;
4759         got_endoff = got.br_startoff + got.br_blockcount;
4760         ASSERT(got_endoff >= del_endoff);
4761         delay = isnullstartblock(got.br_startblock);
4762         ASSERT(isnullstartblock(del->br_startblock) == delay);
4763         flags = 0;
4764         qfield = 0;
4765         error = 0;
4766         /*
4767          * If deleting a real allocation, must free up the disk space.
4768          */
4769         if (!delay) {
4770                 flags = XFS_ILOG_CORE;
4771                 /*
4772                  * Realtime allocation.  Free it and record di_nblocks update.
4773                  */
4774                 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4775                         xfs_fsblock_t   bno;
4776                         xfs_filblks_t   len;
4777
4778                         ASSERT(do_mod(del->br_blockcount,
4779                                       mp->m_sb.sb_rextsize) == 0);
4780                         ASSERT(do_mod(del->br_startblock,
4781                                       mp->m_sb.sb_rextsize) == 0);
4782                         bno = del->br_startblock;
4783                         len = del->br_blockcount;
4784                         do_div(bno, mp->m_sb.sb_rextsize);
4785                         do_div(len, mp->m_sb.sb_rextsize);
4786                         error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4787                         if (error)
4788                                 goto done;
4789                         do_fx = 0;
4790                         nblks = len * mp->m_sb.sb_rextsize;
4791                         qfield = XFS_TRANS_DQ_RTBCOUNT;
4792                 }
4793                 /*
4794                  * Ordinary allocation.
4795                  */
4796                 else {
4797                         do_fx = 1;
4798                         nblks = del->br_blockcount;
4799                         qfield = XFS_TRANS_DQ_BCOUNT;
4800                 }
4801                 /*
4802                  * Set up del_endblock and cur for later.
4803                  */
4804                 del_endblock = del->br_startblock + del->br_blockcount;
4805                 if (cur) {
4806                         if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
4807                                         got.br_startblock, got.br_blockcount,
4808                                         &i)))
4809                                 goto done;
4810                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4811                 }
4812                 da_old = da_new = 0;
4813         } else {
4814                 da_old = startblockval(got.br_startblock);
4815                 da_new = 0;
4816                 nblks = 0;
4817                 do_fx = 0;
4818         }
4819         /*
4820          * Set flag value to use in switch statement.
4821          * Left-contig is 2, right-contig is 1.
4822          */
4823         switch (((got.br_startoff == del->br_startoff) << 1) |
4824                 (got_endoff == del_endoff)) {
4825         case 3:
4826                 /*
4827                  * Matches the whole extent.  Delete the entry.
4828                  */
4829                 xfs_iext_remove(ip, *idx, 1,
4830                                 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
4831                 --*idx;
4832                 if (delay)
4833                         break;
4834
4835                 XFS_IFORK_NEXT_SET(ip, whichfork,
4836                         XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4837                 flags |= XFS_ILOG_CORE;
4838                 if (!cur) {
4839                         flags |= xfs_ilog_fext(whichfork);
4840                         break;
4841                 }
4842                 if ((error = xfs_btree_delete(cur, &i)))
4843                         goto done;
4844                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4845                 break;
4846
4847         case 2:
4848                 /*
4849                  * Deleting the first part of the extent.
4850                  */
4851                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4852                 xfs_bmbt_set_startoff(ep, del_endoff);
4853                 temp = got.br_blockcount - del->br_blockcount;
4854                 xfs_bmbt_set_blockcount(ep, temp);
4855                 if (delay) {
4856                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4857                                 da_old);
4858                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4859                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4860                         da_new = temp;
4861                         break;
4862                 }
4863                 xfs_bmbt_set_startblock(ep, del_endblock);
4864                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4865                 if (!cur) {
4866                         flags |= xfs_ilog_fext(whichfork);
4867                         break;
4868                 }
4869                 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
4870                                 got.br_blockcount - del->br_blockcount,
4871                                 got.br_state)))
4872                         goto done;
4873                 break;
4874
4875         case 1:
4876                 /*
4877                  * Deleting the last part of the extent.
4878                  */
4879                 temp = got.br_blockcount - del->br_blockcount;
4880                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4881                 xfs_bmbt_set_blockcount(ep, temp);
4882                 if (delay) {
4883                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4884                                 da_old);
4885                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4886                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4887                         da_new = temp;
4888                         break;
4889                 }
4890                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4891                 if (!cur) {
4892                         flags |= xfs_ilog_fext(whichfork);
4893                         break;
4894                 }
4895                 if ((error = xfs_bmbt_update(cur, got.br_startoff,
4896                                 got.br_startblock,
4897                                 got.br_blockcount - del->br_blockcount,
4898                                 got.br_state)))
4899                         goto done;
4900                 break;
4901
4902         case 0:
4903                 /*
4904                  * Deleting the middle of the extent.
4905                  */
4906                 temp = del->br_startoff - got.br_startoff;
4907                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4908                 xfs_bmbt_set_blockcount(ep, temp);
4909                 new.br_startoff = del_endoff;
4910                 temp2 = got_endoff - del_endoff;
4911                 new.br_blockcount = temp2;
4912                 new.br_state = got.br_state;
4913                 if (!delay) {
4914                         new.br_startblock = del_endblock;
4915                         flags |= XFS_ILOG_CORE;
4916                         if (cur) {
4917                                 if ((error = xfs_bmbt_update(cur,
4918                                                 got.br_startoff,
4919                                                 got.br_startblock, temp,
4920                                                 got.br_state)))
4921                                         goto done;
4922                                 if ((error = xfs_btree_increment(cur, 0, &i)))
4923                                         goto done;
4924                                 cur->bc_rec.b = new;
4925                                 error = xfs_btree_insert(cur, &i);
4926                                 if (error && error != -ENOSPC)
4927                                         goto done;
4928                                 /*
4929                                  * If get no-space back from btree insert,
4930                                  * it tried a split, and we have a zero
4931                                  * block reservation.
4932                                  * Fix up our state and return the error.
4933                                  */
4934                                 if (error == -ENOSPC) {
4935                                         /*
4936                                          * Reset the cursor, don't trust
4937                                          * it after any insert operation.
4938                                          */
4939                                         if ((error = xfs_bmbt_lookup_eq(cur,
4940                                                         got.br_startoff,
4941                                                         got.br_startblock,
4942                                                         temp, &i)))
4943                                                 goto done;
4944                                         XFS_WANT_CORRUPTED_GOTO(mp,
4945                                                                 i == 1, done);
4946                                         /*
4947                                          * Update the btree record back
4948                                          * to the original value.
4949                                          */
4950                                         if ((error = xfs_bmbt_update(cur,
4951                                                         got.br_startoff,
4952                                                         got.br_startblock,
4953                                                         got.br_blockcount,
4954                                                         got.br_state)))
4955                                                 goto done;
4956                                         /*
4957                                          * Reset the extent record back
4958                                          * to the original value.
4959                                          */
4960                                         xfs_bmbt_set_blockcount(ep,
4961                                                 got.br_blockcount);
4962                                         flags = 0;
4963                                         error = -ENOSPC;
4964                                         goto done;
4965                                 }
4966                                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4967                         } else
4968                                 flags |= xfs_ilog_fext(whichfork);
4969                         XFS_IFORK_NEXT_SET(ip, whichfork,
4970                                 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
4971                 } else {
4972                         ASSERT(whichfork == XFS_DATA_FORK);
4973                         temp = xfs_bmap_worst_indlen(ip, temp);
4974                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4975                         temp2 = xfs_bmap_worst_indlen(ip, temp2);
4976                         new.br_startblock = nullstartblock((int)temp2);
4977                         da_new = temp + temp2;
4978                         while (da_new > da_old) {
4979                                 if (temp) {
4980                                         temp--;
4981                                         da_new--;
4982                                         xfs_bmbt_set_startblock(ep,
4983                                                 nullstartblock((int)temp));
4984                                 }
4985                                 if (da_new == da_old)
4986                                         break;
4987                                 if (temp2) {
4988                                         temp2--;
4989                                         da_new--;
4990                                         new.br_startblock =
4991                                                 nullstartblock((int)temp2);
4992                                 }
4993                         }
4994                 }
4995                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4996                 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
4997                 ++*idx;
4998                 break;
4999         }
5000         /*
5001          * If we need to, add to list of extents to delete.
5002          */
5003         if (do_fx)
5004                 xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
5005                         mp);
5006         /*
5007          * Adjust inode # blocks in the file.
5008          */
5009         if (nblks)
5010                 ip->i_d.di_nblocks -= nblks;
5011         /*
5012          * Adjust quota data.
5013          */
5014         if (qfield)
5015                 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5016
5017         /*
5018          * Account for change in delayed indirect blocks.
5019          * Nothing to do for disk quota accounting here.
5020          */
5021         ASSERT(da_old >= da_new);
5022         if (da_old > da_new) {
5023                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
5024                         (int64_t)(da_old - da_new), 0);
5025         }
5026 done:
5027         *logflagsp = flags;
5028         return error;
5029 }
5030
5031 /*
5032  * Unmap (remove) blocks from a file.
5033  * If nexts is nonzero then the number of extents to remove is limited to
5034  * that value.  If not all extents in the block range can be removed then
5035  * *done is set.
5036  */
5037 int                                             /* error */
5038 xfs_bunmapi(
5039         xfs_trans_t             *tp,            /* transaction pointer */
5040         struct xfs_inode        *ip,            /* incore inode */
5041         xfs_fileoff_t           bno,            /* starting offset to unmap */
5042         xfs_filblks_t           len,            /* length to unmap in file */
5043         int                     flags,          /* misc flags */
5044         xfs_extnum_t            nexts,          /* number of extents max */
5045         xfs_fsblock_t           *firstblock,    /* first allocated block
5046                                                    controls a.g. for allocs */
5047         xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
5048         int                     *done)          /* set if not done yet */
5049 {
5050         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
5051         xfs_bmbt_irec_t         del;            /* extent being deleted */
5052         int                     eof;            /* is deleting at eof */
5053         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
5054         int                     error;          /* error return value */
5055         xfs_extnum_t            extno;          /* extent number in list */
5056         xfs_bmbt_irec_t         got;            /* current extent record */
5057         xfs_ifork_t             *ifp;           /* inode fork pointer */
5058         int                     isrt;           /* freeing in rt area */
5059         xfs_extnum_t            lastx;          /* last extent index used */
5060         int                     logflags;       /* transaction logging flags */
5061         xfs_extlen_t            mod;            /* rt extent offset */
5062         xfs_mount_t             *mp;            /* mount structure */
5063         xfs_extnum_t            nextents;       /* number of file extents */
5064         xfs_bmbt_irec_t         prev;           /* previous extent record */
5065         xfs_fileoff_t           start;          /* first file offset deleted */
5066         int                     tmp_logflags;   /* partial logging flags */
5067         int                     wasdel;         /* was a delayed alloc extent */
5068         int                     whichfork;      /* data or attribute fork */
5069         xfs_fsblock_t           sum;
5070
5071         trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5072
5073         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5074                 XFS_ATTR_FORK : XFS_DATA_FORK;
5075         ifp = XFS_IFORK_PTR(ip, whichfork);
5076         if (unlikely(
5077             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5078             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5079                 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5080                                  ip->i_mount);
5081                 return -EFSCORRUPTED;
5082         }
5083         mp = ip->i_mount;
5084         if (XFS_FORCED_SHUTDOWN(mp))
5085                 return -EIO;
5086
5087         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5088         ASSERT(len > 0);
5089         ASSERT(nexts >= 0);
5090
5091         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5092             (error = xfs_iread_extents(tp, ip, whichfork)))
5093                 return error;
5094         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5095         if (nextents == 0) {
5096                 *done = 1;
5097                 return 0;
5098         }
5099         XFS_STATS_INC(xs_blk_unmap);
5100         isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5101         start = bno;
5102         bno = start + len - 1;
5103         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5104                 &prev);
5105
5106         /*
5107          * Check to see if the given block number is past the end of the
5108          * file, back up to the last block if so...
5109          */
5110         if (eof) {
5111                 ep = xfs_iext_get_ext(ifp, --lastx);
5112                 xfs_bmbt_get_all(ep, &got);
5113                 bno = got.br_startoff + got.br_blockcount - 1;
5114         }
5115         logflags = 0;
5116         if (ifp->if_flags & XFS_IFBROOT) {
5117                 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5118                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5119                 cur->bc_private.b.firstblock = *firstblock;
5120                 cur->bc_private.b.flist = flist;
5121                 cur->bc_private.b.flags = 0;
5122         } else
5123                 cur = NULL;
5124
5125         if (isrt) {
5126                 /*
5127                  * Synchronize by locking the bitmap inode.
5128                  */
5129                 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
5130                 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5131         }
5132
5133         extno = 0;
5134         while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5135                (nexts == 0 || extno < nexts)) {
5136                 /*
5137                  * Is the found extent after a hole in which bno lives?
5138                  * Just back up to the previous extent, if so.
5139                  */
5140                 if (got.br_startoff > bno) {
5141                         if (--lastx < 0)
5142                                 break;
5143                         ep = xfs_iext_get_ext(ifp, lastx);
5144                         xfs_bmbt_get_all(ep, &got);
5145                 }
5146                 /*
5147                  * Is the last block of this extent before the range
5148                  * we're supposed to delete?  If so, we're done.
5149                  */
5150                 bno = XFS_FILEOFF_MIN(bno,
5151                         got.br_startoff + got.br_blockcount - 1);
5152                 if (bno < start)
5153                         break;
5154                 /*
5155                  * Then deal with the (possibly delayed) allocated space
5156                  * we found.
5157                  */
5158                 ASSERT(ep != NULL);
5159                 del = got;
5160                 wasdel = isnullstartblock(del.br_startblock);
5161                 if (got.br_startoff < start) {
5162                         del.br_startoff = start;
5163                         del.br_blockcount -= start - got.br_startoff;
5164                         if (!wasdel)
5165                                 del.br_startblock += start - got.br_startoff;
5166                 }
5167                 if (del.br_startoff + del.br_blockcount > bno + 1)
5168                         del.br_blockcount = bno + 1 - del.br_startoff;
5169                 sum = del.br_startblock + del.br_blockcount;
5170                 if (isrt &&
5171                     (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5172                         /*
5173                          * Realtime extent not lined up at the end.
5174                          * The extent could have been split into written
5175                          * and unwritten pieces, or we could just be
5176                          * unmapping part of it.  But we can't really
5177                          * get rid of part of a realtime extent.
5178                          */
5179                         if (del.br_state == XFS_EXT_UNWRITTEN ||
5180                             !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5181                                 /*
5182                                  * This piece is unwritten, or we're not
5183                                  * using unwritten extents.  Skip over it.
5184                                  */
5185                                 ASSERT(bno >= mod);
5186                                 bno -= mod > del.br_blockcount ?
5187                                         del.br_blockcount : mod;
5188                                 if (bno < got.br_startoff) {
5189                                         if (--lastx >= 0)
5190                                                 xfs_bmbt_get_all(xfs_iext_get_ext(
5191                                                         ifp, lastx), &got);
5192                                 }
5193                                 continue;
5194                         }
5195                         /*
5196                          * It's written, turn it unwritten.
5197                          * This is better than zeroing it.
5198                          */
5199                         ASSERT(del.br_state == XFS_EXT_NORM);
5200                         ASSERT(xfs_trans_get_block_res(tp) > 0);
5201                         /*
5202                          * If this spans a realtime extent boundary,
5203                          * chop it back to the start of the one we end at.
5204                          */
5205                         if (del.br_blockcount > mod) {
5206                                 del.br_startoff += del.br_blockcount - mod;
5207                                 del.br_startblock += del.br_blockcount - mod;
5208                                 del.br_blockcount = mod;
5209                         }
5210                         del.br_state = XFS_EXT_UNWRITTEN;
5211                         error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5212                                         &lastx, &cur, &del, firstblock, flist,
5213                                         &logflags);
5214                         if (error)
5215                                 goto error0;
5216                         goto nodelete;
5217                 }
5218                 if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5219                         /*
5220                          * Realtime extent is lined up at the end but not
5221                          * at the front.  We'll get rid of full extents if
5222                          * we can.
5223                          */
5224                         mod = mp->m_sb.sb_rextsize - mod;
5225                         if (del.br_blockcount > mod) {
5226                                 del.br_blockcount -= mod;
5227                                 del.br_startoff += mod;
5228                                 del.br_startblock += mod;
5229                         } else if ((del.br_startoff == start &&
5230                                     (del.br_state == XFS_EXT_UNWRITTEN ||
5231                                      xfs_trans_get_block_res(tp) == 0)) ||
5232                                    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5233                                 /*
5234                                  * Can't make it unwritten.  There isn't
5235                                  * a full extent here so just skip it.
5236                                  */
5237                                 ASSERT(bno >= del.br_blockcount);
5238                                 bno -= del.br_blockcount;
5239                                 if (got.br_startoff > bno) {
5240                                         if (--lastx >= 0) {
5241                                                 ep = xfs_iext_get_ext(ifp,
5242                                                                       lastx);
5243                                                 xfs_bmbt_get_all(ep, &got);
5244                                         }
5245                                 }
5246                                 continue;
5247                         } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5248                                 /*
5249                                  * This one is already unwritten.
5250                                  * It must have a written left neighbor.
5251                                  * Unwrite the killed part of that one and
5252                                  * try again.
5253                                  */
5254                                 ASSERT(lastx > 0);
5255                                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5256                                                 lastx - 1), &prev);
5257                                 ASSERT(prev.br_state == XFS_EXT_NORM);
5258                                 ASSERT(!isnullstartblock(prev.br_startblock));
5259                                 ASSERT(del.br_startblock ==
5260                                        prev.br_startblock + prev.br_blockcount);
5261                                 if (prev.br_startoff < start) {
5262                                         mod = start - prev.br_startoff;
5263                                         prev.br_blockcount -= mod;
5264                                         prev.br_startblock += mod;
5265                                         prev.br_startoff = start;
5266                                 }
5267                                 prev.br_state = XFS_EXT_UNWRITTEN;
5268                                 lastx--;
5269                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5270                                                 ip, &lastx, &cur, &prev,
5271                                                 firstblock, flist, &logflags);
5272                                 if (error)
5273                                         goto error0;
5274                                 goto nodelete;
5275                         } else {
5276                                 ASSERT(del.br_state == XFS_EXT_NORM);
5277                                 del.br_state = XFS_EXT_UNWRITTEN;
5278                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5279                                                 ip, &lastx, &cur, &del,
5280                                                 firstblock, flist, &logflags);
5281                                 if (error)
5282                                         goto error0;
5283                                 goto nodelete;
5284                         }
5285                 }
5286                 if (wasdel) {
5287                         ASSERT(startblockval(del.br_startblock) > 0);
5288                         /* Update realtime/data freespace, unreserve quota */
5289                         if (isrt) {
5290                                 xfs_filblks_t rtexts;
5291
5292                                 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5293                                 do_div(rtexts, mp->m_sb.sb_rextsize);
5294                                 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
5295                                                 (int64_t)rtexts, 0);
5296                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5297                                         ip, -((long)del.br_blockcount), 0,
5298                                         XFS_QMOPT_RES_RTBLKS);
5299                         } else {
5300                                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
5301                                                 (int64_t)del.br_blockcount, 0);
5302                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5303                                         ip, -((long)del.br_blockcount), 0,
5304                                         XFS_QMOPT_RES_REGBLKS);
5305                         }
5306                         ip->i_delayed_blks -= del.br_blockcount;
5307                         if (cur)
5308                                 cur->bc_private.b.flags |=
5309                                         XFS_BTCUR_BPRV_WASDEL;
5310                 } else if (cur)
5311                         cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5312                 /*
5313                  * If it's the case where the directory code is running
5314                  * with no block reservation, and the deleted block is in
5315                  * the middle of its extent, and the resulting insert
5316                  * of an extent would cause transformation to btree format,
5317                  * then reject it.  The calling code will then swap
5318                  * blocks around instead.
5319                  * We have to do this now, rather than waiting for the
5320                  * conversion to btree format, since the transaction
5321                  * will be dirty.
5322                  */
5323                 if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
5324                     XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5325                     XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5326                         XFS_IFORK_MAXEXT(ip, whichfork) &&
5327                     del.br_startoff > got.br_startoff &&
5328                     del.br_startoff + del.br_blockcount <
5329                     got.br_startoff + got.br_blockcount) {
5330                         error = -ENOSPC;
5331                         goto error0;
5332                 }
5333                 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
5334                                 &tmp_logflags, whichfork);
5335                 logflags |= tmp_logflags;
5336                 if (error)
5337                         goto error0;
5338                 bno = del.br_startoff - 1;
5339 nodelete:
5340                 /*
5341                  * If not done go on to the next (previous) record.
5342                  */
5343                 if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5344                         if (lastx >= 0) {
5345                                 ep = xfs_iext_get_ext(ifp, lastx);
5346                                 if (xfs_bmbt_get_startoff(ep) > bno) {
5347                                         if (--lastx >= 0)
5348                                                 ep = xfs_iext_get_ext(ifp,
5349                                                                       lastx);
5350                                 }
5351                                 xfs_bmbt_get_all(ep, &got);
5352                         }
5353                         extno++;
5354                 }
5355         }
5356         *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5357
5358         /*
5359          * Convert to a btree if necessary.
5360          */
5361         if (xfs_bmap_needs_btree(ip, whichfork)) {
5362                 ASSERT(cur == NULL);
5363                 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
5364                         &cur, 0, &tmp_logflags, whichfork);
5365                 logflags |= tmp_logflags;
5366                 if (error)
5367                         goto error0;
5368         }
5369         /*
5370          * transform from btree to extents, give it cur
5371          */
5372         else if (xfs_bmap_wants_extents(ip, whichfork)) {
5373                 ASSERT(cur != NULL);
5374                 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5375                         whichfork);
5376                 logflags |= tmp_logflags;
5377                 if (error)
5378                         goto error0;
5379         }
5380         /*
5381          * transform from extents to local?
5382          */
5383         error = 0;
5384 error0:
5385         /*
5386          * Log everything.  Do this after conversion, there's no point in
5387          * logging the extent records if we've converted to btree format.
5388          */
5389         if ((logflags & xfs_ilog_fext(whichfork)) &&
5390             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5391                 logflags &= ~xfs_ilog_fext(whichfork);
5392         else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5393                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5394                 logflags &= ~xfs_ilog_fbroot(whichfork);
5395         /*
5396          * Log inode even in the error case, if the transaction
5397          * is dirty we'll need to shut down the filesystem.
5398          */
5399         if (logflags)
5400                 xfs_trans_log_inode(tp, ip, logflags);
5401         if (cur) {
5402                 if (!error) {
5403                         *firstblock = cur->bc_private.b.firstblock;
5404                         cur->bc_private.b.allocated = 0;
5405                 }
5406                 xfs_btree_del_cursor(cur,
5407                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5408         }
5409         return error;
5410 }
5411
5412 /*
5413  * Determine whether an extent shift can be accomplished by a merge with the
5414  * extent that precedes the target hole of the shift.
5415  */
5416 STATIC bool
5417 xfs_bmse_can_merge(
5418         struct xfs_bmbt_irec    *left,  /* preceding extent */
5419         struct xfs_bmbt_irec    *got,   /* current extent to shift */
5420         xfs_fileoff_t           shift)  /* shift fsb */
5421 {
5422         xfs_fileoff_t           startoff;
5423
5424         startoff = got->br_startoff - shift;
5425
5426         /*
5427          * The extent, once shifted, must be adjacent in-file and on-disk with
5428          * the preceding extent.
5429          */
5430         if ((left->br_startoff + left->br_blockcount != startoff) ||
5431             (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5432             (left->br_state != got->br_state) ||
5433             (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5434                 return false;
5435
5436         return true;
5437 }
5438
5439 /*
5440  * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5441  * hole in the file. If an extent shift would result in the extent being fully
5442  * adjacent to the extent that currently precedes the hole, we can merge with
5443  * the preceding extent rather than do the shift.
5444  *
5445  * This function assumes the caller has verified a shift-by-merge is possible
5446  * with the provided extents via xfs_bmse_can_merge().
5447  */
5448 STATIC int
5449 xfs_bmse_merge(
5450         struct xfs_inode                *ip,
5451         int                             whichfork,
5452         xfs_fileoff_t                   shift,          /* shift fsb */
5453         int                             current_ext,    /* idx of gotp */
5454         struct xfs_bmbt_rec_host        *gotp,          /* extent to shift */
5455         struct xfs_bmbt_rec_host        *leftp,         /* preceding extent */
5456         struct xfs_btree_cur            *cur,
5457         int                             *logflags)      /* output */
5458 {
5459         struct xfs_bmbt_irec            got;
5460         struct xfs_bmbt_irec            left;
5461         xfs_filblks_t                   blockcount;
5462         int                             error, i;
5463         struct xfs_mount                *mp = ip->i_mount;
5464
5465         xfs_bmbt_get_all(gotp, &got);
5466         xfs_bmbt_get_all(leftp, &left);
5467         blockcount = left.br_blockcount + got.br_blockcount;
5468
5469         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5470         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5471         ASSERT(xfs_bmse_can_merge(&left, &got, shift));
5472
5473         /*
5474          * Merge the in-core extents. Note that the host record pointers and
5475          * current_ext index are invalid once the extent has been removed via
5476          * xfs_iext_remove().
5477          */
5478         xfs_bmbt_set_blockcount(leftp, blockcount);
5479         xfs_iext_remove(ip, current_ext, 1, 0);
5480
5481         /*
5482          * Update the on-disk extent count, the btree if necessary and log the
5483          * inode.
5484          */
5485         XFS_IFORK_NEXT_SET(ip, whichfork,
5486                            XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5487         *logflags |= XFS_ILOG_CORE;
5488         if (!cur) {
5489                 *logflags |= XFS_ILOG_DEXT;
5490                 return 0;
5491         }
5492
5493         /* lookup and remove the extent to merge */
5494         error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5495                                    got.br_blockcount, &i);
5496         if (error)
5497                 return error;
5498         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5499
5500         error = xfs_btree_delete(cur, &i);
5501         if (error)
5502                 return error;
5503         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5504
5505         /* lookup and update size of the previous extent */
5506         error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5507                                    left.br_blockcount, &i);
5508         if (error)
5509                 return error;
5510         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5511
5512         left.br_blockcount = blockcount;
5513
5514         return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
5515                                left.br_blockcount, left.br_state);
5516 }
5517
5518 /*
5519  * Shift a single extent.
5520  */
5521 STATIC int
5522 xfs_bmse_shift_one(
5523         struct xfs_inode                *ip,
5524         int                             whichfork,
5525         xfs_fileoff_t                   offset_shift_fsb,
5526         int                             *current_ext,
5527         struct xfs_bmbt_rec_host        *gotp,
5528         struct xfs_btree_cur            *cur,
5529         int                             *logflags)
5530 {
5531         struct xfs_ifork                *ifp;
5532         struct xfs_mount                *mp;
5533         xfs_fileoff_t                   startoff;
5534         struct xfs_bmbt_rec_host        *leftp;
5535         struct xfs_bmbt_irec            got;
5536         struct xfs_bmbt_irec            left;
5537         int                             error;
5538         int                             i;
5539
5540         mp = ip->i_mount;
5541         ifp = XFS_IFORK_PTR(ip, whichfork);
5542
5543         xfs_bmbt_get_all(gotp, &got);
5544         startoff = got.br_startoff - offset_shift_fsb;
5545
5546         /* delalloc extents should be prevented by caller */
5547         XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5548
5549         /*
5550          * Check for merge if we've got an extent to the left, otherwise make
5551          * sure there's enough room at the start of the file for the shift.
5552          */
5553         if (*current_ext) {
5554                 /* grab the left extent and check for a large enough hole */
5555                 leftp = xfs_iext_get_ext(ifp, *current_ext - 1);
5556                 xfs_bmbt_get_all(leftp, &left);
5557
5558                 if (startoff < left.br_startoff + left.br_blockcount)
5559                         return -EINVAL;
5560
5561                 /* check whether to merge the extent or shift it down */
5562                 if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) {
5563                         return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5564                                               *current_ext, gotp, leftp, cur,
5565                                               logflags);
5566                 }
5567         } else if (got.br_startoff < offset_shift_fsb)
5568                 return -EINVAL;
5569
5570         /*
5571          * Increment the extent index for the next iteration, update the start
5572          * offset of the in-core extent and update the btree if applicable.
5573          */
5574         (*current_ext)++;
5575         xfs_bmbt_set_startoff(gotp, startoff);
5576         *logflags |= XFS_ILOG_CORE;
5577         if (!cur) {
5578                 *logflags |= XFS_ILOG_DEXT;
5579                 return 0;
5580         }
5581
5582         error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5583                                    got.br_blockcount, &i);
5584         if (error)
5585                 return error;
5586         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5587
5588         got.br_startoff = startoff;
5589         return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5590                                 got.br_blockcount, got.br_state);
5591 }
5592
5593 /*
5594  * Shift extent records to the left to cover a hole.
5595  *
5596  * The maximum number of extents to be shifted in a single operation is
5597  * @num_exts. @start_fsb specifies the file offset to start the shift and the
5598  * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5599  * is the length by which each extent is shifted. If there is no hole to shift
5600  * the extents into, this will be considered invalid operation and we abort
5601  * immediately.
5602  */
5603 int
5604 xfs_bmap_shift_extents(
5605         struct xfs_trans        *tp,
5606         struct xfs_inode        *ip,
5607         xfs_fileoff_t           start_fsb,
5608         xfs_fileoff_t           offset_shift_fsb,
5609         int                     *done,
5610         xfs_fileoff_t           *next_fsb,
5611         xfs_fsblock_t           *firstblock,
5612         struct xfs_bmap_free    *flist,
5613         int                     num_exts)
5614 {
5615         struct xfs_btree_cur            *cur = NULL;
5616         struct xfs_bmbt_rec_host        *gotp;
5617         struct xfs_bmbt_irec            got;
5618         struct xfs_mount                *mp = ip->i_mount;
5619         struct xfs_ifork                *ifp;
5620         xfs_extnum_t                    nexts = 0;
5621         xfs_extnum_t                    current_ext;
5622         int                             error = 0;
5623         int                             whichfork = XFS_DATA_FORK;
5624         int                             logflags = 0;
5625         int                             total_extents;
5626
5627         if (unlikely(XFS_TEST_ERROR(
5628             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5629              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5630              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5631                 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5632                                  XFS_ERRLEVEL_LOW, mp);
5633                 return -EFSCORRUPTED;
5634         }
5635
5636         if (XFS_FORCED_SHUTDOWN(mp))
5637                 return -EIO;
5638
5639         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5640         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5641
5642         ifp = XFS_IFORK_PTR(ip, whichfork);
5643         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5644                 /* Read in all the extents */
5645                 error = xfs_iread_extents(tp, ip, whichfork);
5646                 if (error)
5647                         return error;
5648         }
5649
5650         if (ifp->if_flags & XFS_IFBROOT) {
5651                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5652                 cur->bc_private.b.firstblock = *firstblock;
5653                 cur->bc_private.b.flist = flist;
5654                 cur->bc_private.b.flags = 0;
5655         }
5656
5657         /*
5658          * Look up the extent index for the fsb where we start shifting. We can
5659          * henceforth iterate with current_ext as extent list changes are locked
5660          * out via ilock.
5661          *
5662          * gotp can be null in 2 cases: 1) if there are no extents or 2)
5663          * start_fsb lies in a hole beyond which there are no extents. Either
5664          * way, we are done.
5665          */
5666         gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext);
5667         if (!gotp) {
5668                 *done = 1;
5669                 goto del_cursor;
5670         }
5671
5672         /*
5673          * There may be delalloc extents in the data fork before the range we
5674          * are collapsing out, so we cannot use the count of real extents here.
5675          * Instead we have to calculate it from the incore fork.
5676          */
5677         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5678         while (nexts++ < num_exts && current_ext < total_extents) {
5679                 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5680                                         &current_ext, gotp, cur, &logflags);
5681                 if (error)
5682                         goto del_cursor;
5683
5684                 /* update total extent count and grab the next record */
5685                 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5686                 if (current_ext >= total_extents)
5687                         break;
5688                 gotp = xfs_iext_get_ext(ifp, current_ext);
5689         }
5690
5691         /* Check if we are done */
5692         if (current_ext == total_extents) {
5693                 *done = 1;
5694         } else if (next_fsb) {
5695                 xfs_bmbt_get_all(gotp, &got);
5696                 *next_fsb = got.br_startoff;
5697         }
5698
5699 del_cursor:
5700         if (cur)
5701                 xfs_btree_del_cursor(cur,
5702                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5703
5704         if (logflags)
5705                 xfs_trans_log_inode(tp, ip, logflags);
5706
5707         return error;
5708 }