Merge tag 'docs-5.6-2' of git://git.lwn.net/linux

[linux.git] / fs / xfs / xfs_buf.c
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 0abba171aa89b6c948fc795201d2eee9734d6e06..217e4f82a44a2601351c8e46f7503512b9a5cc31 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -198,20 +198,22 @@ xfs_buf_free_maps(
         }
  }
  
-static struct xfs_buf *
+static int
  _xfs_buf_alloc(
         struct xfs_buftarg      *target,
         struct xfs_buf_map      *map,
         int                     nmaps,
-       xfs_buf_flags_t         flags)
+       xfs_buf_flags_t         flags,
+       struct xfs_buf          **bpp)
  {
         struct xfs_buf          *bp;
         int                     error;
         int                     i;
  
+       *bpp = NULL;
         bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
         if (unlikely(!bp))
-               return NULL;
+               return -ENOMEM;
  
         /*
          * We don't want certain flags to appear in b_flags unless they are
@@ -238,8 +240,8 @@ _xfs_buf_alloc(
          */
         error = xfs_buf_get_maps(bp, nmaps);
         if (error)  {
-               kmem_zone_free(xfs_buf_zone, bp);
-               return NULL;
+               kmem_cache_free(xfs_buf_zone, bp);
+               return error;
         }
  
         bp->b_bn = map[0].bm_bn;
@@ -256,7 +258,8 @@ _xfs_buf_alloc(
         XFS_STATS_INC(bp->b_mount, xb_create);
         trace_xfs_buf_init(bp, _RET_IP_);
  
-       return bp;
+       *bpp = bp;
+       return 0;
  }
  
  /*
@@ -304,7 +307,7 @@ _xfs_buf_free_pages(
   *     The buffer must not be on any hash - use xfs_buf_rele instead for
   *     hashed and refcounted buffers
   */
-void
+static void
  xfs_buf_free(
         xfs_buf_t               *bp)
  {
@@ -328,7 +331,7 @@ xfs_buf_free(
                 kmem_free(bp->b_addr);
         _xfs_buf_free_pages(bp);
         xfs_buf_free_maps(bp);
-       kmem_zone_free(xfs_buf_zone, bp);
+       kmem_cache_free(xfs_buf_zone, bp);
  }
  
  /*
@@ -461,7 +464,7 @@ _xfs_buf_map_pages(
                 unsigned nofs_flag;
  
                 /*
-                * vm_map_ram() will allocate auxillary structures (e.g.
+                * vm_map_ram() will allocate auxiliary structures (e.g.
                  * pagetables) with GFP_KERNEL, yet we are likely to be under
                  * GFP_NOFS context here. Hence we need to tell memory reclaim
                  * that we are in such a context via PF_MEMALLOC_NOFS to prevent
@@ -682,53 +685,39 @@ xfs_buf_incore(
   * cache hits, as metadata intensive workloads will see 3 orders of magnitude
   * more hits than misses.
   */
-struct xfs_buf *
+int
  xfs_buf_get_map(
         struct xfs_buftarg      *target,
         struct xfs_buf_map      *map,
         int                     nmaps,
-       xfs_buf_flags_t         flags)
+       xfs_buf_flags_t         flags,
+       struct xfs_buf          **bpp)
  {
         struct xfs_buf          *bp;
         struct xfs_buf          *new_bp;
         int                     error = 0;
  
+       *bpp = NULL;
         error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
-
-       switch (error) {
-       case 0:
-               /* cache hit */
+       if (!error)
                 goto found;
-       case -EAGAIN:
-               /* cache hit, trylock failure, caller handles failure */
-               ASSERT(flags & XBF_TRYLOCK);
-               return NULL;
-       case -ENOENT:
-               /* cache miss, go for insert */
-               break;
-       case -EFSCORRUPTED:
-       default:
-               /*
-                * None of the higher layers understand failure types
-                * yet, so return NULL to signal a fatal lookup error.
-                */
-               return NULL;
-       }
+       if (error != -ENOENT)
+               return error;
  
-       new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
-       if (unlikely(!new_bp))
-               return NULL;
+       error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
+       if (error)
+               return error;
  
         error = xfs_buf_allocate_memory(new_bp, flags);
         if (error) {
                 xfs_buf_free(new_bp);
-               return NULL;
+               return error;
         }
  
         error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
         if (error) {
                 xfs_buf_free(new_bp);
-               return NULL;
+               return error;
         }
  
         if (bp != new_bp)
@@ -741,7 +730,7 @@ xfs_buf_get_map(
                         xfs_warn(target->bt_mount,
                                 "%s: failed to map pagesn", __func__);
                         xfs_buf_relse(bp);
-                       return NULL;
+                       return error;
                 }
         }
  
@@ -754,7 +743,8 @@ xfs_buf_get_map(
  
         XFS_STATS_INC(target->bt_mount, xb_get);
         trace_xfs_buf_get(bp, flags, _RET_IP_);
-       return bp;
+       *bpp = bp;
+       return 0;
  }
  
  STATIC int
@@ -806,46 +796,77 @@ xfs_buf_reverify(
         return bp->b_error;
  }
  
-xfs_buf_t *
+int
  xfs_buf_read_map(
         struct xfs_buftarg      *target,
         struct xfs_buf_map      *map,
         int                     nmaps,
         xfs_buf_flags_t         flags,
-       const struct xfs_buf_ops *ops)
+       struct xfs_buf          **bpp,
+       const struct xfs_buf_ops *ops,
+       xfs_failaddr_t          fa)
  {
         struct xfs_buf          *bp;
+       int                     error;
  
         flags |= XBF_READ;
+       *bpp = NULL;
  
-       bp = xfs_buf_get_map(target, map, nmaps, flags);
-       if (!bp)
-               return NULL;
+       error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
+       if (error)
+               return error;
  
         trace_xfs_buf_read(bp, flags, _RET_IP_);
  
         if (!(bp->b_flags & XBF_DONE)) {
+               /* Initiate the buffer read and wait. */
                 XFS_STATS_INC(target->bt_mount, xb_get_read);
                 bp->b_ops = ops;
-               _xfs_buf_read(bp, flags);
-               return bp;
+               error = _xfs_buf_read(bp, flags);
+
+               /* Readahead iodone already dropped the buffer, so exit. */
+               if (flags & XBF_ASYNC)
+                       return 0;
+       } else {
+               /* Buffer already read; all we need to do is check it. */
+               error = xfs_buf_reverify(bp, ops);
+
+               /* Readahead already finished; drop the buffer and exit. */
+               if (flags & XBF_ASYNC) {
+                       xfs_buf_relse(bp);
+                       return 0;
+               }
+
+               /* We do not want read in the flags */
+               bp->b_flags &= ~XBF_READ;
+               ASSERT(bp->b_ops != NULL || ops == NULL);
         }
  
-       xfs_buf_reverify(bp, ops);
+       /*
+        * If we've had a read error, then the contents of the buffer are
+        * invalid and should not be used. To ensure that a followup read tries
+        * to pull the buffer from disk again, we clear the XBF_DONE flag and
+        * mark the buffer stale. This ensures that anyone who has a current
+        * reference to the buffer will interpret it's contents correctly and
+        * future cache lookups will also treat it as an empty, uninitialised
+        * buffer.
+        */
+       if (error) {
+               if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
+                       xfs_buf_ioerror_alert(bp, fa);
  
-       if (flags & XBF_ASYNC) {
-               /*
-                * Read ahead call which is already satisfied,
-                * drop the buffer
-                */
+               bp->b_flags &= ~XBF_DONE;
+               xfs_buf_stale(bp);
                 xfs_buf_relse(bp);
-               return NULL;
+
+               /* bad CRC means corrupted metadata */
+               if (error == -EFSBADCRC)
+                       error = -EFSCORRUPTED;
+               return error;
         }
  
-       /* We do not want read in the flags */
-       bp->b_flags &= ~XBF_READ;
-       ASSERT(bp->b_ops != NULL || ops == NULL);
-       return bp;
+       *bpp = bp;
+       return 0;
  }
  
  /*
@@ -859,11 +880,14 @@ xfs_buf_readahead_map(
         int                     nmaps,
         const struct xfs_buf_ops *ops)
  {
+       struct xfs_buf          *bp;
+
         if (bdi_read_congested(target->bt_bdev->bd_bdi))
                 return;
  
         xfs_buf_read_map(target, map, nmaps,
-                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
+                    XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
+                    __this_address);
  }
  
  /*
@@ -880,12 +904,13 @@ xfs_buf_read_uncached(
         const struct xfs_buf_ops *ops)
  {
         struct xfs_buf          *bp;
+       int                     error;
  
         *bpp = NULL;
  
-       bp = xfs_buf_get_uncached(target, numblks, flags);
-       if (!bp)
-               return -ENOMEM;
+       error = xfs_buf_get_uncached(target, numblks, flags, &bp);
+       if (error)
+               return error;
  
         /* set up the buffer for a read IO */
         ASSERT(bp->b_map_count == 1);
@@ -896,7 +921,7 @@ xfs_buf_read_uncached(
  
         xfs_buf_submit(bp);
         if (bp->b_error) {
-               int     error = bp->b_error;
+               error = bp->b_error;
                 xfs_buf_relse(bp);
                 return error;
         }
@@ -905,20 +930,23 @@ xfs_buf_read_uncached(
         return 0;
  }
  
-xfs_buf_t *
+int
  xfs_buf_get_uncached(
         struct xfs_buftarg      *target,
         size_t                  numblks,
-       int                     flags)
+       int                     flags,
+       struct xfs_buf          **bpp)
  {
         unsigned long           page_count;
         int                     error, i;
         struct xfs_buf          *bp;
         DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
  
+       *bpp = NULL;
+
         /* flags might contain irrelevant bits, pass only what we care about */
-       bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
-       if (unlikely(bp == NULL))
+       error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
+       if (error)
                 goto fail;
  
         page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
@@ -928,8 +956,10 @@ xfs_buf_get_uncached(
  
         for (i = 0; i < page_count; i++) {
                 bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
-               if (!bp->b_pages[i])
+               if (!bp->b_pages[i]) {
+                       error = -ENOMEM;
                         goto fail_free_mem;
+               }
         }
         bp->b_flags |= _XBF_PAGES;
  
@@ -941,7 +971,8 @@ xfs_buf_get_uncached(
         }
  
         trace_xfs_buf_get_uncached(bp, _RET_IP_);
-       return bp;
+       *bpp = bp;
+       return 0;
  
   fail_free_mem:
         while (--i >= 0)
@@ -949,9 +980,9 @@ xfs_buf_get_uncached(
         _xfs_buf_free_pages(bp);
   fail_free_buf:
         xfs_buf_free_maps(bp);
-       kmem_zone_free(xfs_buf_zone, bp);
+       kmem_cache_free(xfs_buf_zone, bp);
   fail:
-       return NULL;
+       return error;
  }
  
  /*
@@ -1205,10 +1236,10 @@ __xfs_buf_ioerror(
  void
  xfs_buf_ioerror_alert(
         struct xfs_buf          *bp,
-       const char              *func)
+       xfs_failaddr_t          func)
  {
         xfs_alert(bp->b_mount,
-"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
+"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
                         func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
                         -bp->b_error);
  }
@@ -1261,8 +1292,7 @@ xfs_buf_ioapply_map(
         int             map,
         int             *buf_offset,
         int             *count,
-       int             op,
-       int             op_flags)
+       int             op)
  {
         int             page_index;
         int             total_nr_pages = bp->b_page_count;
@@ -1297,7 +1327,7 @@ xfs_buf_ioapply_map(
         bio->bi_iter.bi_sector = sector;
         bio->bi_end_io = xfs_buf_bio_end_io;
         bio->bi_private = bp;
-       bio_set_op_attrs(bio, op, op_flags);
+       bio->bi_opf = op;
  
         for (; size && nr_pages; nr_pages--, page_index++) {
                 int     rbytes, nbytes = PAGE_SIZE - offset;
@@ -1342,7 +1372,6 @@ _xfs_buf_ioapply(
  {
         struct blk_plug plug;
         int             op;
-       int             op_flags = 0;
         int             offset;
         int             size;
         int             i;
@@ -1384,15 +1413,14 @@ _xfs_buf_ioapply(
                                 dump_stack();
                         }
                 }
-       } else if (bp->b_flags & XBF_READ_AHEAD) {
-               op = REQ_OP_READ;
-               op_flags = REQ_RAHEAD;
         } else {
                 op = REQ_OP_READ;
+               if (bp->b_flags & XBF_READ_AHEAD)
+                       op |= REQ_RAHEAD;
         }
  
         /* we only use the buffer cache for meta-data */
-       op_flags |= REQ_META;
+       op |= REQ_META;
  
         /*
          * Walk all the vectors issuing IO on them. Set up the initial offset
@@ -1404,7 +1432,7 @@ _xfs_buf_ioapply(
         size = BBTOB(bp->b_length);
         blk_start_plug(&plug);
         for (i = 0; i < bp->b_map_count; i++) {
-               xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
+               xfs_buf_ioapply_map(bp, i, &offset, &size, op);
                 if (bp->b_error)
                         break;
                 if (size <= 0)
@@ -2063,8 +2091,9 @@ xfs_buf_delwri_pushbuf(
  int __init
  xfs_buf_init(void)
  {
-       xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
-                                               KM_ZONE_HWALIGN, NULL);
+       xfs_buf_zone = kmem_cache_create("xfs_buf",
+                                        sizeof(struct xfs_buf), 0,
+                                        SLAB_HWCACHE_ALIGN, NULL);
         if (!xfs_buf_zone)
                 goto out;
  
@@ -2077,7 +2106,7 @@ xfs_buf_init(void)
  void
  xfs_buf_terminate(void)
  {
-       kmem_zone_destroy(xfs_buf_zone);
+       kmem_cache_destroy(xfs_buf_zone);
  }
  
  void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)