]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/btrfs/delayed-inode.c
btrfs: convert to new i_version API
[linux.git] / fs / btrfs / delayed-inode.c
1 /*
2  * Copyright (C) 2011 Fujitsu.  All rights reserved.
3  * Written by Miao Xie <miaox@cn.fujitsu.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19
20 #include <linux/slab.h>
21 #include <linux/iversion.h>
22 #include "delayed-inode.h"
23 #include "disk-io.h"
24 #include "transaction.h"
25 #include "ctree.h"
26
27 #define BTRFS_DELAYED_WRITEBACK         512
28 #define BTRFS_DELAYED_BACKGROUND        128
29 #define BTRFS_DELAYED_BATCH             16
30
31 static struct kmem_cache *delayed_node_cache;
32
33 int __init btrfs_delayed_inode_init(void)
34 {
35         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
36                                         sizeof(struct btrfs_delayed_node),
37                                         0,
38                                         SLAB_MEM_SPREAD,
39                                         NULL);
40         if (!delayed_node_cache)
41                 return -ENOMEM;
42         return 0;
43 }
44
45 void btrfs_delayed_inode_exit(void)
46 {
47         kmem_cache_destroy(delayed_node_cache);
48 }
49
50 static inline void btrfs_init_delayed_node(
51                                 struct btrfs_delayed_node *delayed_node,
52                                 struct btrfs_root *root, u64 inode_id)
53 {
54         delayed_node->root = root;
55         delayed_node->inode_id = inode_id;
56         refcount_set(&delayed_node->refs, 0);
57         delayed_node->ins_root = RB_ROOT;
58         delayed_node->del_root = RB_ROOT;
59         mutex_init(&delayed_node->mutex);
60         INIT_LIST_HEAD(&delayed_node->n_list);
61         INIT_LIST_HEAD(&delayed_node->p_list);
62 }
63
64 static inline int btrfs_is_continuous_delayed_item(
65                                         struct btrfs_delayed_item *item1,
66                                         struct btrfs_delayed_item *item2)
67 {
68         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
69             item1->key.objectid == item2->key.objectid &&
70             item1->key.type == item2->key.type &&
71             item1->key.offset + 1 == item2->key.offset)
72                 return 1;
73         return 0;
74 }
75
76 static struct btrfs_delayed_node *btrfs_get_delayed_node(
77                 struct btrfs_inode *btrfs_inode)
78 {
79         struct btrfs_root *root = btrfs_inode->root;
80         u64 ino = btrfs_ino(btrfs_inode);
81         struct btrfs_delayed_node *node;
82
83         node = READ_ONCE(btrfs_inode->delayed_node);
84         if (node) {
85                 refcount_inc(&node->refs);
86                 return node;
87         }
88
89         spin_lock(&root->inode_lock);
90         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
91         if (node) {
92                 if (btrfs_inode->delayed_node) {
93                         refcount_inc(&node->refs);      /* can be accessed */
94                         BUG_ON(btrfs_inode->delayed_node != node);
95                         spin_unlock(&root->inode_lock);
96                         return node;
97                 }
98                 btrfs_inode->delayed_node = node;
99                 /* can be accessed and cached in the inode */
100                 refcount_add(2, &node->refs);
101                 spin_unlock(&root->inode_lock);
102                 return node;
103         }
104         spin_unlock(&root->inode_lock);
105
106         return NULL;
107 }
108
109 /* Will return either the node or PTR_ERR(-ENOMEM) */
110 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
111                 struct btrfs_inode *btrfs_inode)
112 {
113         struct btrfs_delayed_node *node;
114         struct btrfs_root *root = btrfs_inode->root;
115         u64 ino = btrfs_ino(btrfs_inode);
116         int ret;
117
118 again:
119         node = btrfs_get_delayed_node(btrfs_inode);
120         if (node)
121                 return node;
122
123         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
124         if (!node)
125                 return ERR_PTR(-ENOMEM);
126         btrfs_init_delayed_node(node, root, ino);
127
128         /* cached in the btrfs inode and can be accessed */
129         refcount_set(&node->refs, 2);
130
131         ret = radix_tree_preload(GFP_NOFS);
132         if (ret) {
133                 kmem_cache_free(delayed_node_cache, node);
134                 return ERR_PTR(ret);
135         }
136
137         spin_lock(&root->inode_lock);
138         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
139         if (ret == -EEXIST) {
140                 spin_unlock(&root->inode_lock);
141                 kmem_cache_free(delayed_node_cache, node);
142                 radix_tree_preload_end();
143                 goto again;
144         }
145         btrfs_inode->delayed_node = node;
146         spin_unlock(&root->inode_lock);
147         radix_tree_preload_end();
148
149         return node;
150 }
151
152 /*
153  * Call it when holding delayed_node->mutex
154  *
155  * If mod = 1, add this node into the prepared list.
156  */
157 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
158                                      struct btrfs_delayed_node *node,
159                                      int mod)
160 {
161         spin_lock(&root->lock);
162         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
163                 if (!list_empty(&node->p_list))
164                         list_move_tail(&node->p_list, &root->prepare_list);
165                 else if (mod)
166                         list_add_tail(&node->p_list, &root->prepare_list);
167         } else {
168                 list_add_tail(&node->n_list, &root->node_list);
169                 list_add_tail(&node->p_list, &root->prepare_list);
170                 refcount_inc(&node->refs);      /* inserted into list */
171                 root->nodes++;
172                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
173         }
174         spin_unlock(&root->lock);
175 }
176
177 /* Call it when holding delayed_node->mutex */
178 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
179                                        struct btrfs_delayed_node *node)
180 {
181         spin_lock(&root->lock);
182         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
183                 root->nodes--;
184                 refcount_dec(&node->refs);      /* not in the list */
185                 list_del_init(&node->n_list);
186                 if (!list_empty(&node->p_list))
187                         list_del_init(&node->p_list);
188                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
189         }
190         spin_unlock(&root->lock);
191 }
192
193 static struct btrfs_delayed_node *btrfs_first_delayed_node(
194                         struct btrfs_delayed_root *delayed_root)
195 {
196         struct list_head *p;
197         struct btrfs_delayed_node *node = NULL;
198
199         spin_lock(&delayed_root->lock);
200         if (list_empty(&delayed_root->node_list))
201                 goto out;
202
203         p = delayed_root->node_list.next;
204         node = list_entry(p, struct btrfs_delayed_node, n_list);
205         refcount_inc(&node->refs);
206 out:
207         spin_unlock(&delayed_root->lock);
208
209         return node;
210 }
211
212 static struct btrfs_delayed_node *btrfs_next_delayed_node(
213                                                 struct btrfs_delayed_node *node)
214 {
215         struct btrfs_delayed_root *delayed_root;
216         struct list_head *p;
217         struct btrfs_delayed_node *next = NULL;
218
219         delayed_root = node->root->fs_info->delayed_root;
220         spin_lock(&delayed_root->lock);
221         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
222                 /* not in the list */
223                 if (list_empty(&delayed_root->node_list))
224                         goto out;
225                 p = delayed_root->node_list.next;
226         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
227                 goto out;
228         else
229                 p = node->n_list.next;
230
231         next = list_entry(p, struct btrfs_delayed_node, n_list);
232         refcount_inc(&next->refs);
233 out:
234         spin_unlock(&delayed_root->lock);
235
236         return next;
237 }
238
239 static void __btrfs_release_delayed_node(
240                                 struct btrfs_delayed_node *delayed_node,
241                                 int mod)
242 {
243         struct btrfs_delayed_root *delayed_root;
244
245         if (!delayed_node)
246                 return;
247
248         delayed_root = delayed_node->root->fs_info->delayed_root;
249
250         mutex_lock(&delayed_node->mutex);
251         if (delayed_node->count)
252                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
253         else
254                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
255         mutex_unlock(&delayed_node->mutex);
256
257         if (refcount_dec_and_test(&delayed_node->refs)) {
258                 bool free = false;
259                 struct btrfs_root *root = delayed_node->root;
260                 spin_lock(&root->inode_lock);
261                 if (refcount_read(&delayed_node->refs) == 0) {
262                         radix_tree_delete(&root->delayed_nodes_tree,
263                                           delayed_node->inode_id);
264                         free = true;
265                 }
266                 spin_unlock(&root->inode_lock);
267                 if (free)
268                         kmem_cache_free(delayed_node_cache, delayed_node);
269         }
270 }
271
272 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
273 {
274         __btrfs_release_delayed_node(node, 0);
275 }
276
277 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
278                                         struct btrfs_delayed_root *delayed_root)
279 {
280         struct list_head *p;
281         struct btrfs_delayed_node *node = NULL;
282
283         spin_lock(&delayed_root->lock);
284         if (list_empty(&delayed_root->prepare_list))
285                 goto out;
286
287         p = delayed_root->prepare_list.next;
288         list_del_init(p);
289         node = list_entry(p, struct btrfs_delayed_node, p_list);
290         refcount_inc(&node->refs);
291 out:
292         spin_unlock(&delayed_root->lock);
293
294         return node;
295 }
296
297 static inline void btrfs_release_prepared_delayed_node(
298                                         struct btrfs_delayed_node *node)
299 {
300         __btrfs_release_delayed_node(node, 1);
301 }
302
303 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
304 {
305         struct btrfs_delayed_item *item;
306         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
307         if (item) {
308                 item->data_len = data_len;
309                 item->ins_or_del = 0;
310                 item->bytes_reserved = 0;
311                 item->delayed_node = NULL;
312                 refcount_set(&item->refs, 1);
313         }
314         return item;
315 }
316
317 /*
318  * __btrfs_lookup_delayed_item - look up the delayed item by key
319  * @delayed_node: pointer to the delayed node
320  * @key:          the key to look up
321  * @prev:         used to store the prev item if the right item isn't found
322  * @next:         used to store the next item if the right item isn't found
323  *
324  * Note: if we don't find the right item, we will return the prev item and
325  * the next item.
326  */
327 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
328                                 struct rb_root *root,
329                                 struct btrfs_key *key,
330                                 struct btrfs_delayed_item **prev,
331                                 struct btrfs_delayed_item **next)
332 {
333         struct rb_node *node, *prev_node = NULL;
334         struct btrfs_delayed_item *delayed_item = NULL;
335         int ret = 0;
336
337         node = root->rb_node;
338
339         while (node) {
340                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
341                                         rb_node);
342                 prev_node = node;
343                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
344                 if (ret < 0)
345                         node = node->rb_right;
346                 else if (ret > 0)
347                         node = node->rb_left;
348                 else
349                         return delayed_item;
350         }
351
352         if (prev) {
353                 if (!prev_node)
354                         *prev = NULL;
355                 else if (ret < 0)
356                         *prev = delayed_item;
357                 else if ((node = rb_prev(prev_node)) != NULL) {
358                         *prev = rb_entry(node, struct btrfs_delayed_item,
359                                          rb_node);
360                 } else
361                         *prev = NULL;
362         }
363
364         if (next) {
365                 if (!prev_node)
366                         *next = NULL;
367                 else if (ret > 0)
368                         *next = delayed_item;
369                 else if ((node = rb_next(prev_node)) != NULL) {
370                         *next = rb_entry(node, struct btrfs_delayed_item,
371                                          rb_node);
372                 } else
373                         *next = NULL;
374         }
375         return NULL;
376 }
377
378 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
379                                         struct btrfs_delayed_node *delayed_node,
380                                         struct btrfs_key *key)
381 {
382         return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
383                                            NULL, NULL);
384 }
385
386 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
387                                     struct btrfs_delayed_item *ins,
388                                     int action)
389 {
390         struct rb_node **p, *node;
391         struct rb_node *parent_node = NULL;
392         struct rb_root *root;
393         struct btrfs_delayed_item *item;
394         int cmp;
395
396         if (action == BTRFS_DELAYED_INSERTION_ITEM)
397                 root = &delayed_node->ins_root;
398         else if (action == BTRFS_DELAYED_DELETION_ITEM)
399                 root = &delayed_node->del_root;
400         else
401                 BUG();
402         p = &root->rb_node;
403         node = &ins->rb_node;
404
405         while (*p) {
406                 parent_node = *p;
407                 item = rb_entry(parent_node, struct btrfs_delayed_item,
408                                  rb_node);
409
410                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
411                 if (cmp < 0)
412                         p = &(*p)->rb_right;
413                 else if (cmp > 0)
414                         p = &(*p)->rb_left;
415                 else
416                         return -EEXIST;
417         }
418
419         rb_link_node(node, parent_node, p);
420         rb_insert_color(node, root);
421         ins->delayed_node = delayed_node;
422         ins->ins_or_del = action;
423
424         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
425             action == BTRFS_DELAYED_INSERTION_ITEM &&
426             ins->key.offset >= delayed_node->index_cnt)
427                         delayed_node->index_cnt = ins->key.offset + 1;
428
429         delayed_node->count++;
430         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
431         return 0;
432 }
433
434 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
435                                               struct btrfs_delayed_item *item)
436 {
437         return __btrfs_add_delayed_item(node, item,
438                                         BTRFS_DELAYED_INSERTION_ITEM);
439 }
440
441 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
442                                              struct btrfs_delayed_item *item)
443 {
444         return __btrfs_add_delayed_item(node, item,
445                                         BTRFS_DELAYED_DELETION_ITEM);
446 }
447
448 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
449 {
450         int seq = atomic_inc_return(&delayed_root->items_seq);
451
452         /*
453          * atomic_dec_return implies a barrier for waitqueue_active
454          */
455         if ((atomic_dec_return(&delayed_root->items) <
456             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
457             waitqueue_active(&delayed_root->wait))
458                 wake_up(&delayed_root->wait);
459 }
460
461 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
462 {
463         struct rb_root *root;
464         struct btrfs_delayed_root *delayed_root;
465
466         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
467
468         BUG_ON(!delayed_root);
469         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
470                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
471
472         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
473                 root = &delayed_item->delayed_node->ins_root;
474         else
475                 root = &delayed_item->delayed_node->del_root;
476
477         rb_erase(&delayed_item->rb_node, root);
478         delayed_item->delayed_node->count--;
479
480         finish_one_item(delayed_root);
481 }
482
483 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
484 {
485         if (item) {
486                 __btrfs_remove_delayed_item(item);
487                 if (refcount_dec_and_test(&item->refs))
488                         kfree(item);
489         }
490 }
491
492 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
493                                         struct btrfs_delayed_node *delayed_node)
494 {
495         struct rb_node *p;
496         struct btrfs_delayed_item *item = NULL;
497
498         p = rb_first(&delayed_node->ins_root);
499         if (p)
500                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
501
502         return item;
503 }
504
505 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
506                                         struct btrfs_delayed_node *delayed_node)
507 {
508         struct rb_node *p;
509         struct btrfs_delayed_item *item = NULL;
510
511         p = rb_first(&delayed_node->del_root);
512         if (p)
513                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
514
515         return item;
516 }
517
518 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
519                                                 struct btrfs_delayed_item *item)
520 {
521         struct rb_node *p;
522         struct btrfs_delayed_item *next = NULL;
523
524         p = rb_next(&item->rb_node);
525         if (p)
526                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
527
528         return next;
529 }
530
531 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
532                                                struct btrfs_fs_info *fs_info,
533                                                struct btrfs_delayed_item *item)
534 {
535         struct btrfs_block_rsv *src_rsv;
536         struct btrfs_block_rsv *dst_rsv;
537         u64 num_bytes;
538         int ret;
539
540         if (!trans->bytes_reserved)
541                 return 0;
542
543         src_rsv = trans->block_rsv;
544         dst_rsv = &fs_info->delayed_block_rsv;
545
546         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
547         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
548         if (!ret) {
549                 trace_btrfs_space_reservation(fs_info, "delayed_item",
550                                               item->key.objectid,
551                                               num_bytes, 1);
552                 item->bytes_reserved = num_bytes;
553         }
554
555         return ret;
556 }
557
558 static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
559                                                 struct btrfs_delayed_item *item)
560 {
561         struct btrfs_block_rsv *rsv;
562
563         if (!item->bytes_reserved)
564                 return;
565
566         rsv = &fs_info->delayed_block_rsv;
567         trace_btrfs_space_reservation(fs_info, "delayed_item",
568                                       item->key.objectid, item->bytes_reserved,
569                                       0);
570         btrfs_block_rsv_release(fs_info, rsv,
571                                 item->bytes_reserved);
572 }
573
574 static int btrfs_delayed_inode_reserve_metadata(
575                                         struct btrfs_trans_handle *trans,
576                                         struct btrfs_root *root,
577                                         struct btrfs_inode *inode,
578                                         struct btrfs_delayed_node *node)
579 {
580         struct btrfs_fs_info *fs_info = root->fs_info;
581         struct btrfs_block_rsv *src_rsv;
582         struct btrfs_block_rsv *dst_rsv;
583         u64 num_bytes;
584         int ret;
585
586         src_rsv = trans->block_rsv;
587         dst_rsv = &fs_info->delayed_block_rsv;
588
589         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
590
591         /*
592          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
593          * which doesn't reserve space for speed.  This is a problem since we
594          * still need to reserve space for this update, so try to reserve the
595          * space.
596          *
597          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
598          * we always reserve enough to update the inode item.
599          */
600         if (!src_rsv || (!trans->bytes_reserved &&
601                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
602                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
603                                           BTRFS_RESERVE_NO_FLUSH);
604                 /*
605                  * Since we're under a transaction reserve_metadata_bytes could
606                  * try to commit the transaction which will make it return
607                  * EAGAIN to make us stop the transaction we have, so return
608                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
609                  */
610                 if (ret == -EAGAIN)
611                         ret = -ENOSPC;
612                 if (!ret) {
613                         node->bytes_reserved = num_bytes;
614                         trace_btrfs_space_reservation(fs_info,
615                                                       "delayed_inode",
616                                                       btrfs_ino(inode),
617                                                       num_bytes, 1);
618                 }
619                 return ret;
620         }
621
622         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
623         if (!ret) {
624                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
625                                               btrfs_ino(inode), num_bytes, 1);
626                 node->bytes_reserved = num_bytes;
627         }
628
629         return ret;
630 }
631
632 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
633                                                 struct btrfs_delayed_node *node)
634 {
635         struct btrfs_block_rsv *rsv;
636
637         if (!node->bytes_reserved)
638                 return;
639
640         rsv = &fs_info->delayed_block_rsv;
641         trace_btrfs_space_reservation(fs_info, "delayed_inode",
642                                       node->inode_id, node->bytes_reserved, 0);
643         btrfs_block_rsv_release(fs_info, rsv,
644                                 node->bytes_reserved);
645         node->bytes_reserved = 0;
646 }
647
648 /*
649  * This helper will insert some continuous items into the same leaf according
650  * to the free space of the leaf.
651  */
652 static int btrfs_batch_insert_items(struct btrfs_root *root,
653                                     struct btrfs_path *path,
654                                     struct btrfs_delayed_item *item)
655 {
656         struct btrfs_fs_info *fs_info = root->fs_info;
657         struct btrfs_delayed_item *curr, *next;
658         int free_space;
659         int total_data_size = 0, total_size = 0;
660         struct extent_buffer *leaf;
661         char *data_ptr;
662         struct btrfs_key *keys;
663         u32 *data_size;
664         struct list_head head;
665         int slot;
666         int nitems;
667         int i;
668         int ret = 0;
669
670         BUG_ON(!path->nodes[0]);
671
672         leaf = path->nodes[0];
673         free_space = btrfs_leaf_free_space(fs_info, leaf);
674         INIT_LIST_HEAD(&head);
675
676         next = item;
677         nitems = 0;
678
679         /*
680          * count the number of the continuous items that we can insert in batch
681          */
682         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
683                free_space) {
684                 total_data_size += next->data_len;
685                 total_size += next->data_len + sizeof(struct btrfs_item);
686                 list_add_tail(&next->tree_list, &head);
687                 nitems++;
688
689                 curr = next;
690                 next = __btrfs_next_delayed_item(curr);
691                 if (!next)
692                         break;
693
694                 if (!btrfs_is_continuous_delayed_item(curr, next))
695                         break;
696         }
697
698         if (!nitems) {
699                 ret = 0;
700                 goto out;
701         }
702
703         /*
704          * we need allocate some memory space, but it might cause the task
705          * to sleep, so we set all locked nodes in the path to blocking locks
706          * first.
707          */
708         btrfs_set_path_blocking(path);
709
710         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
711         if (!keys) {
712                 ret = -ENOMEM;
713                 goto out;
714         }
715
716         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
717         if (!data_size) {
718                 ret = -ENOMEM;
719                 goto error;
720         }
721
722         /* get keys of all the delayed items */
723         i = 0;
724         list_for_each_entry(next, &head, tree_list) {
725                 keys[i] = next->key;
726                 data_size[i] = next->data_len;
727                 i++;
728         }
729
730         /* reset all the locked nodes in the patch to spinning locks. */
731         btrfs_clear_path_blocking(path, NULL, 0);
732
733         /* insert the keys of the items */
734         setup_items_for_insert(root, path, keys, data_size,
735                                total_data_size, total_size, nitems);
736
737         /* insert the dir index items */
738         slot = path->slots[0];
739         list_for_each_entry_safe(curr, next, &head, tree_list) {
740                 data_ptr = btrfs_item_ptr(leaf, slot, char);
741                 write_extent_buffer(leaf, &curr->data,
742                                     (unsigned long)data_ptr,
743                                     curr->data_len);
744                 slot++;
745
746                 btrfs_delayed_item_release_metadata(fs_info, curr);
747
748                 list_del(&curr->tree_list);
749                 btrfs_release_delayed_item(curr);
750         }
751
752 error:
753         kfree(data_size);
754         kfree(keys);
755 out:
756         return ret;
757 }
758
759 /*
760  * This helper can just do simple insertion that needn't extend item for new
761  * data, such as directory name index insertion, inode insertion.
762  */
763 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
764                                      struct btrfs_root *root,
765                                      struct btrfs_path *path,
766                                      struct btrfs_delayed_item *delayed_item)
767 {
768         struct btrfs_fs_info *fs_info = root->fs_info;
769         struct extent_buffer *leaf;
770         char *ptr;
771         int ret;
772
773         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
774                                       delayed_item->data_len);
775         if (ret < 0 && ret != -EEXIST)
776                 return ret;
777
778         leaf = path->nodes[0];
779
780         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
781
782         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
783                             delayed_item->data_len);
784         btrfs_mark_buffer_dirty(leaf);
785
786         btrfs_delayed_item_release_metadata(fs_info, delayed_item);
787         return 0;
788 }
789
790 /*
791  * we insert an item first, then if there are some continuous items, we try
792  * to insert those items into the same leaf.
793  */
794 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
795                                       struct btrfs_path *path,
796                                       struct btrfs_root *root,
797                                       struct btrfs_delayed_node *node)
798 {
799         struct btrfs_delayed_item *curr, *prev;
800         int ret = 0;
801
802 do_again:
803         mutex_lock(&node->mutex);
804         curr = __btrfs_first_delayed_insertion_item(node);
805         if (!curr)
806                 goto insert_end;
807
808         ret = btrfs_insert_delayed_item(trans, root, path, curr);
809         if (ret < 0) {
810                 btrfs_release_path(path);
811                 goto insert_end;
812         }
813
814         prev = curr;
815         curr = __btrfs_next_delayed_item(prev);
816         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
817                 /* insert the continuous items into the same leaf */
818                 path->slots[0]++;
819                 btrfs_batch_insert_items(root, path, curr);
820         }
821         btrfs_release_delayed_item(prev);
822         btrfs_mark_buffer_dirty(path->nodes[0]);
823
824         btrfs_release_path(path);
825         mutex_unlock(&node->mutex);
826         goto do_again;
827
828 insert_end:
829         mutex_unlock(&node->mutex);
830         return ret;
831 }
832
833 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
834                                     struct btrfs_root *root,
835                                     struct btrfs_path *path,
836                                     struct btrfs_delayed_item *item)
837 {
838         struct btrfs_fs_info *fs_info = root->fs_info;
839         struct btrfs_delayed_item *curr, *next;
840         struct extent_buffer *leaf;
841         struct btrfs_key key;
842         struct list_head head;
843         int nitems, i, last_item;
844         int ret = 0;
845
846         BUG_ON(!path->nodes[0]);
847
848         leaf = path->nodes[0];
849
850         i = path->slots[0];
851         last_item = btrfs_header_nritems(leaf) - 1;
852         if (i > last_item)
853                 return -ENOENT; /* FIXME: Is errno suitable? */
854
855         next = item;
856         INIT_LIST_HEAD(&head);
857         btrfs_item_key_to_cpu(leaf, &key, i);
858         nitems = 0;
859         /*
860          * count the number of the dir index items that we can delete in batch
861          */
862         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
863                 list_add_tail(&next->tree_list, &head);
864                 nitems++;
865
866                 curr = next;
867                 next = __btrfs_next_delayed_item(curr);
868                 if (!next)
869                         break;
870
871                 if (!btrfs_is_continuous_delayed_item(curr, next))
872                         break;
873
874                 i++;
875                 if (i > last_item)
876                         break;
877                 btrfs_item_key_to_cpu(leaf, &key, i);
878         }
879
880         if (!nitems)
881                 return 0;
882
883         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
884         if (ret)
885                 goto out;
886
887         list_for_each_entry_safe(curr, next, &head, tree_list) {
888                 btrfs_delayed_item_release_metadata(fs_info, curr);
889                 list_del(&curr->tree_list);
890                 btrfs_release_delayed_item(curr);
891         }
892
893 out:
894         return ret;
895 }
896
897 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
898                                       struct btrfs_path *path,
899                                       struct btrfs_root *root,
900                                       struct btrfs_delayed_node *node)
901 {
902         struct btrfs_delayed_item *curr, *prev;
903         int ret = 0;
904
905 do_again:
906         mutex_lock(&node->mutex);
907         curr = __btrfs_first_delayed_deletion_item(node);
908         if (!curr)
909                 goto delete_fail;
910
911         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
912         if (ret < 0)
913                 goto delete_fail;
914         else if (ret > 0) {
915                 /*
916                  * can't find the item which the node points to, so this node
917                  * is invalid, just drop it.
918                  */
919                 prev = curr;
920                 curr = __btrfs_next_delayed_item(prev);
921                 btrfs_release_delayed_item(prev);
922                 ret = 0;
923                 btrfs_release_path(path);
924                 if (curr) {
925                         mutex_unlock(&node->mutex);
926                         goto do_again;
927                 } else
928                         goto delete_fail;
929         }
930
931         btrfs_batch_delete_items(trans, root, path, curr);
932         btrfs_release_path(path);
933         mutex_unlock(&node->mutex);
934         goto do_again;
935
936 delete_fail:
937         btrfs_release_path(path);
938         mutex_unlock(&node->mutex);
939         return ret;
940 }
941
942 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
943 {
944         struct btrfs_delayed_root *delayed_root;
945
946         if (delayed_node &&
947             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
948                 BUG_ON(!delayed_node->root);
949                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
950                 delayed_node->count--;
951
952                 delayed_root = delayed_node->root->fs_info->delayed_root;
953                 finish_one_item(delayed_root);
954         }
955 }
956
957 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
958 {
959         struct btrfs_delayed_root *delayed_root;
960
961         ASSERT(delayed_node->root);
962         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
963         delayed_node->count--;
964
965         delayed_root = delayed_node->root->fs_info->delayed_root;
966         finish_one_item(delayed_root);
967 }
968
969 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
970                                         struct btrfs_root *root,
971                                         struct btrfs_path *path,
972                                         struct btrfs_delayed_node *node)
973 {
974         struct btrfs_fs_info *fs_info = root->fs_info;
975         struct btrfs_key key;
976         struct btrfs_inode_item *inode_item;
977         struct extent_buffer *leaf;
978         int mod;
979         int ret;
980
981         key.objectid = node->inode_id;
982         key.type = BTRFS_INODE_ITEM_KEY;
983         key.offset = 0;
984
985         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
986                 mod = -1;
987         else
988                 mod = 1;
989
990         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
991         if (ret > 0) {
992                 btrfs_release_path(path);
993                 return -ENOENT;
994         } else if (ret < 0) {
995                 return ret;
996         }
997
998         leaf = path->nodes[0];
999         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1000                                     struct btrfs_inode_item);
1001         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1002                             sizeof(struct btrfs_inode_item));
1003         btrfs_mark_buffer_dirty(leaf);
1004
1005         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1006                 goto no_iref;
1007
1008         path->slots[0]++;
1009         if (path->slots[0] >= btrfs_header_nritems(leaf))
1010                 goto search;
1011 again:
1012         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1013         if (key.objectid != node->inode_id)
1014                 goto out;
1015
1016         if (key.type != BTRFS_INODE_REF_KEY &&
1017             key.type != BTRFS_INODE_EXTREF_KEY)
1018                 goto out;
1019
1020         /*
1021          * Delayed iref deletion is for the inode who has only one link,
1022          * so there is only one iref. The case that several irefs are
1023          * in the same item doesn't exist.
1024          */
1025         btrfs_del_item(trans, root, path);
1026 out:
1027         btrfs_release_delayed_iref(node);
1028 no_iref:
1029         btrfs_release_path(path);
1030 err_out:
1031         btrfs_delayed_inode_release_metadata(fs_info, node);
1032         btrfs_release_delayed_inode(node);
1033
1034         return ret;
1035
1036 search:
1037         btrfs_release_path(path);
1038
1039         key.type = BTRFS_INODE_EXTREF_KEY;
1040         key.offset = -1;
1041         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1042         if (ret < 0)
1043                 goto err_out;
1044         ASSERT(ret);
1045
1046         ret = 0;
1047         leaf = path->nodes[0];
1048         path->slots[0]--;
1049         goto again;
1050 }
1051
1052 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1053                                              struct btrfs_root *root,
1054                                              struct btrfs_path *path,
1055                                              struct btrfs_delayed_node *node)
1056 {
1057         int ret;
1058
1059         mutex_lock(&node->mutex);
1060         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1061                 mutex_unlock(&node->mutex);
1062                 return 0;
1063         }
1064
1065         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1066         mutex_unlock(&node->mutex);
1067         return ret;
1068 }
1069
1070 static inline int
1071 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1072                                    struct btrfs_path *path,
1073                                    struct btrfs_delayed_node *node)
1074 {
1075         int ret;
1076
1077         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1078         if (ret)
1079                 return ret;
1080
1081         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1082         if (ret)
1083                 return ret;
1084
1085         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1086         return ret;
1087 }
1088
1089 /*
1090  * Called when committing the transaction.
1091  * Returns 0 on success.
1092  * Returns < 0 on error and returns with an aborted transaction with any
1093  * outstanding delayed items cleaned up.
1094  */
1095 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1096                                      struct btrfs_fs_info *fs_info, int nr)
1097 {
1098         struct btrfs_delayed_root *delayed_root;
1099         struct btrfs_delayed_node *curr_node, *prev_node;
1100         struct btrfs_path *path;
1101         struct btrfs_block_rsv *block_rsv;
1102         int ret = 0;
1103         bool count = (nr > 0);
1104
1105         if (trans->aborted)
1106                 return -EIO;
1107
1108         path = btrfs_alloc_path();
1109         if (!path)
1110                 return -ENOMEM;
1111         path->leave_spinning = 1;
1112
1113         block_rsv = trans->block_rsv;
1114         trans->block_rsv = &fs_info->delayed_block_rsv;
1115
1116         delayed_root = fs_info->delayed_root;
1117
1118         curr_node = btrfs_first_delayed_node(delayed_root);
1119         while (curr_node && (!count || (count && nr--))) {
1120                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1121                                                          curr_node);
1122                 if (ret) {
1123                         btrfs_release_delayed_node(curr_node);
1124                         curr_node = NULL;
1125                         btrfs_abort_transaction(trans, ret);
1126                         break;
1127                 }
1128
1129                 prev_node = curr_node;
1130                 curr_node = btrfs_next_delayed_node(curr_node);
1131                 btrfs_release_delayed_node(prev_node);
1132         }
1133
1134         if (curr_node)
1135                 btrfs_release_delayed_node(curr_node);
1136         btrfs_free_path(path);
1137         trans->block_rsv = block_rsv;
1138
1139         return ret;
1140 }
1141
1142 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1143                             struct btrfs_fs_info *fs_info)
1144 {
1145         return __btrfs_run_delayed_items(trans, fs_info, -1);
1146 }
1147
1148 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1149                                struct btrfs_fs_info *fs_info, int nr)
1150 {
1151         return __btrfs_run_delayed_items(trans, fs_info, nr);
1152 }
1153
1154 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1155                                      struct btrfs_inode *inode)
1156 {
1157         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1158         struct btrfs_path *path;
1159         struct btrfs_block_rsv *block_rsv;
1160         int ret;
1161
1162         if (!delayed_node)
1163                 return 0;
1164
1165         mutex_lock(&delayed_node->mutex);
1166         if (!delayed_node->count) {
1167                 mutex_unlock(&delayed_node->mutex);
1168                 btrfs_release_delayed_node(delayed_node);
1169                 return 0;
1170         }
1171         mutex_unlock(&delayed_node->mutex);
1172
1173         path = btrfs_alloc_path();
1174         if (!path) {
1175                 btrfs_release_delayed_node(delayed_node);
1176                 return -ENOMEM;
1177         }
1178         path->leave_spinning = 1;
1179
1180         block_rsv = trans->block_rsv;
1181         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1182
1183         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1184
1185         btrfs_release_delayed_node(delayed_node);
1186         btrfs_free_path(path);
1187         trans->block_rsv = block_rsv;
1188
1189         return ret;
1190 }
1191
1192 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1193 {
1194         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1195         struct btrfs_trans_handle *trans;
1196         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1197         struct btrfs_path *path;
1198         struct btrfs_block_rsv *block_rsv;
1199         int ret;
1200
1201         if (!delayed_node)
1202                 return 0;
1203
1204         mutex_lock(&delayed_node->mutex);
1205         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1206                 mutex_unlock(&delayed_node->mutex);
1207                 btrfs_release_delayed_node(delayed_node);
1208                 return 0;
1209         }
1210         mutex_unlock(&delayed_node->mutex);
1211
1212         trans = btrfs_join_transaction(delayed_node->root);
1213         if (IS_ERR(trans)) {
1214                 ret = PTR_ERR(trans);
1215                 goto out;
1216         }
1217
1218         path = btrfs_alloc_path();
1219         if (!path) {
1220                 ret = -ENOMEM;
1221                 goto trans_out;
1222         }
1223         path->leave_spinning = 1;
1224
1225         block_rsv = trans->block_rsv;
1226         trans->block_rsv = &fs_info->delayed_block_rsv;
1227
1228         mutex_lock(&delayed_node->mutex);
1229         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1230                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1231                                                    path, delayed_node);
1232         else
1233                 ret = 0;
1234         mutex_unlock(&delayed_node->mutex);
1235
1236         btrfs_free_path(path);
1237         trans->block_rsv = block_rsv;
1238 trans_out:
1239         btrfs_end_transaction(trans);
1240         btrfs_btree_balance_dirty(fs_info);
1241 out:
1242         btrfs_release_delayed_node(delayed_node);
1243
1244         return ret;
1245 }
1246
1247 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1248 {
1249         struct btrfs_delayed_node *delayed_node;
1250
1251         delayed_node = READ_ONCE(inode->delayed_node);
1252         if (!delayed_node)
1253                 return;
1254
1255         inode->delayed_node = NULL;
1256         btrfs_release_delayed_node(delayed_node);
1257 }
1258
1259 struct btrfs_async_delayed_work {
1260         struct btrfs_delayed_root *delayed_root;
1261         int nr;
1262         struct btrfs_work work;
1263 };
1264
1265 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1266 {
1267         struct btrfs_async_delayed_work *async_work;
1268         struct btrfs_delayed_root *delayed_root;
1269         struct btrfs_trans_handle *trans;
1270         struct btrfs_path *path;
1271         struct btrfs_delayed_node *delayed_node = NULL;
1272         struct btrfs_root *root;
1273         struct btrfs_block_rsv *block_rsv;
1274         int total_done = 0;
1275
1276         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1277         delayed_root = async_work->delayed_root;
1278
1279         path = btrfs_alloc_path();
1280         if (!path)
1281                 goto out;
1282
1283 again:
1284         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1285                 goto free_path;
1286
1287         delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1288         if (!delayed_node)
1289                 goto free_path;
1290
1291         path->leave_spinning = 1;
1292         root = delayed_node->root;
1293
1294         trans = btrfs_join_transaction(root);
1295         if (IS_ERR(trans))
1296                 goto release_path;
1297
1298         block_rsv = trans->block_rsv;
1299         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1300
1301         __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1302
1303         trans->block_rsv = block_rsv;
1304         btrfs_end_transaction(trans);
1305         btrfs_btree_balance_dirty_nodelay(root->fs_info);
1306
1307 release_path:
1308         btrfs_release_path(path);
1309         total_done++;
1310
1311         btrfs_release_prepared_delayed_node(delayed_node);
1312         if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
1313             total_done < async_work->nr)
1314                 goto again;
1315
1316 free_path:
1317         btrfs_free_path(path);
1318 out:
1319         wake_up(&delayed_root->wait);
1320         kfree(async_work);
1321 }
1322
1323
1324 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1325                                      struct btrfs_fs_info *fs_info, int nr)
1326 {
1327         struct btrfs_async_delayed_work *async_work;
1328
1329         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
1330             btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1331                 return 0;
1332
1333         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1334         if (!async_work)
1335                 return -ENOMEM;
1336
1337         async_work->delayed_root = delayed_root;
1338         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1339                         btrfs_async_run_delayed_root, NULL, NULL);
1340         async_work->nr = nr;
1341
1342         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1343         return 0;
1344 }
1345
1346 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1347 {
1348         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1349 }
1350
1351 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1352 {
1353         int val = atomic_read(&delayed_root->items_seq);
1354
1355         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1356                 return 1;
1357
1358         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1359                 return 1;
1360
1361         return 0;
1362 }
1363
1364 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1365 {
1366         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1367
1368         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1369                 return;
1370
1371         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1372                 int seq;
1373                 int ret;
1374
1375                 seq = atomic_read(&delayed_root->items_seq);
1376
1377                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1378                 if (ret)
1379                         return;
1380
1381                 wait_event_interruptible(delayed_root->wait,
1382                                          could_end_wait(delayed_root, seq));
1383                 return;
1384         }
1385
1386         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1387 }
1388
1389 /* Will return 0 or -ENOMEM */
1390 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1391                                    struct btrfs_fs_info *fs_info,
1392                                    const char *name, int name_len,
1393                                    struct btrfs_inode *dir,
1394                                    struct btrfs_disk_key *disk_key, u8 type,
1395                                    u64 index)
1396 {
1397         struct btrfs_delayed_node *delayed_node;
1398         struct btrfs_delayed_item *delayed_item;
1399         struct btrfs_dir_item *dir_item;
1400         int ret;
1401
1402         delayed_node = btrfs_get_or_create_delayed_node(dir);
1403         if (IS_ERR(delayed_node))
1404                 return PTR_ERR(delayed_node);
1405
1406         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1407         if (!delayed_item) {
1408                 ret = -ENOMEM;
1409                 goto release_node;
1410         }
1411
1412         delayed_item->key.objectid = btrfs_ino(dir);
1413         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1414         delayed_item->key.offset = index;
1415
1416         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1417         dir_item->location = *disk_key;
1418         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1419         btrfs_set_stack_dir_data_len(dir_item, 0);
1420         btrfs_set_stack_dir_name_len(dir_item, name_len);
1421         btrfs_set_stack_dir_type(dir_item, type);
1422         memcpy((char *)(dir_item + 1), name, name_len);
1423
1424         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
1425         /*
1426          * we have reserved enough space when we start a new transaction,
1427          * so reserving metadata failure is impossible
1428          */
1429         BUG_ON(ret);
1430
1431
1432         mutex_lock(&delayed_node->mutex);
1433         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1434         if (unlikely(ret)) {
1435                 btrfs_err(fs_info,
1436                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1437                           name_len, name, delayed_node->root->objectid,
1438                           delayed_node->inode_id, ret);
1439                 BUG();
1440         }
1441         mutex_unlock(&delayed_node->mutex);
1442
1443 release_node:
1444         btrfs_release_delayed_node(delayed_node);
1445         return ret;
1446 }
1447
1448 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1449                                                struct btrfs_delayed_node *node,
1450                                                struct btrfs_key *key)
1451 {
1452         struct btrfs_delayed_item *item;
1453
1454         mutex_lock(&node->mutex);
1455         item = __btrfs_lookup_delayed_insertion_item(node, key);
1456         if (!item) {
1457                 mutex_unlock(&node->mutex);
1458                 return 1;
1459         }
1460
1461         btrfs_delayed_item_release_metadata(fs_info, item);
1462         btrfs_release_delayed_item(item);
1463         mutex_unlock(&node->mutex);
1464         return 0;
1465 }
1466
1467 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1468                                    struct btrfs_fs_info *fs_info,
1469                                    struct btrfs_inode *dir, u64 index)
1470 {
1471         struct btrfs_delayed_node *node;
1472         struct btrfs_delayed_item *item;
1473         struct btrfs_key item_key;
1474         int ret;
1475
1476         node = btrfs_get_or_create_delayed_node(dir);
1477         if (IS_ERR(node))
1478                 return PTR_ERR(node);
1479
1480         item_key.objectid = btrfs_ino(dir);
1481         item_key.type = BTRFS_DIR_INDEX_KEY;
1482         item_key.offset = index;
1483
1484         ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key);
1485         if (!ret)
1486                 goto end;
1487
1488         item = btrfs_alloc_delayed_item(0);
1489         if (!item) {
1490                 ret = -ENOMEM;
1491                 goto end;
1492         }
1493
1494         item->key = item_key;
1495
1496         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
1497         /*
1498          * we have reserved enough space when we start a new transaction,
1499          * so reserving metadata failure is impossible.
1500          */
1501         BUG_ON(ret);
1502
1503         mutex_lock(&node->mutex);
1504         ret = __btrfs_add_delayed_deletion_item(node, item);
1505         if (unlikely(ret)) {
1506                 btrfs_err(fs_info,
1507                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1508                           index, node->root->objectid, node->inode_id, ret);
1509                 BUG();
1510         }
1511         mutex_unlock(&node->mutex);
1512 end:
1513         btrfs_release_delayed_node(node);
1514         return ret;
1515 }
1516
1517 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1518 {
1519         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1520
1521         if (!delayed_node)
1522                 return -ENOENT;
1523
1524         /*
1525          * Since we have held i_mutex of this directory, it is impossible that
1526          * a new directory index is added into the delayed node and index_cnt
1527          * is updated now. So we needn't lock the delayed node.
1528          */
1529         if (!delayed_node->index_cnt) {
1530                 btrfs_release_delayed_node(delayed_node);
1531                 return -EINVAL;
1532         }
1533
1534         inode->index_cnt = delayed_node->index_cnt;
1535         btrfs_release_delayed_node(delayed_node);
1536         return 0;
1537 }
1538
1539 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1540                                      struct list_head *ins_list,
1541                                      struct list_head *del_list)
1542 {
1543         struct btrfs_delayed_node *delayed_node;
1544         struct btrfs_delayed_item *item;
1545
1546         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1547         if (!delayed_node)
1548                 return false;
1549
1550         /*
1551          * We can only do one readdir with delayed items at a time because of
1552          * item->readdir_list.
1553          */
1554         inode_unlock_shared(inode);
1555         inode_lock(inode);
1556
1557         mutex_lock(&delayed_node->mutex);
1558         item = __btrfs_first_delayed_insertion_item(delayed_node);
1559         while (item) {
1560                 refcount_inc(&item->refs);
1561                 list_add_tail(&item->readdir_list, ins_list);
1562                 item = __btrfs_next_delayed_item(item);
1563         }
1564
1565         item = __btrfs_first_delayed_deletion_item(delayed_node);
1566         while (item) {
1567                 refcount_inc(&item->refs);
1568                 list_add_tail(&item->readdir_list, del_list);
1569                 item = __btrfs_next_delayed_item(item);
1570         }
1571         mutex_unlock(&delayed_node->mutex);
1572         /*
1573          * This delayed node is still cached in the btrfs inode, so refs
1574          * must be > 1 now, and we needn't check it is going to be freed
1575          * or not.
1576          *
1577          * Besides that, this function is used to read dir, we do not
1578          * insert/delete delayed items in this period. So we also needn't
1579          * requeue or dequeue this delayed node.
1580          */
1581         refcount_dec(&delayed_node->refs);
1582
1583         return true;
1584 }
1585
1586 void btrfs_readdir_put_delayed_items(struct inode *inode,
1587                                      struct list_head *ins_list,
1588                                      struct list_head *del_list)
1589 {
1590         struct btrfs_delayed_item *curr, *next;
1591
1592         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1593                 list_del(&curr->readdir_list);
1594                 if (refcount_dec_and_test(&curr->refs))
1595                         kfree(curr);
1596         }
1597
1598         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1599                 list_del(&curr->readdir_list);
1600                 if (refcount_dec_and_test(&curr->refs))
1601                         kfree(curr);
1602         }
1603
1604         /*
1605          * The VFS is going to do up_read(), so we need to downgrade back to a
1606          * read lock.
1607          */
1608         downgrade_write(&inode->i_rwsem);
1609 }
1610
1611 int btrfs_should_delete_dir_index(struct list_head *del_list,
1612                                   u64 index)
1613 {
1614         struct btrfs_delayed_item *curr, *next;
1615         int ret;
1616
1617         if (list_empty(del_list))
1618                 return 0;
1619
1620         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1621                 if (curr->key.offset > index)
1622                         break;
1623
1624                 list_del(&curr->readdir_list);
1625                 ret = (curr->key.offset == index);
1626
1627                 if (refcount_dec_and_test(&curr->refs))
1628                         kfree(curr);
1629
1630                 if (ret)
1631                         return 1;
1632                 else
1633                         continue;
1634         }
1635         return 0;
1636 }
1637
1638 /*
1639  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1640  *
1641  */
1642 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1643                                     struct list_head *ins_list)
1644 {
1645         struct btrfs_dir_item *di;
1646         struct btrfs_delayed_item *curr, *next;
1647         struct btrfs_key location;
1648         char *name;
1649         int name_len;
1650         int over = 0;
1651         unsigned char d_type;
1652
1653         if (list_empty(ins_list))
1654                 return 0;
1655
1656         /*
1657          * Changing the data of the delayed item is impossible. So
1658          * we needn't lock them. And we have held i_mutex of the
1659          * directory, nobody can delete any directory indexes now.
1660          */
1661         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1662                 list_del(&curr->readdir_list);
1663
1664                 if (curr->key.offset < ctx->pos) {
1665                         if (refcount_dec_and_test(&curr->refs))
1666                                 kfree(curr);
1667                         continue;
1668                 }
1669
1670                 ctx->pos = curr->key.offset;
1671
1672                 di = (struct btrfs_dir_item *)curr->data;
1673                 name = (char *)(di + 1);
1674                 name_len = btrfs_stack_dir_name_len(di);
1675
1676                 d_type = btrfs_filetype_table[di->type];
1677                 btrfs_disk_key_to_cpu(&location, &di->location);
1678
1679                 over = !dir_emit(ctx, name, name_len,
1680                                location.objectid, d_type);
1681
1682                 if (refcount_dec_and_test(&curr->refs))
1683                         kfree(curr);
1684
1685                 if (over)
1686                         return 1;
1687                 ctx->pos++;
1688         }
1689         return 0;
1690 }
1691
1692 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1693                                   struct btrfs_inode_item *inode_item,
1694                                   struct inode *inode)
1695 {
1696         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1697         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1698         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1699         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1700         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1701         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1702         btrfs_set_stack_inode_generation(inode_item,
1703                                          BTRFS_I(inode)->generation);
1704         btrfs_set_stack_inode_sequence(inode_item,
1705                                        inode_peek_iversion(inode));
1706         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1707         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1708         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1709         btrfs_set_stack_inode_block_group(inode_item, 0);
1710
1711         btrfs_set_stack_timespec_sec(&inode_item->atime,
1712                                      inode->i_atime.tv_sec);
1713         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1714                                       inode->i_atime.tv_nsec);
1715
1716         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1717                                      inode->i_mtime.tv_sec);
1718         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1719                                       inode->i_mtime.tv_nsec);
1720
1721         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1722                                      inode->i_ctime.tv_sec);
1723         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1724                                       inode->i_ctime.tv_nsec);
1725
1726         btrfs_set_stack_timespec_sec(&inode_item->otime,
1727                                      BTRFS_I(inode)->i_otime.tv_sec);
1728         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1729                                      BTRFS_I(inode)->i_otime.tv_nsec);
1730 }
1731
1732 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1733 {
1734         struct btrfs_delayed_node *delayed_node;
1735         struct btrfs_inode_item *inode_item;
1736
1737         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1738         if (!delayed_node)
1739                 return -ENOENT;
1740
1741         mutex_lock(&delayed_node->mutex);
1742         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1743                 mutex_unlock(&delayed_node->mutex);
1744                 btrfs_release_delayed_node(delayed_node);
1745                 return -ENOENT;
1746         }
1747
1748         inode_item = &delayed_node->inode_item;
1749
1750         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1751         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1752         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1753         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1754         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1755         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1756         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1757         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1758
1759         inode_set_iversion_queried(inode,
1760                                    btrfs_stack_inode_sequence(inode_item));
1761         inode->i_rdev = 0;
1762         *rdev = btrfs_stack_inode_rdev(inode_item);
1763         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1764
1765         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1766         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1767
1768         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1769         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1770
1771         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1772         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1773
1774         BTRFS_I(inode)->i_otime.tv_sec =
1775                 btrfs_stack_timespec_sec(&inode_item->otime);
1776         BTRFS_I(inode)->i_otime.tv_nsec =
1777                 btrfs_stack_timespec_nsec(&inode_item->otime);
1778
1779         inode->i_generation = BTRFS_I(inode)->generation;
1780         BTRFS_I(inode)->index_cnt = (u64)-1;
1781
1782         mutex_unlock(&delayed_node->mutex);
1783         btrfs_release_delayed_node(delayed_node);
1784         return 0;
1785 }
1786
1787 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1788                                struct btrfs_root *root, struct inode *inode)
1789 {
1790         struct btrfs_delayed_node *delayed_node;
1791         int ret = 0;
1792
1793         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1794         if (IS_ERR(delayed_node))
1795                 return PTR_ERR(delayed_node);
1796
1797         mutex_lock(&delayed_node->mutex);
1798         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1799                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1800                 goto release_node;
1801         }
1802
1803         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1804                                                    delayed_node);
1805         if (ret)
1806                 goto release_node;
1807
1808         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1809         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1810         delayed_node->count++;
1811         atomic_inc(&root->fs_info->delayed_root->items);
1812 release_node:
1813         mutex_unlock(&delayed_node->mutex);
1814         btrfs_release_delayed_node(delayed_node);
1815         return ret;
1816 }
1817
1818 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1819 {
1820         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1821         struct btrfs_delayed_node *delayed_node;
1822
1823         /*
1824          * we don't do delayed inode updates during log recovery because it
1825          * leads to enospc problems.  This means we also can't do
1826          * delayed inode refs
1827          */
1828         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1829                 return -EAGAIN;
1830
1831         delayed_node = btrfs_get_or_create_delayed_node(inode);
1832         if (IS_ERR(delayed_node))
1833                 return PTR_ERR(delayed_node);
1834
1835         /*
1836          * We don't reserve space for inode ref deletion is because:
1837          * - We ONLY do async inode ref deletion for the inode who has only
1838          *   one link(i_nlink == 1), it means there is only one inode ref.
1839          *   And in most case, the inode ref and the inode item are in the
1840          *   same leaf, and we will deal with them at the same time.
1841          *   Since we are sure we will reserve the space for the inode item,
1842          *   it is unnecessary to reserve space for inode ref deletion.
1843          * - If the inode ref and the inode item are not in the same leaf,
1844          *   We also needn't worry about enospc problem, because we reserve
1845          *   much more space for the inode update than it needs.
1846          * - At the worst, we can steal some space from the global reservation.
1847          *   It is very rare.
1848          */
1849         mutex_lock(&delayed_node->mutex);
1850         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1851                 goto release_node;
1852
1853         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1854         delayed_node->count++;
1855         atomic_inc(&fs_info->delayed_root->items);
1856 release_node:
1857         mutex_unlock(&delayed_node->mutex);
1858         btrfs_release_delayed_node(delayed_node);
1859         return 0;
1860 }
1861
1862 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1863 {
1864         struct btrfs_root *root = delayed_node->root;
1865         struct btrfs_fs_info *fs_info = root->fs_info;
1866         struct btrfs_delayed_item *curr_item, *prev_item;
1867
1868         mutex_lock(&delayed_node->mutex);
1869         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1870         while (curr_item) {
1871                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1872                 prev_item = curr_item;
1873                 curr_item = __btrfs_next_delayed_item(prev_item);
1874                 btrfs_release_delayed_item(prev_item);
1875         }
1876
1877         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1878         while (curr_item) {
1879                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1880                 prev_item = curr_item;
1881                 curr_item = __btrfs_next_delayed_item(prev_item);
1882                 btrfs_release_delayed_item(prev_item);
1883         }
1884
1885         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1886                 btrfs_release_delayed_iref(delayed_node);
1887
1888         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1889                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
1890                 btrfs_release_delayed_inode(delayed_node);
1891         }
1892         mutex_unlock(&delayed_node->mutex);
1893 }
1894
1895 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1896 {
1897         struct btrfs_delayed_node *delayed_node;
1898
1899         delayed_node = btrfs_get_delayed_node(inode);
1900         if (!delayed_node)
1901                 return;
1902
1903         __btrfs_kill_delayed_node(delayed_node);
1904         btrfs_release_delayed_node(delayed_node);
1905 }
1906
1907 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1908 {
1909         u64 inode_id = 0;
1910         struct btrfs_delayed_node *delayed_nodes[8];
1911         int i, n;
1912
1913         while (1) {
1914                 spin_lock(&root->inode_lock);
1915                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1916                                            (void **)delayed_nodes, inode_id,
1917                                            ARRAY_SIZE(delayed_nodes));
1918                 if (!n) {
1919                         spin_unlock(&root->inode_lock);
1920                         break;
1921                 }
1922
1923                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1924
1925                 for (i = 0; i < n; i++)
1926                         refcount_inc(&delayed_nodes[i]->refs);
1927                 spin_unlock(&root->inode_lock);
1928
1929                 for (i = 0; i < n; i++) {
1930                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1931                         btrfs_release_delayed_node(delayed_nodes[i]);
1932                 }
1933         }
1934 }
1935
1936 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1937 {
1938         struct btrfs_delayed_node *curr_node, *prev_node;
1939
1940         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1941         while (curr_node) {
1942                 __btrfs_kill_delayed_node(curr_node);
1943
1944                 prev_node = curr_node;
1945                 curr_node = btrfs_next_delayed_node(curr_node);
1946                 btrfs_release_delayed_node(prev_node);
1947         }
1948 }
1949