]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/btrfs/delayed-inode.c
xfs: "optimize" buffer item log segment bitmap setting
[linux.git] / fs / btrfs / delayed-inode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2011 Fujitsu.  All rights reserved.
4  * Written by Miao Xie <miaox@cn.fujitsu.com>
5  */
6
7 #include <linux/slab.h>
8 #include <linux/iversion.h>
9 #include "misc.h"
10 #include "delayed-inode.h"
11 #include "disk-io.h"
12 #include "transaction.h"
13 #include "ctree.h"
14 #include "qgroup.h"
15
16 #define BTRFS_DELAYED_WRITEBACK         512
17 #define BTRFS_DELAYED_BACKGROUND        128
18 #define BTRFS_DELAYED_BATCH             16
19
20 static struct kmem_cache *delayed_node_cache;
21
22 int __init btrfs_delayed_inode_init(void)
23 {
24         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
25                                         sizeof(struct btrfs_delayed_node),
26                                         0,
27                                         SLAB_MEM_SPREAD,
28                                         NULL);
29         if (!delayed_node_cache)
30                 return -ENOMEM;
31         return 0;
32 }
33
34 void __cold btrfs_delayed_inode_exit(void)
35 {
36         kmem_cache_destroy(delayed_node_cache);
37 }
38
39 static inline void btrfs_init_delayed_node(
40                                 struct btrfs_delayed_node *delayed_node,
41                                 struct btrfs_root *root, u64 inode_id)
42 {
43         delayed_node->root = root;
44         delayed_node->inode_id = inode_id;
45         refcount_set(&delayed_node->refs, 0);
46         delayed_node->ins_root = RB_ROOT_CACHED;
47         delayed_node->del_root = RB_ROOT_CACHED;
48         mutex_init(&delayed_node->mutex);
49         INIT_LIST_HEAD(&delayed_node->n_list);
50         INIT_LIST_HEAD(&delayed_node->p_list);
51 }
52
53 static inline int btrfs_is_continuous_delayed_item(
54                                         struct btrfs_delayed_item *item1,
55                                         struct btrfs_delayed_item *item2)
56 {
57         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
58             item1->key.objectid == item2->key.objectid &&
59             item1->key.type == item2->key.type &&
60             item1->key.offset + 1 == item2->key.offset)
61                 return 1;
62         return 0;
63 }
64
65 static struct btrfs_delayed_node *btrfs_get_delayed_node(
66                 struct btrfs_inode *btrfs_inode)
67 {
68         struct btrfs_root *root = btrfs_inode->root;
69         u64 ino = btrfs_ino(btrfs_inode);
70         struct btrfs_delayed_node *node;
71
72         node = READ_ONCE(btrfs_inode->delayed_node);
73         if (node) {
74                 refcount_inc(&node->refs);
75                 return node;
76         }
77
78         spin_lock(&root->inode_lock);
79         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
80
81         if (node) {
82                 if (btrfs_inode->delayed_node) {
83                         refcount_inc(&node->refs);      /* can be accessed */
84                         BUG_ON(btrfs_inode->delayed_node != node);
85                         spin_unlock(&root->inode_lock);
86                         return node;
87                 }
88
89                 /*
90                  * It's possible that we're racing into the middle of removing
91                  * this node from the radix tree.  In this case, the refcount
92                  * was zero and it should never go back to one.  Just return
93                  * NULL like it was never in the radix at all; our release
94                  * function is in the process of removing it.
95                  *
96                  * Some implementations of refcount_inc refuse to bump the
97                  * refcount once it has hit zero.  If we don't do this dance
98                  * here, refcount_inc() may decide to just WARN_ONCE() instead
99                  * of actually bumping the refcount.
100                  *
101                  * If this node is properly in the radix, we want to bump the
102                  * refcount twice, once for the inode and once for this get
103                  * operation.
104                  */
105                 if (refcount_inc_not_zero(&node->refs)) {
106                         refcount_inc(&node->refs);
107                         btrfs_inode->delayed_node = node;
108                 } else {
109                         node = NULL;
110                 }
111
112                 spin_unlock(&root->inode_lock);
113                 return node;
114         }
115         spin_unlock(&root->inode_lock);
116
117         return NULL;
118 }
119
120 /* Will return either the node or PTR_ERR(-ENOMEM) */
121 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
122                 struct btrfs_inode *btrfs_inode)
123 {
124         struct btrfs_delayed_node *node;
125         struct btrfs_root *root = btrfs_inode->root;
126         u64 ino = btrfs_ino(btrfs_inode);
127         int ret;
128
129 again:
130         node = btrfs_get_delayed_node(btrfs_inode);
131         if (node)
132                 return node;
133
134         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
135         if (!node)
136                 return ERR_PTR(-ENOMEM);
137         btrfs_init_delayed_node(node, root, ino);
138
139         /* cached in the btrfs inode and can be accessed */
140         refcount_set(&node->refs, 2);
141
142         ret = radix_tree_preload(GFP_NOFS);
143         if (ret) {
144                 kmem_cache_free(delayed_node_cache, node);
145                 return ERR_PTR(ret);
146         }
147
148         spin_lock(&root->inode_lock);
149         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
150         if (ret == -EEXIST) {
151                 spin_unlock(&root->inode_lock);
152                 kmem_cache_free(delayed_node_cache, node);
153                 radix_tree_preload_end();
154                 goto again;
155         }
156         btrfs_inode->delayed_node = node;
157         spin_unlock(&root->inode_lock);
158         radix_tree_preload_end();
159
160         return node;
161 }
162
163 /*
164  * Call it when holding delayed_node->mutex
165  *
166  * If mod = 1, add this node into the prepared list.
167  */
168 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
169                                      struct btrfs_delayed_node *node,
170                                      int mod)
171 {
172         spin_lock(&root->lock);
173         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
174                 if (!list_empty(&node->p_list))
175                         list_move_tail(&node->p_list, &root->prepare_list);
176                 else if (mod)
177                         list_add_tail(&node->p_list, &root->prepare_list);
178         } else {
179                 list_add_tail(&node->n_list, &root->node_list);
180                 list_add_tail(&node->p_list, &root->prepare_list);
181                 refcount_inc(&node->refs);      /* inserted into list */
182                 root->nodes++;
183                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
184         }
185         spin_unlock(&root->lock);
186 }
187
188 /* Call it when holding delayed_node->mutex */
189 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
190                                        struct btrfs_delayed_node *node)
191 {
192         spin_lock(&root->lock);
193         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
194                 root->nodes--;
195                 refcount_dec(&node->refs);      /* not in the list */
196                 list_del_init(&node->n_list);
197                 if (!list_empty(&node->p_list))
198                         list_del_init(&node->p_list);
199                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
200         }
201         spin_unlock(&root->lock);
202 }
203
204 static struct btrfs_delayed_node *btrfs_first_delayed_node(
205                         struct btrfs_delayed_root *delayed_root)
206 {
207         struct list_head *p;
208         struct btrfs_delayed_node *node = NULL;
209
210         spin_lock(&delayed_root->lock);
211         if (list_empty(&delayed_root->node_list))
212                 goto out;
213
214         p = delayed_root->node_list.next;
215         node = list_entry(p, struct btrfs_delayed_node, n_list);
216         refcount_inc(&node->refs);
217 out:
218         spin_unlock(&delayed_root->lock);
219
220         return node;
221 }
222
223 static struct btrfs_delayed_node *btrfs_next_delayed_node(
224                                                 struct btrfs_delayed_node *node)
225 {
226         struct btrfs_delayed_root *delayed_root;
227         struct list_head *p;
228         struct btrfs_delayed_node *next = NULL;
229
230         delayed_root = node->root->fs_info->delayed_root;
231         spin_lock(&delayed_root->lock);
232         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
233                 /* not in the list */
234                 if (list_empty(&delayed_root->node_list))
235                         goto out;
236                 p = delayed_root->node_list.next;
237         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
238                 goto out;
239         else
240                 p = node->n_list.next;
241
242         next = list_entry(p, struct btrfs_delayed_node, n_list);
243         refcount_inc(&next->refs);
244 out:
245         spin_unlock(&delayed_root->lock);
246
247         return next;
248 }
249
250 static void __btrfs_release_delayed_node(
251                                 struct btrfs_delayed_node *delayed_node,
252                                 int mod)
253 {
254         struct btrfs_delayed_root *delayed_root;
255
256         if (!delayed_node)
257                 return;
258
259         delayed_root = delayed_node->root->fs_info->delayed_root;
260
261         mutex_lock(&delayed_node->mutex);
262         if (delayed_node->count)
263                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
264         else
265                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
266         mutex_unlock(&delayed_node->mutex);
267
268         if (refcount_dec_and_test(&delayed_node->refs)) {
269                 struct btrfs_root *root = delayed_node->root;
270
271                 spin_lock(&root->inode_lock);
272                 /*
273                  * Once our refcount goes to zero, nobody is allowed to bump it
274                  * back up.  We can delete it now.
275                  */
276                 ASSERT(refcount_read(&delayed_node->refs) == 0);
277                 radix_tree_delete(&root->delayed_nodes_tree,
278                                   delayed_node->inode_id);
279                 spin_unlock(&root->inode_lock);
280                 kmem_cache_free(delayed_node_cache, delayed_node);
281         }
282 }
283
284 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
285 {
286         __btrfs_release_delayed_node(node, 0);
287 }
288
289 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
290                                         struct btrfs_delayed_root *delayed_root)
291 {
292         struct list_head *p;
293         struct btrfs_delayed_node *node = NULL;
294
295         spin_lock(&delayed_root->lock);
296         if (list_empty(&delayed_root->prepare_list))
297                 goto out;
298
299         p = delayed_root->prepare_list.next;
300         list_del_init(p);
301         node = list_entry(p, struct btrfs_delayed_node, p_list);
302         refcount_inc(&node->refs);
303 out:
304         spin_unlock(&delayed_root->lock);
305
306         return node;
307 }
308
309 static inline void btrfs_release_prepared_delayed_node(
310                                         struct btrfs_delayed_node *node)
311 {
312         __btrfs_release_delayed_node(node, 1);
313 }
314
315 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
316 {
317         struct btrfs_delayed_item *item;
318         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
319         if (item) {
320                 item->data_len = data_len;
321                 item->ins_or_del = 0;
322                 item->bytes_reserved = 0;
323                 item->delayed_node = NULL;
324                 refcount_set(&item->refs, 1);
325         }
326         return item;
327 }
328
329 /*
330  * __btrfs_lookup_delayed_item - look up the delayed item by key
331  * @delayed_node: pointer to the delayed node
332  * @key:          the key to look up
333  * @prev:         used to store the prev item if the right item isn't found
334  * @next:         used to store the next item if the right item isn't found
335  *
336  * Note: if we don't find the right item, we will return the prev item and
337  * the next item.
338  */
339 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
340                                 struct rb_root *root,
341                                 struct btrfs_key *key,
342                                 struct btrfs_delayed_item **prev,
343                                 struct btrfs_delayed_item **next)
344 {
345         struct rb_node *node, *prev_node = NULL;
346         struct btrfs_delayed_item *delayed_item = NULL;
347         int ret = 0;
348
349         node = root->rb_node;
350
351         while (node) {
352                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
353                                         rb_node);
354                 prev_node = node;
355                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
356                 if (ret < 0)
357                         node = node->rb_right;
358                 else if (ret > 0)
359                         node = node->rb_left;
360                 else
361                         return delayed_item;
362         }
363
364         if (prev) {
365                 if (!prev_node)
366                         *prev = NULL;
367                 else if (ret < 0)
368                         *prev = delayed_item;
369                 else if ((node = rb_prev(prev_node)) != NULL) {
370                         *prev = rb_entry(node, struct btrfs_delayed_item,
371                                          rb_node);
372                 } else
373                         *prev = NULL;
374         }
375
376         if (next) {
377                 if (!prev_node)
378                         *next = NULL;
379                 else if (ret > 0)
380                         *next = delayed_item;
381                 else if ((node = rb_next(prev_node)) != NULL) {
382                         *next = rb_entry(node, struct btrfs_delayed_item,
383                                          rb_node);
384                 } else
385                         *next = NULL;
386         }
387         return NULL;
388 }
389
390 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
391                                         struct btrfs_delayed_node *delayed_node,
392                                         struct btrfs_key *key)
393 {
394         return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
395                                            NULL, NULL);
396 }
397
398 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
399                                     struct btrfs_delayed_item *ins,
400                                     int action)
401 {
402         struct rb_node **p, *node;
403         struct rb_node *parent_node = NULL;
404         struct rb_root_cached *root;
405         struct btrfs_delayed_item *item;
406         int cmp;
407         bool leftmost = true;
408
409         if (action == BTRFS_DELAYED_INSERTION_ITEM)
410                 root = &delayed_node->ins_root;
411         else if (action == BTRFS_DELAYED_DELETION_ITEM)
412                 root = &delayed_node->del_root;
413         else
414                 BUG();
415         p = &root->rb_root.rb_node;
416         node = &ins->rb_node;
417
418         while (*p) {
419                 parent_node = *p;
420                 item = rb_entry(parent_node, struct btrfs_delayed_item,
421                                  rb_node);
422
423                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
424                 if (cmp < 0) {
425                         p = &(*p)->rb_right;
426                         leftmost = false;
427                 } else if (cmp > 0) {
428                         p = &(*p)->rb_left;
429                 } else {
430                         return -EEXIST;
431                 }
432         }
433
434         rb_link_node(node, parent_node, p);
435         rb_insert_color_cached(node, root, leftmost);
436         ins->delayed_node = delayed_node;
437         ins->ins_or_del = action;
438
439         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
440             action == BTRFS_DELAYED_INSERTION_ITEM &&
441             ins->key.offset >= delayed_node->index_cnt)
442                         delayed_node->index_cnt = ins->key.offset + 1;
443
444         delayed_node->count++;
445         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
446         return 0;
447 }
448
449 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
450                                               struct btrfs_delayed_item *item)
451 {
452         return __btrfs_add_delayed_item(node, item,
453                                         BTRFS_DELAYED_INSERTION_ITEM);
454 }
455
456 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
457                                              struct btrfs_delayed_item *item)
458 {
459         return __btrfs_add_delayed_item(node, item,
460                                         BTRFS_DELAYED_DELETION_ITEM);
461 }
462
463 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
464 {
465         int seq = atomic_inc_return(&delayed_root->items_seq);
466
467         /* atomic_dec_return implies a barrier */
468         if ((atomic_dec_return(&delayed_root->items) <
469             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
470                 cond_wake_up_nomb(&delayed_root->wait);
471 }
472
473 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
474 {
475         struct rb_root_cached *root;
476         struct btrfs_delayed_root *delayed_root;
477
478         /* Not associated with any delayed_node */
479         if (!delayed_item->delayed_node)
480                 return;
481         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
482
483         BUG_ON(!delayed_root);
484         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
485                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
486
487         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
488                 root = &delayed_item->delayed_node->ins_root;
489         else
490                 root = &delayed_item->delayed_node->del_root;
491
492         rb_erase_cached(&delayed_item->rb_node, root);
493         delayed_item->delayed_node->count--;
494
495         finish_one_item(delayed_root);
496 }
497
498 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
499 {
500         if (item) {
501                 __btrfs_remove_delayed_item(item);
502                 if (refcount_dec_and_test(&item->refs))
503                         kfree(item);
504         }
505 }
506
507 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
508                                         struct btrfs_delayed_node *delayed_node)
509 {
510         struct rb_node *p;
511         struct btrfs_delayed_item *item = NULL;
512
513         p = rb_first_cached(&delayed_node->ins_root);
514         if (p)
515                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
516
517         return item;
518 }
519
520 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
521                                         struct btrfs_delayed_node *delayed_node)
522 {
523         struct rb_node *p;
524         struct btrfs_delayed_item *item = NULL;
525
526         p = rb_first_cached(&delayed_node->del_root);
527         if (p)
528                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
529
530         return item;
531 }
532
533 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
534                                                 struct btrfs_delayed_item *item)
535 {
536         struct rb_node *p;
537         struct btrfs_delayed_item *next = NULL;
538
539         p = rb_next(&item->rb_node);
540         if (p)
541                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
542
543         return next;
544 }
545
546 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
547                                                struct btrfs_root *root,
548                                                struct btrfs_delayed_item *item)
549 {
550         struct btrfs_block_rsv *src_rsv;
551         struct btrfs_block_rsv *dst_rsv;
552         struct btrfs_fs_info *fs_info = root->fs_info;
553         u64 num_bytes;
554         int ret;
555
556         if (!trans->bytes_reserved)
557                 return 0;
558
559         src_rsv = trans->block_rsv;
560         dst_rsv = &fs_info->delayed_block_rsv;
561
562         num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
563
564         /*
565          * Here we migrate space rsv from transaction rsv, since have already
566          * reserved space when starting a transaction.  So no need to reserve
567          * qgroup space here.
568          */
569         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
570         if (!ret) {
571                 trace_btrfs_space_reservation(fs_info, "delayed_item",
572                                               item->key.objectid,
573                                               num_bytes, 1);
574                 item->bytes_reserved = num_bytes;
575         }
576
577         return ret;
578 }
579
580 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
581                                                 struct btrfs_delayed_item *item)
582 {
583         struct btrfs_block_rsv *rsv;
584         struct btrfs_fs_info *fs_info = root->fs_info;
585
586         if (!item->bytes_reserved)
587                 return;
588
589         rsv = &fs_info->delayed_block_rsv;
590         /*
591          * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
592          * to release/reserve qgroup space.
593          */
594         trace_btrfs_space_reservation(fs_info, "delayed_item",
595                                       item->key.objectid, item->bytes_reserved,
596                                       0);
597         btrfs_block_rsv_release(fs_info, rsv,
598                                 item->bytes_reserved);
599 }
600
601 static int btrfs_delayed_inode_reserve_metadata(
602                                         struct btrfs_trans_handle *trans,
603                                         struct btrfs_root *root,
604                                         struct btrfs_inode *inode,
605                                         struct btrfs_delayed_node *node)
606 {
607         struct btrfs_fs_info *fs_info = root->fs_info;
608         struct btrfs_block_rsv *src_rsv;
609         struct btrfs_block_rsv *dst_rsv;
610         u64 num_bytes;
611         int ret;
612
613         src_rsv = trans->block_rsv;
614         dst_rsv = &fs_info->delayed_block_rsv;
615
616         num_bytes = btrfs_calc_metadata_size(fs_info, 1);
617
618         /*
619          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
620          * which doesn't reserve space for speed.  This is a problem since we
621          * still need to reserve space for this update, so try to reserve the
622          * space.
623          *
624          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
625          * we always reserve enough to update the inode item.
626          */
627         if (!src_rsv || (!trans->bytes_reserved &&
628                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
629                 ret = btrfs_qgroup_reserve_meta_prealloc(root,
630                                 fs_info->nodesize, true);
631                 if (ret < 0)
632                         return ret;
633                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
634                                           BTRFS_RESERVE_NO_FLUSH);
635                 /*
636                  * Since we're under a transaction reserve_metadata_bytes could
637                  * try to commit the transaction which will make it return
638                  * EAGAIN to make us stop the transaction we have, so return
639                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
640                  */
641                 if (ret == -EAGAIN) {
642                         ret = -ENOSPC;
643                         btrfs_qgroup_free_meta_prealloc(root, num_bytes);
644                 }
645                 if (!ret) {
646                         node->bytes_reserved = num_bytes;
647                         trace_btrfs_space_reservation(fs_info,
648                                                       "delayed_inode",
649                                                       btrfs_ino(inode),
650                                                       num_bytes, 1);
651                 } else {
652                         btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
653                 }
654                 return ret;
655         }
656
657         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
658         if (!ret) {
659                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
660                                               btrfs_ino(inode), num_bytes, 1);
661                 node->bytes_reserved = num_bytes;
662         }
663
664         return ret;
665 }
666
667 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
668                                                 struct btrfs_delayed_node *node,
669                                                 bool qgroup_free)
670 {
671         struct btrfs_block_rsv *rsv;
672
673         if (!node->bytes_reserved)
674                 return;
675
676         rsv = &fs_info->delayed_block_rsv;
677         trace_btrfs_space_reservation(fs_info, "delayed_inode",
678                                       node->inode_id, node->bytes_reserved, 0);
679         btrfs_block_rsv_release(fs_info, rsv,
680                                 node->bytes_reserved);
681         if (qgroup_free)
682                 btrfs_qgroup_free_meta_prealloc(node->root,
683                                 node->bytes_reserved);
684         else
685                 btrfs_qgroup_convert_reserved_meta(node->root,
686                                 node->bytes_reserved);
687         node->bytes_reserved = 0;
688 }
689
690 /*
691  * This helper will insert some continuous items into the same leaf according
692  * to the free space of the leaf.
693  */
694 static int btrfs_batch_insert_items(struct btrfs_root *root,
695                                     struct btrfs_path *path,
696                                     struct btrfs_delayed_item *item)
697 {
698         struct btrfs_delayed_item *curr, *next;
699         int free_space;
700         int total_data_size = 0, total_size = 0;
701         struct extent_buffer *leaf;
702         char *data_ptr;
703         struct btrfs_key *keys;
704         u32 *data_size;
705         struct list_head head;
706         int slot;
707         int nitems;
708         int i;
709         int ret = 0;
710
711         BUG_ON(!path->nodes[0]);
712
713         leaf = path->nodes[0];
714         free_space = btrfs_leaf_free_space(leaf);
715         INIT_LIST_HEAD(&head);
716
717         next = item;
718         nitems = 0;
719
720         /*
721          * count the number of the continuous items that we can insert in batch
722          */
723         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
724                free_space) {
725                 total_data_size += next->data_len;
726                 total_size += next->data_len + sizeof(struct btrfs_item);
727                 list_add_tail(&next->tree_list, &head);
728                 nitems++;
729
730                 curr = next;
731                 next = __btrfs_next_delayed_item(curr);
732                 if (!next)
733                         break;
734
735                 if (!btrfs_is_continuous_delayed_item(curr, next))
736                         break;
737         }
738
739         if (!nitems) {
740                 ret = 0;
741                 goto out;
742         }
743
744         /*
745          * we need allocate some memory space, but it might cause the task
746          * to sleep, so we set all locked nodes in the path to blocking locks
747          * first.
748          */
749         btrfs_set_path_blocking(path);
750
751         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
752         if (!keys) {
753                 ret = -ENOMEM;
754                 goto out;
755         }
756
757         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
758         if (!data_size) {
759                 ret = -ENOMEM;
760                 goto error;
761         }
762
763         /* get keys of all the delayed items */
764         i = 0;
765         list_for_each_entry(next, &head, tree_list) {
766                 keys[i] = next->key;
767                 data_size[i] = next->data_len;
768                 i++;
769         }
770
771         /* insert the keys of the items */
772         setup_items_for_insert(root, path, keys, data_size,
773                                total_data_size, total_size, nitems);
774
775         /* insert the dir index items */
776         slot = path->slots[0];
777         list_for_each_entry_safe(curr, next, &head, tree_list) {
778                 data_ptr = btrfs_item_ptr(leaf, slot, char);
779                 write_extent_buffer(leaf, &curr->data,
780                                     (unsigned long)data_ptr,
781                                     curr->data_len);
782                 slot++;
783
784                 btrfs_delayed_item_release_metadata(root, curr);
785
786                 list_del(&curr->tree_list);
787                 btrfs_release_delayed_item(curr);
788         }
789
790 error:
791         kfree(data_size);
792         kfree(keys);
793 out:
794         return ret;
795 }
796
797 /*
798  * This helper can just do simple insertion that needn't extend item for new
799  * data, such as directory name index insertion, inode insertion.
800  */
801 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
802                                      struct btrfs_root *root,
803                                      struct btrfs_path *path,
804                                      struct btrfs_delayed_item *delayed_item)
805 {
806         struct extent_buffer *leaf;
807         char *ptr;
808         int ret;
809
810         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
811                                       delayed_item->data_len);
812         if (ret < 0 && ret != -EEXIST)
813                 return ret;
814
815         leaf = path->nodes[0];
816
817         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
818
819         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
820                             delayed_item->data_len);
821         btrfs_mark_buffer_dirty(leaf);
822
823         btrfs_delayed_item_release_metadata(root, delayed_item);
824         return 0;
825 }
826
827 /*
828  * we insert an item first, then if there are some continuous items, we try
829  * to insert those items into the same leaf.
830  */
831 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
832                                       struct btrfs_path *path,
833                                       struct btrfs_root *root,
834                                       struct btrfs_delayed_node *node)
835 {
836         struct btrfs_delayed_item *curr, *prev;
837         int ret = 0;
838
839 do_again:
840         mutex_lock(&node->mutex);
841         curr = __btrfs_first_delayed_insertion_item(node);
842         if (!curr)
843                 goto insert_end;
844
845         ret = btrfs_insert_delayed_item(trans, root, path, curr);
846         if (ret < 0) {
847                 btrfs_release_path(path);
848                 goto insert_end;
849         }
850
851         prev = curr;
852         curr = __btrfs_next_delayed_item(prev);
853         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
854                 /* insert the continuous items into the same leaf */
855                 path->slots[0]++;
856                 btrfs_batch_insert_items(root, path, curr);
857         }
858         btrfs_release_delayed_item(prev);
859         btrfs_mark_buffer_dirty(path->nodes[0]);
860
861         btrfs_release_path(path);
862         mutex_unlock(&node->mutex);
863         goto do_again;
864
865 insert_end:
866         mutex_unlock(&node->mutex);
867         return ret;
868 }
869
870 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
871                                     struct btrfs_root *root,
872                                     struct btrfs_path *path,
873                                     struct btrfs_delayed_item *item)
874 {
875         struct btrfs_delayed_item *curr, *next;
876         struct extent_buffer *leaf;
877         struct btrfs_key key;
878         struct list_head head;
879         int nitems, i, last_item;
880         int ret = 0;
881
882         BUG_ON(!path->nodes[0]);
883
884         leaf = path->nodes[0];
885
886         i = path->slots[0];
887         last_item = btrfs_header_nritems(leaf) - 1;
888         if (i > last_item)
889                 return -ENOENT; /* FIXME: Is errno suitable? */
890
891         next = item;
892         INIT_LIST_HEAD(&head);
893         btrfs_item_key_to_cpu(leaf, &key, i);
894         nitems = 0;
895         /*
896          * count the number of the dir index items that we can delete in batch
897          */
898         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
899                 list_add_tail(&next->tree_list, &head);
900                 nitems++;
901
902                 curr = next;
903                 next = __btrfs_next_delayed_item(curr);
904                 if (!next)
905                         break;
906
907                 if (!btrfs_is_continuous_delayed_item(curr, next))
908                         break;
909
910                 i++;
911                 if (i > last_item)
912                         break;
913                 btrfs_item_key_to_cpu(leaf, &key, i);
914         }
915
916         if (!nitems)
917                 return 0;
918
919         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
920         if (ret)
921                 goto out;
922
923         list_for_each_entry_safe(curr, next, &head, tree_list) {
924                 btrfs_delayed_item_release_metadata(root, curr);
925                 list_del(&curr->tree_list);
926                 btrfs_release_delayed_item(curr);
927         }
928
929 out:
930         return ret;
931 }
932
933 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
934                                       struct btrfs_path *path,
935                                       struct btrfs_root *root,
936                                       struct btrfs_delayed_node *node)
937 {
938         struct btrfs_delayed_item *curr, *prev;
939         int ret = 0;
940
941 do_again:
942         mutex_lock(&node->mutex);
943         curr = __btrfs_first_delayed_deletion_item(node);
944         if (!curr)
945                 goto delete_fail;
946
947         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
948         if (ret < 0)
949                 goto delete_fail;
950         else if (ret > 0) {
951                 /*
952                  * can't find the item which the node points to, so this node
953                  * is invalid, just drop it.
954                  */
955                 prev = curr;
956                 curr = __btrfs_next_delayed_item(prev);
957                 btrfs_release_delayed_item(prev);
958                 ret = 0;
959                 btrfs_release_path(path);
960                 if (curr) {
961                         mutex_unlock(&node->mutex);
962                         goto do_again;
963                 } else
964                         goto delete_fail;
965         }
966
967         btrfs_batch_delete_items(trans, root, path, curr);
968         btrfs_release_path(path);
969         mutex_unlock(&node->mutex);
970         goto do_again;
971
972 delete_fail:
973         btrfs_release_path(path);
974         mutex_unlock(&node->mutex);
975         return ret;
976 }
977
978 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
979 {
980         struct btrfs_delayed_root *delayed_root;
981
982         if (delayed_node &&
983             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
984                 BUG_ON(!delayed_node->root);
985                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
986                 delayed_node->count--;
987
988                 delayed_root = delayed_node->root->fs_info->delayed_root;
989                 finish_one_item(delayed_root);
990         }
991 }
992
993 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
994 {
995         struct btrfs_delayed_root *delayed_root;
996
997         ASSERT(delayed_node->root);
998         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
999         delayed_node->count--;
1000
1001         delayed_root = delayed_node->root->fs_info->delayed_root;
1002         finish_one_item(delayed_root);
1003 }
1004
1005 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1006                                         struct btrfs_root *root,
1007                                         struct btrfs_path *path,
1008                                         struct btrfs_delayed_node *node)
1009 {
1010         struct btrfs_fs_info *fs_info = root->fs_info;
1011         struct btrfs_key key;
1012         struct btrfs_inode_item *inode_item;
1013         struct extent_buffer *leaf;
1014         int mod;
1015         int ret;
1016
1017         key.objectid = node->inode_id;
1018         key.type = BTRFS_INODE_ITEM_KEY;
1019         key.offset = 0;
1020
1021         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1022                 mod = -1;
1023         else
1024                 mod = 1;
1025
1026         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1027         if (ret > 0) {
1028                 btrfs_release_path(path);
1029                 return -ENOENT;
1030         } else if (ret < 0) {
1031                 return ret;
1032         }
1033
1034         leaf = path->nodes[0];
1035         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1036                                     struct btrfs_inode_item);
1037         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1038                             sizeof(struct btrfs_inode_item));
1039         btrfs_mark_buffer_dirty(leaf);
1040
1041         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1042                 goto no_iref;
1043
1044         path->slots[0]++;
1045         if (path->slots[0] >= btrfs_header_nritems(leaf))
1046                 goto search;
1047 again:
1048         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1049         if (key.objectid != node->inode_id)
1050                 goto out;
1051
1052         if (key.type != BTRFS_INODE_REF_KEY &&
1053             key.type != BTRFS_INODE_EXTREF_KEY)
1054                 goto out;
1055
1056         /*
1057          * Delayed iref deletion is for the inode who has only one link,
1058          * so there is only one iref. The case that several irefs are
1059          * in the same item doesn't exist.
1060          */
1061         btrfs_del_item(trans, root, path);
1062 out:
1063         btrfs_release_delayed_iref(node);
1064 no_iref:
1065         btrfs_release_path(path);
1066 err_out:
1067         btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
1068         btrfs_release_delayed_inode(node);
1069
1070         return ret;
1071
1072 search:
1073         btrfs_release_path(path);
1074
1075         key.type = BTRFS_INODE_EXTREF_KEY;
1076         key.offset = -1;
1077         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1078         if (ret < 0)
1079                 goto err_out;
1080         ASSERT(ret);
1081
1082         ret = 0;
1083         leaf = path->nodes[0];
1084         path->slots[0]--;
1085         goto again;
1086 }
1087
1088 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1089                                              struct btrfs_root *root,
1090                                              struct btrfs_path *path,
1091                                              struct btrfs_delayed_node *node)
1092 {
1093         int ret;
1094
1095         mutex_lock(&node->mutex);
1096         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1097                 mutex_unlock(&node->mutex);
1098                 return 0;
1099         }
1100
1101         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1102         mutex_unlock(&node->mutex);
1103         return ret;
1104 }
1105
1106 static inline int
1107 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1108                                    struct btrfs_path *path,
1109                                    struct btrfs_delayed_node *node)
1110 {
1111         int ret;
1112
1113         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1114         if (ret)
1115                 return ret;
1116
1117         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1118         if (ret)
1119                 return ret;
1120
1121         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1122         return ret;
1123 }
1124
1125 /*
1126  * Called when committing the transaction.
1127  * Returns 0 on success.
1128  * Returns < 0 on error and returns with an aborted transaction with any
1129  * outstanding delayed items cleaned up.
1130  */
1131 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
1132 {
1133         struct btrfs_fs_info *fs_info = trans->fs_info;
1134         struct btrfs_delayed_root *delayed_root;
1135         struct btrfs_delayed_node *curr_node, *prev_node;
1136         struct btrfs_path *path;
1137         struct btrfs_block_rsv *block_rsv;
1138         int ret = 0;
1139         bool count = (nr > 0);
1140
1141         if (trans->aborted)
1142                 return -EIO;
1143
1144         path = btrfs_alloc_path();
1145         if (!path)
1146                 return -ENOMEM;
1147         path->leave_spinning = 1;
1148
1149         block_rsv = trans->block_rsv;
1150         trans->block_rsv = &fs_info->delayed_block_rsv;
1151
1152         delayed_root = fs_info->delayed_root;
1153
1154         curr_node = btrfs_first_delayed_node(delayed_root);
1155         while (curr_node && (!count || (count && nr--))) {
1156                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1157                                                          curr_node);
1158                 if (ret) {
1159                         btrfs_release_delayed_node(curr_node);
1160                         curr_node = NULL;
1161                         btrfs_abort_transaction(trans, ret);
1162                         break;
1163                 }
1164
1165                 prev_node = curr_node;
1166                 curr_node = btrfs_next_delayed_node(curr_node);
1167                 btrfs_release_delayed_node(prev_node);
1168         }
1169
1170         if (curr_node)
1171                 btrfs_release_delayed_node(curr_node);
1172         btrfs_free_path(path);
1173         trans->block_rsv = block_rsv;
1174
1175         return ret;
1176 }
1177
1178 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
1179 {
1180         return __btrfs_run_delayed_items(trans, -1);
1181 }
1182
1183 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
1184 {
1185         return __btrfs_run_delayed_items(trans, nr);
1186 }
1187
1188 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1189                                      struct btrfs_inode *inode)
1190 {
1191         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1192         struct btrfs_path *path;
1193         struct btrfs_block_rsv *block_rsv;
1194         int ret;
1195
1196         if (!delayed_node)
1197                 return 0;
1198
1199         mutex_lock(&delayed_node->mutex);
1200         if (!delayed_node->count) {
1201                 mutex_unlock(&delayed_node->mutex);
1202                 btrfs_release_delayed_node(delayed_node);
1203                 return 0;
1204         }
1205         mutex_unlock(&delayed_node->mutex);
1206
1207         path = btrfs_alloc_path();
1208         if (!path) {
1209                 btrfs_release_delayed_node(delayed_node);
1210                 return -ENOMEM;
1211         }
1212         path->leave_spinning = 1;
1213
1214         block_rsv = trans->block_rsv;
1215         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1216
1217         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1218
1219         btrfs_release_delayed_node(delayed_node);
1220         btrfs_free_path(path);
1221         trans->block_rsv = block_rsv;
1222
1223         return ret;
1224 }
1225
1226 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1227 {
1228         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1229         struct btrfs_trans_handle *trans;
1230         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1231         struct btrfs_path *path;
1232         struct btrfs_block_rsv *block_rsv;
1233         int ret;
1234
1235         if (!delayed_node)
1236                 return 0;
1237
1238         mutex_lock(&delayed_node->mutex);
1239         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1240                 mutex_unlock(&delayed_node->mutex);
1241                 btrfs_release_delayed_node(delayed_node);
1242                 return 0;
1243         }
1244         mutex_unlock(&delayed_node->mutex);
1245
1246         trans = btrfs_join_transaction(delayed_node->root);
1247         if (IS_ERR(trans)) {
1248                 ret = PTR_ERR(trans);
1249                 goto out;
1250         }
1251
1252         path = btrfs_alloc_path();
1253         if (!path) {
1254                 ret = -ENOMEM;
1255                 goto trans_out;
1256         }
1257         path->leave_spinning = 1;
1258
1259         block_rsv = trans->block_rsv;
1260         trans->block_rsv = &fs_info->delayed_block_rsv;
1261
1262         mutex_lock(&delayed_node->mutex);
1263         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1264                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1265                                                    path, delayed_node);
1266         else
1267                 ret = 0;
1268         mutex_unlock(&delayed_node->mutex);
1269
1270         btrfs_free_path(path);
1271         trans->block_rsv = block_rsv;
1272 trans_out:
1273         btrfs_end_transaction(trans);
1274         btrfs_btree_balance_dirty(fs_info);
1275 out:
1276         btrfs_release_delayed_node(delayed_node);
1277
1278         return ret;
1279 }
1280
1281 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1282 {
1283         struct btrfs_delayed_node *delayed_node;
1284
1285         delayed_node = READ_ONCE(inode->delayed_node);
1286         if (!delayed_node)
1287                 return;
1288
1289         inode->delayed_node = NULL;
1290         btrfs_release_delayed_node(delayed_node);
1291 }
1292
1293 struct btrfs_async_delayed_work {
1294         struct btrfs_delayed_root *delayed_root;
1295         int nr;
1296         struct btrfs_work work;
1297 };
1298
1299 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1300 {
1301         struct btrfs_async_delayed_work *async_work;
1302         struct btrfs_delayed_root *delayed_root;
1303         struct btrfs_trans_handle *trans;
1304         struct btrfs_path *path;
1305         struct btrfs_delayed_node *delayed_node = NULL;
1306         struct btrfs_root *root;
1307         struct btrfs_block_rsv *block_rsv;
1308         int total_done = 0;
1309
1310         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1311         delayed_root = async_work->delayed_root;
1312
1313         path = btrfs_alloc_path();
1314         if (!path)
1315                 goto out;
1316
1317         do {
1318                 if (atomic_read(&delayed_root->items) <
1319                     BTRFS_DELAYED_BACKGROUND / 2)
1320                         break;
1321
1322                 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1323                 if (!delayed_node)
1324                         break;
1325
1326                 path->leave_spinning = 1;
1327                 root = delayed_node->root;
1328
1329                 trans = btrfs_join_transaction(root);
1330                 if (IS_ERR(trans)) {
1331                         btrfs_release_path(path);
1332                         btrfs_release_prepared_delayed_node(delayed_node);
1333                         total_done++;
1334                         continue;
1335                 }
1336
1337                 block_rsv = trans->block_rsv;
1338                 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1339
1340                 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1341
1342                 trans->block_rsv = block_rsv;
1343                 btrfs_end_transaction(trans);
1344                 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1345
1346                 btrfs_release_path(path);
1347                 btrfs_release_prepared_delayed_node(delayed_node);
1348                 total_done++;
1349
1350         } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1351                  || total_done < async_work->nr);
1352
1353         btrfs_free_path(path);
1354 out:
1355         wake_up(&delayed_root->wait);
1356         kfree(async_work);
1357 }
1358
1359
1360 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1361                                      struct btrfs_fs_info *fs_info, int nr)
1362 {
1363         struct btrfs_async_delayed_work *async_work;
1364
1365         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1366         if (!async_work)
1367                 return -ENOMEM;
1368
1369         async_work->delayed_root = delayed_root;
1370         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1371                         btrfs_async_run_delayed_root, NULL, NULL);
1372         async_work->nr = nr;
1373
1374         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1375         return 0;
1376 }
1377
1378 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1379 {
1380         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1381 }
1382
1383 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1384 {
1385         int val = atomic_read(&delayed_root->items_seq);
1386
1387         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1388                 return 1;
1389
1390         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1391                 return 1;
1392
1393         return 0;
1394 }
1395
1396 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1397 {
1398         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1399
1400         if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1401                 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1402                 return;
1403
1404         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1405                 int seq;
1406                 int ret;
1407
1408                 seq = atomic_read(&delayed_root->items_seq);
1409
1410                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1411                 if (ret)
1412                         return;
1413
1414                 wait_event_interruptible(delayed_root->wait,
1415                                          could_end_wait(delayed_root, seq));
1416                 return;
1417         }
1418
1419         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1420 }
1421
1422 /* Will return 0 or -ENOMEM */
1423 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1424                                    const char *name, int name_len,
1425                                    struct btrfs_inode *dir,
1426                                    struct btrfs_disk_key *disk_key, u8 type,
1427                                    u64 index)
1428 {
1429         struct btrfs_delayed_node *delayed_node;
1430         struct btrfs_delayed_item *delayed_item;
1431         struct btrfs_dir_item *dir_item;
1432         int ret;
1433
1434         delayed_node = btrfs_get_or_create_delayed_node(dir);
1435         if (IS_ERR(delayed_node))
1436                 return PTR_ERR(delayed_node);
1437
1438         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1439         if (!delayed_item) {
1440                 ret = -ENOMEM;
1441                 goto release_node;
1442         }
1443
1444         delayed_item->key.objectid = btrfs_ino(dir);
1445         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1446         delayed_item->key.offset = index;
1447
1448         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1449         dir_item->location = *disk_key;
1450         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1451         btrfs_set_stack_dir_data_len(dir_item, 0);
1452         btrfs_set_stack_dir_name_len(dir_item, name_len);
1453         btrfs_set_stack_dir_type(dir_item, type);
1454         memcpy((char *)(dir_item + 1), name, name_len);
1455
1456         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
1457         /*
1458          * we have reserved enough space when we start a new transaction,
1459          * so reserving metadata failure is impossible
1460          */
1461         BUG_ON(ret);
1462
1463         mutex_lock(&delayed_node->mutex);
1464         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1465         if (unlikely(ret)) {
1466                 btrfs_err(trans->fs_info,
1467                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1468                           name_len, name, delayed_node->root->root_key.objectid,
1469                           delayed_node->inode_id, ret);
1470                 BUG();
1471         }
1472         mutex_unlock(&delayed_node->mutex);
1473
1474 release_node:
1475         btrfs_release_delayed_node(delayed_node);
1476         return ret;
1477 }
1478
1479 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1480                                                struct btrfs_delayed_node *node,
1481                                                struct btrfs_key *key)
1482 {
1483         struct btrfs_delayed_item *item;
1484
1485         mutex_lock(&node->mutex);
1486         item = __btrfs_lookup_delayed_insertion_item(node, key);
1487         if (!item) {
1488                 mutex_unlock(&node->mutex);
1489                 return 1;
1490         }
1491
1492         btrfs_delayed_item_release_metadata(node->root, item);
1493         btrfs_release_delayed_item(item);
1494         mutex_unlock(&node->mutex);
1495         return 0;
1496 }
1497
1498 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1499                                    struct btrfs_inode *dir, u64 index)
1500 {
1501         struct btrfs_delayed_node *node;
1502         struct btrfs_delayed_item *item;
1503         struct btrfs_key item_key;
1504         int ret;
1505
1506         node = btrfs_get_or_create_delayed_node(dir);
1507         if (IS_ERR(node))
1508                 return PTR_ERR(node);
1509
1510         item_key.objectid = btrfs_ino(dir);
1511         item_key.type = BTRFS_DIR_INDEX_KEY;
1512         item_key.offset = index;
1513
1514         ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1515                                                   &item_key);
1516         if (!ret)
1517                 goto end;
1518
1519         item = btrfs_alloc_delayed_item(0);
1520         if (!item) {
1521                 ret = -ENOMEM;
1522                 goto end;
1523         }
1524
1525         item->key = item_key;
1526
1527         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
1528         /*
1529          * we have reserved enough space when we start a new transaction,
1530          * so reserving metadata failure is impossible.
1531          */
1532         if (ret < 0) {
1533                 btrfs_err(trans->fs_info,
1534 "metadata reservation failed for delayed dir item deltiona, should have been reserved");
1535                 btrfs_release_delayed_item(item);
1536                 goto end;
1537         }
1538
1539         mutex_lock(&node->mutex);
1540         ret = __btrfs_add_delayed_deletion_item(node, item);
1541         if (unlikely(ret)) {
1542                 btrfs_err(trans->fs_info,
1543                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1544                           index, node->root->root_key.objectid,
1545                           node->inode_id, ret);
1546                 btrfs_delayed_item_release_metadata(dir->root, item);
1547                 btrfs_release_delayed_item(item);
1548         }
1549         mutex_unlock(&node->mutex);
1550 end:
1551         btrfs_release_delayed_node(node);
1552         return ret;
1553 }
1554
1555 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1556 {
1557         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1558
1559         if (!delayed_node)
1560                 return -ENOENT;
1561
1562         /*
1563          * Since we have held i_mutex of this directory, it is impossible that
1564          * a new directory index is added into the delayed node and index_cnt
1565          * is updated now. So we needn't lock the delayed node.
1566          */
1567         if (!delayed_node->index_cnt) {
1568                 btrfs_release_delayed_node(delayed_node);
1569                 return -EINVAL;
1570         }
1571
1572         inode->index_cnt = delayed_node->index_cnt;
1573         btrfs_release_delayed_node(delayed_node);
1574         return 0;
1575 }
1576
1577 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1578                                      struct list_head *ins_list,
1579                                      struct list_head *del_list)
1580 {
1581         struct btrfs_delayed_node *delayed_node;
1582         struct btrfs_delayed_item *item;
1583
1584         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1585         if (!delayed_node)
1586                 return false;
1587
1588         /*
1589          * We can only do one readdir with delayed items at a time because of
1590          * item->readdir_list.
1591          */
1592         inode_unlock_shared(inode);
1593         inode_lock(inode);
1594
1595         mutex_lock(&delayed_node->mutex);
1596         item = __btrfs_first_delayed_insertion_item(delayed_node);
1597         while (item) {
1598                 refcount_inc(&item->refs);
1599                 list_add_tail(&item->readdir_list, ins_list);
1600                 item = __btrfs_next_delayed_item(item);
1601         }
1602
1603         item = __btrfs_first_delayed_deletion_item(delayed_node);
1604         while (item) {
1605                 refcount_inc(&item->refs);
1606                 list_add_tail(&item->readdir_list, del_list);
1607                 item = __btrfs_next_delayed_item(item);
1608         }
1609         mutex_unlock(&delayed_node->mutex);
1610         /*
1611          * This delayed node is still cached in the btrfs inode, so refs
1612          * must be > 1 now, and we needn't check it is going to be freed
1613          * or not.
1614          *
1615          * Besides that, this function is used to read dir, we do not
1616          * insert/delete delayed items in this period. So we also needn't
1617          * requeue or dequeue this delayed node.
1618          */
1619         refcount_dec(&delayed_node->refs);
1620
1621         return true;
1622 }
1623
1624 void btrfs_readdir_put_delayed_items(struct inode *inode,
1625                                      struct list_head *ins_list,
1626                                      struct list_head *del_list)
1627 {
1628         struct btrfs_delayed_item *curr, *next;
1629
1630         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1631                 list_del(&curr->readdir_list);
1632                 if (refcount_dec_and_test(&curr->refs))
1633                         kfree(curr);
1634         }
1635
1636         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1637                 list_del(&curr->readdir_list);
1638                 if (refcount_dec_and_test(&curr->refs))
1639                         kfree(curr);
1640         }
1641
1642         /*
1643          * The VFS is going to do up_read(), so we need to downgrade back to a
1644          * read lock.
1645          */
1646         downgrade_write(&inode->i_rwsem);
1647 }
1648
1649 int btrfs_should_delete_dir_index(struct list_head *del_list,
1650                                   u64 index)
1651 {
1652         struct btrfs_delayed_item *curr;
1653         int ret = 0;
1654
1655         list_for_each_entry(curr, del_list, readdir_list) {
1656                 if (curr->key.offset > index)
1657                         break;
1658                 if (curr->key.offset == index) {
1659                         ret = 1;
1660                         break;
1661                 }
1662         }
1663         return ret;
1664 }
1665
1666 /*
1667  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1668  *
1669  */
1670 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1671                                     struct list_head *ins_list)
1672 {
1673         struct btrfs_dir_item *di;
1674         struct btrfs_delayed_item *curr, *next;
1675         struct btrfs_key location;
1676         char *name;
1677         int name_len;
1678         int over = 0;
1679         unsigned char d_type;
1680
1681         if (list_empty(ins_list))
1682                 return 0;
1683
1684         /*
1685          * Changing the data of the delayed item is impossible. So
1686          * we needn't lock them. And we have held i_mutex of the
1687          * directory, nobody can delete any directory indexes now.
1688          */
1689         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1690                 list_del(&curr->readdir_list);
1691
1692                 if (curr->key.offset < ctx->pos) {
1693                         if (refcount_dec_and_test(&curr->refs))
1694                                 kfree(curr);
1695                         continue;
1696                 }
1697
1698                 ctx->pos = curr->key.offset;
1699
1700                 di = (struct btrfs_dir_item *)curr->data;
1701                 name = (char *)(di + 1);
1702                 name_len = btrfs_stack_dir_name_len(di);
1703
1704                 d_type = fs_ftype_to_dtype(di->type);
1705                 btrfs_disk_key_to_cpu(&location, &di->location);
1706
1707                 over = !dir_emit(ctx, name, name_len,
1708                                location.objectid, d_type);
1709
1710                 if (refcount_dec_and_test(&curr->refs))
1711                         kfree(curr);
1712
1713                 if (over)
1714                         return 1;
1715                 ctx->pos++;
1716         }
1717         return 0;
1718 }
1719
1720 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1721                                   struct btrfs_inode_item *inode_item,
1722                                   struct inode *inode)
1723 {
1724         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1725         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1726         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1727         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1728         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1729         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1730         btrfs_set_stack_inode_generation(inode_item,
1731                                          BTRFS_I(inode)->generation);
1732         btrfs_set_stack_inode_sequence(inode_item,
1733                                        inode_peek_iversion(inode));
1734         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1735         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1736         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1737         btrfs_set_stack_inode_block_group(inode_item, 0);
1738
1739         btrfs_set_stack_timespec_sec(&inode_item->atime,
1740                                      inode->i_atime.tv_sec);
1741         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1742                                       inode->i_atime.tv_nsec);
1743
1744         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1745                                      inode->i_mtime.tv_sec);
1746         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1747                                       inode->i_mtime.tv_nsec);
1748
1749         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1750                                      inode->i_ctime.tv_sec);
1751         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1752                                       inode->i_ctime.tv_nsec);
1753
1754         btrfs_set_stack_timespec_sec(&inode_item->otime,
1755                                      BTRFS_I(inode)->i_otime.tv_sec);
1756         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1757                                      BTRFS_I(inode)->i_otime.tv_nsec);
1758 }
1759
1760 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1761 {
1762         struct btrfs_delayed_node *delayed_node;
1763         struct btrfs_inode_item *inode_item;
1764
1765         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1766         if (!delayed_node)
1767                 return -ENOENT;
1768
1769         mutex_lock(&delayed_node->mutex);
1770         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1771                 mutex_unlock(&delayed_node->mutex);
1772                 btrfs_release_delayed_node(delayed_node);
1773                 return -ENOENT;
1774         }
1775
1776         inode_item = &delayed_node->inode_item;
1777
1778         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1779         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1780         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1781         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1782         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1783         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1784         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1785         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1786
1787         inode_set_iversion_queried(inode,
1788                                    btrfs_stack_inode_sequence(inode_item));
1789         inode->i_rdev = 0;
1790         *rdev = btrfs_stack_inode_rdev(inode_item);
1791         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1792
1793         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1794         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1795
1796         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1797         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1798
1799         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1800         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1801
1802         BTRFS_I(inode)->i_otime.tv_sec =
1803                 btrfs_stack_timespec_sec(&inode_item->otime);
1804         BTRFS_I(inode)->i_otime.tv_nsec =
1805                 btrfs_stack_timespec_nsec(&inode_item->otime);
1806
1807         inode->i_generation = BTRFS_I(inode)->generation;
1808         BTRFS_I(inode)->index_cnt = (u64)-1;
1809
1810         mutex_unlock(&delayed_node->mutex);
1811         btrfs_release_delayed_node(delayed_node);
1812         return 0;
1813 }
1814
1815 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1816                                struct btrfs_root *root, struct inode *inode)
1817 {
1818         struct btrfs_delayed_node *delayed_node;
1819         int ret = 0;
1820
1821         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1822         if (IS_ERR(delayed_node))
1823                 return PTR_ERR(delayed_node);
1824
1825         mutex_lock(&delayed_node->mutex);
1826         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1827                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1828                 goto release_node;
1829         }
1830
1831         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1832                                                    delayed_node);
1833         if (ret)
1834                 goto release_node;
1835
1836         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1837         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1838         delayed_node->count++;
1839         atomic_inc(&root->fs_info->delayed_root->items);
1840 release_node:
1841         mutex_unlock(&delayed_node->mutex);
1842         btrfs_release_delayed_node(delayed_node);
1843         return ret;
1844 }
1845
1846 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1847 {
1848         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1849         struct btrfs_delayed_node *delayed_node;
1850
1851         /*
1852          * we don't do delayed inode updates during log recovery because it
1853          * leads to enospc problems.  This means we also can't do
1854          * delayed inode refs
1855          */
1856         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1857                 return -EAGAIN;
1858
1859         delayed_node = btrfs_get_or_create_delayed_node(inode);
1860         if (IS_ERR(delayed_node))
1861                 return PTR_ERR(delayed_node);
1862
1863         /*
1864          * We don't reserve space for inode ref deletion is because:
1865          * - We ONLY do async inode ref deletion for the inode who has only
1866          *   one link(i_nlink == 1), it means there is only one inode ref.
1867          *   And in most case, the inode ref and the inode item are in the
1868          *   same leaf, and we will deal with them at the same time.
1869          *   Since we are sure we will reserve the space for the inode item,
1870          *   it is unnecessary to reserve space for inode ref deletion.
1871          * - If the inode ref and the inode item are not in the same leaf,
1872          *   We also needn't worry about enospc problem, because we reserve
1873          *   much more space for the inode update than it needs.
1874          * - At the worst, we can steal some space from the global reservation.
1875          *   It is very rare.
1876          */
1877         mutex_lock(&delayed_node->mutex);
1878         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1879                 goto release_node;
1880
1881         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1882         delayed_node->count++;
1883         atomic_inc(&fs_info->delayed_root->items);
1884 release_node:
1885         mutex_unlock(&delayed_node->mutex);
1886         btrfs_release_delayed_node(delayed_node);
1887         return 0;
1888 }
1889
1890 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1891 {
1892         struct btrfs_root *root = delayed_node->root;
1893         struct btrfs_fs_info *fs_info = root->fs_info;
1894         struct btrfs_delayed_item *curr_item, *prev_item;
1895
1896         mutex_lock(&delayed_node->mutex);
1897         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1898         while (curr_item) {
1899                 btrfs_delayed_item_release_metadata(root, curr_item);
1900                 prev_item = curr_item;
1901                 curr_item = __btrfs_next_delayed_item(prev_item);
1902                 btrfs_release_delayed_item(prev_item);
1903         }
1904
1905         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1906         while (curr_item) {
1907                 btrfs_delayed_item_release_metadata(root, curr_item);
1908                 prev_item = curr_item;
1909                 curr_item = __btrfs_next_delayed_item(prev_item);
1910                 btrfs_release_delayed_item(prev_item);
1911         }
1912
1913         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1914                 btrfs_release_delayed_iref(delayed_node);
1915
1916         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1917                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
1918                 btrfs_release_delayed_inode(delayed_node);
1919         }
1920         mutex_unlock(&delayed_node->mutex);
1921 }
1922
1923 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1924 {
1925         struct btrfs_delayed_node *delayed_node;
1926
1927         delayed_node = btrfs_get_delayed_node(inode);
1928         if (!delayed_node)
1929                 return;
1930
1931         __btrfs_kill_delayed_node(delayed_node);
1932         btrfs_release_delayed_node(delayed_node);
1933 }
1934
1935 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1936 {
1937         u64 inode_id = 0;
1938         struct btrfs_delayed_node *delayed_nodes[8];
1939         int i, n;
1940
1941         while (1) {
1942                 spin_lock(&root->inode_lock);
1943                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1944                                            (void **)delayed_nodes, inode_id,
1945                                            ARRAY_SIZE(delayed_nodes));
1946                 if (!n) {
1947                         spin_unlock(&root->inode_lock);
1948                         break;
1949                 }
1950
1951                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1952
1953                 for (i = 0; i < n; i++)
1954                         refcount_inc(&delayed_nodes[i]->refs);
1955                 spin_unlock(&root->inode_lock);
1956
1957                 for (i = 0; i < n; i++) {
1958                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1959                         btrfs_release_delayed_node(delayed_nodes[i]);
1960                 }
1961         }
1962 }
1963
1964 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1965 {
1966         struct btrfs_delayed_node *curr_node, *prev_node;
1967
1968         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1969         while (curr_node) {
1970                 __btrfs_kill_delayed_node(curr_node);
1971
1972                 prev_node = curr_node;
1973                 curr_node = btrfs_next_delayed_node(curr_node);
1974                 btrfs_release_delayed_node(prev_node);
1975         }
1976 }
1977