From 004b68621897f06aa2817e7438469d23f4a3a284 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Apr 2017 23:24:55 +0800 Subject: [PATCH] f2fs: use rb-tree to track pending discard commands Introduce rb-tree based discard cache infrastructure to speed up lookup and merge operation of discard entry. Signed-off-by: Chao Yu [Jaegeuk Kim: initialize dc to avoid build warning] Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 15 +-- fs/f2fs/f2fs.h | 48 ++++++++- fs/f2fs/segment.c | 223 +++++++++++++++++++++++++++++++++-------- 3 files changed, 236 insertions(+), 50 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 68e649a31c7d..221ad086ee00 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -49,7 +49,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, return NULL; } -static struct rb_entry *__lookup_rb_tree(struct rb_root *root, +struct rb_entry *__lookup_rb_tree(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs) { struct rb_entry *re; @@ -61,7 +61,7 @@ static struct rb_entry *__lookup_rb_tree(struct rb_root *root, return re; } -static struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, struct rb_root *root, struct rb_node **parent, unsigned int ofs) { @@ -92,13 +92,14 @@ static struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, * in order to simpfy the insertion after. * tree must stay unchanged between lookup and insertion. */ -static struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, - struct rb_node **insert_parent) + struct rb_node **insert_parent, + bool force) { struct rb_node **pnode = &root->rb_node; struct rb_node *parent = NULL, *tmp_node; @@ -145,12 +146,12 @@ static struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, return NULL; lookup_neighbors: - if (ofs == re->ofs) { + if (ofs == re->ofs || force) { /* lookup prev node for merging backward later */ tmp_node = rb_prev(&re->rb_node); *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); } - if (ofs == re->ofs + re->len - 1) { + if (ofs == re->ofs + re->len - 1 || force) { /* lookup next node for merging frontward later */ tmp_node = rb_next(&re->rb_node); *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); @@ -481,7 +482,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, (struct rb_entry *)et->cached_en, fofs, (struct rb_entry **)&prev_en, (struct rb_entry **)&next_en, - &insert_p, &insert_parent); + &insert_p, &insert_parent, false); if (!en) en = next_en; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 562db8989a4e..ee7d6105a7a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -194,13 +194,26 @@ enum { D_DONE, }; +struct discard_info { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ +}; + struct discard_cmd { + struct rb_node rb_node; /* rb node located in rb-tree */ + union { + struct { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ + }; + struct discard_info di; /* discard info */ + + }; struct list_head list; /* command list */ struct completion wait; /* compleation */ struct block_device *bdev; /* bdev */ - block_t lstart; /* logical start address */ - block_t start; /* actual start address in dev */ - block_t len; /* length */ int state; /* state */ int error; /* bio error */ }; @@ -217,6 +230,7 @@ struct discard_cmd_control { atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ + struct rb_root root; /* root of discard rb-tree */ }; /* for the list of fsync inodes, used only during recovery */ @@ -517,6 +531,24 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ei->len = len; } +static inline bool __is_discard_mergeable(struct discard_info *back, + struct discard_info *front) +{ + return back->lstart + back->len == front->lstart; +} + +static inline bool __is_discard_back_mergeable(struct discard_info *cur, + struct discard_info *back) +{ + return __is_discard_mergeable(back, cur); +} + +static inline bool __is_discard_front_mergeable(struct discard_info *cur, + struct discard_info *front) +{ + return __is_discard_mergeable(cur, front); +} + static inline bool __is_extent_mergeable(struct extent_info *back, struct extent_info *front) { @@ -2573,6 +2605,16 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ +struct rb_entry *__lookup_rb_tree(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs); +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root *root, struct rb_node **parent, + unsigned int ofs); +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs, + struct rb_entry **prev_entry, struct rb_entry **next_entry, + struct rb_node ***insert_p, struct rb_node **insert_parent, + bool force); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); void f2fs_drop_extent_tree(struct inode *inode); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 58cfbe3d4dc7..d137a08ec3a0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -672,7 +672,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static void __add_discard_cmd(struct f2fs_sb_info *sbi, +static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) { @@ -689,18 +689,46 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, dc->state = D_PREP; dc->error = 0; init_completion(&dc->wait); - - mutex_lock(&dcc->cmd_lock); list_add_tail(&dc->list, pend_list); - mutex_unlock(&dcc->cmd_lock); - atomic_inc(&dcc->discard_cmd_cnt); + + return dc; +} + +static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node *parent, struct rb_node **p) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + + dc = __create_discard_cmd(sbi, bdev, lstart, start, len); + + rb_link_node(&dc->rb_node, parent, p); + rb_insert_color(&dc->rb_node, &dcc->root); + + return dc; } -static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) +static void __detach_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&(SM_I(sbi)->dcc_info->issing_discard)); + atomic_dec(&dcc->issing_discard); + + list_del(&dc->list); + rb_erase(&dc->rb_node, &dcc->root); + + kmem_cache_free(discard_cmd_slab, dc); + + atomic_dec(&dcc->discard_cmd_cnt); +} + +static void __remove_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (dc->error == -EOPNOTSUPP) dc->error = 0; @@ -708,9 +736,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, "Issue discard failed, ret: %d", dc->error); - list_del(&dc->list); - kmem_cache_free(discard_cmd_slab, dc); - atomic_dec(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + __detach_discard_cmd(dcc, dc); } static void f2fs_submit_discard_endio(struct bio *bio) @@ -754,62 +780,178 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, } } -static int __queue_discard_cmd(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkstart, block_t blklen) +static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node **insert_p, + struct rb_node *insert_parent) { - block_t lblkstart = blkstart; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct rb_node **p = &dcc->root.rb_node; + struct rb_node *parent = NULL; + struct discard_cmd *dc = NULL; - trace_f2fs_issue_discard(bdev, blkstart, blklen); + if (insert_p && insert_parent) { + parent = insert_parent; + p = insert_p; + goto do_insert; + } - if (sbi->s_ndevs) { - int devi = f2fs_target_device_index(sbi, blkstart); + p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); +do_insert: + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); + if (!dc) + return NULL; - blkstart -= FDEV(devi).start_blk; - } - __add_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); - return 0; + return dc; } static void __punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, block_t blkaddr) { - block_t end_block = START_BLOCK(sbi, GET_SEGNO(sbi, blkaddr) + 1); + struct discard_info di = dc->di; + bool modified = false; - if (dc->state == D_DONE || dc->lstart + dc->len <= end_block) { + if (dc->state == D_DONE || dc->len == 1) { __remove_discard_cmd(sbi, dc); return; } - if (blkaddr - dc->lstart < dc->lstart + dc->len - end_block) { - dc->start += (end_block - dc->lstart); - dc->len -= (end_block - dc->lstart); - dc->lstart = end_block; - } else { + if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; + modified = true; + } + + if (blkaddr < di.lstart + di.len - 1) { + if (modified) { + __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, + di.start + blkaddr + 1 - di.lstart, + di.lstart + di.len - 1 - blkaddr, + NULL, NULL); + } else { + dc->lstart++; + dc->len--; + dc->start++; + } } } -/* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->discard_pend_list); - struct list_head *wait_list = &(dcc->discard_wait_list); - struct discard_cmd *dc, *tmp; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct discard_cmd *dc; + struct discard_info di = {0}; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + block_t end = lstart + len; mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, pend_list, list) { - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) - __punch_discard_cmd(sbi, dc, blkaddr); + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, lstart, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (dc) + prev_dc = dc; + + if (!prev_dc) { + di.lstart = lstart; + di.len = next_dc ? next_dc->lstart - lstart : len; + di.len = min(di.len, len); + di.start = start; } - list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { - wait_for_completion_io(&dc->wait); - __punch_discard_cmd(sbi, dc, blkaddr); + while (1) { + struct rb_node *node; + bool merged = false; + struct discard_cmd *tdc = NULL; + + if (prev_dc) { + di.lstart = prev_dc->lstart + prev_dc->len; + if (di.lstart < lstart) + di.lstart = lstart; + if (di.lstart >= end) + break; + + if (!next_dc || next_dc->lstart > end) + di.len = end - di.lstart; + else + di.len = next_dc->lstart - di.lstart; + di.start = start + di.lstart - lstart; + } + + if (!di.len) + goto next; + + if (prev_dc && prev_dc->state == D_PREP && + prev_dc->bdev == bdev && + __is_discard_back_mergeable(&di, &prev_dc->di)) { + prev_dc->di.len += di.len; + di = prev_dc->di; + tdc = prev_dc; + merged = true; + } + + if (next_dc && next_dc->state == D_PREP && + next_dc->bdev == bdev && + __is_discard_front_mergeable(&di, &next_dc->di)) { + next_dc->di.lstart = di.lstart; + next_dc->di.len += di.len; + next_dc->di.start = di.start; + if (tdc) + __remove_discard_cmd(sbi, tdc); + + merged = true; } + + if (!merged) + __insert_discard_tree(sbi, bdev, di.lstart, di.start, + di.len, NULL, NULL); + next: + prev_dc = next_dc; + if (!prev_dc) + break; + + node = rb_next(&prev_dc->rb_node); + next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); + } + + mutex_unlock(&dcc->cmd_lock); +} + +static int __queue_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + block_t lblkstart = blkstart; + + trace_f2fs_issue_discard(bdev, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + return 0; +} + +/* This should be covered by global mutex, &sit_i->sentry_lock */ +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + + mutex_lock(&dcc->cmd_lock); + + dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + if (dc) { + if (dc->state != D_PREP) + wait_for_completion_io(&dc->wait); + __punch_discard_cmd(sbi, dc, blkaddr); } mutex_unlock(&dcc->cmd_lock); @@ -1178,6 +1320,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = 0; + dcc->root = RB_ROOT; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; -- 2.45.2