]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/lightnvm/pblk-core.c
lightnvm: pblk: redesign GC algorithm
[linux.git] / drivers / lightnvm / pblk-core.c
1 /*
2  * Copyright (C) 2016 CNEX Labs
3  * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4  *                  Matias Bjorling <matias@cnexlabs.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * pblk-core.c - pblk's core functionality
16  *
17  */
18
19 #include "pblk.h"
20
21 static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
22                          struct ppa_addr *ppa)
23 {
24         struct nvm_tgt_dev *dev = pblk->dev;
25         struct nvm_geo *geo = &dev->geo;
26         int pos = pblk_dev_ppa_to_pos(geo, *ppa);
27
28         pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
29         atomic_long_inc(&pblk->erase_failed);
30
31         atomic_dec(&line->blk_in_line);
32         if (test_and_set_bit(pos, line->blk_bitmap))
33                 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
34                                                         line->id, pos);
35
36         pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
37 }
38
39 static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
40 {
41         struct pblk_line *line;
42
43         line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)];
44         atomic_dec(&line->left_seblks);
45
46         if (rqd->error) {
47                 struct ppa_addr *ppa;
48
49                 ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
50                 if (!ppa)
51                         return;
52
53                 *ppa = rqd->ppa_addr;
54                 pblk_mark_bb(pblk, line, ppa);
55         }
56 }
57
58 /* Erase completion assumes that only one block is erased at the time */
59 static void pblk_end_io_erase(struct nvm_rq *rqd)
60 {
61         struct pblk *pblk = rqd->private;
62
63         __pblk_end_io_erase(pblk, rqd);
64         mempool_free(rqd, pblk->g_rq_pool);
65 }
66
67 void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
68                            u64 paddr)
69 {
70         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
71         struct list_head *move_list = NULL;
72
73         /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
74          * table is modified with reclaimed sectors, a check is done to endure
75          * that newer updates are not overwritten.
76          */
77         spin_lock(&line->lock);
78         if (line->state == PBLK_LINESTATE_GC ||
79                                         line->state == PBLK_LINESTATE_FREE) {
80                 spin_unlock(&line->lock);
81                 return;
82         }
83
84         if (test_and_set_bit(paddr, line->invalid_bitmap)) {
85                 WARN_ONCE(1, "pblk: double invalidate\n");
86                 spin_unlock(&line->lock);
87                 return;
88         }
89         le32_add_cpu(line->vsc, -1);
90
91         if (line->state == PBLK_LINESTATE_CLOSED)
92                 move_list = pblk_line_gc_list(pblk, line);
93         spin_unlock(&line->lock);
94
95         if (move_list) {
96                 spin_lock(&l_mg->gc_lock);
97                 spin_lock(&line->lock);
98                 /* Prevent moving a line that has just been chosen for GC */
99                 if (line->state == PBLK_LINESTATE_GC ||
100                                         line->state == PBLK_LINESTATE_FREE) {
101                         spin_unlock(&line->lock);
102                         spin_unlock(&l_mg->gc_lock);
103                         return;
104                 }
105                 spin_unlock(&line->lock);
106
107                 list_move_tail(&line->list, move_list);
108                 spin_unlock(&l_mg->gc_lock);
109         }
110 }
111
112 void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
113 {
114         struct pblk_line *line;
115         u64 paddr;
116         int line_id;
117
118 #ifdef CONFIG_NVM_DEBUG
119         /* Callers must ensure that the ppa points to a device address */
120         BUG_ON(pblk_addr_in_cache(ppa));
121         BUG_ON(pblk_ppa_empty(ppa));
122 #endif
123
124         line_id = pblk_tgt_ppa_to_line(ppa);
125         line = &pblk->lines[line_id];
126         paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
127
128         __pblk_map_invalidate(pblk, line, paddr);
129 }
130
131 static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
132                                   unsigned int nr_secs)
133 {
134         sector_t lba;
135
136         spin_lock(&pblk->trans_lock);
137         for (lba = slba; lba < slba + nr_secs; lba++) {
138                 struct ppa_addr ppa;
139
140                 ppa = pblk_trans_map_get(pblk, lba);
141
142                 if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
143                         pblk_map_invalidate(pblk, ppa);
144
145                 pblk_ppa_set_empty(&ppa);
146                 pblk_trans_map_set(pblk, lba, ppa);
147         }
148         spin_unlock(&pblk->trans_lock);
149 }
150
151 struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
152 {
153         mempool_t *pool;
154         struct nvm_rq *rqd;
155         int rq_size;
156
157         if (rw == WRITE) {
158                 pool = pblk->w_rq_pool;
159                 rq_size = pblk_w_rq_size;
160         } else {
161                 pool = pblk->g_rq_pool;
162                 rq_size = pblk_g_rq_size;
163         }
164
165         rqd = mempool_alloc(pool, GFP_KERNEL);
166         memset(rqd, 0, rq_size);
167
168         return rqd;
169 }
170
171 void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
172 {
173         mempool_t *pool;
174
175         if (rw == WRITE)
176                 pool = pblk->w_rq_pool;
177         else
178                 pool = pblk->g_rq_pool;
179
180         mempool_free(rqd, pool);
181 }
182
183 void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
184                          int nr_pages)
185 {
186         struct bio_vec bv;
187         int i;
188
189         WARN_ON(off + nr_pages != bio->bi_vcnt);
190
191         bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
192         for (i = off; i < nr_pages + off; i++) {
193                 bv = bio->bi_io_vec[i];
194                 mempool_free(bv.bv_page, pblk->page_pool);
195         }
196 }
197
198 int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
199                        int nr_pages)
200 {
201         struct request_queue *q = pblk->dev->q;
202         struct page *page;
203         int i, ret;
204
205         for (i = 0; i < nr_pages; i++) {
206                 page = mempool_alloc(pblk->page_pool, flags);
207                 if (!page)
208                         goto err;
209
210                 ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
211                 if (ret != PBLK_EXPOSED_PAGE_SIZE) {
212                         pr_err("pblk: could not add page to bio\n");
213                         mempool_free(page, pblk->page_pool);
214                         goto err;
215                 }
216         }
217
218         return 0;
219 err:
220         pblk_bio_free_pages(pblk, bio, 0, i - 1);
221         return -1;
222 }
223
224 static void pblk_write_kick(struct pblk *pblk)
225 {
226         wake_up_process(pblk->writer_ts);
227         mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
228 }
229
230 void pblk_write_timer_fn(unsigned long data)
231 {
232         struct pblk *pblk = (struct pblk *)data;
233
234         /* kick the write thread every tick to flush outstanding data */
235         pblk_write_kick(pblk);
236 }
237
238 void pblk_write_should_kick(struct pblk *pblk)
239 {
240         unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
241
242         if (secs_avail >= pblk->min_write_pgs)
243                 pblk_write_kick(pblk);
244 }
245
246 void pblk_end_bio_sync(struct bio *bio)
247 {
248         struct completion *waiting = bio->bi_private;
249
250         complete(waiting);
251 }
252
253 void pblk_end_io_sync(struct nvm_rq *rqd)
254 {
255         struct completion *waiting = rqd->private;
256
257         complete(waiting);
258 }
259
260 void pblk_flush_writer(struct pblk *pblk)
261 {
262         struct bio *bio;
263         int ret;
264         DECLARE_COMPLETION_ONSTACK(wait);
265
266         bio = bio_alloc(GFP_KERNEL, 1);
267         if (!bio)
268                 return;
269
270         bio->bi_iter.bi_sector = 0; /* internal bio */
271         bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
272         bio->bi_private = &wait;
273         bio->bi_end_io = pblk_end_bio_sync;
274
275         ret = pblk_write_to_cache(pblk, bio, 0);
276         if (ret == NVM_IO_OK) {
277                 if (!wait_for_completion_io_timeout(&wait,
278                                 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
279                         pr_err("pblk: flush cache timed out\n");
280                 }
281         } else if (ret != NVM_IO_DONE) {
282                 pr_err("pblk: tear down bio failed\n");
283         }
284
285         if (bio->bi_status)
286                 pr_err("pblk: flush sync write failed (%u)\n", bio->bi_status);
287
288         bio_put(bio);
289 }
290
291 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
292 {
293         struct pblk_line_meta *lm = &pblk->lm;
294         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
295         struct list_head *move_list = NULL;
296         int vsc = le32_to_cpu(*line->vsc);
297
298         lockdep_assert_held(&line->lock);
299
300         if (!vsc) {
301                 if (line->gc_group != PBLK_LINEGC_FULL) {
302                         line->gc_group = PBLK_LINEGC_FULL;
303                         move_list = &l_mg->gc_full_list;
304                 }
305         } else if (vsc < lm->high_thrs) {
306                 if (line->gc_group != PBLK_LINEGC_HIGH) {
307                         line->gc_group = PBLK_LINEGC_HIGH;
308                         move_list = &l_mg->gc_high_list;
309                 }
310         } else if (vsc < lm->mid_thrs) {
311                 if (line->gc_group != PBLK_LINEGC_MID) {
312                         line->gc_group = PBLK_LINEGC_MID;
313                         move_list = &l_mg->gc_mid_list;
314                 }
315         } else if (vsc < line->sec_in_line) {
316                 if (line->gc_group != PBLK_LINEGC_LOW) {
317                         line->gc_group = PBLK_LINEGC_LOW;
318                         move_list = &l_mg->gc_low_list;
319                 }
320         } else if (vsc == line->sec_in_line) {
321                 if (line->gc_group != PBLK_LINEGC_EMPTY) {
322                         line->gc_group = PBLK_LINEGC_EMPTY;
323                         move_list = &l_mg->gc_empty_list;
324                 }
325         } else {
326                 line->state = PBLK_LINESTATE_CORRUPT;
327                 line->gc_group = PBLK_LINEGC_NONE;
328                 move_list =  &l_mg->corrupt_list;
329                 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
330                                                 line->id, vsc,
331                                                 line->sec_in_line,
332                                                 lm->high_thrs, lm->mid_thrs);
333         }
334
335         return move_list;
336 }
337
338 void pblk_discard(struct pblk *pblk, struct bio *bio)
339 {
340         sector_t slba = pblk_get_lba(bio);
341         sector_t nr_secs = pblk_get_secs(bio);
342
343         pblk_invalidate_range(pblk, slba, nr_secs);
344 }
345
346 struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
347 {
348         struct ppa_addr ppa;
349
350         spin_lock(&pblk->trans_lock);
351         ppa = pblk_trans_map_get(pblk, lba);
352         spin_unlock(&pblk->trans_lock);
353
354         return ppa;
355 }
356
357 void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
358 {
359         atomic_long_inc(&pblk->write_failed);
360 #ifdef CONFIG_NVM_DEBUG
361         pblk_print_failed_rqd(pblk, rqd, rqd->error);
362 #endif
363 }
364
365 void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
366 {
367         /* Empty page read is not necessarily an error (e.g., L2P recovery) */
368         if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
369                 atomic_long_inc(&pblk->read_empty);
370                 return;
371         }
372
373         switch (rqd->error) {
374         case NVM_RSP_WARN_HIGHECC:
375                 atomic_long_inc(&pblk->read_high_ecc);
376                 break;
377         case NVM_RSP_ERR_FAILECC:
378         case NVM_RSP_ERR_FAILCRC:
379                 atomic_long_inc(&pblk->read_failed);
380                 break;
381         default:
382                 pr_err("pblk: unknown read error:%d\n", rqd->error);
383         }
384 #ifdef CONFIG_NVM_DEBUG
385         pblk_print_failed_rqd(pblk, rqd, rqd->error);
386 #endif
387 }
388
389 void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
390 {
391         pblk->sec_per_write = sec_per_write;
392 }
393
394 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
395 {
396         struct nvm_tgt_dev *dev = pblk->dev;
397
398 #ifdef CONFIG_NVM_DEBUG
399         struct ppa_addr *ppa_list;
400
401         ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
402         if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
403                 WARN_ON(1);
404                 return -EINVAL;
405         }
406
407         if (rqd->opcode == NVM_OP_PWRITE) {
408                 struct pblk_line *line;
409                 struct ppa_addr ppa;
410                 int i;
411
412                 for (i = 0; i < rqd->nr_ppas; i++) {
413                         ppa = ppa_list[i];
414                         line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
415
416                         spin_lock(&line->lock);
417                         if (line->state != PBLK_LINESTATE_OPEN) {
418                                 pr_err("pblk: bad ppa: line:%d,state:%d\n",
419                                                         line->id, line->state);
420                                 WARN_ON(1);
421                                 spin_unlock(&line->lock);
422                                 return -EINVAL;
423                         }
424                         spin_unlock(&line->lock);
425                 }
426         }
427 #endif
428         return nvm_submit_io(dev, rqd);
429 }
430
431 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
432                               unsigned int nr_secs, unsigned int len,
433                               gfp_t gfp_mask)
434 {
435         struct nvm_tgt_dev *dev = pblk->dev;
436         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
437         void *kaddr = data;
438         struct page *page;
439         struct bio *bio;
440         int i, ret;
441
442         if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
443                 return bio_map_kern(dev->q, kaddr, len, gfp_mask);
444
445         bio = bio_kmalloc(gfp_mask, nr_secs);
446         if (!bio)
447                 return ERR_PTR(-ENOMEM);
448
449         for (i = 0; i < nr_secs; i++) {
450                 page = vmalloc_to_page(kaddr);
451                 if (!page) {
452                         pr_err("pblk: could not map vmalloc bio\n");
453                         bio_put(bio);
454                         bio = ERR_PTR(-ENOMEM);
455                         goto out;
456                 }
457
458                 ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
459                 if (ret != PAGE_SIZE) {
460                         pr_err("pblk: could not add page to bio\n");
461                         bio_put(bio);
462                         bio = ERR_PTR(-ENOMEM);
463                         goto out;
464                 }
465
466                 kaddr += PAGE_SIZE;
467         }
468 out:
469         return bio;
470 }
471
472 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
473                    unsigned long secs_to_flush)
474 {
475         int max = pblk->sec_per_write;
476         int min = pblk->min_write_pgs;
477         int secs_to_sync = 0;
478
479         if (secs_avail >= max)
480                 secs_to_sync = max;
481         else if (secs_avail >= min)
482                 secs_to_sync = min * (secs_avail / min);
483         else if (secs_to_flush)
484                 secs_to_sync = min;
485
486         return secs_to_sync;
487 }
488
489 void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
490 {
491         u64 addr;
492         int i;
493
494         addr = find_next_zero_bit(line->map_bitmap,
495                                         pblk->lm.sec_per_line, line->cur_sec);
496         line->cur_sec = addr - nr_secs;
497
498         for (i = 0; i < nr_secs; i++, line->cur_sec--)
499                 WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
500 }
501
502 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
503 {
504         u64 addr;
505         int i;
506
507         lockdep_assert_held(&line->lock);
508
509         /* logic error: ppa out-of-bounds. Prevent generating bad address */
510         if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
511                 WARN(1, "pblk: page allocation out of bounds\n");
512                 nr_secs = pblk->lm.sec_per_line - line->cur_sec;
513         }
514
515         line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
516                                         pblk->lm.sec_per_line, line->cur_sec);
517         for (i = 0; i < nr_secs; i++, line->cur_sec++)
518                 WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
519
520         return addr;
521 }
522
523 u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
524 {
525         u64 addr;
526
527         /* Lock needed in case a write fails and a recovery needs to remap
528          * failed write buffer entries
529          */
530         spin_lock(&line->lock);
531         addr = __pblk_alloc_page(pblk, line, nr_secs);
532         line->left_msecs -= nr_secs;
533         WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
534         spin_unlock(&line->lock);
535
536         return addr;
537 }
538
539 u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
540 {
541         u64 paddr;
542
543         spin_lock(&line->lock);
544         paddr = find_next_zero_bit(line->map_bitmap,
545                                         pblk->lm.sec_per_line, line->cur_sec);
546         spin_unlock(&line->lock);
547
548         return paddr;
549 }
550
551 /*
552  * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
553  * taking the per LUN semaphore.
554  */
555 static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
556                                      void *emeta_buf, u64 paddr, int dir)
557 {
558         struct nvm_tgt_dev *dev = pblk->dev;
559         struct nvm_geo *geo = &dev->geo;
560         struct pblk_line_meta *lm = &pblk->lm;
561         void *ppa_list, *meta_list;
562         struct bio *bio;
563         struct nvm_rq rqd;
564         dma_addr_t dma_ppa_list, dma_meta_list;
565         int min = pblk->min_write_pgs;
566         int left_ppas = lm->emeta_sec[0];
567         int id = line->id;
568         int rq_ppas, rq_len;
569         int cmd_op, bio_op;
570         int i, j;
571         int ret;
572         DECLARE_COMPLETION_ONSTACK(wait);
573
574         if (dir == WRITE) {
575                 bio_op = REQ_OP_WRITE;
576                 cmd_op = NVM_OP_PWRITE;
577         } else if (dir == READ) {
578                 bio_op = REQ_OP_READ;
579                 cmd_op = NVM_OP_PREAD;
580         } else
581                 return -EINVAL;
582
583         meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
584                                                         &dma_meta_list);
585         if (!meta_list)
586                 return -ENOMEM;
587
588         ppa_list = meta_list + pblk_dma_meta_size;
589         dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
590
591 next_rq:
592         memset(&rqd, 0, sizeof(struct nvm_rq));
593
594         rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
595         rq_len = rq_ppas * geo->sec_size;
596
597         bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, GFP_KERNEL);
598         if (IS_ERR(bio)) {
599                 ret = PTR_ERR(bio);
600                 goto free_rqd_dma;
601         }
602
603         bio->bi_iter.bi_sector = 0; /* internal bio */
604         bio_set_op_attrs(bio, bio_op, 0);
605
606         rqd.bio = bio;
607         rqd.meta_list = meta_list;
608         rqd.ppa_list = ppa_list;
609         rqd.dma_meta_list = dma_meta_list;
610         rqd.dma_ppa_list = dma_ppa_list;
611         rqd.opcode = cmd_op;
612         rqd.nr_ppas = rq_ppas;
613         rqd.end_io = pblk_end_io_sync;
614         rqd.private = &wait;
615
616         if (dir == WRITE) {
617                 struct pblk_sec_meta *meta_list = rqd.meta_list;
618
619                 rqd.flags = pblk_set_progr_mode(pblk, WRITE);
620                 for (i = 0; i < rqd.nr_ppas; ) {
621                         spin_lock(&line->lock);
622                         paddr = __pblk_alloc_page(pblk, line, min);
623                         spin_unlock(&line->lock);
624                         for (j = 0; j < min; j++, i++, paddr++) {
625                                 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
626                                 rqd.ppa_list[i] =
627                                         addr_to_gen_ppa(pblk, paddr, id);
628                         }
629                 }
630         } else {
631                 for (i = 0; i < rqd.nr_ppas; ) {
632                         struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
633                         int pos = pblk_dev_ppa_to_pos(geo, ppa);
634                         int read_type = PBLK_READ_RANDOM;
635
636                         if (pblk_io_aligned(pblk, rq_ppas))
637                                 read_type = PBLK_READ_SEQUENTIAL;
638                         rqd.flags = pblk_set_read_mode(pblk, read_type);
639
640                         while (test_bit(pos, line->blk_bitmap)) {
641                                 paddr += min;
642                                 if (pblk_boundary_paddr_checks(pblk, paddr)) {
643                                         pr_err("pblk: corrupt emeta line:%d\n",
644                                                                 line->id);
645                                         bio_put(bio);
646                                         ret = -EINTR;
647                                         goto free_rqd_dma;
648                                 }
649
650                                 ppa = addr_to_gen_ppa(pblk, paddr, id);
651                                 pos = pblk_dev_ppa_to_pos(geo, ppa);
652                         }
653
654                         if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
655                                 pr_err("pblk: corrupt emeta line:%d\n",
656                                                                 line->id);
657                                 bio_put(bio);
658                                 ret = -EINTR;
659                                 goto free_rqd_dma;
660                         }
661
662                         for (j = 0; j < min; j++, i++, paddr++)
663                                 rqd.ppa_list[i] =
664                                         addr_to_gen_ppa(pblk, paddr, line->id);
665                 }
666         }
667
668         ret = pblk_submit_io(pblk, &rqd);
669         if (ret) {
670                 pr_err("pblk: emeta I/O submission failed: %d\n", ret);
671                 bio_put(bio);
672                 goto free_rqd_dma;
673         }
674
675         if (!wait_for_completion_io_timeout(&wait,
676                                 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
677                 pr_err("pblk: emeta I/O timed out\n");
678         }
679         reinit_completion(&wait);
680
681         if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
682                 bio_put(bio);
683
684         if (rqd.error) {
685                 if (dir == WRITE)
686                         pblk_log_write_err(pblk, &rqd);
687                 else
688                         pblk_log_read_err(pblk, &rqd);
689         }
690
691         emeta_buf += rq_len;
692         left_ppas -= rq_ppas;
693         if (left_ppas)
694                 goto next_rq;
695 free_rqd_dma:
696         nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
697         return ret;
698 }
699
700 u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
701 {
702         struct nvm_tgt_dev *dev = pblk->dev;
703         struct nvm_geo *geo = &dev->geo;
704         struct pblk_line_meta *lm = &pblk->lm;
705         int bit;
706
707         /* This usually only happens on bad lines */
708         bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
709         if (bit >= lm->blk_per_line)
710                 return -1;
711
712         return bit * geo->sec_per_pl;
713 }
714
715 static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
716                                      u64 paddr, int dir)
717 {
718         struct nvm_tgt_dev *dev = pblk->dev;
719         struct pblk_line_meta *lm = &pblk->lm;
720         struct bio *bio;
721         struct nvm_rq rqd;
722         __le64 *lba_list = NULL;
723         int i, ret;
724         int cmd_op, bio_op;
725         int flags;
726         DECLARE_COMPLETION_ONSTACK(wait);
727
728         if (dir == WRITE) {
729                 bio_op = REQ_OP_WRITE;
730                 cmd_op = NVM_OP_PWRITE;
731                 flags = pblk_set_progr_mode(pblk, WRITE);
732                 lba_list = emeta_to_lbas(pblk, line->emeta->buf);
733         } else if (dir == READ) {
734                 bio_op = REQ_OP_READ;
735                 cmd_op = NVM_OP_PREAD;
736                 flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
737         } else
738                 return -EINVAL;
739
740         memset(&rqd, 0, sizeof(struct nvm_rq));
741
742         rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
743                                                         &rqd.dma_meta_list);
744         if (!rqd.meta_list)
745                 return -ENOMEM;
746
747         rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
748         rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
749
750         bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
751         if (IS_ERR(bio)) {
752                 ret = PTR_ERR(bio);
753                 goto free_ppa_list;
754         }
755
756         bio->bi_iter.bi_sector = 0; /* internal bio */
757         bio_set_op_attrs(bio, bio_op, 0);
758
759         rqd.bio = bio;
760         rqd.opcode = cmd_op;
761         rqd.flags = flags;
762         rqd.nr_ppas = lm->smeta_sec;
763         rqd.end_io = pblk_end_io_sync;
764         rqd.private = &wait;
765
766         for (i = 0; i < lm->smeta_sec; i++, paddr++) {
767                 struct pblk_sec_meta *meta_list = rqd.meta_list;
768
769                 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
770
771                 if (dir == WRITE) {
772                         u64 addr_empty = cpu_to_le64(ADDR_EMPTY);
773
774                         meta_list[i].lba = lba_list[paddr] = addr_empty;
775                 }
776         }
777
778         /*
779          * This I/O is sent by the write thread when a line is replace. Since
780          * the write thread is the only one sending write and erase commands,
781          * there is no need to take the LUN semaphore.
782          */
783         ret = pblk_submit_io(pblk, &rqd);
784         if (ret) {
785                 pr_err("pblk: smeta I/O submission failed: %d\n", ret);
786                 bio_put(bio);
787                 goto free_ppa_list;
788         }
789
790         if (!wait_for_completion_io_timeout(&wait,
791                                 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
792                 pr_err("pblk: smeta I/O timed out\n");
793         }
794
795         if (rqd.error) {
796                 if (dir == WRITE)
797                         pblk_log_write_err(pblk, &rqd);
798                 else
799                         pblk_log_read_err(pblk, &rqd);
800         }
801
802 free_ppa_list:
803         nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
804
805         return ret;
806 }
807
808 int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
809 {
810         u64 bpaddr = pblk_line_smeta_start(pblk, line);
811
812         return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
813 }
814
815 int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
816                          void *emeta_buf)
817 {
818         return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
819                                                 line->emeta_ssec, READ);
820 }
821
822 static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
823                             struct ppa_addr ppa)
824 {
825         rqd->opcode = NVM_OP_ERASE;
826         rqd->ppa_addr = ppa;
827         rqd->nr_ppas = 1;
828         rqd->flags = pblk_set_progr_mode(pblk, ERASE);
829         rqd->bio = NULL;
830 }
831
832 static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
833 {
834         struct nvm_rq rqd;
835         int ret;
836         DECLARE_COMPLETION_ONSTACK(wait);
837
838         memset(&rqd, 0, sizeof(struct nvm_rq));
839
840         pblk_setup_e_rq(pblk, &rqd, ppa);
841
842         rqd.end_io = pblk_end_io_sync;
843         rqd.private = &wait;
844
845         /* The write thread schedules erases so that it minimizes disturbances
846          * with writes. Thus, there is no need to take the LUN semaphore.
847          */
848         ret = pblk_submit_io(pblk, &rqd);
849         if (ret) {
850                 struct nvm_tgt_dev *dev = pblk->dev;
851                 struct nvm_geo *geo = &dev->geo;
852
853                 pr_err("pblk: could not sync erase line:%d,blk:%d\n",
854                                         pblk_dev_ppa_to_line(ppa),
855                                         pblk_dev_ppa_to_pos(geo, ppa));
856
857                 rqd.error = ret;
858                 goto out;
859         }
860
861         if (!wait_for_completion_io_timeout(&wait,
862                                 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
863                 pr_err("pblk: sync erase timed out\n");
864         }
865
866 out:
867         rqd.private = pblk;
868         __pblk_end_io_erase(pblk, &rqd);
869
870         return 0;
871 }
872
873 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
874 {
875         struct pblk_line_meta *lm = &pblk->lm;
876         struct ppa_addr ppa;
877         int bit = -1;
878
879         /* Erase only good blocks, one at a time */
880         do {
881                 spin_lock(&line->lock);
882                 bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
883                                                                 bit + 1);
884                 if (bit >= lm->blk_per_line) {
885                         spin_unlock(&line->lock);
886                         break;
887                 }
888
889                 ppa = pblk->luns[bit].bppa; /* set ch and lun */
890                 ppa.g.blk = line->id;
891
892                 atomic_dec(&line->left_eblks);
893                 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
894                 spin_unlock(&line->lock);
895
896                 if (pblk_blk_erase_sync(pblk, ppa)) {
897                         pr_err("pblk: failed to erase line %d\n", line->id);
898                         return -ENOMEM;
899                 }
900         } while (1);
901
902         return 0;
903 }
904
905 static void pblk_line_setup_metadata(struct pblk_line *line,
906                                      struct pblk_line_mgmt *l_mg,
907                                      struct pblk_line_meta *lm)
908 {
909         int meta_line;
910
911 retry_meta:
912         meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
913         if (meta_line == PBLK_DATA_LINES) {
914                 spin_unlock(&l_mg->free_lock);
915                 io_schedule();
916                 spin_lock(&l_mg->free_lock);
917                 goto retry_meta;
918         }
919
920         set_bit(meta_line, &l_mg->meta_bitmap);
921         line->meta_line = meta_line;
922
923         line->smeta = l_mg->sline_meta[meta_line];
924         line->emeta = l_mg->eline_meta[meta_line];
925
926         memset(line->smeta, 0, lm->smeta_len);
927         memset(line->emeta->buf, 0, lm->emeta_len[0]);
928
929         line->emeta->mem = 0;
930         atomic_set(&line->emeta->sync, 0);
931 }
932
933 /* For now lines are always assumed full lines. Thus, smeta former and current
934  * lun bitmaps are omitted.
935  */
936 static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
937                                   struct pblk_line *cur)
938 {
939         struct nvm_tgt_dev *dev = pblk->dev;
940         struct nvm_geo *geo = &dev->geo;
941         struct pblk_line_meta *lm = &pblk->lm;
942         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
943         struct pblk_emeta *emeta = line->emeta;
944         struct line_emeta *emeta_buf = emeta->buf;
945         struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
946         int nr_blk_line;
947
948         /* After erasing the line, new bad blocks might appear and we risk
949          * having an invalid line
950          */
951         nr_blk_line = lm->blk_per_line -
952                         bitmap_weight(line->blk_bitmap, lm->blk_per_line);
953         if (nr_blk_line < lm->min_blk_line) {
954                 spin_lock(&l_mg->free_lock);
955                 spin_lock(&line->lock);
956                 line->state = PBLK_LINESTATE_BAD;
957                 spin_unlock(&line->lock);
958
959                 list_add_tail(&line->list, &l_mg->bad_list);
960                 spin_unlock(&l_mg->free_lock);
961
962                 pr_debug("pblk: line %d is bad\n", line->id);
963
964                 return 0;
965         }
966
967         /* Run-time metadata */
968         line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
969
970         /* Mark LUNs allocated in this line (all for now) */
971         bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
972
973         smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
974         memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
975         smeta_buf->header.id = cpu_to_le32(line->id);
976         smeta_buf->header.type = cpu_to_le16(line->type);
977         smeta_buf->header.version = cpu_to_le16(1);
978
979         /* Start metadata */
980         smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
981         smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
982
983         /* Fill metadata among lines */
984         if (cur) {
985                 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
986                 smeta_buf->prev_id = cpu_to_le32(cur->id);
987                 cur->emeta->buf->next_id = cpu_to_le32(line->id);
988         } else {
989                 smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
990         }
991
992         /* All smeta must be set at this point */
993         smeta_buf->header.crc = cpu_to_le32(
994                         pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
995         smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
996
997         /* End metadata */
998         memcpy(&emeta_buf->header, &smeta_buf->header,
999                                                 sizeof(struct line_header));
1000         emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
1001         emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
1002         emeta_buf->nr_valid_lbas = cpu_to_le64(0);
1003         emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
1004         emeta_buf->crc = cpu_to_le32(0);
1005         emeta_buf->prev_id = smeta_buf->prev_id;
1006
1007         return 1;
1008 }
1009
1010 /* For now lines are always assumed full lines. Thus, smeta former and current
1011  * lun bitmaps are omitted.
1012  */
1013 static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
1014                              int init)
1015 {
1016         struct nvm_tgt_dev *dev = pblk->dev;
1017         struct nvm_geo *geo = &dev->geo;
1018         struct pblk_line_meta *lm = &pblk->lm;
1019         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1020         int nr_bb = 0;
1021         u64 off;
1022         int bit = -1;
1023
1024         line->sec_in_line = lm->sec_per_line;
1025
1026         /* Capture bad block information on line mapping bitmaps */
1027         while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
1028                                         bit + 1)) < lm->blk_per_line) {
1029                 off = bit * geo->sec_per_pl;
1030                 bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
1031                                                         lm->sec_per_line);
1032                 bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
1033                                                         lm->sec_per_line);
1034                 line->sec_in_line -= geo->sec_per_blk;
1035                 if (bit >= lm->emeta_bb)
1036                         nr_bb++;
1037         }
1038
1039         /* Mark smeta metadata sectors as bad sectors */
1040         bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1041         off = bit * geo->sec_per_pl;
1042 retry_smeta:
1043         bitmap_set(line->map_bitmap, off, lm->smeta_sec);
1044         line->sec_in_line -= lm->smeta_sec;
1045         line->smeta_ssec = off;
1046         line->cur_sec = off + lm->smeta_sec;
1047
1048         if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
1049                 pr_debug("pblk: line smeta I/O failed. Retry\n");
1050                 off += geo->sec_per_pl;
1051                 goto retry_smeta;
1052         }
1053
1054         bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
1055
1056         /* Mark emeta metadata sectors as bad sectors. We need to consider bad
1057          * blocks to make sure that there are enough sectors to store emeta
1058          */
1059         bit = lm->sec_per_line;
1060         off = lm->sec_per_line - lm->emeta_sec[0];
1061         bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
1062         while (nr_bb) {
1063                 off -= geo->sec_per_pl;
1064                 if (!test_bit(off, line->invalid_bitmap)) {
1065                         bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
1066                         nr_bb--;
1067                 }
1068         }
1069
1070         line->sec_in_line -= lm->emeta_sec[0];
1071         line->emeta_ssec = off;
1072         line->nr_valid_lbas = 0;
1073         line->left_msecs = line->sec_in_line;
1074         *line->vsc = cpu_to_le32(line->sec_in_line);
1075
1076         if (lm->sec_per_line - line->sec_in_line !=
1077                 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
1078                 spin_lock(&line->lock);
1079                 line->state = PBLK_LINESTATE_BAD;
1080                 spin_unlock(&line->lock);
1081
1082                 list_add_tail(&line->list, &l_mg->bad_list);
1083                 pr_err("pblk: unexpected line %d is bad\n", line->id);
1084
1085                 return 0;
1086         }
1087
1088         return 1;
1089 }
1090
1091 static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1092 {
1093         struct pblk_line_meta *lm = &pblk->lm;
1094         int blk_in_line = atomic_read(&line->blk_in_line);
1095
1096         line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1097         if (!line->map_bitmap)
1098                 return -ENOMEM;
1099         memset(line->map_bitmap, 0, lm->sec_bitmap_len);
1100
1101         /* invalid_bitmap is special since it is used when line is closed. No
1102          * need to zeroized; it will be initialized using bb info form
1103          * map_bitmap
1104          */
1105         line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1106         if (!line->invalid_bitmap) {
1107                 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1108                 return -ENOMEM;
1109         }
1110
1111         spin_lock(&line->lock);
1112         if (line->state != PBLK_LINESTATE_FREE) {
1113                 spin_unlock(&line->lock);
1114                 WARN(1, "pblk: corrupted line state\n");
1115                 return -EINTR;
1116         }
1117         line->state = PBLK_LINESTATE_OPEN;
1118
1119         atomic_set(&line->left_eblks, blk_in_line);
1120         atomic_set(&line->left_seblks, blk_in_line);
1121
1122         line->meta_distance = lm->meta_distance;
1123         spin_unlock(&line->lock);
1124
1125         /* Bad blocks do not need to be erased */
1126         bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
1127
1128         kref_init(&line->ref);
1129
1130         return 0;
1131 }
1132
1133 int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
1134 {
1135         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1136         int ret;
1137
1138         spin_lock(&l_mg->free_lock);
1139         l_mg->data_line = line;
1140         list_del(&line->list);
1141
1142         ret = pblk_line_prepare(pblk, line);
1143         if (ret) {
1144                 list_add(&line->list, &l_mg->free_list);
1145                 spin_unlock(&l_mg->free_lock);
1146                 return ret;
1147         }
1148         spin_unlock(&l_mg->free_lock);
1149
1150         pblk_rl_free_lines_dec(&pblk->rl, line);
1151
1152         if (!pblk_line_init_bb(pblk, line, 0)) {
1153                 list_add(&line->list, &l_mg->free_list);
1154                 return -EINTR;
1155         }
1156
1157         return 0;
1158 }
1159
1160 void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
1161 {
1162         mempool_free(line->map_bitmap, pblk->line_meta_pool);
1163         line->map_bitmap = NULL;
1164         line->smeta = NULL;
1165         line->emeta = NULL;
1166 }
1167
1168 struct pblk_line *pblk_line_get(struct pblk *pblk)
1169 {
1170         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1171         struct pblk_line_meta *lm = &pblk->lm;
1172         struct pblk_line *line = NULL;
1173         int bit;
1174
1175         lockdep_assert_held(&l_mg->free_lock);
1176
1177 retry_get:
1178         if (list_empty(&l_mg->free_list)) {
1179                 pr_err("pblk: no free lines\n");
1180                 goto out;
1181         }
1182
1183         line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
1184         list_del(&line->list);
1185         l_mg->nr_free_lines--;
1186
1187         bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1188         if (unlikely(bit >= lm->blk_per_line)) {
1189                 spin_lock(&line->lock);
1190                 line->state = PBLK_LINESTATE_BAD;
1191                 spin_unlock(&line->lock);
1192
1193                 list_add_tail(&line->list, &l_mg->bad_list);
1194
1195                 pr_debug("pblk: line %d is bad\n", line->id);
1196                 goto retry_get;
1197         }
1198
1199         if (pblk_line_prepare(pblk, line)) {
1200                 pr_err("pblk: failed to prepare line %d\n", line->id);
1201                 list_add(&line->list, &l_mg->free_list);
1202                 l_mg->nr_free_lines++;
1203                 return NULL;
1204         }
1205
1206 out:
1207         return line;
1208 }
1209
1210 static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1211                                          struct pblk_line *line)
1212 {
1213         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1214         struct pblk_line *retry_line;
1215
1216         spin_lock(&l_mg->free_lock);
1217         retry_line = pblk_line_get(pblk);
1218         if (!retry_line) {
1219                 l_mg->data_line = NULL;
1220                 spin_unlock(&l_mg->free_lock);
1221                 return NULL;
1222         }
1223
1224         retry_line->smeta = line->smeta;
1225         retry_line->emeta = line->emeta;
1226         retry_line->meta_line = line->meta_line;
1227
1228         pblk_line_free(pblk, line);
1229         l_mg->data_line = retry_line;
1230         spin_unlock(&l_mg->free_lock);
1231
1232         if (pblk_line_erase(pblk, retry_line)) {
1233                 spin_lock(&l_mg->free_lock);
1234                 l_mg->data_line = NULL;
1235                 spin_unlock(&l_mg->free_lock);
1236                 return NULL;
1237         }
1238
1239         pblk_rl_free_lines_dec(&pblk->rl, retry_line);
1240
1241         return retry_line;
1242 }
1243
1244 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1245 {
1246         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1247         struct pblk_line *line;
1248         int is_next = 0;
1249
1250         spin_lock(&l_mg->free_lock);
1251         line = pblk_line_get(pblk);
1252         if (!line) {
1253                 spin_unlock(&l_mg->free_lock);
1254                 return NULL;
1255         }
1256
1257         line->seq_nr = l_mg->d_seq_nr++;
1258         line->type = PBLK_LINETYPE_DATA;
1259         l_mg->data_line = line;
1260
1261         pblk_line_setup_metadata(line, l_mg, &pblk->lm);
1262
1263         /* Allocate next line for preparation */
1264         l_mg->data_next = pblk_line_get(pblk);
1265         if (l_mg->data_next) {
1266                 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1267                 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1268                 is_next = 1;
1269         }
1270         spin_unlock(&l_mg->free_lock);
1271
1272         pblk_rl_free_lines_dec(&pblk->rl, line);
1273         if (is_next)
1274                 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1275
1276         if (pblk_line_erase(pblk, line))
1277                 return NULL;
1278
1279 retry_setup:
1280         if (!pblk_line_init_metadata(pblk, line, NULL)) {
1281                 line = pblk_line_retry(pblk, line);
1282                 if (!line)
1283                         return NULL;
1284
1285                 goto retry_setup;
1286         }
1287
1288         if (!pblk_line_init_bb(pblk, line, 1)) {
1289                 line = pblk_line_retry(pblk, line);
1290                 if (!line)
1291                         return NULL;
1292
1293                 goto retry_setup;
1294         }
1295
1296         return line;
1297 }
1298
1299 struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
1300 {
1301         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1302         struct pblk_line *cur, *new;
1303         unsigned int left_seblks;
1304         int is_next = 0;
1305
1306         cur = l_mg->data_line;
1307         new = l_mg->data_next;
1308         if (!new)
1309                 return NULL;
1310         l_mg->data_line = new;
1311
1312 retry_line:
1313         left_seblks = atomic_read(&new->left_seblks);
1314         if (left_seblks) {
1315                 /* If line is not fully erased, erase it */
1316                 if (atomic_read(&new->left_eblks)) {
1317                         if (pblk_line_erase(pblk, new))
1318                                 return NULL;
1319                 } else {
1320                         io_schedule();
1321                 }
1322                 goto retry_line;
1323         }
1324
1325         spin_lock(&l_mg->free_lock);
1326         /* Allocate next line for preparation */
1327         l_mg->data_next = pblk_line_get(pblk);
1328         if (l_mg->data_next) {
1329                 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1330                 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1331                 is_next = 1;
1332         }
1333
1334         pblk_line_setup_metadata(new, l_mg, &pblk->lm);
1335         spin_unlock(&l_mg->free_lock);
1336
1337         if (is_next)
1338                 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1339
1340 retry_setup:
1341         if (!pblk_line_init_metadata(pblk, new, cur)) {
1342                 new = pblk_line_retry(pblk, new);
1343                 if (!new)
1344                         return NULL;
1345
1346                 goto retry_setup;
1347         }
1348
1349         if (!pblk_line_init_bb(pblk, new, 1)) {
1350                 new = pblk_line_retry(pblk, new);
1351                 if (!new)
1352                         return NULL;
1353
1354                 goto retry_setup;
1355         }
1356
1357         return new;
1358 }
1359
1360 void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
1361 {
1362         if (line->map_bitmap)
1363                 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1364         if (line->invalid_bitmap)
1365                 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1366
1367         *line->vsc = cpu_to_le32(EMPTY_ENTRY);
1368
1369         line->map_bitmap = NULL;
1370         line->invalid_bitmap = NULL;
1371         line->smeta = NULL;
1372         line->emeta = NULL;
1373 }
1374
1375 void pblk_line_put(struct kref *ref)
1376 {
1377         struct pblk_line *line = container_of(ref, struct pblk_line, ref);
1378         struct pblk *pblk = line->pblk;
1379         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1380
1381         spin_lock(&line->lock);
1382         WARN_ON(line->state != PBLK_LINESTATE_GC);
1383         line->state = PBLK_LINESTATE_FREE;
1384         line->gc_group = PBLK_LINEGC_NONE;
1385         pblk_line_free(pblk, line);
1386         spin_unlock(&line->lock);
1387
1388         spin_lock(&l_mg->free_lock);
1389         list_add_tail(&line->list, &l_mg->free_list);
1390         l_mg->nr_free_lines++;
1391         spin_unlock(&l_mg->free_lock);
1392
1393         pblk_rl_free_lines_inc(&pblk->rl, line);
1394 }
1395
1396 int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
1397 {
1398         struct nvm_rq *rqd;
1399         int err;
1400
1401         rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
1402         memset(rqd, 0, pblk_g_rq_size);
1403
1404         pblk_setup_e_rq(pblk, rqd, ppa);
1405
1406         rqd->end_io = pblk_end_io_erase;
1407         rqd->private = pblk;
1408
1409         /* The write thread schedules erases so that it minimizes disturbances
1410          * with writes. Thus, there is no need to take the LUN semaphore.
1411          */
1412         err = pblk_submit_io(pblk, rqd);
1413         if (err) {
1414                 struct nvm_tgt_dev *dev = pblk->dev;
1415                 struct nvm_geo *geo = &dev->geo;
1416
1417                 pr_err("pblk: could not async erase line:%d,blk:%d\n",
1418                                         pblk_dev_ppa_to_line(ppa),
1419                                         pblk_dev_ppa_to_pos(geo, ppa));
1420         }
1421
1422         return err;
1423 }
1424
1425 struct pblk_line *pblk_line_get_data(struct pblk *pblk)
1426 {
1427         return pblk->l_mg.data_line;
1428 }
1429
1430 /* For now, always erase next line */
1431 struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
1432 {
1433         return pblk->l_mg.data_next;
1434 }
1435
1436 int pblk_line_is_full(struct pblk_line *line)
1437 {
1438         return (line->left_msecs == 0);
1439 }
1440
1441 void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1442 {
1443         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1444         struct pblk_line_meta *lm = &pblk->lm;
1445         struct list_head *move_list;
1446
1447         WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
1448                                 "pblk: corrupt closed line %d\n", line->id);
1449
1450         spin_lock(&l_mg->free_lock);
1451         WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
1452         spin_unlock(&l_mg->free_lock);
1453
1454         spin_lock(&l_mg->gc_lock);
1455         spin_lock(&line->lock);
1456         WARN_ON(line->state != PBLK_LINESTATE_OPEN);
1457         line->state = PBLK_LINESTATE_CLOSED;
1458         move_list = pblk_line_gc_list(pblk, line);
1459
1460         list_add_tail(&line->list, move_list);
1461
1462         mempool_free(line->map_bitmap, pblk->line_meta_pool);
1463         line->map_bitmap = NULL;
1464         line->smeta = NULL;
1465         line->emeta = NULL;
1466
1467         spin_unlock(&line->lock);
1468         spin_unlock(&l_mg->gc_lock);
1469
1470         pblk_gc_should_kick(pblk);
1471 }
1472
1473 void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
1474 {
1475         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1476         struct pblk_line_meta *lm = &pblk->lm;
1477         struct pblk_emeta *emeta = line->emeta;
1478         struct line_emeta *emeta_buf = emeta->buf;
1479
1480         /* No need for exact vsc value; avoid a big line lock and tak aprox. */
1481         memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
1482         memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
1483
1484         emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
1485         emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
1486
1487         spin_lock(&l_mg->close_lock);
1488         spin_lock(&line->lock);
1489         list_add_tail(&line->list, &l_mg->emeta_list);
1490         spin_unlock(&line->lock);
1491         spin_unlock(&l_mg->close_lock);
1492 }
1493
1494 void pblk_line_close_ws(struct work_struct *work)
1495 {
1496         struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1497                                                                         ws);
1498         struct pblk *pblk = line_ws->pblk;
1499         struct pblk_line *line = line_ws->line;
1500
1501         pblk_line_close(pblk, line);
1502         mempool_free(line_ws, pblk->line_ws_pool);
1503 }
1504
1505 void pblk_line_mark_bb(struct work_struct *work)
1506 {
1507         struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1508                                                                         ws);
1509         struct pblk *pblk = line_ws->pblk;
1510         struct nvm_tgt_dev *dev = pblk->dev;
1511         struct ppa_addr *ppa = line_ws->priv;
1512         int ret;
1513
1514         ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
1515         if (ret) {
1516                 struct pblk_line *line;
1517                 int pos;
1518
1519                 line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
1520                 pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
1521
1522                 pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
1523                                 line->id, pos);
1524         }
1525
1526         kfree(ppa);
1527         mempool_free(line_ws, pblk->line_ws_pool);
1528 }
1529
1530 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1531                       void (*work)(struct work_struct *))
1532 {
1533         struct pblk_line_ws *line_ws;
1534
1535         line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC);
1536         if (!line_ws)
1537                 return;
1538
1539         line_ws->pblk = pblk;
1540         line_ws->line = line;
1541         line_ws->priv = priv;
1542
1543         INIT_WORK(&line_ws->ws, work);
1544         queue_work(pblk->kw_wq, &line_ws->ws);
1545 }
1546
1547 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1548                   unsigned long *lun_bitmap)
1549 {
1550         struct nvm_tgt_dev *dev = pblk->dev;
1551         struct nvm_geo *geo = &dev->geo;
1552         struct pblk_lun *rlun;
1553         int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
1554         int ret;
1555
1556         /*
1557          * Only send one inflight I/O per LUN. Since we map at a page
1558          * granurality, all ppas in the I/O will map to the same LUN
1559          */
1560 #ifdef CONFIG_NVM_DEBUG
1561         int i;
1562
1563         for (i = 1; i < nr_ppas; i++)
1564                 WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
1565                                 ppa_list[0].g.ch != ppa_list[i].g.ch);
1566 #endif
1567         /* If the LUN has been locked for this same request, do no attempt to
1568          * lock it again
1569          */
1570         if (test_and_set_bit(pos, lun_bitmap))
1571                 return;
1572
1573         rlun = &pblk->luns[pos];
1574         ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
1575         if (ret) {
1576                 switch (ret) {
1577                 case -ETIME:
1578                         pr_err("pblk: lun semaphore timed out\n");
1579                         break;
1580                 case -EINTR:
1581                         pr_err("pblk: lun semaphore timed out\n");
1582                         break;
1583                 }
1584         }
1585 }
1586
1587 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1588                 unsigned long *lun_bitmap)
1589 {
1590         struct nvm_tgt_dev *dev = pblk->dev;
1591         struct nvm_geo *geo = &dev->geo;
1592         struct pblk_lun *rlun;
1593         int nr_luns = geo->nr_luns;
1594         int bit = -1;
1595
1596         while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
1597                 rlun = &pblk->luns[bit];
1598                 up(&rlun->wr_sem);
1599         }
1600
1601         kfree(lun_bitmap);
1602 }
1603
1604 void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1605 {
1606         struct ppa_addr l2p_ppa;
1607
1608         /* logic error: lba out-of-bounds. Ignore update */
1609         if (!(lba < pblk->rl.nr_secs)) {
1610                 WARN(1, "pblk: corrupted L2P map request\n");
1611                 return;
1612         }
1613
1614         spin_lock(&pblk->trans_lock);
1615         l2p_ppa = pblk_trans_map_get(pblk, lba);
1616
1617         if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa))
1618                 pblk_map_invalidate(pblk, l2p_ppa);
1619
1620         pblk_trans_map_set(pblk, lba, ppa);
1621         spin_unlock(&pblk->trans_lock);
1622 }
1623
1624 void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1625 {
1626 #ifdef CONFIG_NVM_DEBUG
1627         /* Callers must ensure that the ppa points to a cache address */
1628         BUG_ON(!pblk_addr_in_cache(ppa));
1629         BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1630 #endif
1631
1632         pblk_update_map(pblk, lba, ppa);
1633 }
1634
1635 int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1636                        struct pblk_line *gc_line)
1637 {
1638         struct ppa_addr l2p_ppa;
1639         int ret = 1;
1640
1641 #ifdef CONFIG_NVM_DEBUG
1642         /* Callers must ensure that the ppa points to a cache address */
1643         BUG_ON(!pblk_addr_in_cache(ppa));
1644         BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1645 #endif
1646
1647         /* logic error: lba out-of-bounds. Ignore update */
1648         if (!(lba < pblk->rl.nr_secs)) {
1649                 WARN(1, "pblk: corrupted L2P map request\n");
1650                 return 0;
1651         }
1652
1653         spin_lock(&pblk->trans_lock);
1654         l2p_ppa = pblk_trans_map_get(pblk, lba);
1655
1656         /* Prevent updated entries to be overwritten by GC */
1657         if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) ||
1658                                 pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) {
1659                 ret = 0;
1660                 goto out;
1661         }
1662
1663         pblk_trans_map_set(pblk, lba, ppa);
1664 out:
1665         spin_unlock(&pblk->trans_lock);
1666         return ret;
1667 }
1668
1669 void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1670                          struct ppa_addr entry_line)
1671 {
1672         struct ppa_addr l2p_line;
1673
1674 #ifdef CONFIG_NVM_DEBUG
1675         /* Callers must ensure that the ppa points to a device address */
1676         BUG_ON(pblk_addr_in_cache(ppa));
1677 #endif
1678         /* Invalidate and discard padded entries */
1679         if (lba == ADDR_EMPTY) {
1680 #ifdef CONFIG_NVM_DEBUG
1681                 atomic_long_inc(&pblk->padded_wb);
1682 #endif
1683                 pblk_map_invalidate(pblk, ppa);
1684                 return;
1685         }
1686
1687         /* logic error: lba out-of-bounds. Ignore update */
1688         if (!(lba < pblk->rl.nr_secs)) {
1689                 WARN(1, "pblk: corrupted L2P map request\n");
1690                 return;
1691         }
1692
1693         spin_lock(&pblk->trans_lock);
1694         l2p_line = pblk_trans_map_get(pblk, lba);
1695
1696         /* Do not update L2P if the cacheline has been updated. In this case,
1697          * the mapped ppa must be invalidated
1698          */
1699         if (l2p_line.ppa != entry_line.ppa) {
1700                 if (!pblk_ppa_empty(ppa))
1701                         pblk_map_invalidate(pblk, ppa);
1702                 goto out;
1703         }
1704
1705 #ifdef CONFIG_NVM_DEBUG
1706         WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line));
1707 #endif
1708
1709         pblk_trans_map_set(pblk, lba, ppa);
1710 out:
1711         spin_unlock(&pblk->trans_lock);
1712 }
1713
1714 void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
1715                          sector_t blba, int nr_secs)
1716 {
1717         int i;
1718
1719         spin_lock(&pblk->trans_lock);
1720         for (i = 0; i < nr_secs; i++)
1721                 ppas[i] = pblk_trans_map_get(pblk, blba + i);
1722         spin_unlock(&pblk->trans_lock);
1723 }
1724
1725 void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
1726                           u64 *lba_list, int nr_secs)
1727 {
1728         sector_t lba;
1729         int i;
1730
1731         spin_lock(&pblk->trans_lock);
1732         for (i = 0; i < nr_secs; i++) {
1733                 lba = lba_list[i];
1734                 if (lba == ADDR_EMPTY) {
1735                         ppas[i].ppa = ADDR_EMPTY;
1736                 } else {
1737                         /* logic error: lba out-of-bounds. Ignore update */
1738                         if (!(lba < pblk->rl.nr_secs)) {
1739                                 WARN(1, "pblk: corrupted L2P map request\n");
1740                                 continue;
1741                         }
1742                         ppas[i] = pblk_trans_map_get(pblk, lba);
1743                 }
1744         }
1745         spin_unlock(&pblk->trans_lock);
1746 }