]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/infiniband/hw/hns/hns_roce_mr.c
Merge tag 'nfsd-5.6' of git://linux-nfs.org/~bfields/linux
[linux.git] / drivers / infiniband / hw / hns / hns_roce_mr.c
1 /*
2  * Copyright (c) 2016 Hisilicon Limited.
3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/platform_device.h>
35 #include <linux/vmalloc.h>
36 #include <rdma/ib_umem.h>
37 #include "hns_roce_device.h"
38 #include "hns_roce_cmd.h"
39 #include "hns_roce_hem.h"
40
41 static u32 hw_index_to_key(unsigned long ind)
42 {
43         return (u32)(ind >> 24) | (ind << 8);
44 }
45
46 unsigned long key_to_hw_index(u32 key)
47 {
48         return (key << 24) | (key >> 8);
49 }
50
51 static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
52                                   struct hns_roce_cmd_mailbox *mailbox,
53                                   unsigned long mpt_index)
54 {
55         return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
56                                  HNS_ROCE_CMD_CREATE_MPT,
57                                  HNS_ROCE_CMD_TIMEOUT_MSECS);
58 }
59
60 int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
61                             struct hns_roce_cmd_mailbox *mailbox,
62                             unsigned long mpt_index)
63 {
64         return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
65                                  mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
66                                  HNS_ROCE_CMD_TIMEOUT_MSECS);
67 }
68
69 static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
70                                 unsigned long *seg)
71 {
72         int o;
73         u32 m;
74
75         spin_lock(&buddy->lock);
76
77         for (o = order; o <= buddy->max_order; ++o) {
78                 if (buddy->num_free[o]) {
79                         m = 1 << (buddy->max_order - o);
80                         *seg = find_first_bit(buddy->bits[o], m);
81                         if (*seg < m)
82                                 goto found;
83                 }
84         }
85         spin_unlock(&buddy->lock);
86         return -EINVAL;
87
88  found:
89         clear_bit(*seg, buddy->bits[o]);
90         --buddy->num_free[o];
91
92         while (o > order) {
93                 --o;
94                 *seg <<= 1;
95                 set_bit(*seg ^ 1, buddy->bits[o]);
96                 ++buddy->num_free[o];
97         }
98
99         spin_unlock(&buddy->lock);
100
101         *seg <<= order;
102         return 0;
103 }
104
105 static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
106                                 int order)
107 {
108         seg >>= order;
109
110         spin_lock(&buddy->lock);
111
112         while (test_bit(seg ^ 1, buddy->bits[order])) {
113                 clear_bit(seg ^ 1, buddy->bits[order]);
114                 --buddy->num_free[order];
115                 seg >>= 1;
116                 ++order;
117         }
118
119         set_bit(seg, buddy->bits[order]);
120         ++buddy->num_free[order];
121
122         spin_unlock(&buddy->lock);
123 }
124
125 static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
126 {
127         int i, s;
128
129         buddy->max_order = max_order;
130         spin_lock_init(&buddy->lock);
131         buddy->bits = kcalloc(buddy->max_order + 1,
132                               sizeof(*buddy->bits),
133                               GFP_KERNEL);
134         buddy->num_free = kcalloc(buddy->max_order + 1,
135                                   sizeof(*buddy->num_free),
136                                   GFP_KERNEL);
137         if (!buddy->bits || !buddy->num_free)
138                 goto err_out;
139
140         for (i = 0; i <= buddy->max_order; ++i) {
141                 s = BITS_TO_LONGS(1 << (buddy->max_order - i));
142                 buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
143                                          __GFP_NOWARN);
144                 if (!buddy->bits[i]) {
145                         buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
146                         if (!buddy->bits[i])
147                                 goto err_out_free;
148                 }
149         }
150
151         set_bit(0, buddy->bits[buddy->max_order]);
152         buddy->num_free[buddy->max_order] = 1;
153
154         return 0;
155
156 err_out_free:
157         for (i = 0; i <= buddy->max_order; ++i)
158                 kvfree(buddy->bits[i]);
159
160 err_out:
161         kfree(buddy->bits);
162         kfree(buddy->num_free);
163         return -ENOMEM;
164 }
165
166 static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
167 {
168         int i;
169
170         for (i = 0; i <= buddy->max_order; ++i)
171                 kvfree(buddy->bits[i]);
172
173         kfree(buddy->bits);
174         kfree(buddy->num_free);
175 }
176
177 static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
178                                     unsigned long *seg, u32 mtt_type)
179 {
180         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
181         struct hns_roce_hem_table *table;
182         struct hns_roce_buddy *buddy;
183         int ret;
184
185         switch (mtt_type) {
186         case MTT_TYPE_WQE:
187                 buddy = &mr_table->mtt_buddy;
188                 table = &mr_table->mtt_table;
189                 break;
190         case MTT_TYPE_CQE:
191                 buddy = &mr_table->mtt_cqe_buddy;
192                 table = &mr_table->mtt_cqe_table;
193                 break;
194         case MTT_TYPE_SRQWQE:
195                 buddy = &mr_table->mtt_srqwqe_buddy;
196                 table = &mr_table->mtt_srqwqe_table;
197                 break;
198         case MTT_TYPE_IDX:
199                 buddy = &mr_table->mtt_idx_buddy;
200                 table = &mr_table->mtt_idx_table;
201                 break;
202         default:
203                 dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
204                         mtt_type);
205                 return -EINVAL;
206         }
207
208         ret = hns_roce_buddy_alloc(buddy, order, seg);
209         if (ret)
210                 return ret;
211
212         ret = hns_roce_table_get_range(hr_dev, table, *seg,
213                                        *seg + (1 << order) - 1);
214         if (ret) {
215                 hns_roce_buddy_free(buddy, *seg, order);
216                 return ret;
217         }
218
219         return 0;
220 }
221
222 int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
223                       struct hns_roce_mtt *mtt)
224 {
225         int ret;
226         int i;
227
228         /* Page num is zero, correspond to DMA memory register */
229         if (!npages) {
230                 mtt->order = -1;
231                 mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
232                 return 0;
233         }
234
235         /* Note: if page_shift is zero, FAST memory register */
236         mtt->page_shift = page_shift;
237
238         /* Compute MTT entry necessary */
239         for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
240              i <<= 1)
241                 ++mtt->order;
242
243         /* Allocate MTT entry */
244         ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
245                                        mtt->mtt_type);
246         if (ret == -1)
247                 return -ENOMEM;
248
249         return 0;
250 }
251
252 void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
253 {
254         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
255
256         if (mtt->order < 0)
257                 return;
258
259         switch (mtt->mtt_type) {
260         case MTT_TYPE_WQE:
261                 hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
262                                     mtt->order);
263                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
264                                         mtt->first_seg,
265                                         mtt->first_seg + (1 << mtt->order) - 1);
266                 break;
267         case MTT_TYPE_CQE:
268                 hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
269                                     mtt->order);
270                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
271                                         mtt->first_seg,
272                                         mtt->first_seg + (1 << mtt->order) - 1);
273                 break;
274         case MTT_TYPE_SRQWQE:
275                 hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
276                                     mtt->order);
277                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
278                                         mtt->first_seg,
279                                         mtt->first_seg + (1 << mtt->order) - 1);
280                 break;
281         case MTT_TYPE_IDX:
282                 hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
283                                     mtt->order);
284                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
285                                         mtt->first_seg,
286                                         mtt->first_seg + (1 << mtt->order) - 1);
287                 break;
288         default:
289                 dev_err(hr_dev->dev,
290                         "Unsupport mtt type %d, clean mtt failed\n",
291                         mtt->mtt_type);
292                 break;
293         }
294 }
295
296 static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
297                                struct hns_roce_mr *mr, int err_loop_index,
298                                int loop_i, int loop_j)
299 {
300         struct device *dev = hr_dev->dev;
301         u32 mhop_num;
302         u32 pbl_bt_sz;
303         u64 bt_idx;
304         int i, j;
305
306         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
307         mhop_num = hr_dev->caps.pbl_hop_num;
308
309         i = loop_i;
310         if (mhop_num == 3 && err_loop_index == 2) {
311                 for (; i >= 0; i--) {
312                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
313                                           mr->pbl_l1_dma_addr[i]);
314
315                         for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
316                                 if (i == loop_i && j >= loop_j)
317                                         break;
318
319                                 bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
320                                 dma_free_coherent(dev, pbl_bt_sz,
321                                                   mr->pbl_bt_l2[bt_idx],
322                                                   mr->pbl_l2_dma_addr[bt_idx]);
323                         }
324                 }
325         } else if (mhop_num == 3 && err_loop_index == 1) {
326                 for (i -= 1; i >= 0; i--) {
327                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
328                                           mr->pbl_l1_dma_addr[i]);
329
330                         for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
331                                 bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
332                                 dma_free_coherent(dev, pbl_bt_sz,
333                                                   mr->pbl_bt_l2[bt_idx],
334                                                   mr->pbl_l2_dma_addr[bt_idx]);
335                         }
336                 }
337         } else if (mhop_num == 2 && err_loop_index == 1) {
338                 for (i -= 1; i >= 0; i--)
339                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
340                                           mr->pbl_l1_dma_addr[i]);
341         } else {
342                 dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
343                          mhop_num, err_loop_index);
344                 return;
345         }
346
347         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
348         mr->pbl_bt_l0 = NULL;
349         mr->pbl_l0_dma_addr = 0;
350 }
351 static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
352                                struct hns_roce_mr *mr, u32 pbl_bt_sz)
353 {
354         struct device *dev = hr_dev->dev;
355
356         if (npages > pbl_bt_sz / 8) {
357                 dev_err(dev, "npages %d is larger than buf_pg_sz!",
358                         npages);
359                 return -EINVAL;
360         }
361         mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
362                                          &(mr->pbl_dma_addr),
363                                          GFP_KERNEL);
364         if (!mr->pbl_buf)
365                 return -ENOMEM;
366
367         mr->pbl_size = npages;
368         mr->pbl_ba = mr->pbl_dma_addr;
369         mr->pbl_hop_num = 1;
370         mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
371         mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
372         return 0;
373
374 }
375
376
377 static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
378                                struct hns_roce_mr *mr, u32 pbl_bt_sz)
379 {
380         struct device *dev = hr_dev->dev;
381         int npages_allocated;
382         u64 pbl_last_bt_num;
383         u64 pbl_bt_cnt = 0;
384         u64 size;
385         int i;
386
387         pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
388
389         /* alloc L1 BT */
390         for (i = 0; i < pbl_bt_sz / 8; i++) {
391                 if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
392                         size = pbl_bt_sz;
393                 } else {
394                         npages_allocated = i * (pbl_bt_sz / 8);
395                         size = (npages - npages_allocated) * 8;
396                 }
397                 mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
398                                             &(mr->pbl_l1_dma_addr[i]),
399                                             GFP_KERNEL);
400                 if (!mr->pbl_bt_l1[i]) {
401                         hns_roce_loop_free(hr_dev, mr, 1, i, 0);
402                         return -ENOMEM;
403                 }
404
405                 *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
406
407                 pbl_bt_cnt++;
408                 if (pbl_bt_cnt >= pbl_last_bt_num)
409                         break;
410         }
411
412         mr->l0_chunk_last_num = i + 1;
413
414         return 0;
415 }
416
417 static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
418                                struct hns_roce_mr *mr, u32 pbl_bt_sz)
419 {
420         struct device *dev = hr_dev->dev;
421         int mr_alloc_done = 0;
422         int npages_allocated;
423         u64 pbl_last_bt_num;
424         u64 pbl_bt_cnt = 0;
425         u64 bt_idx;
426         u64 size;
427         int i;
428         int j = 0;
429
430         pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
431
432         mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
433                                       sizeof(*mr->pbl_l2_dma_addr),
434                                       GFP_KERNEL);
435         if (!mr->pbl_l2_dma_addr)
436                 return -ENOMEM;
437
438         mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
439                                 sizeof(*mr->pbl_bt_l2),
440                                 GFP_KERNEL);
441         if (!mr->pbl_bt_l2)
442                 goto err_kcalloc_bt_l2;
443
444         /* alloc L1, L2 BT */
445         for (i = 0; i < pbl_bt_sz / 8; i++) {
446                 mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
447                                             &(mr->pbl_l1_dma_addr[i]),
448                                             GFP_KERNEL);
449                 if (!mr->pbl_bt_l1[i]) {
450                         hns_roce_loop_free(hr_dev, mr, 1, i, 0);
451                         goto err_dma_alloc_l0;
452                 }
453
454                 *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
455
456                 for (j = 0; j < pbl_bt_sz / 8; j++) {
457                         bt_idx = i * pbl_bt_sz / 8 + j;
458
459                         if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
460                                 size = pbl_bt_sz;
461                         } else {
462                                 npages_allocated = bt_idx *
463                                                    (pbl_bt_sz / 8);
464                                 size = (npages - npages_allocated) * 8;
465                         }
466                         mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
467                                       dev, size,
468                                       &(mr->pbl_l2_dma_addr[bt_idx]),
469                                       GFP_KERNEL);
470                         if (!mr->pbl_bt_l2[bt_idx]) {
471                                 hns_roce_loop_free(hr_dev, mr, 2, i, j);
472                                 goto err_dma_alloc_l0;
473                         }
474
475                         *(mr->pbl_bt_l1[i] + j) =
476                                         mr->pbl_l2_dma_addr[bt_idx];
477
478                         pbl_bt_cnt++;
479                         if (pbl_bt_cnt >= pbl_last_bt_num) {
480                                 mr_alloc_done = 1;
481                                 break;
482                         }
483                 }
484
485                 if (mr_alloc_done)
486                         break;
487         }
488
489         mr->l0_chunk_last_num = i + 1;
490         mr->l1_chunk_last_num = j + 1;
491
492
493         return 0;
494
495 err_dma_alloc_l0:
496         kfree(mr->pbl_bt_l2);
497         mr->pbl_bt_l2 = NULL;
498
499 err_kcalloc_bt_l2:
500         kfree(mr->pbl_l2_dma_addr);
501         mr->pbl_l2_dma_addr = NULL;
502
503         return -ENOMEM;
504 }
505
506
507 /* PBL multi hop addressing */
508 static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
509                                struct hns_roce_mr *mr)
510 {
511         struct device *dev = hr_dev->dev;
512         u32 pbl_bt_sz;
513         u32 mhop_num;
514
515         mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
516         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
517
518         if (mhop_num == HNS_ROCE_HOP_NUM_0)
519                 return 0;
520
521         if (mhop_num == 1)
522                 return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
523
524         mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
525                                       sizeof(*mr->pbl_l1_dma_addr),
526                                       GFP_KERNEL);
527         if (!mr->pbl_l1_dma_addr)
528                 return -ENOMEM;
529
530         mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
531                                 GFP_KERNEL);
532         if (!mr->pbl_bt_l1)
533                 goto err_kcalloc_bt_l1;
534
535         /* alloc L0 BT */
536         mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
537                                            &(mr->pbl_l0_dma_addr),
538                                            GFP_KERNEL);
539         if (!mr->pbl_bt_l0)
540                 goto err_kcalloc_l2_dma;
541
542         if (mhop_num == 2) {
543                 if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
544                         goto err_kcalloc_l2_dma;
545         }
546
547         if (mhop_num == 3) {
548                 if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
549                         goto err_kcalloc_l2_dma;
550         }
551
552
553         mr->pbl_size = npages;
554         mr->pbl_ba = mr->pbl_l0_dma_addr;
555         mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
556         mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
557         mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
558
559         return 0;
560
561 err_kcalloc_l2_dma:
562         kfree(mr->pbl_bt_l1);
563         mr->pbl_bt_l1 = NULL;
564
565 err_kcalloc_bt_l1:
566         kfree(mr->pbl_l1_dma_addr);
567         mr->pbl_l1_dma_addr = NULL;
568
569         return -ENOMEM;
570 }
571
572 static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
573                              u64 size, u32 access, int npages,
574                              struct hns_roce_mr *mr)
575 {
576         struct device *dev = hr_dev->dev;
577         unsigned long index = 0;
578         int ret;
579
580         /* Allocate a key for mr from mr_table */
581         ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
582         if (ret)
583                 return -ENOMEM;
584
585         mr->iova = iova;                        /* MR va starting addr */
586         mr->size = size;                        /* MR addr range */
587         mr->pd = pd;                            /* MR num */
588         mr->access = access;                    /* MR access permit */
589         mr->enabled = 0;                        /* MR active status */
590         mr->key = hw_index_to_key(index);       /* MR key */
591
592         if (size == ~0ull) {
593                 mr->pbl_buf = NULL;
594                 mr->pbl_dma_addr = 0;
595                 /* PBL multi-hop addressing parameters */
596                 mr->pbl_bt_l2 = NULL;
597                 mr->pbl_bt_l1 = NULL;
598                 mr->pbl_bt_l0 = NULL;
599                 mr->pbl_l2_dma_addr = NULL;
600                 mr->pbl_l1_dma_addr = NULL;
601                 mr->pbl_l0_dma_addr = 0;
602         } else {
603                 if (!hr_dev->caps.pbl_hop_num) {
604                         mr->pbl_buf = dma_alloc_coherent(dev,
605                                                          npages * BA_BYTE_LEN,
606                                                          &(mr->pbl_dma_addr),
607                                                          GFP_KERNEL);
608                         if (!mr->pbl_buf)
609                                 return -ENOMEM;
610                 } else {
611                         ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
612                 }
613         }
614
615         return ret;
616 }
617
618 static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
619                                struct hns_roce_mr *mr)
620 {
621         struct device *dev = hr_dev->dev;
622         int npages_allocated;
623         int npages;
624         int i, j;
625         u32 pbl_bt_sz;
626         u32 mhop_num;
627         u64 bt_idx;
628
629         npages = mr->pbl_size;
630         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
631         mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
632
633         if (mhop_num == HNS_ROCE_HOP_NUM_0)
634                 return;
635
636         if (mhop_num == 1) {
637                 dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN),
638                                   mr->pbl_buf, mr->pbl_dma_addr);
639                 return;
640         }
641
642         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
643                           mr->pbl_l0_dma_addr);
644
645         if (mhop_num == 2) {
646                 for (i = 0; i < mr->l0_chunk_last_num; i++) {
647                         if (i == mr->l0_chunk_last_num - 1) {
648                                 npages_allocated =
649                                                 i * (pbl_bt_sz / BA_BYTE_LEN);
650
651                                 dma_free_coherent(dev,
652                                       (npages - npages_allocated) * BA_BYTE_LEN,
653                                        mr->pbl_bt_l1[i],
654                                        mr->pbl_l1_dma_addr[i]);
655
656                                 break;
657                         }
658
659                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
660                                           mr->pbl_l1_dma_addr[i]);
661                 }
662         } else if (mhop_num == 3) {
663                 for (i = 0; i < mr->l0_chunk_last_num; i++) {
664                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
665                                           mr->pbl_l1_dma_addr[i]);
666
667                         for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
668                                 bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j;
669
670                                 if ((i == mr->l0_chunk_last_num - 1)
671                                     && j == mr->l1_chunk_last_num - 1) {
672                                         npages_allocated = bt_idx *
673                                                       (pbl_bt_sz / BA_BYTE_LEN);
674
675                                         dma_free_coherent(dev,
676                                               (npages - npages_allocated) *
677                                               BA_BYTE_LEN,
678                                               mr->pbl_bt_l2[bt_idx],
679                                               mr->pbl_l2_dma_addr[bt_idx]);
680
681                                         break;
682                                 }
683
684                                 dma_free_coherent(dev, pbl_bt_sz,
685                                                 mr->pbl_bt_l2[bt_idx],
686                                                 mr->pbl_l2_dma_addr[bt_idx]);
687                         }
688                 }
689         }
690
691         kfree(mr->pbl_bt_l1);
692         kfree(mr->pbl_l1_dma_addr);
693         mr->pbl_bt_l1 = NULL;
694         mr->pbl_l1_dma_addr = NULL;
695         if (mhop_num == 3) {
696                 kfree(mr->pbl_bt_l2);
697                 kfree(mr->pbl_l2_dma_addr);
698                 mr->pbl_bt_l2 = NULL;
699                 mr->pbl_l2_dma_addr = NULL;
700         }
701 }
702
703 static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
704                              struct hns_roce_mr *mr)
705 {
706         struct device *dev = hr_dev->dev;
707         int npages = 0;
708         int ret;
709
710         if (mr->enabled) {
711                 ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
712                                               key_to_hw_index(mr->key) &
713                                               (hr_dev->caps.num_mtpts - 1));
714                 if (ret)
715                         dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
716         }
717
718         if (mr->size != ~0ULL) {
719                 if (mr->type == MR_TYPE_MR)
720                         npages = ib_umem_page_count(mr->umem);
721
722                 if (!hr_dev->caps.pbl_hop_num)
723                         dma_free_coherent(dev,
724                                           (unsigned int)(npages * BA_BYTE_LEN),
725                                           mr->pbl_buf, mr->pbl_dma_addr);
726                 else
727                         hns_roce_mhop_free(hr_dev, mr);
728         }
729
730         if (mr->enabled)
731                 hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
732                                    key_to_hw_index(mr->key));
733
734         hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
735                              key_to_hw_index(mr->key), BITMAP_NO_RR);
736 }
737
738 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
739                               struct hns_roce_mr *mr)
740 {
741         int ret;
742         unsigned long mtpt_idx = key_to_hw_index(mr->key);
743         struct device *dev = hr_dev->dev;
744         struct hns_roce_cmd_mailbox *mailbox;
745         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
746
747         /* Prepare HEM entry memory */
748         ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
749         if (ret)
750                 return ret;
751
752         /* Allocate mailbox memory */
753         mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
754         if (IS_ERR(mailbox)) {
755                 ret = PTR_ERR(mailbox);
756                 goto err_table;
757         }
758
759         if (mr->type != MR_TYPE_FRMR)
760                 ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
761         else
762                 ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
763         if (ret) {
764                 dev_err(dev, "Write mtpt fail!\n");
765                 goto err_page;
766         }
767
768         ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
769                                      mtpt_idx & (hr_dev->caps.num_mtpts - 1));
770         if (ret) {
771                 dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
772                 goto err_page;
773         }
774
775         mr->enabled = 1;
776         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
777
778         return 0;
779
780 err_page:
781         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
782
783 err_table:
784         hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
785         return ret;
786 }
787
788 static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
789                                     struct hns_roce_mtt *mtt, u32 start_index,
790                                     u32 npages, u64 *page_list)
791 {
792         struct hns_roce_hem_table *table;
793         dma_addr_t dma_handle;
794         __le64 *mtts;
795         u32 bt_page_size;
796         u32 i;
797
798         switch (mtt->mtt_type) {
799         case MTT_TYPE_WQE:
800                 table = &hr_dev->mr_table.mtt_table;
801                 bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
802                 break;
803         case MTT_TYPE_CQE:
804                 table = &hr_dev->mr_table.mtt_cqe_table;
805                 bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
806                 break;
807         case MTT_TYPE_SRQWQE:
808                 table = &hr_dev->mr_table.mtt_srqwqe_table;
809                 bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
810                 break;
811         case MTT_TYPE_IDX:
812                 table = &hr_dev->mr_table.mtt_idx_table;
813                 bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
814                 break;
815         default:
816                 return -EINVAL;
817         }
818
819         /* All MTTs must fit in the same page */
820         if (start_index / (bt_page_size / sizeof(u64)) !=
821                 (start_index + npages - 1) / (bt_page_size / sizeof(u64)))
822                 return -EINVAL;
823
824         if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
825                 return -EINVAL;
826
827         mtts = hns_roce_table_find(hr_dev, table,
828                                 mtt->first_seg +
829                                 start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
830                                 &dma_handle);
831         if (!mtts)
832                 return -ENOMEM;
833
834         /* Save page addr, low 12 bits : 0 */
835         for (i = 0; i < npages; ++i) {
836                 if (!hr_dev->caps.mtt_hop_num)
837                         mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
838                 else
839                         mtts[i] = cpu_to_le64(page_list[i]);
840         }
841
842         return 0;
843 }
844
845 static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
846                               struct hns_roce_mtt *mtt, u32 start_index,
847                               u32 npages, u64 *page_list)
848 {
849         int chunk;
850         int ret;
851         u32 bt_page_size;
852
853         if (mtt->order < 0)
854                 return -EINVAL;
855
856         switch (mtt->mtt_type) {
857         case MTT_TYPE_WQE:
858                 bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
859                 break;
860         case MTT_TYPE_CQE:
861                 bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
862                 break;
863         case MTT_TYPE_SRQWQE:
864                 bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
865                 break;
866         case MTT_TYPE_IDX:
867                 bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
868                 break;
869         default:
870                 dev_err(hr_dev->dev,
871                         "Unsupport mtt type %d, write mtt failed\n",
872                         mtt->mtt_type);
873                 return -EINVAL;
874         }
875
876         while (npages > 0) {
877                 chunk = min_t(int, bt_page_size / sizeof(u64), npages);
878
879                 ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
880                                                page_list);
881                 if (ret)
882                         return ret;
883
884                 npages -= chunk;
885                 start_index += chunk;
886                 page_list += chunk;
887         }
888
889         return 0;
890 }
891
892 int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
893                            struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
894 {
895         u64 *page_list;
896         int ret;
897         u32 i;
898
899         page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
900         if (!page_list)
901                 return -ENOMEM;
902
903         for (i = 0; i < buf->npages; ++i) {
904                 if (buf->nbufs == 1)
905                         page_list[i] = buf->direct.map + (i << buf->page_shift);
906                 else
907                         page_list[i] = buf->page_list[i].map;
908
909         }
910         ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
911
912         kfree(page_list);
913
914         return ret;
915 }
916
917 int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
918 {
919         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
920         int ret;
921
922         ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap,
923                                    hr_dev->caps.num_mtpts,
924                                    hr_dev->caps.num_mtpts - 1,
925                                    hr_dev->caps.reserved_mrws, 0);
926         if (ret)
927                 return ret;
928
929         ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
930                                   ilog2(hr_dev->caps.num_mtt_segs));
931         if (ret)
932                 goto err_buddy;
933
934         if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
935                 ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
936                                           ilog2(hr_dev->caps.num_cqe_segs));
937                 if (ret)
938                         goto err_buddy_cqe;
939         }
940
941         if (hr_dev->caps.num_srqwqe_segs) {
942                 ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
943                                           ilog2(hr_dev->caps.num_srqwqe_segs));
944                 if (ret)
945                         goto err_buddy_srqwqe;
946         }
947
948         if (hr_dev->caps.num_idx_segs) {
949                 ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
950                                           ilog2(hr_dev->caps.num_idx_segs));
951                 if (ret)
952                         goto err_buddy_idx;
953         }
954
955         return 0;
956
957 err_buddy_idx:
958         if (hr_dev->caps.num_srqwqe_segs)
959                 hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
960
961 err_buddy_srqwqe:
962         if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
963                 hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
964
965 err_buddy_cqe:
966         hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
967
968 err_buddy:
969         hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
970         return ret;
971 }
972
973 void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
974 {
975         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
976
977         if (hr_dev->caps.num_idx_segs)
978                 hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
979         if (hr_dev->caps.num_srqwqe_segs)
980                 hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
981         hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
982         if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
983                 hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
984         hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
985 }
986
987 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
988 {
989         struct hns_roce_mr *mr;
990         int ret;
991
992         mr = kmalloc(sizeof(*mr), GFP_KERNEL);
993         if (mr == NULL)
994                 return  ERR_PTR(-ENOMEM);
995
996         mr->type = MR_TYPE_DMA;
997
998         /* Allocate memory region key */
999         ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
1000                                 ~0ULL, acc, 0, mr);
1001         if (ret)
1002                 goto err_free;
1003
1004         ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
1005         if (ret)
1006                 goto err_mr;
1007
1008         mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1009         mr->umem = NULL;
1010
1011         return &mr->ibmr;
1012
1013 err_mr:
1014         hns_roce_mr_free(to_hr_dev(pd->device), mr);
1015
1016 err_free:
1017         kfree(mr);
1018         return ERR_PTR(ret);
1019 }
1020
1021 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
1022                                struct hns_roce_mtt *mtt, struct ib_umem *umem)
1023 {
1024         struct device *dev = hr_dev->dev;
1025         struct sg_dma_page_iter sg_iter;
1026         unsigned int order;
1027         int npage = 0;
1028         int ret = 0;
1029         int i;
1030         u64 page_addr;
1031         u64 *pages;
1032         u32 bt_page_size;
1033         u32 n;
1034
1035         switch (mtt->mtt_type) {
1036         case MTT_TYPE_WQE:
1037                 order = hr_dev->caps.mtt_ba_pg_sz;
1038                 break;
1039         case MTT_TYPE_CQE:
1040                 order = hr_dev->caps.cqe_ba_pg_sz;
1041                 break;
1042         case MTT_TYPE_SRQWQE:
1043                 order = hr_dev->caps.srqwqe_ba_pg_sz;
1044                 break;
1045         case MTT_TYPE_IDX:
1046                 order = hr_dev->caps.idx_ba_pg_sz;
1047                 break;
1048         default:
1049                 dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
1050                         mtt->mtt_type);
1051                 return -EINVAL;
1052         }
1053
1054         bt_page_size = 1 << (order + PAGE_SHIFT);
1055
1056         pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
1057         if (!pages)
1058                 return -ENOMEM;
1059
1060         i = n = 0;
1061
1062         for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1063                 page_addr = sg_page_iter_dma_address(&sg_iter);
1064                 if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
1065                         if (page_addr & ((1 << mtt->page_shift) - 1)) {
1066                                 dev_err(dev,
1067                                         "page_addr is not page_shift %d alignment!\n",
1068                                         mtt->page_shift);
1069                                 ret = -EINVAL;
1070                                 goto out;
1071                         }
1072                         pages[i++] = page_addr;
1073                 }
1074                 npage++;
1075                 if (i == bt_page_size / sizeof(u64)) {
1076                         ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1077                         if (ret)
1078                                 goto out;
1079                         n += i;
1080                         i = 0;
1081                 }
1082         }
1083
1084         if (i)
1085                 ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1086
1087 out:
1088         free_pages((unsigned long) pages, order);
1089         return ret;
1090 }
1091
1092 static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
1093                                      struct hns_roce_mr *mr,
1094                                      struct ib_umem *umem)
1095 {
1096         struct sg_dma_page_iter sg_iter;
1097         int i = 0, j = 0;
1098         u64 page_addr;
1099         u32 pbl_bt_sz;
1100
1101         if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
1102                 return 0;
1103
1104         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
1105         for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1106                 page_addr = sg_page_iter_dma_address(&sg_iter);
1107                 if (!hr_dev->caps.pbl_hop_num) {
1108                         /* for hip06, page addr is aligned to 4K */
1109                         mr->pbl_buf[i++] = page_addr >> 12;
1110                 } else if (hr_dev->caps.pbl_hop_num == 1) {
1111                         mr->pbl_buf[i++] = page_addr;
1112                 } else {
1113                         if (hr_dev->caps.pbl_hop_num == 2)
1114                                 mr->pbl_bt_l1[i][j] = page_addr;
1115                         else if (hr_dev->caps.pbl_hop_num == 3)
1116                                 mr->pbl_bt_l2[i][j] = page_addr;
1117
1118                         j++;
1119                         if (j >= (pbl_bt_sz / BA_BYTE_LEN)) {
1120                                 i++;
1121                                 j = 0;
1122                         }
1123                 }
1124         }
1125
1126         /* Memory barrier */
1127         mb();
1128
1129         return 0;
1130 }
1131
1132 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1133                                    u64 virt_addr, int access_flags,
1134                                    struct ib_udata *udata)
1135 {
1136         struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1137         struct device *dev = hr_dev->dev;
1138         struct hns_roce_mr *mr;
1139         int bt_size;
1140         int ret;
1141         int n;
1142         int i;
1143
1144         mr = kmalloc(sizeof(*mr), GFP_KERNEL);
1145         if (!mr)
1146                 return ERR_PTR(-ENOMEM);
1147
1148         mr->umem = ib_umem_get(pd->device, start, length, access_flags);
1149         if (IS_ERR(mr->umem)) {
1150                 ret = PTR_ERR(mr->umem);
1151                 goto err_free;
1152         }
1153
1154         n = ib_umem_page_count(mr->umem);
1155
1156         if (!hr_dev->caps.pbl_hop_num) {
1157                 if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
1158                         dev_err(dev,
1159                              " MR len %lld err. MR is limited to 4G at most!\n",
1160                              length);
1161                         ret = -EINVAL;
1162                         goto err_umem;
1163                 }
1164         } else {
1165                 u64 pbl_size = 1;
1166
1167                 bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) /
1168                           BA_BYTE_LEN;
1169                 for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
1170                         pbl_size *= bt_size;
1171                 if (n > pbl_size) {
1172                         dev_err(dev,
1173                             " MR len %lld err. MR page num is limited to %lld!\n",
1174                             length, pbl_size);
1175                         ret = -EINVAL;
1176                         goto err_umem;
1177                 }
1178         }
1179
1180         mr->type = MR_TYPE_MR;
1181
1182         ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
1183                                 access_flags, n, mr);
1184         if (ret)
1185                 goto err_umem;
1186
1187         ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1188         if (ret)
1189                 goto err_mr;
1190
1191         ret = hns_roce_mr_enable(hr_dev, mr);
1192         if (ret)
1193                 goto err_mr;
1194
1195         mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1196
1197         return &mr->ibmr;
1198
1199 err_mr:
1200         hns_roce_mr_free(hr_dev, mr);
1201
1202 err_umem:
1203         ib_umem_release(mr->umem);
1204
1205 err_free:
1206         kfree(mr);
1207         return ERR_PTR(ret);
1208 }
1209
1210 static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
1211                           u64 start, u64 length,
1212                           u64 virt_addr, int mr_access_flags,
1213                           struct hns_roce_cmd_mailbox *mailbox,
1214                           u32 pdn, struct ib_udata *udata)
1215 {
1216         struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1217         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1218         struct device *dev = hr_dev->dev;
1219         int npages;
1220         int ret;
1221
1222         if (mr->size != ~0ULL) {
1223                 npages = ib_umem_page_count(mr->umem);
1224
1225                 if (hr_dev->caps.pbl_hop_num)
1226                         hns_roce_mhop_free(hr_dev, mr);
1227                 else
1228                         dma_free_coherent(dev, npages * 8,
1229                                           mr->pbl_buf, mr->pbl_dma_addr);
1230         }
1231         ib_umem_release(mr->umem);
1232
1233         mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags);
1234         if (IS_ERR(mr->umem)) {
1235                 ret = PTR_ERR(mr->umem);
1236                 mr->umem = NULL;
1237                 return -ENOMEM;
1238         }
1239         npages = ib_umem_page_count(mr->umem);
1240
1241         if (hr_dev->caps.pbl_hop_num) {
1242                 ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
1243                 if (ret)
1244                         goto release_umem;
1245         } else {
1246                 mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
1247                                                  &(mr->pbl_dma_addr),
1248                                                  GFP_KERNEL);
1249                 if (!mr->pbl_buf) {
1250                         ret = -ENOMEM;
1251                         goto release_umem;
1252                 }
1253         }
1254
1255         ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1256                                            mr_access_flags, virt_addr,
1257                                            length, mailbox->buf);
1258         if (ret)
1259                 goto release_umem;
1260
1261
1262         ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1263         if (ret) {
1264                 if (mr->size != ~0ULL) {
1265                         npages = ib_umem_page_count(mr->umem);
1266
1267                         if (hr_dev->caps.pbl_hop_num)
1268                                 hns_roce_mhop_free(hr_dev, mr);
1269                         else
1270                                 dma_free_coherent(dev, npages * 8,
1271                                                   mr->pbl_buf,
1272                                                   mr->pbl_dma_addr);
1273                 }
1274
1275                 goto release_umem;
1276         }
1277
1278         return 0;
1279
1280 release_umem:
1281         ib_umem_release(mr->umem);
1282         return ret;
1283
1284 }
1285
1286
1287 int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
1288                            u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
1289                            struct ib_udata *udata)
1290 {
1291         struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1292         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1293         struct hns_roce_cmd_mailbox *mailbox;
1294         struct device *dev = hr_dev->dev;
1295         unsigned long mtpt_idx;
1296         u32 pdn = 0;
1297         int ret;
1298
1299         if (!mr->enabled)
1300                 return -EINVAL;
1301
1302         mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1303         if (IS_ERR(mailbox))
1304                 return PTR_ERR(mailbox);
1305
1306         mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
1307         ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
1308                                 HNS_ROCE_CMD_QUERY_MPT,
1309                                 HNS_ROCE_CMD_TIMEOUT_MSECS);
1310         if (ret)
1311                 goto free_cmd_mbox;
1312
1313         ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
1314         if (ret)
1315                 dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
1316
1317         mr->enabled = 0;
1318
1319         if (flags & IB_MR_REREG_PD)
1320                 pdn = to_hr_pd(pd)->pdn;
1321
1322         if (flags & IB_MR_REREG_TRANS) {
1323                 ret = rereg_mr_trans(ibmr, flags,
1324                                      start, length,
1325                                      virt_addr, mr_access_flags,
1326                                      mailbox, pdn, udata);
1327                 if (ret)
1328                         goto free_cmd_mbox;
1329         } else {
1330                 ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1331                                                    mr_access_flags, virt_addr,
1332                                                    length, mailbox->buf);
1333                 if (ret)
1334                         goto free_cmd_mbox;
1335         }
1336
1337         ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
1338         if (ret) {
1339                 dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
1340                 ib_umem_release(mr->umem);
1341                 goto free_cmd_mbox;
1342         }
1343
1344         mr->enabled = 1;
1345         if (flags & IB_MR_REREG_ACCESS)
1346                 mr->access = mr_access_flags;
1347
1348         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1349
1350         return 0;
1351
1352 free_cmd_mbox:
1353         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1354
1355         return ret;
1356 }
1357
1358 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1359 {
1360         struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1361         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1362         int ret = 0;
1363
1364         if (hr_dev->hw->dereg_mr) {
1365                 ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
1366         } else {
1367                 hns_roce_mr_free(hr_dev, mr);
1368
1369                 ib_umem_release(mr->umem);
1370                 kfree(mr);
1371         }
1372
1373         return ret;
1374 }
1375
1376 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1377                                 u32 max_num_sg, struct ib_udata *udata)
1378 {
1379         struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1380         struct device *dev = hr_dev->dev;
1381         struct hns_roce_mr *mr;
1382         u64 length;
1383         u32 page_size;
1384         int ret;
1385
1386         page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
1387         length = max_num_sg * page_size;
1388
1389         if (mr_type != IB_MR_TYPE_MEM_REG)
1390                 return ERR_PTR(-EINVAL);
1391
1392         if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
1393                 dev_err(dev, "max_num_sg larger than %d\n",
1394                         HNS_ROCE_FRMR_MAX_PA);
1395                 return ERR_PTR(-EINVAL);
1396         }
1397
1398         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1399         if (!mr)
1400                 return ERR_PTR(-ENOMEM);
1401
1402         mr->type = MR_TYPE_FRMR;
1403
1404         /* Allocate memory region key */
1405         ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
1406                                 0, max_num_sg, mr);
1407         if (ret)
1408                 goto err_free;
1409
1410         ret = hns_roce_mr_enable(hr_dev, mr);
1411         if (ret)
1412                 goto err_mr;
1413
1414         mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1415         mr->umem = NULL;
1416
1417         return &mr->ibmr;
1418
1419 err_mr:
1420         hns_roce_mr_free(to_hr_dev(pd->device), mr);
1421
1422 err_free:
1423         kfree(mr);
1424         return ERR_PTR(ret);
1425 }
1426
1427 static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
1428 {
1429         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1430
1431         mr->pbl_buf[mr->npages++] = addr;
1432
1433         return 0;
1434 }
1435
1436 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1437                        unsigned int *sg_offset)
1438 {
1439         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1440
1441         mr->npages = 0;
1442
1443         return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
1444 }
1445
1446 static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
1447                              struct hns_roce_mw *mw)
1448 {
1449         struct device *dev = hr_dev->dev;
1450         int ret;
1451
1452         if (mw->enabled) {
1453                 ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
1454                                               key_to_hw_index(mw->rkey) &
1455                                               (hr_dev->caps.num_mtpts - 1));
1456                 if (ret)
1457                         dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
1458
1459                 hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
1460                                    key_to_hw_index(mw->rkey));
1461         }
1462
1463         hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
1464                              key_to_hw_index(mw->rkey), BITMAP_NO_RR);
1465 }
1466
1467 static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
1468                               struct hns_roce_mw *mw)
1469 {
1470         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
1471         struct hns_roce_cmd_mailbox *mailbox;
1472         struct device *dev = hr_dev->dev;
1473         unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
1474         int ret;
1475
1476         /* prepare HEM entry memory */
1477         ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1478         if (ret)
1479                 return ret;
1480
1481         mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1482         if (IS_ERR(mailbox)) {
1483                 ret = PTR_ERR(mailbox);
1484                 goto err_table;
1485         }
1486
1487         ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
1488         if (ret) {
1489                 dev_err(dev, "MW write mtpt fail!\n");
1490                 goto err_page;
1491         }
1492
1493         ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
1494                                      mtpt_idx & (hr_dev->caps.num_mtpts - 1));
1495         if (ret) {
1496                 dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
1497                 goto err_page;
1498         }
1499
1500         mw->enabled = 1;
1501
1502         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1503
1504         return 0;
1505
1506 err_page:
1507         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1508
1509 err_table:
1510         hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1511
1512         return ret;
1513 }
1514
1515 struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
1516                                 struct ib_udata *udata)
1517 {
1518         struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
1519         struct hns_roce_mw *mw;
1520         unsigned long index = 0;
1521         int ret;
1522
1523         mw = kmalloc(sizeof(*mw), GFP_KERNEL);
1524         if (!mw)
1525                 return ERR_PTR(-ENOMEM);
1526
1527         /* Allocate a key for mw from bitmap */
1528         ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
1529         if (ret)
1530                 goto err_bitmap;
1531
1532         mw->rkey = hw_index_to_key(index);
1533
1534         mw->ibmw.rkey = mw->rkey;
1535         mw->ibmw.type = type;
1536         mw->pdn = to_hr_pd(ib_pd)->pdn;
1537         mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
1538         mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
1539         mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
1540
1541         ret = hns_roce_mw_enable(hr_dev, mw);
1542         if (ret)
1543                 goto err_mw;
1544
1545         return &mw->ibmw;
1546
1547 err_mw:
1548         hns_roce_mw_free(hr_dev, mw);
1549
1550 err_bitmap:
1551         kfree(mw);
1552
1553         return ERR_PTR(ret);
1554 }
1555
1556 int hns_roce_dealloc_mw(struct ib_mw *ibmw)
1557 {
1558         struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
1559         struct hns_roce_mw *mw = to_hr_mw(ibmw);
1560
1561         hns_roce_mw_free(hr_dev, mw);
1562         kfree(mw);
1563
1564         return 0;
1565 }
1566
1567 void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
1568                        int buf_pg_shift)
1569 {
1570         hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift);
1571         mtr->buf_pg_shift = buf_pg_shift;
1572 }
1573
1574 void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
1575                           struct hns_roce_mtr *mtr)
1576 {
1577         hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1578 }
1579
1580 static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
1581                               struct hns_roce_mtr *mtr, dma_addr_t *bufs,
1582                               struct hns_roce_buf_region *r)
1583 {
1584         int offset;
1585         int count;
1586         int npage;
1587         u64 *mtts;
1588         int end;
1589         int i;
1590
1591         offset = r->offset;
1592         end = offset + r->count;
1593         npage = 0;
1594         while (offset < end) {
1595                 mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1596                                                   offset, &count, NULL);
1597                 if (!mtts)
1598                         return -ENOBUFS;
1599
1600                 /* Save page addr, low 12 bits : 0 */
1601                 for (i = 0; i < count; i++) {
1602                         if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
1603                                 mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT;
1604                         else
1605                                 mtts[i] = bufs[npage];
1606
1607                         npage++;
1608                 }
1609                 offset += count;
1610         }
1611
1612         return 0;
1613 }
1614
1615 int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1616                         dma_addr_t **bufs, struct hns_roce_buf_region *regions,
1617                         int region_cnt)
1618 {
1619         struct hns_roce_buf_region *r;
1620         int ret;
1621         int i;
1622
1623         ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions,
1624                                         region_cnt);
1625         if (ret)
1626                 return ret;
1627
1628         for (i = 0; i < region_cnt; i++) {
1629                 r = &regions[i];
1630                 ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r);
1631                 if (ret) {
1632                         dev_err(hr_dev->dev,
1633                                 "write mtr[%d/%d] err %d,offset=%d.\n",
1634                                 i, region_cnt, ret,  r->offset);
1635                         goto err_write;
1636                 }
1637         }
1638
1639         return 0;
1640
1641 err_write:
1642         hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1643
1644         return ret;
1645 }
1646
1647 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1648                       int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
1649 {
1650         u64 *mtts = mtt_buf;
1651         int mtt_count;
1652         int total = 0;
1653         u64 *addr;
1654         int npage;
1655         int left;
1656
1657         if (mtts == NULL || mtt_max < 1)
1658                 goto done;
1659
1660         left = mtt_max;
1661         while (left > 0) {
1662                 mtt_count = 0;
1663                 addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1664                                                   offset + total,
1665                                                   &mtt_count, NULL);
1666                 if (!addr || !mtt_count)
1667                         goto done;
1668
1669                 npage = min(mtt_count, left);
1670                 memcpy(&mtts[total], addr, BA_BYTE_LEN * npage);
1671                 left -= npage;
1672                 total += npage;
1673         }
1674
1675 done:
1676         if (base_addr)
1677                 *base_addr = mtr->hem_list.root_ba;
1678
1679         return total;
1680 }