]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/infiniband/hw/efa/efa_com.c
Merge tag 'irqchip-fixes-5.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / drivers / infiniband / hw / efa / efa_com.c
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
2 /*
3  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
4  */
5
6 #include "efa_com.h"
7 #include "efa_regs_defs.h"
8
9 #define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
10
11 #define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
12 #define EFA_MMIO_READ_INVALID 0xffffffff
13
14 #define EFA_POLL_INTERVAL_MS 100 /* msecs */
15
16 #define EFA_ASYNC_QUEUE_DEPTH 16
17 #define EFA_ADMIN_QUEUE_DEPTH 32
18
19 #define MIN_EFA_VER\
20         ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
21          (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
22
23 #define EFA_CTRL_MAJOR          0
24 #define EFA_CTRL_MINOR          0
25 #define EFA_CTRL_SUB_MINOR      1
26
27 #define MIN_EFA_CTRL_VER \
28         (((EFA_CTRL_MAJOR) << \
29         (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
30         ((EFA_CTRL_MINOR) << \
31         (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
32         (EFA_CTRL_SUB_MINOR))
33
34 #define EFA_DMA_ADDR_TO_UINT32_LOW(x)   ((u32)((u64)(x)))
35 #define EFA_DMA_ADDR_TO_UINT32_HIGH(x)  ((u32)(((u64)(x)) >> 32))
36
37 #define EFA_REGS_ADMIN_INTR_MASK 1
38
39 enum efa_cmd_status {
40         EFA_CMD_SUBMITTED,
41         EFA_CMD_COMPLETED,
42 };
43
44 struct efa_comp_ctx {
45         struct completion wait_event;
46         struct efa_admin_acq_entry *user_cqe;
47         u32 comp_size;
48         enum efa_cmd_status status;
49         /* status from the device */
50         u8 comp_status;
51         u8 cmd_opcode;
52         u8 occupied;
53 };
54
55 static const char *efa_com_cmd_str(u8 cmd)
56 {
57 #define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
58
59         switch (cmd) {
60         EFA_CMD_STR_CASE(CREATE_QP);
61         EFA_CMD_STR_CASE(MODIFY_QP);
62         EFA_CMD_STR_CASE(QUERY_QP);
63         EFA_CMD_STR_CASE(DESTROY_QP);
64         EFA_CMD_STR_CASE(CREATE_AH);
65         EFA_CMD_STR_CASE(DESTROY_AH);
66         EFA_CMD_STR_CASE(REG_MR);
67         EFA_CMD_STR_CASE(DEREG_MR);
68         EFA_CMD_STR_CASE(CREATE_CQ);
69         EFA_CMD_STR_CASE(DESTROY_CQ);
70         EFA_CMD_STR_CASE(GET_FEATURE);
71         EFA_CMD_STR_CASE(SET_FEATURE);
72         EFA_CMD_STR_CASE(GET_STATS);
73         EFA_CMD_STR_CASE(ALLOC_PD);
74         EFA_CMD_STR_CASE(DEALLOC_PD);
75         EFA_CMD_STR_CASE(ALLOC_UAR);
76         EFA_CMD_STR_CASE(DEALLOC_UAR);
77         default: return "unknown command opcode";
78         }
79 #undef EFA_CMD_STR_CASE
80 }
81
82 static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
83 {
84         struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
85         struct efa_admin_mmio_req_read_less_resp *read_resp;
86         unsigned long exp_time;
87         u32 mmio_read_reg;
88         u32 err;
89
90         read_resp = mmio_read->read_resp;
91
92         spin_lock(&mmio_read->lock);
93         mmio_read->seq_num++;
94
95         /* trash DMA req_id to identify when hardware is done */
96         read_resp->req_id = mmio_read->seq_num + 0x9aL;
97         mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
98                         EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
99         mmio_read_reg |= mmio_read->seq_num &
100                          EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
101
102         writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
103
104         exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
105         do {
106                 if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
107                         break;
108                 udelay(1);
109         } while (time_is_after_jiffies(exp_time));
110
111         if (read_resp->req_id != mmio_read->seq_num) {
112                 ibdev_err_ratelimited(
113                         edev->efa_dev,
114                         "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
115                         mmio_read->seq_num, offset, read_resp->req_id,
116                         read_resp->reg_off);
117                 err = EFA_MMIO_READ_INVALID;
118                 goto out;
119         }
120
121         if (read_resp->reg_off != offset) {
122                 ibdev_err_ratelimited(
123                         edev->efa_dev,
124                         "Reading register failed: wrong offset provided\n");
125                 err = EFA_MMIO_READ_INVALID;
126                 goto out;
127         }
128
129         err = read_resp->reg_val;
130 out:
131         spin_unlock(&mmio_read->lock);
132         return err;
133 }
134
135 static int efa_com_admin_init_sq(struct efa_com_dev *edev)
136 {
137         struct efa_com_admin_queue *aq = &edev->aq;
138         struct efa_com_admin_sq *sq = &aq->sq;
139         u16 size = aq->depth * sizeof(*sq->entries);
140         u32 addr_high;
141         u32 addr_low;
142         u32 aq_caps;
143
144         sq->entries =
145                 dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
146         if (!sq->entries)
147                 return -ENOMEM;
148
149         spin_lock_init(&sq->lock);
150
151         sq->cc = 0;
152         sq->pc = 0;
153         sq->phase = 1;
154
155         sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
156
157         addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
158         addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
159
160         writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
161         writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
162
163         aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
164         aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
165                         EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
166                         EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
167
168         writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
169
170         return 0;
171 }
172
173 static int efa_com_admin_init_cq(struct efa_com_dev *edev)
174 {
175         struct efa_com_admin_queue *aq = &edev->aq;
176         struct efa_com_admin_cq *cq = &aq->cq;
177         u16 size = aq->depth * sizeof(*cq->entries);
178         u32 addr_high;
179         u32 addr_low;
180         u32 acq_caps;
181
182         cq->entries =
183                 dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
184         if (!cq->entries)
185                 return -ENOMEM;
186
187         spin_lock_init(&cq->lock);
188
189         cq->cc = 0;
190         cq->phase = 1;
191
192         addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
193         addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
194
195         writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
196         writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
197
198         acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
199         acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
200                         EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
201                         EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
202         acq_caps |= (aq->msix_vector_idx <<
203                         EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
204                         EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
205
206         writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
207
208         return 0;
209 }
210
211 static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
212                                    struct efa_aenq_handlers *aenq_handlers)
213 {
214         struct efa_com_aenq *aenq = &edev->aenq;
215         u32 addr_low, addr_high, aenq_caps;
216         u16 size;
217
218         if (!aenq_handlers) {
219                 ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
220                 return -EINVAL;
221         }
222
223         size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
224         aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
225                                            GFP_KERNEL);
226         if (!aenq->entries)
227                 return -ENOMEM;
228
229         aenq->aenq_handlers = aenq_handlers;
230         aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
231         aenq->cc = 0;
232         aenq->phase = 1;
233
234         addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
235         addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
236
237         writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
238         writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
239
240         aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
241         aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
242                 EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
243                 EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
244         aenq_caps |= (aenq->msix_vector_idx
245                       << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
246                      EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
247         writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
248
249         /*
250          * Init cons_db to mark that all entries in the queue
251          * are initially available
252          */
253         writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
254
255         return 0;
256 }
257
258 /* ID to be used with efa_com_get_comp_ctx */
259 static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
260 {
261         u16 ctx_id;
262
263         spin_lock(&aq->comp_ctx_lock);
264         ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
265         aq->comp_ctx_pool_next++;
266         spin_unlock(&aq->comp_ctx_lock);
267
268         return ctx_id;
269 }
270
271 static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
272                                    u16 ctx_id)
273 {
274         spin_lock(&aq->comp_ctx_lock);
275         aq->comp_ctx_pool_next--;
276         aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
277         spin_unlock(&aq->comp_ctx_lock);
278 }
279
280 static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
281                                         struct efa_comp_ctx *comp_ctx)
282 {
283         u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command &
284                      EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
285         u16 ctx_id = cmd_id & (aq->depth - 1);
286
287         ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
288         comp_ctx->occupied = 0;
289         efa_com_dealloc_ctx_id(aq, ctx_id);
290 }
291
292 static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
293                                                  u16 cmd_id, bool capture)
294 {
295         u16 ctx_id = cmd_id & (aq->depth - 1);
296
297         if (aq->comp_ctx[ctx_id].occupied && capture) {
298                 ibdev_err_ratelimited(
299                         aq->efa_dev,
300                         "Completion context for command_id %#x is occupied\n",
301                         cmd_id);
302                 return NULL;
303         }
304
305         if (capture) {
306                 aq->comp_ctx[ctx_id].occupied = 1;
307                 ibdev_dbg(aq->efa_dev,
308                           "Take completion ctxt for command_id %#x\n", cmd_id);
309         }
310
311         return &aq->comp_ctx[ctx_id];
312 }
313
314 static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
315                                                        struct efa_admin_aq_entry *cmd,
316                                                        size_t cmd_size_in_bytes,
317                                                        struct efa_admin_acq_entry *comp,
318                                                        size_t comp_size_in_bytes)
319 {
320         struct efa_admin_aq_entry *aqe;
321         struct efa_comp_ctx *comp_ctx;
322         u16 queue_size_mask;
323         u16 cmd_id;
324         u16 ctx_id;
325         u16 pi;
326
327         queue_size_mask = aq->depth - 1;
328         pi = aq->sq.pc & queue_size_mask;
329
330         ctx_id = efa_com_alloc_ctx_id(aq);
331
332         /* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
333         cmd_id = ctx_id & queue_size_mask;
334         cmd_id |= aq->sq.pc & ~queue_size_mask;
335         cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
336
337         cmd->aq_common_descriptor.command_id = cmd_id;
338         cmd->aq_common_descriptor.flags |= aq->sq.phase &
339                 EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
340
341         comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
342         if (!comp_ctx) {
343                 efa_com_dealloc_ctx_id(aq, ctx_id);
344                 return ERR_PTR(-EINVAL);
345         }
346
347         comp_ctx->status = EFA_CMD_SUBMITTED;
348         comp_ctx->comp_size = comp_size_in_bytes;
349         comp_ctx->user_cqe = comp;
350         comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
351
352         reinit_completion(&comp_ctx->wait_event);
353
354         aqe = &aq->sq.entries[pi];
355         memset(aqe, 0, sizeof(*aqe));
356         memcpy(aqe, cmd, cmd_size_in_bytes);
357
358         aq->sq.pc++;
359         atomic64_inc(&aq->stats.submitted_cmd);
360
361         if ((aq->sq.pc & queue_size_mask) == 0)
362                 aq->sq.phase = !aq->sq.phase;
363
364         /* barrier not needed in case of writel */
365         writel(aq->sq.pc, aq->sq.db_addr);
366
367         return comp_ctx;
368 }
369
370 static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
371 {
372         size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
373         size_t size = aq->depth * sizeof(struct efa_comp_ctx);
374         struct efa_comp_ctx *comp_ctx;
375         u16 i;
376
377         aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
378         aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
379         if (!aq->comp_ctx || !aq->comp_ctx_pool) {
380                 devm_kfree(aq->dmadev, aq->comp_ctx_pool);
381                 devm_kfree(aq->dmadev, aq->comp_ctx);
382                 return -ENOMEM;
383         }
384
385         for (i = 0; i < aq->depth; i++) {
386                 comp_ctx = efa_com_get_comp_ctx(aq, i, false);
387                 if (comp_ctx)
388                         init_completion(&comp_ctx->wait_event);
389
390                 aq->comp_ctx_pool[i] = i;
391         }
392
393         spin_lock_init(&aq->comp_ctx_lock);
394
395         aq->comp_ctx_pool_next = 0;
396
397         return 0;
398 }
399
400 static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
401                                                      struct efa_admin_aq_entry *cmd,
402                                                      size_t cmd_size_in_bytes,
403                                                      struct efa_admin_acq_entry *comp,
404                                                      size_t comp_size_in_bytes)
405 {
406         struct efa_comp_ctx *comp_ctx;
407
408         spin_lock(&aq->sq.lock);
409         if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
410                 ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
411                 spin_unlock(&aq->sq.lock);
412                 return ERR_PTR(-ENODEV);
413         }
414
415         comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
416                                               comp_size_in_bytes);
417         spin_unlock(&aq->sq.lock);
418         if (IS_ERR(comp_ctx))
419                 clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
420
421         return comp_ctx;
422 }
423
424 static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
425                                                    struct efa_admin_acq_entry *cqe)
426 {
427         struct efa_comp_ctx *comp_ctx;
428         u16 cmd_id;
429
430         cmd_id = cqe->acq_common_descriptor.command &
431                  EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
432
433         comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
434         if (!comp_ctx) {
435                 ibdev_err(aq->efa_dev,
436                           "comp_ctx is NULL. Changing the admin queue running state\n");
437                 clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
438                 return;
439         }
440
441         comp_ctx->status = EFA_CMD_COMPLETED;
442         comp_ctx->comp_status = cqe->acq_common_descriptor.status;
443         if (comp_ctx->user_cqe)
444                 memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
445
446         if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
447                 complete(&comp_ctx->wait_event);
448 }
449
450 static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
451 {
452         struct efa_admin_acq_entry *cqe;
453         u16 queue_size_mask;
454         u16 comp_num = 0;
455         u8 phase;
456         u16 ci;
457
458         queue_size_mask = aq->depth - 1;
459
460         ci = aq->cq.cc & queue_size_mask;
461         phase = aq->cq.phase;
462
463         cqe = &aq->cq.entries[ci];
464
465         /* Go over all the completions */
466         while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
467                 EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
468                 /*
469                  * Do not read the rest of the completion entry before the
470                  * phase bit was validated
471                  */
472                 dma_rmb();
473                 efa_com_handle_single_admin_completion(aq, cqe);
474
475                 ci++;
476                 comp_num++;
477                 if (ci == aq->depth) {
478                         ci = 0;
479                         phase = !phase;
480                 }
481
482                 cqe = &aq->cq.entries[ci];
483         }
484
485         aq->cq.cc += comp_num;
486         aq->cq.phase = phase;
487         aq->sq.cc += comp_num;
488         atomic64_add(comp_num, &aq->stats.completed_cmd);
489 }
490
491 static int efa_com_comp_status_to_errno(u8 comp_status)
492 {
493         switch (comp_status) {
494         case EFA_ADMIN_SUCCESS:
495                 return 0;
496         case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
497                 return -ENOMEM;
498         case EFA_ADMIN_UNSUPPORTED_OPCODE:
499                 return -EOPNOTSUPP;
500         case EFA_ADMIN_BAD_OPCODE:
501         case EFA_ADMIN_MALFORMED_REQUEST:
502         case EFA_ADMIN_ILLEGAL_PARAMETER:
503         case EFA_ADMIN_UNKNOWN_ERROR:
504                 return -EINVAL;
505         default:
506                 return -EINVAL;
507         }
508 }
509
510 static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
511                                                      struct efa_com_admin_queue *aq)
512 {
513         unsigned long timeout;
514         unsigned long flags;
515         int err;
516
517         timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
518
519         while (1) {
520                 spin_lock_irqsave(&aq->cq.lock, flags);
521                 efa_com_handle_admin_completion(aq);
522                 spin_unlock_irqrestore(&aq->cq.lock, flags);
523
524                 if (comp_ctx->status != EFA_CMD_SUBMITTED)
525                         break;
526
527                 if (time_is_before_jiffies(timeout)) {
528                         ibdev_err_ratelimited(
529                                 aq->efa_dev,
530                                 "Wait for completion (polling) timeout\n");
531                         /* EFA didn't have any completion */
532                         atomic64_inc(&aq->stats.no_completion);
533
534                         clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
535                         err = -ETIME;
536                         goto out;
537                 }
538
539                 msleep(aq->poll_interval);
540         }
541
542         err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
543 out:
544         efa_com_put_comp_ctx(aq, comp_ctx);
545         return err;
546 }
547
548 static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
549                                                         struct efa_com_admin_queue *aq)
550 {
551         unsigned long flags;
552         int err;
553
554         wait_for_completion_timeout(&comp_ctx->wait_event,
555                                     usecs_to_jiffies(aq->completion_timeout));
556
557         /*
558          * In case the command wasn't completed find out the root cause.
559          * There might be 2 kinds of errors
560          * 1) No completion (timeout reached)
561          * 2) There is completion but the device didn't get any msi-x interrupt.
562          */
563         if (comp_ctx->status == EFA_CMD_SUBMITTED) {
564                 spin_lock_irqsave(&aq->cq.lock, flags);
565                 efa_com_handle_admin_completion(aq);
566                 spin_unlock_irqrestore(&aq->cq.lock, flags);
567
568                 atomic64_inc(&aq->stats.no_completion);
569
570                 if (comp_ctx->status == EFA_CMD_COMPLETED)
571                         ibdev_err_ratelimited(
572                                 aq->efa_dev,
573                                 "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
574                                 efa_com_cmd_str(comp_ctx->cmd_opcode),
575                                 comp_ctx->cmd_opcode, comp_ctx->status,
576                                 comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
577                 else
578                         ibdev_err_ratelimited(
579                                 aq->efa_dev,
580                                 "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
581                                 efa_com_cmd_str(comp_ctx->cmd_opcode),
582                                 comp_ctx->cmd_opcode, comp_ctx->status,
583                                 comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
584
585                 clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
586                 err = -ETIME;
587                 goto out;
588         }
589
590         err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
591 out:
592         efa_com_put_comp_ctx(aq, comp_ctx);
593         return err;
594 }
595
596 /*
597  * There are two types to wait for completion.
598  * Polling mode - wait until the completion is available.
599  * Async mode - wait on wait queue until the completion is ready
600  * (or the timeout expired).
601  * It is expected that the IRQ called efa_com_handle_admin_completion
602  * to mark the completions.
603  */
604 static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
605                                              struct efa_com_admin_queue *aq)
606 {
607         if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
608                 return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
609
610         return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
611 }
612
613 /**
614  * efa_com_cmd_exec - Execute admin command
615  * @aq: admin queue.
616  * @cmd: the admin command to execute.
617  * @cmd_size: the command size.
618  * @comp: command completion return entry.
619  * @comp_size: command completion size.
620  * Submit an admin command and then wait until the device will return a
621  * completion.
622  * The completion will be copied into comp.
623  *
624  * @return - 0 on success, negative value on failure.
625  */
626 int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
627                      struct efa_admin_aq_entry *cmd,
628                      size_t cmd_size,
629                      struct efa_admin_acq_entry *comp,
630                      size_t comp_size)
631 {
632         struct efa_comp_ctx *comp_ctx;
633         int err;
634
635         might_sleep();
636
637         /* In case of queue FULL */
638         down(&aq->avail_cmds);
639
640         ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
641                   efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
642                   cmd->aq_common_descriptor.opcode);
643         comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
644         if (IS_ERR(comp_ctx)) {
645                 ibdev_err_ratelimited(
646                         aq->efa_dev,
647                         "Failed to submit command %s (opcode %u) err %ld\n",
648                         efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
649                         cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
650
651                 up(&aq->avail_cmds);
652                 return PTR_ERR(comp_ctx);
653         }
654
655         err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
656         if (err)
657                 ibdev_err_ratelimited(
658                         aq->efa_dev,
659                         "Failed to process command %s (opcode %u) comp_status %d err %d\n",
660                         efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
661                         cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
662                         err);
663
664         up(&aq->avail_cmds);
665
666         return err;
667 }
668
669 /**
670  * efa_com_admin_destroy - Destroy the admin and the async events queues.
671  * @edev: EFA communication layer struct
672  */
673 void efa_com_admin_destroy(struct efa_com_dev *edev)
674 {
675         struct efa_com_admin_queue *aq = &edev->aq;
676         struct efa_com_aenq *aenq = &edev->aenq;
677         struct efa_com_admin_cq *cq = &aq->cq;
678         struct efa_com_admin_sq *sq = &aq->sq;
679         u16 size;
680
681         clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
682
683         devm_kfree(edev->dmadev, aq->comp_ctx_pool);
684         devm_kfree(edev->dmadev, aq->comp_ctx);
685
686         size = aq->depth * sizeof(*sq->entries);
687         dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
688
689         size = aq->depth * sizeof(*cq->entries);
690         dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
691
692         size = aenq->depth * sizeof(*aenq->entries);
693         dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
694 }
695
696 /**
697  * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
698  * @edev: EFA communication layer struct
699  * @polling: Enable/Disable polling mode
700  *
701  * Set the admin completion mode.
702  */
703 void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
704 {
705         u32 mask_value = 0;
706
707         if (polling)
708                 mask_value = EFA_REGS_ADMIN_INTR_MASK;
709
710         writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
711         if (polling)
712                 set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
713         else
714                 clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
715 }
716
717 static void efa_com_stats_init(struct efa_com_dev *edev)
718 {
719         atomic64_t *s = (atomic64_t *)&edev->aq.stats;
720         int i;
721
722         for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
723                 atomic64_set(s, 0);
724 }
725
726 /**
727  * efa_com_admin_init - Init the admin and the async queues
728  * @edev: EFA communication layer struct
729  * @aenq_handlers: Those handlers to be called upon event.
730  *
731  * Initialize the admin submission and completion queues.
732  * Initialize the asynchronous events notification queues.
733  *
734  * @return - 0 on success, negative value on failure.
735  */
736 int efa_com_admin_init(struct efa_com_dev *edev,
737                        struct efa_aenq_handlers *aenq_handlers)
738 {
739         struct efa_com_admin_queue *aq = &edev->aq;
740         u32 timeout;
741         u32 dev_sts;
742         u32 cap;
743         int err;
744
745         dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
746         if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
747                 ibdev_err(edev->efa_dev,
748                           "Device isn't ready, abort com init %#x\n", dev_sts);
749                 return -ENODEV;
750         }
751
752         aq->depth = EFA_ADMIN_QUEUE_DEPTH;
753
754         aq->dmadev = edev->dmadev;
755         aq->efa_dev = edev->efa_dev;
756         set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
757
758         sema_init(&aq->avail_cmds, aq->depth);
759
760         efa_com_stats_init(edev);
761
762         err = efa_com_init_comp_ctxt(aq);
763         if (err)
764                 return err;
765
766         err = efa_com_admin_init_sq(edev);
767         if (err)
768                 goto err_destroy_comp_ctxt;
769
770         err = efa_com_admin_init_cq(edev);
771         if (err)
772                 goto err_destroy_sq;
773
774         efa_com_set_admin_polling_mode(edev, false);
775
776         err = efa_com_admin_init_aenq(edev, aenq_handlers);
777         if (err)
778                 goto err_destroy_cq;
779
780         cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
781         timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
782                   EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
783         if (timeout)
784                 /* the resolution of timeout reg is 100ms */
785                 aq->completion_timeout = timeout * 100000;
786         else
787                 aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
788
789         aq->poll_interval = EFA_POLL_INTERVAL_MS;
790
791         set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
792
793         return 0;
794
795 err_destroy_cq:
796         dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
797                           aq->cq.entries, aq->cq.dma_addr);
798 err_destroy_sq:
799         dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
800                           aq->sq.entries, aq->sq.dma_addr);
801 err_destroy_comp_ctxt:
802         devm_kfree(edev->dmadev, aq->comp_ctx);
803
804         return err;
805 }
806
807 /**
808  * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
809  * @edev: EFA communication layer struct
810  *
811  * This method goes over the admin completion queue and wakes up
812  * all the pending threads that wait on the commands wait event.
813  *
814  * @note: Should be called after MSI-X interrupt.
815  */
816 void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
817 {
818         unsigned long flags;
819
820         spin_lock_irqsave(&edev->aq.cq.lock, flags);
821         efa_com_handle_admin_completion(&edev->aq);
822         spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
823 }
824
825 /*
826  * efa_handle_specific_aenq_event:
827  * return the handler that is relevant to the specific event group
828  */
829 static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
830                                                      u16 group)
831 {
832         struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
833
834         if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
835                 return aenq_handlers->handlers[group];
836
837         return aenq_handlers->unimplemented_handler;
838 }
839
840 /**
841  * efa_com_aenq_intr_handler - AENQ interrupt handler
842  * @edev: EFA communication layer struct
843  * @data: Data of interrupt handler.
844  *
845  * Go over the async event notification queue and call the proper aenq handler.
846  */
847 void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
848 {
849         struct efa_admin_aenq_common_desc *aenq_common;
850         struct efa_com_aenq *aenq = &edev->aenq;
851         struct efa_admin_aenq_entry *aenq_e;
852         efa_aenq_handler handler_cb;
853         u32 processed = 0;
854         u8 phase;
855         u32 ci;
856
857         ci = aenq->cc & (aenq->depth - 1);
858         phase = aenq->phase;
859         aenq_e = &aenq->entries[ci]; /* Get first entry */
860         aenq_common = &aenq_e->aenq_common_desc;
861
862         /* Go over all the events */
863         while ((READ_ONCE(aenq_common->flags) &
864                 EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
865                 /*
866                  * Do not read the rest of the completion entry before the
867                  * phase bit was validated
868                  */
869                 dma_rmb();
870
871                 /* Handle specific event*/
872                 handler_cb = efa_com_get_specific_aenq_cb(edev,
873                                                           aenq_common->group);
874                 handler_cb(data, aenq_e); /* call the actual event handler*/
875
876                 /* Get next event entry */
877                 ci++;
878                 processed++;
879
880                 if (ci == aenq->depth) {
881                         ci = 0;
882                         phase = !phase;
883                 }
884                 aenq_e = &aenq->entries[ci];
885                 aenq_common = &aenq_e->aenq_common_desc;
886         }
887
888         aenq->cc += processed;
889         aenq->phase = phase;
890
891         /* Don't update aenq doorbell if there weren't any processed events */
892         if (!processed)
893                 return;
894
895         /* barrier not needed in case of writel */
896         writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
897 }
898
899 static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
900 {
901         struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
902         u32 addr_high;
903         u32 addr_low;
904
905         /* dma_addr_bits is unknown at this point */
906         addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
907         addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
908
909         writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
910         writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
911 }
912
913 int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
914 {
915         struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
916
917         spin_lock_init(&mmio_read->lock);
918         mmio_read->read_resp =
919                 dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
920                                    &mmio_read->read_resp_dma_addr, GFP_KERNEL);
921         if (!mmio_read->read_resp)
922                 return -ENOMEM;
923
924         efa_com_mmio_reg_read_resp_addr_init(edev);
925
926         mmio_read->read_resp->req_id = 0;
927         mmio_read->seq_num = 0;
928         mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
929
930         return 0;
931 }
932
933 void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
934 {
935         struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
936
937         dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
938                           mmio_read->read_resp, mmio_read->read_resp_dma_addr);
939 }
940
941 int efa_com_validate_version(struct efa_com_dev *edev)
942 {
943         u32 ctrl_ver_masked;
944         u32 ctrl_ver;
945         u32 ver;
946
947         /*
948          * Make sure the EFA version and the controller version are at least
949          * as the driver expects
950          */
951         ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
952         ctrl_ver = efa_com_reg_read32(edev,
953                                       EFA_REGS_CONTROLLER_VERSION_OFF);
954
955         ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
956                   (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
957                           EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
958                   ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
959
960         if (ver < MIN_EFA_VER) {
961                 ibdev_err(edev->efa_dev,
962                           "EFA version is lower than the minimal version the driver supports\n");
963                 return -EOPNOTSUPP;
964         }
965
966         ibdev_dbg(edev->efa_dev,
967                   "efa controller version: %d.%d.%d implementation version %d\n",
968                   (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
969                           EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
970                   (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
971                           EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
972                   (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
973                   (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
974                           EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
975
976         ctrl_ver_masked =
977                 (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
978                 (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
979                 (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
980
981         /* Validate the ctrl version without the implementation ID */
982         if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
983                 ibdev_err(edev->efa_dev,
984                           "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
985                 return -EOPNOTSUPP;
986         }
987
988         return 0;
989 }
990
991 /**
992  * efa_com_get_dma_width - Retrieve physical dma address width the device
993  * supports.
994  * @edev: EFA communication layer struct
995  *
996  * Retrieve the maximum physical address bits the device can handle.
997  *
998  * @return: > 0 on Success and negative value otherwise.
999  */
1000 int efa_com_get_dma_width(struct efa_com_dev *edev)
1001 {
1002         u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1003         int width;
1004
1005         width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
1006                 EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
1007
1008         ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
1009
1010         if (width < 32 || width > 64) {
1011                 ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
1012                 return -EINVAL;
1013         }
1014
1015         edev->dma_addr_bits = width;
1016
1017         return width;
1018 }
1019
1020 static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
1021                                 u16 exp_state)
1022 {
1023         u32 val, i;
1024
1025         for (i = 0; i < timeout; i++) {
1026                 val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1027
1028                 if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
1029                     exp_state)
1030                         return 0;
1031
1032                 ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
1033                 msleep(EFA_POLL_INTERVAL_MS);
1034         }
1035
1036         return -ETIME;
1037 }
1038
1039 /**
1040  * efa_com_dev_reset - Perform device FLR to the device.
1041  * @edev: EFA communication layer struct
1042  * @reset_reason: Specify what is the trigger for the reset in case of an error.
1043  *
1044  * @return - 0 on success, negative value on failure.
1045  */
1046 int efa_com_dev_reset(struct efa_com_dev *edev,
1047                       enum efa_regs_reset_reason_types reset_reason)
1048 {
1049         u32 stat, timeout, cap, reset_val;
1050         int err;
1051
1052         stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1053         cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1054
1055         if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
1056                 ibdev_err(edev->efa_dev,
1057                           "Device isn't ready, can't reset device\n");
1058                 return -EINVAL;
1059         }
1060
1061         timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
1062                   EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
1063         if (!timeout) {
1064                 ibdev_err(edev->efa_dev, "Invalid timeout value\n");
1065                 return -EINVAL;
1066         }
1067
1068         /* start reset */
1069         reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
1070         reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
1071                      EFA_REGS_DEV_CTL_RESET_REASON_MASK;
1072         writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1073
1074         /* reset clears the mmio readless address, restore it */
1075         efa_com_mmio_reg_read_resp_addr_init(edev);
1076
1077         err = wait_for_reset_state(edev, timeout,
1078                                    EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
1079         if (err) {
1080                 ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
1081                 return err;
1082         }
1083
1084         /* reset done */
1085         writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1086         err = wait_for_reset_state(edev, timeout, 0);
1087         if (err) {
1088                 ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
1089                 return err;
1090         }
1091
1092         timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
1093                   EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
1094         if (timeout)
1095                 /* the resolution of timeout reg is 100ms */
1096                 edev->aq.completion_timeout = timeout * 100000;
1097         else
1098                 edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
1099
1100         return 0;
1101 }