]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/crypto/ccp/ccp-dev-v5.c
Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'
[linux.git] / drivers / crypto / ccp / ccp-dev-v5.c
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Gary R Hook <gary.hook@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/kthread.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/interrupt.h>
19 #include <linux/compiler.h>
20 #include <linux/ccp.h>
21
22 #include "ccp-dev.h"
23
24 /* Allocate the requested number of contiguous LSB slots
25  * from the LSB bitmap. Look in the private range for this
26  * queue first; failing that, check the public area.
27  * If no space is available, wait around.
28  * Return: first slot number
29  */
30 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
31 {
32         struct ccp_device *ccp;
33         int start;
34
35         /* First look at the map for the queue */
36         if (cmd_q->lsb >= 0) {
37                 start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
38                                                         LSB_SIZE,
39                                                         0, count, 0);
40                 if (start < LSB_SIZE) {
41                         bitmap_set(cmd_q->lsbmap, start, count);
42                         return start + cmd_q->lsb * LSB_SIZE;
43                 }
44         }
45
46         /* No joy; try to get an entry from the shared blocks */
47         ccp = cmd_q->ccp;
48         for (;;) {
49                 mutex_lock(&ccp->sb_mutex);
50
51                 start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
52                                                         MAX_LSB_CNT * LSB_SIZE,
53                                                         0,
54                                                         count, 0);
55                 if (start <= MAX_LSB_CNT * LSB_SIZE) {
56                         bitmap_set(ccp->lsbmap, start, count);
57
58                         mutex_unlock(&ccp->sb_mutex);
59                         return start;
60                 }
61
62                 ccp->sb_avail = 0;
63
64                 mutex_unlock(&ccp->sb_mutex);
65
66                 /* Wait for KSB entries to become available */
67                 if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
68                         return 0;
69         }
70 }
71
72 /* Free a number of LSB slots from the bitmap, starting at
73  * the indicated starting slot number.
74  */
75 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
76                          unsigned int count)
77 {
78         if (!start)
79                 return;
80
81         if (cmd_q->lsb == start) {
82                 /* An entry from the private LSB */
83                 bitmap_clear(cmd_q->lsbmap, start, count);
84         } else {
85                 /* From the shared LSBs */
86                 struct ccp_device *ccp = cmd_q->ccp;
87
88                 mutex_lock(&ccp->sb_mutex);
89                 bitmap_clear(ccp->lsbmap, start, count);
90                 ccp->sb_avail = 1;
91                 mutex_unlock(&ccp->sb_mutex);
92                 wake_up_interruptible_all(&ccp->sb_queue);
93         }
94 }
95
96 /* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
97 union ccp_function {
98         struct {
99                 u16 size:7;
100                 u16 encrypt:1;
101                 u16 mode:5;
102                 u16 type:2;
103         } aes;
104         struct {
105                 u16 size:7;
106                 u16 encrypt:1;
107                 u16 rsvd:5;
108                 u16 type:2;
109         } aes_xts;
110         struct {
111                 u16 rsvd1:10;
112                 u16 type:4;
113                 u16 rsvd2:1;
114         } sha;
115         struct {
116                 u16 mode:3;
117                 u16 size:12;
118         } rsa;
119         struct {
120                 u16 byteswap:2;
121                 u16 bitwise:3;
122                 u16 reflect:2;
123                 u16 rsvd:8;
124         } pt;
125         struct  {
126                 u16 rsvd:13;
127         } zlib;
128         struct {
129                 u16 size:10;
130                 u16 type:2;
131                 u16 mode:3;
132         } ecc;
133         u16 raw;
134 };
135
136 #define CCP_AES_SIZE(p)         ((p)->aes.size)
137 #define CCP_AES_ENCRYPT(p)      ((p)->aes.encrypt)
138 #define CCP_AES_MODE(p)         ((p)->aes.mode)
139 #define CCP_AES_TYPE(p)         ((p)->aes.type)
140 #define CCP_XTS_SIZE(p)         ((p)->aes_xts.size)
141 #define CCP_XTS_ENCRYPT(p)      ((p)->aes_xts.encrypt)
142 #define CCP_SHA_TYPE(p)         ((p)->sha.type)
143 #define CCP_RSA_SIZE(p)         ((p)->rsa.size)
144 #define CCP_PT_BYTESWAP(p)      ((p)->pt.byteswap)
145 #define CCP_PT_BITWISE(p)       ((p)->pt.bitwise)
146 #define CCP_ECC_MODE(p)         ((p)->ecc.mode)
147 #define CCP_ECC_AFFINE(p)       ((p)->ecc.one)
148
149 /* Word 0 */
150 #define CCP5_CMD_DW0(p)         ((p)->dw0)
151 #define CCP5_CMD_SOC(p)         (CCP5_CMD_DW0(p).soc)
152 #define CCP5_CMD_IOC(p)         (CCP5_CMD_DW0(p).ioc)
153 #define CCP5_CMD_INIT(p)        (CCP5_CMD_DW0(p).init)
154 #define CCP5_CMD_EOM(p)         (CCP5_CMD_DW0(p).eom)
155 #define CCP5_CMD_FUNCTION(p)    (CCP5_CMD_DW0(p).function)
156 #define CCP5_CMD_ENGINE(p)      (CCP5_CMD_DW0(p).engine)
157 #define CCP5_CMD_PROT(p)        (CCP5_CMD_DW0(p).prot)
158
159 /* Word 1 */
160 #define CCP5_CMD_DW1(p)         ((p)->length)
161 #define CCP5_CMD_LEN(p)         (CCP5_CMD_DW1(p))
162
163 /* Word 2 */
164 #define CCP5_CMD_DW2(p)         ((p)->src_lo)
165 #define CCP5_CMD_SRC_LO(p)      (CCP5_CMD_DW2(p))
166
167 /* Word 3 */
168 #define CCP5_CMD_DW3(p)         ((p)->dw3)
169 #define CCP5_CMD_SRC_MEM(p)     ((p)->dw3.src_mem)
170 #define CCP5_CMD_SRC_HI(p)      ((p)->dw3.src_hi)
171 #define CCP5_CMD_LSB_ID(p)      ((p)->dw3.lsb_cxt_id)
172 #define CCP5_CMD_FIX_SRC(p)     ((p)->dw3.fixed)
173
174 /* Words 4/5 */
175 #define CCP5_CMD_DW4(p)         ((p)->dw4)
176 #define CCP5_CMD_DST_LO(p)      (CCP5_CMD_DW4(p).dst_lo)
177 #define CCP5_CMD_DW5(p)         ((p)->dw5.fields.dst_hi)
178 #define CCP5_CMD_DST_HI(p)      (CCP5_CMD_DW5(p))
179 #define CCP5_CMD_DST_MEM(p)     ((p)->dw5.fields.dst_mem)
180 #define CCP5_CMD_FIX_DST(p)     ((p)->dw5.fields.fixed)
181 #define CCP5_CMD_SHA_LO(p)      ((p)->dw4.sha_len_lo)
182 #define CCP5_CMD_SHA_HI(p)      ((p)->dw5.sha_len_hi)
183
184 /* Word 6/7 */
185 #define CCP5_CMD_DW6(p)         ((p)->key_lo)
186 #define CCP5_CMD_KEY_LO(p)      (CCP5_CMD_DW6(p))
187 #define CCP5_CMD_DW7(p)         ((p)->dw7)
188 #define CCP5_CMD_KEY_HI(p)      ((p)->dw7.key_hi)
189 #define CCP5_CMD_KEY_MEM(p)     ((p)->dw7.key_mem)
190
191 static inline u32 low_address(unsigned long addr)
192 {
193         return (u64)addr & 0x0ffffffff;
194 }
195
196 static inline u32 high_address(unsigned long addr)
197 {
198         return ((u64)addr >> 32) & 0x00000ffff;
199 }
200
201 static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
202 {
203         unsigned int head_idx, n;
204         u32 head_lo, queue_start;
205
206         queue_start = low_address(cmd_q->qdma_tail);
207         head_lo = ioread32(cmd_q->reg_head_lo);
208         head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
209
210         n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
211
212         return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
213 }
214
215 static int ccp5_do_cmd(struct ccp5_desc *desc,
216                        struct ccp_cmd_queue *cmd_q)
217 {
218         u32 *mP;
219         __le32 *dP;
220         u32 tail;
221         int     i;
222         int ret = 0;
223
224         if (CCP5_CMD_SOC(desc)) {
225                 CCP5_CMD_IOC(desc) = 1;
226                 CCP5_CMD_SOC(desc) = 0;
227         }
228         mutex_lock(&cmd_q->q_mutex);
229
230         mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
231         dP = (__le32 *) desc;
232         for (i = 0; i < 8; i++)
233                 mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
234
235         cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
236
237         /* The data used by this command must be flushed to memory */
238         wmb();
239
240         /* Write the new tail address back to the queue register */
241         tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
242         iowrite32(tail, cmd_q->reg_tail_lo);
243
244         /* Turn the queue back on using our cached control register */
245         iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
246         mutex_unlock(&cmd_q->q_mutex);
247
248         if (CCP5_CMD_IOC(desc)) {
249                 /* Wait for the job to complete */
250                 ret = wait_event_interruptible(cmd_q->int_queue,
251                                                cmd_q->int_rcvd);
252                 if (ret || cmd_q->cmd_error) {
253                         if (cmd_q->cmd_error)
254                                 ccp_log_error(cmd_q->ccp,
255                                               cmd_q->cmd_error);
256                         /* A version 5 device doesn't use Job IDs... */
257                         if (!ret)
258                                 ret = -EIO;
259                 }
260                 cmd_q->int_rcvd = 0;
261         }
262
263         return 0;
264 }
265
266 static int ccp5_perform_aes(struct ccp_op *op)
267 {
268         struct ccp5_desc desc;
269         union ccp_function function;
270         u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
271
272         /* Zero out all the fields of the command desc */
273         memset(&desc, 0, Q_DESC_SIZE);
274
275         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
276
277         CCP5_CMD_SOC(&desc) = op->soc;
278         CCP5_CMD_IOC(&desc) = 1;
279         CCP5_CMD_INIT(&desc) = op->init;
280         CCP5_CMD_EOM(&desc) = op->eom;
281         CCP5_CMD_PROT(&desc) = 0;
282
283         function.raw = 0;
284         CCP_AES_ENCRYPT(&function) = op->u.aes.action;
285         CCP_AES_MODE(&function) = op->u.aes.mode;
286         CCP_AES_TYPE(&function) = op->u.aes.type;
287         if (op->u.aes.mode == CCP_AES_MODE_CFB)
288                 CCP_AES_SIZE(&function) = 0x7f;
289
290         CCP5_CMD_FUNCTION(&desc) = function.raw;
291
292         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
293
294         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
295         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
296         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
297
298         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
299         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
300         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
301
302         CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
303         CCP5_CMD_KEY_HI(&desc) = 0;
304         CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
305         CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
306
307         return ccp5_do_cmd(&desc, op->cmd_q);
308 }
309
310 static int ccp5_perform_xts_aes(struct ccp_op *op)
311 {
312         struct ccp5_desc desc;
313         union ccp_function function;
314         u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
315
316         /* Zero out all the fields of the command desc */
317         memset(&desc, 0, Q_DESC_SIZE);
318
319         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
320
321         CCP5_CMD_SOC(&desc) = op->soc;
322         CCP5_CMD_IOC(&desc) = 1;
323         CCP5_CMD_INIT(&desc) = op->init;
324         CCP5_CMD_EOM(&desc) = op->eom;
325         CCP5_CMD_PROT(&desc) = 0;
326
327         function.raw = 0;
328         CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
329         CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
330         CCP5_CMD_FUNCTION(&desc) = function.raw;
331
332         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
333
334         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
335         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
336         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
337
338         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
339         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
340         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
341
342         CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
343         CCP5_CMD_KEY_HI(&desc) =  0;
344         CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
345         CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
346
347         return ccp5_do_cmd(&desc, op->cmd_q);
348 }
349
350 static int ccp5_perform_sha(struct ccp_op *op)
351 {
352         struct ccp5_desc desc;
353         union ccp_function function;
354
355         /* Zero out all the fields of the command desc */
356         memset(&desc, 0, Q_DESC_SIZE);
357
358         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
359
360         CCP5_CMD_SOC(&desc) = op->soc;
361         CCP5_CMD_IOC(&desc) = 1;
362         CCP5_CMD_INIT(&desc) = 1;
363         CCP5_CMD_EOM(&desc) = op->eom;
364         CCP5_CMD_PROT(&desc) = 0;
365
366         function.raw = 0;
367         CCP_SHA_TYPE(&function) = op->u.sha.type;
368         CCP5_CMD_FUNCTION(&desc) = function.raw;
369
370         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
371
372         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
373         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
374         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
375
376         CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
377
378         if (op->eom) {
379                 CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
380                 CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
381         } else {
382                 CCP5_CMD_SHA_LO(&desc) = 0;
383                 CCP5_CMD_SHA_HI(&desc) = 0;
384         }
385
386         return ccp5_do_cmd(&desc, op->cmd_q);
387 }
388
389 static int ccp5_perform_rsa(struct ccp_op *op)
390 {
391         struct ccp5_desc desc;
392         union ccp_function function;
393
394         /* Zero out all the fields of the command desc */
395         memset(&desc, 0, Q_DESC_SIZE);
396
397         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
398
399         CCP5_CMD_SOC(&desc) = op->soc;
400         CCP5_CMD_IOC(&desc) = 1;
401         CCP5_CMD_INIT(&desc) = 0;
402         CCP5_CMD_EOM(&desc) = 1;
403         CCP5_CMD_PROT(&desc) = 0;
404
405         function.raw = 0;
406         CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
407         CCP5_CMD_FUNCTION(&desc) = function.raw;
408
409         CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
410
411         /* Source is from external memory */
412         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
413         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
414         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
415
416         /* Destination is in external memory */
417         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
418         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
419         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
420
421         /* Exponent is in LSB memory */
422         CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
423         CCP5_CMD_KEY_HI(&desc) = 0;
424         CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
425
426         return ccp5_do_cmd(&desc, op->cmd_q);
427 }
428
429 static int ccp5_perform_passthru(struct ccp_op *op)
430 {
431         struct ccp5_desc desc;
432         union ccp_function function;
433         struct ccp_dma_info *saddr = &op->src.u.dma;
434         struct ccp_dma_info *daddr = &op->dst.u.dma;
435
436         memset(&desc, 0, Q_DESC_SIZE);
437
438         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
439
440         CCP5_CMD_SOC(&desc) = 0;
441         CCP5_CMD_IOC(&desc) = 1;
442         CCP5_CMD_INIT(&desc) = 0;
443         CCP5_CMD_EOM(&desc) = op->eom;
444         CCP5_CMD_PROT(&desc) = 0;
445
446         function.raw = 0;
447         CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
448         CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
449         CCP5_CMD_FUNCTION(&desc) = function.raw;
450
451         /* Length of source data is always 256 bytes */
452         if (op->src.type == CCP_MEMTYPE_SYSTEM)
453                 CCP5_CMD_LEN(&desc) = saddr->length;
454         else
455                 CCP5_CMD_LEN(&desc) = daddr->length;
456
457         if (op->src.type == CCP_MEMTYPE_SYSTEM) {
458                 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
459                 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
460                 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
461
462                 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
463                         CCP5_CMD_LSB_ID(&desc) = op->sb_key;
464         } else {
465                 u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
466
467                 CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
468                 CCP5_CMD_SRC_HI(&desc) = 0;
469                 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
470         }
471
472         if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
473                 CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
474                 CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
475                 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
476         } else {
477                 u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
478
479                 CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
480                 CCP5_CMD_DST_HI(&desc) = 0;
481                 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
482         }
483
484         return ccp5_do_cmd(&desc, op->cmd_q);
485 }
486
487 static int ccp5_perform_ecc(struct ccp_op *op)
488 {
489         struct ccp5_desc desc;
490         union ccp_function function;
491
492         /* Zero out all the fields of the command desc */
493         memset(&desc, 0, Q_DESC_SIZE);
494
495         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
496
497         CCP5_CMD_SOC(&desc) = 0;
498         CCP5_CMD_IOC(&desc) = 1;
499         CCP5_CMD_INIT(&desc) = 0;
500         CCP5_CMD_EOM(&desc) = 1;
501         CCP5_CMD_PROT(&desc) = 0;
502
503         function.raw = 0;
504         function.ecc.mode = op->u.ecc.function;
505         CCP5_CMD_FUNCTION(&desc) = function.raw;
506
507         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
508
509         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
510         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
511         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
512
513         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
514         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
515         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
516
517         return ccp5_do_cmd(&desc, op->cmd_q);
518 }
519
520 static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
521 {
522         int q_mask = 1 << cmd_q->id;
523         int queues = 0;
524         int j;
525
526         /* Build a bit mask to know which LSBs this queue has access to.
527          * Don't bother with segment 0 as it has special privileges.
528          */
529         for (j = 1; j < MAX_LSB_CNT; j++) {
530                 if (status & q_mask)
531                         bitmap_set(cmd_q->lsbmask, j, 1);
532                 status >>= LSB_REGION_WIDTH;
533         }
534         queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
535         dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
536                  cmd_q->id, queues);
537
538         return queues ? 0 : -EINVAL;
539 }
540
541
542 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
543                                         int lsb_cnt, int n_lsbs,
544                                         unsigned long *lsb_pub)
545 {
546         DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
547         int bitno;
548         int qlsb_wgt;
549         int i;
550
551         /* For each queue:
552          * If the count of potential LSBs available to a queue matches the
553          * ordinal given to us in lsb_cnt:
554          * Copy the mask of possible LSBs for this queue into "qlsb";
555          * For each bit in qlsb, see if the corresponding bit in the
556          * aggregation mask is set; if so, we have a match.
557          *     If we have a match, clear the bit in the aggregation to
558          *     mark it as no longer available.
559          *     If there is no match, clear the bit in qlsb and keep looking.
560          */
561         for (i = 0; i < ccp->cmd_q_count; i++) {
562                 struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
563
564                 qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
565
566                 if (qlsb_wgt == lsb_cnt) {
567                         bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
568
569                         bitno = find_first_bit(qlsb, MAX_LSB_CNT);
570                         while (bitno < MAX_LSB_CNT) {
571                                 if (test_bit(bitno, lsb_pub)) {
572                                         /* We found an available LSB
573                                          * that this queue can access
574                                          */
575                                         cmd_q->lsb = bitno;
576                                         bitmap_clear(lsb_pub, bitno, 1);
577                                         dev_info(ccp->dev,
578                                                  "Queue %d gets LSB %d\n",
579                                                  i, bitno);
580                                         break;
581                                 }
582                                 bitmap_clear(qlsb, bitno, 1);
583                                 bitno = find_first_bit(qlsb, MAX_LSB_CNT);
584                         }
585                         if (bitno >= MAX_LSB_CNT)
586                                 return -EINVAL;
587                         n_lsbs--;
588                 }
589         }
590         return n_lsbs;
591 }
592
593 /* For each queue, from the most- to least-constrained:
594  * find an LSB that can be assigned to the queue. If there are N queues that
595  * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
596  * dedicated LSB. Remaining LSB regions become a shared resource.
597  * If we have fewer LSBs than queues, all LSB regions become shared resources.
598  */
599 static int ccp_assign_lsbs(struct ccp_device *ccp)
600 {
601         DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
602         DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
603         int n_lsbs = 0;
604         int bitno;
605         int i, lsb_cnt;
606         int rc = 0;
607
608         bitmap_zero(lsb_pub, MAX_LSB_CNT);
609
610         /* Create an aggregate bitmap to get a total count of available LSBs */
611         for (i = 0; i < ccp->cmd_q_count; i++)
612                 bitmap_or(lsb_pub,
613                           lsb_pub, ccp->cmd_q[i].lsbmask,
614                           MAX_LSB_CNT);
615
616         n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
617
618         if (n_lsbs >= ccp->cmd_q_count) {
619                 /* We have enough LSBS to give every queue a private LSB.
620                  * Brute force search to start with the queues that are more
621                  * constrained in LSB choice. When an LSB is privately
622                  * assigned, it is removed from the public mask.
623                  * This is an ugly N squared algorithm with some optimization.
624                  */
625                 for (lsb_cnt = 1;
626                      n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
627                      lsb_cnt++) {
628                         rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
629                                                           lsb_pub);
630                         if (rc < 0)
631                                 return -EINVAL;
632                         n_lsbs = rc;
633                 }
634         }
635
636         rc = 0;
637         /* What's left of the LSBs, according to the public mask, now become
638          * shared. Any zero bits in the lsb_pub mask represent an LSB region
639          * that can't be used as a shared resource, so mark the LSB slots for
640          * them as "in use".
641          */
642         bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
643
644         bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
645         while (bitno < MAX_LSB_CNT) {
646                 bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
647                 bitmap_set(qlsb, bitno, 1);
648                 bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
649         }
650
651         return rc;
652 }
653
654 static int ccp5_init(struct ccp_device *ccp)
655 {
656         struct device *dev = ccp->dev;
657         struct ccp_cmd_queue *cmd_q;
658         struct dma_pool *dma_pool;
659         char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
660         unsigned int qmr, qim, i;
661         u64 status;
662         u32 status_lo, status_hi;
663         int ret;
664
665         /* Find available queues */
666         qim = 0;
667         qmr = ioread32(ccp->io_regs + Q_MASK_REG);
668         for (i = 0; i < MAX_HW_QUEUES; i++) {
669
670                 if (!(qmr & (1 << i)))
671                         continue;
672
673                 /* Allocate a dma pool for this queue */
674                 snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
675                          ccp->name, i);
676                 dma_pool = dma_pool_create(dma_pool_name, dev,
677                                            CCP_DMAPOOL_MAX_SIZE,
678                                            CCP_DMAPOOL_ALIGN, 0);
679                 if (!dma_pool) {
680                         dev_err(dev, "unable to allocate dma pool\n");
681                         ret = -ENOMEM;
682                 }
683
684                 cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
685                 ccp->cmd_q_count++;
686
687                 cmd_q->ccp = ccp;
688                 cmd_q->id = i;
689                 cmd_q->dma_pool = dma_pool;
690                 mutex_init(&cmd_q->q_mutex);
691
692                 /* Page alignment satisfies our needs for N <= 128 */
693                 BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
694                 cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
695                 cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
696                                                    &cmd_q->qbase_dma,
697                                                    GFP_KERNEL);
698                 if (!cmd_q->qbase) {
699                         dev_err(dev, "unable to allocate command queue\n");
700                         ret = -ENOMEM;
701                         goto e_pool;
702                 }
703
704                 cmd_q->qidx = 0;
705                 /* Preset some register values and masks that are queue
706                  * number dependent
707                  */
708                 cmd_q->reg_control = ccp->io_regs +
709                                      CMD5_Q_STATUS_INCR * (i + 1);
710                 cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
711                 cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
712                 cmd_q->reg_int_enable = cmd_q->reg_control +
713                                         CMD5_Q_INT_ENABLE_BASE;
714                 cmd_q->reg_interrupt_status = cmd_q->reg_control +
715                                               CMD5_Q_INTERRUPT_STATUS_BASE;
716                 cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
717                 cmd_q->reg_int_status = cmd_q->reg_control +
718                                         CMD5_Q_INT_STATUS_BASE;
719                 cmd_q->reg_dma_status = cmd_q->reg_control +
720                                         CMD5_Q_DMA_STATUS_BASE;
721                 cmd_q->reg_dma_read_status = cmd_q->reg_control +
722                                              CMD5_Q_DMA_READ_STATUS_BASE;
723                 cmd_q->reg_dma_write_status = cmd_q->reg_control +
724                                               CMD5_Q_DMA_WRITE_STATUS_BASE;
725
726                 init_waitqueue_head(&cmd_q->int_queue);
727
728                 dev_dbg(dev, "queue #%u available\n", i);
729         }
730         if (ccp->cmd_q_count == 0) {
731                 dev_notice(dev, "no command queues available\n");
732                 ret = -EIO;
733                 goto e_pool;
734         }
735         dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
736
737         /* Turn off the queues and disable interrupts until ready */
738         for (i = 0; i < ccp->cmd_q_count; i++) {
739                 cmd_q = &ccp->cmd_q[i];
740
741                 cmd_q->qcontrol = 0; /* Start with nothing */
742                 iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
743
744                 /* Disable the interrupts */
745                 iowrite32(0x00, cmd_q->reg_int_enable);
746                 ioread32(cmd_q->reg_int_status);
747                 ioread32(cmd_q->reg_status);
748
749                 /* Clear the interrupts */
750                 iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
751         }
752
753         dev_dbg(dev, "Requesting an IRQ...\n");
754         /* Request an irq */
755         ret = ccp->get_irq(ccp);
756         if (ret) {
757                 dev_err(dev, "unable to allocate an IRQ\n");
758                 goto e_pool;
759         }
760
761         dev_dbg(dev, "Loading LSB map...\n");
762         /* Copy the private LSB mask to the public registers */
763         status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
764         status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
765         iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
766         iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
767         status = ((u64)status_hi<<30) | (u64)status_lo;
768
769         dev_dbg(dev, "Configuring virtual queues...\n");
770         /* Configure size of each virtual queue accessible to host */
771         for (i = 0; i < ccp->cmd_q_count; i++) {
772                 u32 dma_addr_lo;
773                 u32 dma_addr_hi;
774
775                 cmd_q = &ccp->cmd_q[i];
776
777                 cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
778                 cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
779
780                 cmd_q->qdma_tail = cmd_q->qbase_dma;
781                 dma_addr_lo = low_address(cmd_q->qdma_tail);
782                 iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
783                 iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
784
785                 dma_addr_hi = high_address(cmd_q->qdma_tail);
786                 cmd_q->qcontrol |= (dma_addr_hi << 16);
787                 iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
788
789                 /* Find the LSB regions accessible to the queue */
790                 ccp_find_lsb_regions(cmd_q, status);
791                 cmd_q->lsb = -1; /* Unassigned value */
792         }
793
794         dev_dbg(dev, "Assigning LSBs...\n");
795         ret = ccp_assign_lsbs(ccp);
796         if (ret) {
797                 dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
798                 goto e_irq;
799         }
800
801         /* Optimization: pre-allocate LSB slots for each queue */
802         for (i = 0; i < ccp->cmd_q_count; i++) {
803                 ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
804                 ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
805         }
806
807         dev_dbg(dev, "Starting threads...\n");
808         /* Create a kthread for each queue */
809         for (i = 0; i < ccp->cmd_q_count; i++) {
810                 struct task_struct *kthread;
811
812                 cmd_q = &ccp->cmd_q[i];
813
814                 kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
815                                          "%s-q%u", ccp->name, cmd_q->id);
816                 if (IS_ERR(kthread)) {
817                         dev_err(dev, "error creating queue thread (%ld)\n",
818                                 PTR_ERR(kthread));
819                         ret = PTR_ERR(kthread);
820                         goto e_kthread;
821                 }
822
823                 cmd_q->kthread = kthread;
824                 wake_up_process(kthread);
825         }
826
827         dev_dbg(dev, "Enabling interrupts...\n");
828         /* Enable interrupts */
829         for (i = 0; i < ccp->cmd_q_count; i++) {
830                 cmd_q = &ccp->cmd_q[i];
831                 iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
832         }
833
834         dev_dbg(dev, "Registering device...\n");
835         /* Put this on the unit list to make it available */
836         ccp_add_device(ccp);
837
838         ret = ccp_register_rng(ccp);
839         if (ret)
840                 goto e_kthread;
841
842         /* Register the DMA engine support */
843         ret = ccp_dmaengine_register(ccp);
844         if (ret)
845                 goto e_hwrng;
846
847         return 0;
848
849 e_hwrng:
850         ccp_unregister_rng(ccp);
851
852 e_kthread:
853         for (i = 0; i < ccp->cmd_q_count; i++)
854                 if (ccp->cmd_q[i].kthread)
855                         kthread_stop(ccp->cmd_q[i].kthread);
856
857 e_irq:
858         ccp->free_irq(ccp);
859
860 e_pool:
861         for (i = 0; i < ccp->cmd_q_count; i++)
862                 dma_pool_destroy(ccp->cmd_q[i].dma_pool);
863
864         return ret;
865 }
866
867 static void ccp5_destroy(struct ccp_device *ccp)
868 {
869         struct device *dev = ccp->dev;
870         struct ccp_cmd_queue *cmd_q;
871         struct ccp_cmd *cmd;
872         unsigned int i;
873
874         /* Unregister the DMA engine */
875         ccp_dmaengine_unregister(ccp);
876
877         /* Unregister the RNG */
878         ccp_unregister_rng(ccp);
879
880         /* Remove this device from the list of available units first */
881         ccp_del_device(ccp);
882
883         /* Disable and clear interrupts */
884         for (i = 0; i < ccp->cmd_q_count; i++) {
885                 cmd_q = &ccp->cmd_q[i];
886
887                 /* Turn off the run bit */
888                 iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
889
890                 /* Disable the interrupts */
891                 iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
892
893                 /* Clear the interrupt status */
894                 iowrite32(0x00, cmd_q->reg_int_enable);
895                 ioread32(cmd_q->reg_int_status);
896                 ioread32(cmd_q->reg_status);
897         }
898
899         /* Stop the queue kthreads */
900         for (i = 0; i < ccp->cmd_q_count; i++)
901                 if (ccp->cmd_q[i].kthread)
902                         kthread_stop(ccp->cmd_q[i].kthread);
903
904         ccp->free_irq(ccp);
905
906         for (i = 0; i < ccp->cmd_q_count; i++) {
907                 cmd_q = &ccp->cmd_q[i];
908                 dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
909                                   cmd_q->qbase_dma);
910         }
911
912         /* Flush the cmd and backlog queue */
913         while (!list_empty(&ccp->cmd)) {
914                 /* Invoke the callback directly with an error code */
915                 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
916                 list_del(&cmd->entry);
917                 cmd->callback(cmd->data, -ENODEV);
918         }
919         while (!list_empty(&ccp->backlog)) {
920                 /* Invoke the callback directly with an error code */
921                 cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
922                 list_del(&cmd->entry);
923                 cmd->callback(cmd->data, -ENODEV);
924         }
925 }
926
927 static irqreturn_t ccp5_irq_handler(int irq, void *data)
928 {
929         struct device *dev = data;
930         struct ccp_device *ccp = dev_get_drvdata(dev);
931         u32 status;
932         unsigned int i;
933
934         for (i = 0; i < ccp->cmd_q_count; i++) {
935                 struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
936
937                 status = ioread32(cmd_q->reg_interrupt_status);
938
939                 if (status) {
940                         cmd_q->int_status = status;
941                         cmd_q->q_status = ioread32(cmd_q->reg_status);
942                         cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
943
944                         /* On error, only save the first error value */
945                         if ((status & INT_ERROR) && !cmd_q->cmd_error)
946                                 cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
947
948                         cmd_q->int_rcvd = 1;
949
950                         /* Acknowledge the interrupt and wake the kthread */
951                         iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
952                         wake_up_interruptible(&cmd_q->int_queue);
953                 }
954         }
955
956         return IRQ_HANDLED;
957 }
958
959 static void ccp5_config(struct ccp_device *ccp)
960 {
961         /* Public side */
962         iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
963 }
964
965 static void ccp5other_config(struct ccp_device *ccp)
966 {
967         int i;
968         u32 rnd;
969
970         /* We own all of the queues on the NTB CCP */
971
972         iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
973         iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
974         for (i = 0; i < 12; i++) {
975                 rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
976                 iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
977         }
978
979         iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
980         iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
981         iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
982
983         iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
984         iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
985
986         iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
987
988         ccp5_config(ccp);
989 }
990
991 /* Version 5 adds some function, but is essentially the same as v5 */
992 static const struct ccp_actions ccp5_actions = {
993         .aes = ccp5_perform_aes,
994         .xts_aes = ccp5_perform_xts_aes,
995         .sha = ccp5_perform_sha,
996         .rsa = ccp5_perform_rsa,
997         .passthru = ccp5_perform_passthru,
998         .ecc = ccp5_perform_ecc,
999         .sballoc = ccp_lsb_alloc,
1000         .sbfree = ccp_lsb_free,
1001         .init = ccp5_init,
1002         .destroy = ccp5_destroy,
1003         .get_free_slots = ccp5_get_free_slots,
1004         .irqhandler = ccp5_irq_handler,
1005 };
1006
1007 const struct ccp_vdata ccpv5a = {
1008         .version = CCP_VERSION(5, 0),
1009         .setup = ccp5_config,
1010         .perform = &ccp5_actions,
1011         .bar = 2,
1012         .offset = 0x0,
1013 };
1014
1015 const struct ccp_vdata ccpv5b = {
1016         .version = CCP_VERSION(5, 0),
1017         .setup = ccp5other_config,
1018         .perform = &ccp5_actions,
1019         .bar = 2,
1020         .offset = 0x0,
1021 };