]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/staging/media/tegra-vde/tegra-vde.c
media: staging: tegra-vde: Align bitstream size to 16K
[linux.git] / drivers / staging / media / tegra-vde / tegra-vde.c
1 /*
2  * NVIDIA Tegra Video decoder driver
3  *
4  * Copyright (C) 2016-2017 Dmitry Osipenko <digetx@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/clk.h>
13 #include <linux/dma-buf.h>
14 #include <linux/genalloc.h>
15 #include <linux/interrupt.h>
16 #include <linux/iopoll.h>
17 #include <linux/miscdevice.h>
18 #include <linux/module.h>
19 #include <linux/platform_device.h>
20 #include <linux/pm_runtime.h>
21 #include <linux/reset.h>
22 #include <linux/slab.h>
23 #include <linux/uaccess.h>
24
25 #include <soc/tegra/pmc.h>
26
27 #include "uapi.h"
28
29 #define ICMDQUE_WR              0x00
30 #define CMDQUE_CONTROL          0x08
31 #define INTR_STATUS             0x18
32 #define BSE_INT_ENB             0x40
33 #define BSE_CONFIG              0x44
34
35 #define BSE_ICMDQUE_EMPTY       BIT(3)
36 #define BSE_DMA_BUSY            BIT(23)
37
38 #define VDE_WR(__data, __addr)                          \
39 do {                                                    \
40         dev_dbg(vde->miscdev.parent,                    \
41                 "%s: %d: 0x%08X => " #__addr ")\n",     \
42                 __func__, __LINE__, (u32)(__data));     \
43         writel_relaxed(__data, __addr);                 \
44 } while (0)
45
46 struct video_frame {
47         struct dma_buf_attachment *y_dmabuf_attachment;
48         struct dma_buf_attachment *cb_dmabuf_attachment;
49         struct dma_buf_attachment *cr_dmabuf_attachment;
50         struct dma_buf_attachment *aux_dmabuf_attachment;
51         struct sg_table *y_sgt;
52         struct sg_table *cb_sgt;
53         struct sg_table *cr_sgt;
54         struct sg_table *aux_sgt;
55         dma_addr_t y_addr;
56         dma_addr_t cb_addr;
57         dma_addr_t cr_addr;
58         dma_addr_t aux_addr;
59         u32 frame_num;
60         u32 flags;
61 };
62
63 struct tegra_vde {
64         void __iomem *sxe;
65         void __iomem *bsev;
66         void __iomem *mbe;
67         void __iomem *ppe;
68         void __iomem *mce;
69         void __iomem *tfe;
70         void __iomem *ppb;
71         void __iomem *vdma;
72         void __iomem *frameid;
73         struct mutex lock;
74         struct miscdevice miscdev;
75         struct reset_control *rst;
76         struct gen_pool *iram_pool;
77         struct completion decode_completion;
78         struct clk *clk;
79         dma_addr_t iram_lists_addr;
80         u32 *iram;
81 };
82
83 static void tegra_vde_set_bits(struct tegra_vde *vde,
84                                u32 mask, void __iomem *regs)
85 {
86         u32 value = readl_relaxed(regs);
87
88         VDE_WR(value | mask, regs);
89 }
90
91 static int tegra_vde_wait_mbe(struct tegra_vde *vde)
92 {
93         u32 tmp;
94
95         return readl_relaxed_poll_timeout(vde->mbe + 0x8C, tmp,
96                                           (tmp >= 0x10), 1, 100);
97 }
98
99 static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde *vde,
100                                          unsigned int refs_nb,
101                                          bool setup_refs)
102 {
103         u32 frame_idx_enb_mask = 0;
104         u32 value;
105         unsigned int frame_idx;
106         unsigned int idx;
107         int err;
108
109         VDE_WR(0xD0000000 | (0 << 23), vde->mbe + 0x80);
110         VDE_WR(0xD0200000 | (0 << 23), vde->mbe + 0x80);
111
112         err = tegra_vde_wait_mbe(vde);
113         if (err)
114                 return err;
115
116         if (!setup_refs)
117                 return 0;
118
119         for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) {
120                 VDE_WR(0xD0000000 | (frame_idx << 23), vde->mbe + 0x80);
121                 VDE_WR(0xD0200000 | (frame_idx << 23), vde->mbe + 0x80);
122
123                 frame_idx_enb_mask |= frame_idx << (6 * (idx % 4));
124
125                 if (idx % 4 == 3 || idx == refs_nb - 1) {
126                         value = 0xC0000000;
127                         value |= (idx >> 2) << 24;
128                         value |= frame_idx_enb_mask;
129
130                         VDE_WR(value, vde->mbe + 0x80);
131
132                         err = tegra_vde_wait_mbe(vde);
133                         if (err)
134                                 return err;
135
136                         frame_idx_enb_mask = 0;
137                 }
138         }
139
140         return 0;
141 }
142
143 static void tegra_vde_mbe_set_0xa_reg(struct tegra_vde *vde, int reg, u32 val)
144 {
145         VDE_WR(0xA0000000 | (reg << 24) | (val & 0xFFFF), vde->mbe + 0x80);
146         VDE_WR(0xA0000000 | ((reg + 1) << 24) | (val >> 16), vde->mbe + 0x80);
147 }
148
149 static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma)
150 {
151         struct device *dev = vde->miscdev.parent;
152         u32 value;
153         int err;
154
155         err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
156                                          !(value & BIT(2)), 1, 100);
157         if (err) {
158                 dev_err(dev, "BSEV unknown bit timeout\n");
159                 return err;
160         }
161
162         err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
163                                          (value & BSE_ICMDQUE_EMPTY), 1, 100);
164         if (err) {
165                 dev_err(dev, "BSEV ICMDQUE flush timeout\n");
166                 return err;
167         }
168
169         if (!wait_dma)
170                 return 0;
171
172         err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
173                                          !(value & BSE_DMA_BUSY), 1, 100);
174         if (err) {
175                 dev_err(dev, "BSEV DMA timeout\n");
176                 return err;
177         }
178
179         return 0;
180 }
181
182 static int tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde *vde,
183                                             u32 value, bool wait_dma)
184 {
185         VDE_WR(value, vde->bsev + ICMDQUE_WR);
186
187         return tegra_vde_wait_bsev(vde, wait_dma);
188 }
189
190 static void tegra_vde_setup_frameid(struct tegra_vde *vde,
191                                     struct video_frame *frame,
192                                     unsigned int frameid,
193                                     u32 mbs_width, u32 mbs_height)
194 {
195         u32 y_addr  = frame ? frame->y_addr  : 0x6CDEAD00;
196         u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00;
197         u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00;
198         u32 value1 = frame ? ((mbs_width << 16) | mbs_height) : 0;
199         u32 value2 = frame ? ((((mbs_width + 1) >> 1) << 6) | 1) : 0;
200
201         VDE_WR(y_addr  >> 8, vde->frameid + 0x000 + frameid * 4);
202         VDE_WR(cb_addr >> 8, vde->frameid + 0x100 + frameid * 4);
203         VDE_WR(cr_addr >> 8, vde->frameid + 0x180 + frameid * 4);
204         VDE_WR(value1,       vde->frameid + 0x080 + frameid * 4);
205         VDE_WR(value2,       vde->frameid + 0x280 + frameid * 4);
206 }
207
208 static void tegra_setup_frameidx(struct tegra_vde *vde,
209                                  struct video_frame *frames,
210                                  unsigned int frames_nb,
211                                  u32 mbs_width, u32 mbs_height)
212 {
213         unsigned int idx;
214
215         for (idx = 0; idx < frames_nb; idx++)
216                 tegra_vde_setup_frameid(vde, &frames[idx], idx,
217                                         mbs_width, mbs_height);
218
219         for (; idx < 17; idx++)
220                 tegra_vde_setup_frameid(vde, NULL, idx, 0, 0);
221 }
222
223 static void tegra_vde_setup_iram_entry(struct tegra_vde *vde,
224                                        unsigned int table,
225                                        unsigned int row,
226                                        u32 value1, u32 value2)
227 {
228         u32 *iram_tables = vde->iram;
229
230         dev_dbg(vde->miscdev.parent, "IRAM table %u: row %u: 0x%08X 0x%08X\n",
231                 table, row, value1, value2);
232
233         iram_tables[0x20 * table + row * 2] = value1;
234         iram_tables[0x20 * table + row * 2 + 1] = value2;
235 }
236
237 static void tegra_vde_setup_iram_tables(struct tegra_vde *vde,
238                                         struct video_frame *dpb_frames,
239                                         unsigned int ref_frames_nb,
240                                         unsigned int with_earlier_poc_nb)
241 {
242         struct video_frame *frame;
243         u32 value, aux_addr;
244         int with_later_poc_nb;
245         unsigned int i, k;
246
247         dev_dbg(vde->miscdev.parent, "DPB: Frame 0: frame_num = %d\n",
248                 dpb_frames[0].frame_num);
249
250         dev_dbg(vde->miscdev.parent, "REF L0:\n");
251
252         for (i = 0; i < 16; i++) {
253                 if (i < ref_frames_nb) {
254                         frame = &dpb_frames[i + 1];
255
256                         aux_addr = frame->aux_addr;
257
258                         value  = (i + 1) << 26;
259                         value |= !(frame->flags & FLAG_B_FRAME) << 25;
260                         value |= 1 << 24;
261                         value |= frame->frame_num;
262
263                         dev_dbg(vde->miscdev.parent,
264                                 "\tFrame %d: frame_num = %d B_frame = %d\n",
265                                 i + 1, frame->frame_num,
266                                 (frame->flags & FLAG_B_FRAME));
267                 } else {
268                         aux_addr = 0x6ADEAD00;
269                         value = 0;
270                 }
271
272                 tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr);
273                 tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr);
274                 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
275                 tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr);
276         }
277
278         if (!(dpb_frames[0].flags & FLAG_B_FRAME))
279                 return;
280
281         if (with_earlier_poc_nb >= ref_frames_nb)
282                 return;
283
284         with_later_poc_nb = ref_frames_nb - with_earlier_poc_nb;
285
286         dev_dbg(vde->miscdev.parent,
287                 "REF L1: with_later_poc_nb %d with_earlier_poc_nb %d\n",
288                  with_later_poc_nb, with_earlier_poc_nb);
289
290         for (i = 0, k = with_earlier_poc_nb; i < with_later_poc_nb; i++, k++) {
291                 frame = &dpb_frames[k + 1];
292
293                 aux_addr = frame->aux_addr;
294
295                 value  = (k + 1) << 26;
296                 value |= !(frame->flags & FLAG_B_FRAME) << 25;
297                 value |= 1 << 24;
298                 value |= frame->frame_num;
299
300                 dev_dbg(vde->miscdev.parent,
301                         "\tFrame %d: frame_num = %d\n",
302                         k + 1, frame->frame_num);
303
304                 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
305         }
306
307         for (k = 0; i < ref_frames_nb; i++, k++) {
308                 frame = &dpb_frames[k + 1];
309
310                 aux_addr = frame->aux_addr;
311
312                 value  = (k + 1) << 26;
313                 value |= !(frame->flags & FLAG_B_FRAME) << 25;
314                 value |= 1 << 24;
315                 value |= frame->frame_num;
316
317                 dev_dbg(vde->miscdev.parent,
318                         "\tFrame %d: frame_num = %d\n",
319                         k + 1, frame->frame_num);
320
321                 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
322         }
323 }
324
325 static int tegra_vde_setup_hw_context(struct tegra_vde *vde,
326                                       struct tegra_vde_h264_decoder_ctx *ctx,
327                                       struct video_frame *dpb_frames,
328                                       dma_addr_t bitstream_data_addr,
329                                       size_t bitstream_data_size,
330                                       unsigned int macroblocks_nb)
331 {
332         struct device *dev = vde->miscdev.parent;
333         u32 value;
334         int err;
335
336         tegra_vde_set_bits(vde, 0x000A, vde->sxe + 0xF0);
337         tegra_vde_set_bits(vde, 0x000B, vde->bsev + CMDQUE_CONTROL);
338         tegra_vde_set_bits(vde, 0x8002, vde->mbe + 0x50);
339         tegra_vde_set_bits(vde, 0x000A, vde->mbe + 0xA0);
340         tegra_vde_set_bits(vde, 0x000A, vde->ppe + 0x14);
341         tegra_vde_set_bits(vde, 0x000A, vde->ppe + 0x28);
342         tegra_vde_set_bits(vde, 0x0A00, vde->mce + 0x08);
343         tegra_vde_set_bits(vde, 0x000A, vde->tfe + 0x00);
344         tegra_vde_set_bits(vde, 0x0005, vde->vdma + 0x04);
345
346         VDE_WR(0x00000000, vde->vdma + 0x1C);
347         VDE_WR(0x00000000, vde->vdma + 0x00);
348         VDE_WR(0x00000007, vde->vdma + 0x04);
349         VDE_WR(0x00000007, vde->frameid + 0x200);
350         VDE_WR(0x00000005, vde->tfe + 0x04);
351         VDE_WR(0x00000000, vde->mbe + 0x84);
352         VDE_WR(0x00000010, vde->sxe + 0x08);
353         VDE_WR(0x00000150, vde->sxe + 0x54);
354         VDE_WR(0x0000054C, vde->sxe + 0x58);
355         VDE_WR(0x00000E34, vde->sxe + 0x5C);
356         VDE_WR(0x063C063C, vde->mce + 0x10);
357         VDE_WR(0x0003FC00, vde->bsev + INTR_STATUS);
358         VDE_WR(0x0000150D, vde->bsev + BSE_CONFIG);
359         VDE_WR(0x00000100, vde->bsev + BSE_INT_ENB);
360         VDE_WR(0x00000000, vde->bsev + 0x98);
361         VDE_WR(0x00000060, vde->bsev + 0x9C);
362
363         memset(vde->iram + 128, 0, macroblocks_nb / 2);
364
365         tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb,
366                              ctx->pic_width_in_mbs, ctx->pic_height_in_mbs);
367
368         tegra_vde_setup_iram_tables(vde, dpb_frames,
369                                     ctx->dpb_frames_nb - 1,
370                                     ctx->dpb_ref_frames_with_earlier_poc_nb);
371         wmb();
372
373         VDE_WR(0x00000000, vde->bsev + 0x8C);
374         VDE_WR(bitstream_data_addr + bitstream_data_size,
375                vde->bsev + 0x54);
376
377         value = ctx->pic_width_in_mbs << 11 | ctx->pic_height_in_mbs << 3;
378
379         VDE_WR(value, vde->bsev + 0x88);
380
381         err = tegra_vde_wait_bsev(vde, false);
382         if (err)
383                 return err;
384
385         err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x800003FC, false);
386         if (err)
387                 return err;
388
389         value = 0x01500000;
390         value |= ((vde->iram_lists_addr + 512) >> 2) & 0xFFFF;
391
392         err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
393         if (err)
394                 return err;
395
396         err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false);
397         if (err)
398                 return err;
399
400         err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x80000080, false);
401         if (err)
402                 return err;
403
404         value = 0x0E340000 | ((vde->iram_lists_addr >> 2) & 0xFFFF);
405
406         err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
407         if (err)
408                 return err;
409
410         value = 0x00800005;
411         value |= ctx->pic_width_in_mbs << 11;
412         value |= ctx->pic_height_in_mbs << 3;
413
414         VDE_WR(value, vde->sxe + 0x10);
415
416         value = !ctx->baseline_profile << 17;
417         value |= ctx->level_idc << 13;
418         value |= ctx->log2_max_pic_order_cnt_lsb << 7;
419         value |= ctx->pic_order_cnt_type << 5;
420         value |= ctx->log2_max_frame_num;
421
422         VDE_WR(value, vde->sxe + 0x40);
423
424         value = ctx->pic_init_qp << 25;
425         value |= !!(ctx->deblocking_filter_control_present_flag) << 2;
426         value |= !!ctx->pic_order_present_flag;
427
428         VDE_WR(value, vde->sxe + 0x44);
429
430         value = ctx->chroma_qp_index_offset;
431         value |= ctx->num_ref_idx_l0_active_minus1 << 5;
432         value |= ctx->num_ref_idx_l1_active_minus1 << 10;
433         value |= !!ctx->constrained_intra_pred_flag << 15;
434
435         VDE_WR(value, vde->sxe + 0x48);
436
437         value = 0x0C000000;
438         value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 24;
439
440         VDE_WR(value, vde->sxe + 0x4C);
441
442         value = 0x03800000;
443         value |= bitstream_data_size & GENMASK(19, 15);
444
445         VDE_WR(value, vde->sxe + 0x68);
446
447         VDE_WR(bitstream_data_addr, vde->sxe + 0x6C);
448
449         value = 0x10000005;
450         value |= ctx->pic_width_in_mbs << 11;
451         value |= ctx->pic_height_in_mbs << 3;
452
453         VDE_WR(value, vde->mbe + 0x80);
454
455         value = 0x26800000;
456         value |= ctx->level_idc << 4;
457         value |= !ctx->baseline_profile << 1;
458         value |= !!ctx->direct_8x8_inference_flag;
459
460         VDE_WR(value, vde->mbe + 0x80);
461
462         VDE_WR(0xF4000001, vde->mbe + 0x80);
463         VDE_WR(0x20000000, vde->mbe + 0x80);
464         VDE_WR(0xF4000101, vde->mbe + 0x80);
465
466         value = 0x20000000;
467         value |= ctx->chroma_qp_index_offset << 8;
468
469         VDE_WR(value, vde->mbe + 0x80);
470
471         err = tegra_vde_setup_mbe_frame_idx(vde,
472                                             ctx->dpb_frames_nb - 1,
473                                             ctx->pic_order_cnt_type == 0);
474         if (err) {
475                 dev_err(dev, "MBE frames setup failed %d\n", err);
476                 return err;
477         }
478
479         tegra_vde_mbe_set_0xa_reg(vde, 0, 0x000009FC);
480         tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00);
481         tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00);
482         tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00);
483         tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr);
484
485         value = 0xFC000000;
486         value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 2;
487
488         if (!ctx->baseline_profile)
489                 value |= !!(dpb_frames[0].flags & FLAG_REFERENCE) << 1;
490
491         VDE_WR(value, vde->mbe + 0x80);
492
493         err = tegra_vde_wait_mbe(vde);
494         if (err) {
495                 dev_err(dev, "MBE programming failed %d\n", err);
496                 return err;
497         }
498
499         return 0;
500 }
501
502 static void tegra_vde_decode_frame(struct tegra_vde *vde,
503                                    unsigned int macroblocks_nb)
504 {
505         reinit_completion(&vde->decode_completion);
506
507         VDE_WR(0x00000001, vde->bsev + 0x8C);
508         VDE_WR(0x20000000 | (macroblocks_nb - 1), vde->sxe + 0x00);
509 }
510
511 static void tegra_vde_detach_and_put_dmabuf(struct dma_buf_attachment *a,
512                                             struct sg_table *sgt,
513                                             enum dma_data_direction dma_dir)
514 {
515         struct dma_buf *dmabuf = a->dmabuf;
516
517         dma_buf_unmap_attachment(a, sgt, dma_dir);
518         dma_buf_detach(dmabuf, a);
519         dma_buf_put(dmabuf);
520 }
521
522 static int tegra_vde_attach_dmabuf(struct device *dev,
523                                    int fd,
524                                    unsigned long offset,
525                                    size_t min_size,
526                                    size_t align_size,
527                                    struct dma_buf_attachment **a,
528                                    dma_addr_t *addr,
529                                    struct sg_table **s,
530                                    size_t *size,
531                                    enum dma_data_direction dma_dir)
532 {
533         struct dma_buf_attachment *attachment;
534         struct dma_buf *dmabuf;
535         struct sg_table *sgt;
536         int err;
537
538         dmabuf = dma_buf_get(fd);
539         if (IS_ERR(dmabuf)) {
540                 dev_err(dev, "Invalid dmabuf FD\n");
541                 return PTR_ERR(dmabuf);
542         }
543
544         if (dmabuf->size & (align_size - 1)) {
545                 dev_err(dev, "Unaligned dmabuf 0x%zX, "
546                              "should be aligned to 0x%zX\n",
547                         dmabuf->size, align_size);
548                 return -EINVAL;
549         }
550
551         if ((u64)offset + min_size > dmabuf->size) {
552                 dev_err(dev, "Too small dmabuf size %zu @0x%lX, "
553                              "should be at least %zu\n",
554                         dmabuf->size, offset, min_size);
555                 return -EINVAL;
556         }
557
558         attachment = dma_buf_attach(dmabuf, dev);
559         if (IS_ERR(attachment)) {
560                 dev_err(dev, "Failed to attach dmabuf\n");
561                 err = PTR_ERR(attachment);
562                 goto err_put;
563         }
564
565         sgt = dma_buf_map_attachment(attachment, dma_dir);
566         if (IS_ERR(sgt)) {
567                 dev_err(dev, "Failed to get dmabufs sg_table\n");
568                 err = PTR_ERR(sgt);
569                 goto err_detach;
570         }
571
572         if (sgt->nents != 1) {
573                 dev_err(dev, "Sparse DMA region is unsupported\n");
574                 err = -EINVAL;
575                 goto err_unmap;
576         }
577
578         *addr = sg_dma_address(sgt->sgl) + offset;
579         *a = attachment;
580         *s = sgt;
581
582         if (size)
583                 *size = dmabuf->size - offset;
584
585         return 0;
586
587 err_unmap:
588         dma_buf_unmap_attachment(attachment, sgt, dma_dir);
589 err_detach:
590         dma_buf_detach(dmabuf, attachment);
591 err_put:
592         dma_buf_put(dmabuf);
593
594         return err;
595 }
596
597 static int tegra_vde_attach_dmabufs_to_frame(struct device *dev,
598                                              struct video_frame *frame,
599                                              struct tegra_vde_h264_frame *src,
600                                              enum dma_data_direction dma_dir,
601                                              bool baseline_profile,
602                                              size_t csize)
603 {
604         int err;
605
606         err = tegra_vde_attach_dmabuf(dev, src->y_fd,
607                                       src->y_offset, csize * 4, SZ_256,
608                                       &frame->y_dmabuf_attachment,
609                                       &frame->y_addr,
610                                       &frame->y_sgt,
611                                       NULL, dma_dir);
612         if (err)
613                 return err;
614
615         err = tegra_vde_attach_dmabuf(dev, src->cb_fd,
616                                       src->cb_offset, csize, SZ_256,
617                                       &frame->cb_dmabuf_attachment,
618                                       &frame->cb_addr,
619                                       &frame->cb_sgt,
620                                       NULL, dma_dir);
621         if (err)
622                 goto err_release_y;
623
624         err = tegra_vde_attach_dmabuf(dev, src->cr_fd,
625                                       src->cr_offset, csize, SZ_256,
626                                       &frame->cr_dmabuf_attachment,
627                                       &frame->cr_addr,
628                                       &frame->cr_sgt,
629                                       NULL, dma_dir);
630         if (err)
631                 goto err_release_cb;
632
633         if (baseline_profile) {
634                 frame->aux_addr = 0x64DEAD00;
635                 return 0;
636         }
637
638         err = tegra_vde_attach_dmabuf(dev, src->aux_fd,
639                                       src->aux_offset, csize, SZ_256,
640                                       &frame->aux_dmabuf_attachment,
641                                       &frame->aux_addr,
642                                       &frame->aux_sgt,
643                                       NULL, dma_dir);
644         if (err)
645                 goto err_release_cr;
646
647         return 0;
648
649 err_release_cr:
650         tegra_vde_detach_and_put_dmabuf(frame->cr_dmabuf_attachment,
651                                         frame->cr_sgt, dma_dir);
652 err_release_cb:
653         tegra_vde_detach_and_put_dmabuf(frame->cb_dmabuf_attachment,
654                                         frame->cb_sgt, dma_dir);
655 err_release_y:
656         tegra_vde_detach_and_put_dmabuf(frame->y_dmabuf_attachment,
657                                         frame->y_sgt, dma_dir);
658
659         return err;
660 }
661
662 static void tegra_vde_release_frame_dmabufs(struct video_frame *frame,
663                                             enum dma_data_direction dma_dir,
664                                             bool baseline_profile)
665 {
666         if (!baseline_profile)
667                 tegra_vde_detach_and_put_dmabuf(frame->aux_dmabuf_attachment,
668                                                 frame->aux_sgt, dma_dir);
669
670         tegra_vde_detach_and_put_dmabuf(frame->cr_dmabuf_attachment,
671                                         frame->cr_sgt, dma_dir);
672
673         tegra_vde_detach_and_put_dmabuf(frame->cb_dmabuf_attachment,
674                                         frame->cb_sgt, dma_dir);
675
676         tegra_vde_detach_and_put_dmabuf(frame->y_dmabuf_attachment,
677                                         frame->y_sgt, dma_dir);
678 }
679
680 static int tegra_vde_validate_frame(struct device *dev,
681                                     struct tegra_vde_h264_frame *frame)
682 {
683         if (frame->frame_num > 0x7FFFFF) {
684                 dev_err(dev, "Bad frame_num %u\n", frame->frame_num);
685                 return -EINVAL;
686         }
687
688         return 0;
689 }
690
691 static int tegra_vde_validate_h264_ctx(struct device *dev,
692                                        struct tegra_vde_h264_decoder_ctx *ctx)
693 {
694         if (ctx->dpb_frames_nb == 0 || ctx->dpb_frames_nb > 17) {
695                 dev_err(dev, "Bad DPB size %u\n", ctx->dpb_frames_nb);
696                 return -EINVAL;
697         }
698
699         if (ctx->level_idc > 15) {
700                 dev_err(dev, "Bad level value %u\n", ctx->level_idc);
701                 return -EINVAL;
702         }
703
704         if (ctx->pic_init_qp > 52) {
705                 dev_err(dev, "Bad pic_init_qp value %u\n", ctx->pic_init_qp);
706                 return -EINVAL;
707         }
708
709         if (ctx->log2_max_pic_order_cnt_lsb > 16) {
710                 dev_err(dev, "Bad log2_max_pic_order_cnt_lsb value %u\n",
711                         ctx->log2_max_pic_order_cnt_lsb);
712                 return -EINVAL;
713         }
714
715         if (ctx->log2_max_frame_num > 16) {
716                 dev_err(dev, "Bad log2_max_frame_num value %u\n",
717                         ctx->log2_max_frame_num);
718                 return -EINVAL;
719         }
720
721         if (ctx->chroma_qp_index_offset > 31) {
722                 dev_err(dev, "Bad chroma_qp_index_offset value %u\n",
723                         ctx->chroma_qp_index_offset);
724                 return -EINVAL;
725         }
726
727         if (ctx->pic_order_cnt_type > 2) {
728                 dev_err(dev, "Bad pic_order_cnt_type value %u\n",
729                         ctx->pic_order_cnt_type);
730                 return -EINVAL;
731         }
732
733         if (ctx->num_ref_idx_l0_active_minus1 > 15) {
734                 dev_err(dev, "Bad num_ref_idx_l0_active_minus1 value %u\n",
735                         ctx->num_ref_idx_l0_active_minus1);
736                 return -EINVAL;
737         }
738
739         if (ctx->num_ref_idx_l1_active_minus1 > 15) {
740                 dev_err(dev, "Bad num_ref_idx_l1_active_minus1 value %u\n",
741                         ctx->num_ref_idx_l1_active_minus1);
742                 return -EINVAL;
743         }
744
745         if (!ctx->pic_width_in_mbs || ctx->pic_width_in_mbs > 127) {
746                 dev_err(dev, "Bad pic_width_in_mbs value %u\n",
747                         ctx->pic_width_in_mbs);
748                 return -EINVAL;
749         }
750
751         if (!ctx->pic_height_in_mbs || ctx->pic_height_in_mbs > 127) {
752                 dev_err(dev, "Bad pic_height_in_mbs value %u\n",
753                         ctx->pic_height_in_mbs);
754                 return -EINVAL;
755         }
756
757         return 0;
758 }
759
760 static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
761                                        unsigned long vaddr)
762 {
763         struct device *dev = vde->miscdev.parent;
764         struct tegra_vde_h264_decoder_ctx ctx;
765         struct tegra_vde_h264_frame frames[17];
766         struct tegra_vde_h264_frame __user *frames_user;
767         struct video_frame *dpb_frames;
768         struct dma_buf_attachment *bitstream_data_dmabuf_attachment;
769         struct sg_table *bitstream_sgt;
770         enum dma_data_direction dma_dir;
771         dma_addr_t bitstream_data_addr;
772         dma_addr_t bsev_ptr;
773         size_t bitstream_data_size;
774         unsigned int macroblocks_nb;
775         unsigned int read_bytes;
776         unsigned int i;
777         long timeout;
778         int ret, err;
779
780         if (copy_from_user(&ctx, (void __user *)vaddr, sizeof(ctx)))
781                 return -EFAULT;
782
783         ret = tegra_vde_validate_h264_ctx(dev, &ctx);
784         if (ret)
785                 return ret;
786
787         ret = tegra_vde_attach_dmabuf(dev, ctx.bitstream_data_fd,
788                                       ctx.bitstream_data_offset,
789                                       SZ_16K, SZ_16K,
790                                       &bitstream_data_dmabuf_attachment,
791                                       &bitstream_data_addr,
792                                       &bitstream_sgt,
793                                       &bitstream_data_size,
794                                       DMA_TO_DEVICE);
795         if (ret)
796                 return ret;
797
798         dpb_frames = kcalloc(ctx.dpb_frames_nb, sizeof(*dpb_frames),
799                              GFP_KERNEL);
800         if (!dpb_frames) {
801                 ret = -ENOMEM;
802                 goto release_bitstream_dmabuf;
803         }
804
805         macroblocks_nb = ctx.pic_width_in_mbs * ctx.pic_height_in_mbs;
806         frames_user = u64_to_user_ptr(ctx.dpb_frames_ptr);
807
808         if (copy_from_user(frames, frames_user,
809                            ctx.dpb_frames_nb * sizeof(*frames))) {
810                 ret = -EFAULT;
811                 goto free_dpb_frames;
812         }
813
814         for (i = 0; i < ctx.dpb_frames_nb; i++) {
815                 ret = tegra_vde_validate_frame(dev, &frames[i]);
816                 if (ret)
817                         goto release_dpb_frames;
818
819                 dpb_frames[i].flags = frames[i].flags;
820                 dpb_frames[i].frame_num = frames[i].frame_num;
821
822                 dma_dir = (i == 0) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
823
824                 ret = tegra_vde_attach_dmabufs_to_frame(dev, &dpb_frames[i],
825                                                         &frames[i], dma_dir,
826                                                         ctx.baseline_profile,
827                                                         macroblocks_nb * 64);
828                 if (ret)
829                         goto release_dpb_frames;
830         }
831
832         ret = mutex_lock_interruptible(&vde->lock);
833         if (ret)
834                 goto release_dpb_frames;
835
836         ret = pm_runtime_get_sync(dev);
837         if (ret < 0)
838                 goto unlock;
839
840         /*
841          * We rely on the VDE registers reset value, otherwise VDE
842          * causes bus lockup.
843          */
844         ret = reset_control_reset(vde->rst);
845         if (ret) {
846                 dev_err(dev, "Failed to reset HW: %d\n", ret);
847                 goto put_runtime_pm;
848         }
849
850         ret = tegra_vde_setup_hw_context(vde, &ctx, dpb_frames,
851                                          bitstream_data_addr,
852                                          bitstream_data_size,
853                                          macroblocks_nb);
854         if (ret)
855                 goto put_runtime_pm;
856
857         tegra_vde_decode_frame(vde, macroblocks_nb);
858
859         timeout = wait_for_completion_interruptible_timeout(
860                         &vde->decode_completion, msecs_to_jiffies(1000));
861         if (timeout == 0) {
862                 bsev_ptr = readl_relaxed(vde->bsev + 0x10);
863                 macroblocks_nb = readl_relaxed(vde->sxe + 0xC8) & 0x1FFF;
864                 read_bytes = bsev_ptr ? bsev_ptr - bitstream_data_addr : 0;
865
866                 dev_err(dev, "Decoding failed: "
867                                 "read 0x%X bytes, %u macroblocks parsed\n",
868                         read_bytes, macroblocks_nb);
869
870                 ret = -EIO;
871         } else if (timeout < 0) {
872                 ret = timeout;
873         }
874
875         err = reset_control_assert(vde->rst);
876         if (err)
877                 dev_err(dev, "Failed to assert HW reset: %d\n", err);
878
879 put_runtime_pm:
880         pm_runtime_mark_last_busy(dev);
881         pm_runtime_put_autosuspend(dev);
882
883 unlock:
884         mutex_unlock(&vde->lock);
885
886 release_dpb_frames:
887         while (i--) {
888                 dma_dir = (i == 0) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
889
890                 tegra_vde_release_frame_dmabufs(&dpb_frames[i], dma_dir,
891                                                 ctx.baseline_profile);
892         }
893
894 free_dpb_frames:
895         kfree(dpb_frames);
896
897 release_bitstream_dmabuf:
898         tegra_vde_detach_and_put_dmabuf(bitstream_data_dmabuf_attachment,
899                                         bitstream_sgt, DMA_TO_DEVICE);
900
901         return ret;
902 }
903
904 static long tegra_vde_unlocked_ioctl(struct file *filp,
905                                      unsigned int cmd, unsigned long arg)
906 {
907         struct miscdevice *miscdev = filp->private_data;
908         struct tegra_vde *vde = container_of(miscdev, struct tegra_vde,
909                                              miscdev);
910
911         switch (cmd) {
912         case TEGRA_VDE_IOCTL_DECODE_H264:
913                 return tegra_vde_ioctl_decode_h264(vde, arg);
914         }
915
916         dev_err(miscdev->parent, "Invalid IOCTL command %u\n", cmd);
917
918         return -ENOTTY;
919 }
920
921 static const struct file_operations tegra_vde_fops = {
922         .owner          = THIS_MODULE,
923         .unlocked_ioctl = tegra_vde_unlocked_ioctl,
924 };
925
926 static irqreturn_t tegra_vde_isr(int irq, void *data)
927 {
928         struct tegra_vde *vde = data;
929
930         tegra_vde_set_bits(vde, 0, vde->frameid + 0x208);
931         complete(&vde->decode_completion);
932
933         return IRQ_HANDLED;
934 }
935
936 static int tegra_vde_runtime_suspend(struct device *dev)
937 {
938         struct tegra_vde *vde = dev_get_drvdata(dev);
939         int err;
940
941         err = tegra_powergate_power_off(TEGRA_POWERGATE_VDEC);
942         if (err) {
943                 dev_err(dev, "Failed to power down HW: %d\n", err);
944                 return err;
945         }
946
947         clk_disable_unprepare(vde->clk);
948
949         return 0;
950 }
951
952 static int tegra_vde_runtime_resume(struct device *dev)
953 {
954         struct tegra_vde *vde = dev_get_drvdata(dev);
955         int err;
956
957         err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_VDEC,
958                                                 vde->clk, vde->rst);
959         if (err) {
960                 dev_err(dev, "Failed to power up HW : %d\n", err);
961                 return err;
962         }
963
964         return 0;
965 }
966
967 static int tegra_vde_probe(struct platform_device *pdev)
968 {
969         struct device *dev = &pdev->dev;
970         struct resource *regs;
971         struct tegra_vde *vde;
972         int irq, err;
973
974         vde = devm_kzalloc(dev, sizeof(*vde), GFP_KERNEL);
975         if (!vde)
976                 return -ENOMEM;
977
978         platform_set_drvdata(pdev, vde);
979
980         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sxe");
981         if (!regs)
982                 return -ENODEV;
983
984         vde->sxe = devm_ioremap_resource(dev, regs);
985         if (IS_ERR(vde->sxe))
986                 return PTR_ERR(vde->sxe);
987
988         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bsev");
989         if (!regs)
990                 return -ENODEV;
991
992         vde->bsev = devm_ioremap_resource(dev, regs);
993         if (IS_ERR(vde->bsev))
994                 return PTR_ERR(vde->bsev);
995
996         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mbe");
997         if (!regs)
998                 return -ENODEV;
999
1000         vde->mbe = devm_ioremap_resource(dev, regs);
1001         if (IS_ERR(vde->mbe))
1002                 return PTR_ERR(vde->mbe);
1003
1004         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ppe");
1005         if (!regs)
1006                 return -ENODEV;
1007
1008         vde->ppe = devm_ioremap_resource(dev, regs);
1009         if (IS_ERR(vde->ppe))
1010                 return PTR_ERR(vde->ppe);
1011
1012         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mce");
1013         if (!regs)
1014                 return -ENODEV;
1015
1016         vde->mce = devm_ioremap_resource(dev, regs);
1017         if (IS_ERR(vde->mce))
1018                 return PTR_ERR(vde->mce);
1019
1020         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "tfe");
1021         if (!regs)
1022                 return -ENODEV;
1023
1024         vde->tfe = devm_ioremap_resource(dev, regs);
1025         if (IS_ERR(vde->tfe))
1026                 return PTR_ERR(vde->tfe);
1027
1028         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ppb");
1029         if (!regs)
1030                 return -ENODEV;
1031
1032         vde->ppb = devm_ioremap_resource(dev, regs);
1033         if (IS_ERR(vde->ppb))
1034                 return PTR_ERR(vde->ppb);
1035
1036         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vdma");
1037         if (!regs)
1038                 return -ENODEV;
1039
1040         vde->vdma = devm_ioremap_resource(dev, regs);
1041         if (IS_ERR(vde->vdma))
1042                 return PTR_ERR(vde->vdma);
1043
1044         regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "frameid");
1045         if (!regs)
1046                 return -ENODEV;
1047
1048         vde->frameid = devm_ioremap_resource(dev, regs);
1049         if (IS_ERR(vde->frameid))
1050                 return PTR_ERR(vde->frameid);
1051
1052         vde->clk = devm_clk_get(dev, NULL);
1053         if (IS_ERR(vde->clk)) {
1054                 err = PTR_ERR(vde->clk);
1055                 dev_err(dev, "Could not get VDE clk %d\n", err);
1056                 return err;
1057         }
1058
1059         vde->rst = devm_reset_control_get(dev, NULL);
1060         if (IS_ERR(vde->rst)) {
1061                 err = PTR_ERR(vde->rst);
1062                 dev_err(dev, "Could not get VDE reset %d\n", err);
1063                 return err;
1064         }
1065
1066         irq = platform_get_irq_byname(pdev, "sync-token");
1067         if (irq < 0)
1068                 return irq;
1069
1070         err = devm_request_irq(dev, irq, tegra_vde_isr, 0,
1071                                dev_name(dev), vde);
1072         if (err) {
1073                 dev_err(dev, "Could not request IRQ %d\n", err);
1074                 return err;
1075         }
1076
1077         vde->iram_pool = of_gen_pool_get(dev->of_node, "iram", 0);
1078         if (!vde->iram_pool) {
1079                 dev_err(dev, "Could not get IRAM pool\n");
1080                 return -EPROBE_DEFER;
1081         }
1082
1083         vde->iram = gen_pool_dma_alloc(vde->iram_pool,
1084                                        gen_pool_size(vde->iram_pool),
1085                                        &vde->iram_lists_addr);
1086         if (!vde->iram) {
1087                 dev_err(dev, "Could not reserve IRAM\n");
1088                 return -ENOMEM;
1089         }
1090
1091         mutex_init(&vde->lock);
1092         init_completion(&vde->decode_completion);
1093
1094         vde->miscdev.minor = MISC_DYNAMIC_MINOR;
1095         vde->miscdev.name = "tegra_vde";
1096         vde->miscdev.fops = &tegra_vde_fops;
1097         vde->miscdev.parent = dev;
1098
1099         err = misc_register(&vde->miscdev);
1100         if (err) {
1101                 dev_err(dev, "Failed to register misc device: %d\n", err);
1102                 goto err_gen_free;
1103         }
1104
1105         pm_runtime_enable(dev);
1106         pm_runtime_use_autosuspend(dev);
1107         pm_runtime_set_autosuspend_delay(dev, 300);
1108
1109         if (!pm_runtime_enabled(dev)) {
1110                 err = tegra_vde_runtime_resume(dev);
1111                 if (err)
1112                         goto err_misc_unreg;
1113         }
1114
1115         return 0;
1116
1117 err_misc_unreg:
1118         misc_deregister(&vde->miscdev);
1119
1120 err_gen_free:
1121         gen_pool_free(vde->iram_pool, (unsigned long)vde->iram,
1122                       gen_pool_size(vde->iram_pool));
1123
1124         return err;
1125 }
1126
1127 static int tegra_vde_remove(struct platform_device *pdev)
1128 {
1129         struct tegra_vde *vde = platform_get_drvdata(pdev);
1130         struct device *dev = &pdev->dev;
1131         int err;
1132
1133         if (!pm_runtime_enabled(dev)) {
1134                 err = tegra_vde_runtime_suspend(dev);
1135                 if (err)
1136                         return err;
1137         }
1138
1139         pm_runtime_dont_use_autosuspend(dev);
1140         pm_runtime_disable(dev);
1141
1142         misc_deregister(&vde->miscdev);
1143
1144         gen_pool_free(vde->iram_pool, (unsigned long)vde->iram,
1145                       gen_pool_size(vde->iram_pool));
1146
1147         return 0;
1148 }
1149
1150 #ifdef CONFIG_PM_SLEEP
1151 static int tegra_vde_pm_suspend(struct device *dev)
1152 {
1153         struct tegra_vde *vde = dev_get_drvdata(dev);
1154         int err;
1155
1156         mutex_lock(&vde->lock);
1157
1158         err = pm_runtime_force_suspend(dev);
1159         if (err < 0)
1160                 return err;
1161
1162         return 0;
1163 }
1164
1165 static int tegra_vde_pm_resume(struct device *dev)
1166 {
1167         struct tegra_vde *vde = dev_get_drvdata(dev);
1168         int err;
1169
1170         err = pm_runtime_force_resume(dev);
1171         if (err < 0)
1172                 return err;
1173
1174         mutex_unlock(&vde->lock);
1175
1176         return 0;
1177 }
1178 #endif
1179
1180 static const struct dev_pm_ops tegra_vde_pm_ops = {
1181         SET_RUNTIME_PM_OPS(tegra_vde_runtime_suspend,
1182                            tegra_vde_runtime_resume,
1183                            NULL)
1184         SET_SYSTEM_SLEEP_PM_OPS(tegra_vde_pm_suspend,
1185                                 tegra_vde_pm_resume)
1186 };
1187
1188 static const struct of_device_id tegra_vde_of_match[] = {
1189         { .compatible = "nvidia,tegra20-vde", },
1190         { },
1191 };
1192 MODULE_DEVICE_TABLE(of, tegra_vde_of_match);
1193
1194 static struct platform_driver tegra_vde_driver = {
1195         .probe          = tegra_vde_probe,
1196         .remove         = tegra_vde_remove,
1197         .driver         = {
1198                 .name           = "tegra-vde",
1199                 .of_match_table = tegra_vde_of_match,
1200                 .pm             = &tegra_vde_pm_ops,
1201         },
1202 };
1203 module_platform_driver(tegra_vde_driver);
1204
1205 MODULE_DESCRIPTION("NVIDIA Tegra Video Decoder driver");
1206 MODULE_AUTHOR("Dmitry Osipenko <digetx@gmail.com>");
1207 MODULE_LICENSE("GPL");