]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/gpu/ipu-v3/ipu-image-convert.c
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / drivers / gpu / ipu-v3 / ipu-image-convert.c
index 9d25db6924b3ac48a9499e0af4c66ad3be817713..eeca50d9a1ee4dbba4dade327acba96e55be85c6 100644 (file)
@@ -251,6 +251,12 @@ static const struct ipu_image_pixfmt image_convert_formats[] = {
        }, {
                .fourcc = V4L2_PIX_FMT_XBGR32,
                .bpp    = 32,
+       }, {
+               .fourcc = V4L2_PIX_FMT_BGRX32,
+               .bpp    = 32,
+       }, {
+               .fourcc = V4L2_PIX_FMT_RGBX32,
+               .bpp    = 32,
        }, {
                .fourcc = V4L2_PIX_FMT_YUYV,
                .bpp    = 16,
@@ -376,8 +382,11 @@ static inline int num_stripes(int dim)
 
 /*
  * Calculate downsizing coefficients, which are the same for all tiles,
- * and bilinear resizing coefficients, which are used to find the best
- * seam positions.
+ * and initial bilinear resizing coefficients, which are used to find the
+ * best seam positions.
+ * Also determine the number of tiles necessary to guarantee that no tile
+ * is larger than 1024 pixels in either dimension at the output and between
+ * IC downsizing and main processing sections.
  */
 static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
                                          struct ipu_image *in,
@@ -391,6 +400,8 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
        u32 resized_height = out->rect.height;
        u32 resize_coeff_h;
        u32 resize_coeff_v;
+       u32 cols;
+       u32 rows;
 
        if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
                resized_width = out->rect.height;
@@ -401,14 +412,12 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
        if (WARN_ON(resized_width == 0 || resized_height == 0))
                return -EINVAL;
 
-       while (downsized_width > 1024 ||
-              downsized_width >= resized_width * 2) {
+       while (downsized_width >= resized_width * 2) {
                downsized_width >>= 1;
                downsize_coeff_h++;
        }
 
-       while (downsized_height > 1024 ||
-              downsized_height >= resized_height * 2) {
+       while (downsized_height >= resized_height * 2) {
                downsized_height >>= 1;
                downsize_coeff_v++;
        }
@@ -422,10 +431,18 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
        resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1);
        resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1);
 
+       /*
+        * Both the output of the IC downsizing section before being passed to
+        * the IC main processing section and the final output of the IC main
+        * processing section must be <= 1024 pixels in both dimensions.
+        */
+       cols = num_stripes(max_t(u32, downsized_width, resized_width));
+       rows = num_stripes(max_t(u32, downsized_height, resized_height));
+
        dev_dbg(ctx->chan->priv->ipu->dev,
                "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u tiles\n",
                __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v,
-               resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows);
+               resize_coeff_v, cols, rows);
 
        if (downsize_coeff_h > 2 || downsize_coeff_v  > 2 ||
            resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff)
@@ -435,6 +452,8 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
        ctx->downsize_coeff_v = downsize_coeff_v;
        ctx->image_resize_coeff_h = resize_coeff_h;
        ctx->image_resize_coeff_v = resize_coeff_v;
+       ctx->in.num_cols = cols;
+       ctx->in.num_rows = rows;
 
        return 0;
 }
@@ -442,12 +461,10 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
 #define round_closest(x, y) round_down((x) + (y)/2, (y))
 
 /*
- * Find the best aligned seam position in the inverval [out_start, out_end].
+ * Find the best aligned seam position for the given column / row index.
  * Rotation and image offsets are out of scope.
  *
- * @out_start: start of inverval, must be within 1024 pixels / lines
- *             of out_end
- * @out_end: end of interval, smaller than or equal to out_edge
+ * @index: column / row index, used to calculate valid interval
  * @in_edge: input right / bottom edge
  * @out_edge: output right / bottom edge
  * @in_align: input alignment, either horizontal 8-byte line start address
@@ -463,8 +480,7 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
  * @_out_seam: aligned output seam position return value
  */
 static void find_best_seam(struct ipu_image_convert_ctx *ctx,
-                          unsigned int out_start,
-                          unsigned int out_end,
+                          unsigned int index,
                           unsigned int in_edge,
                           unsigned int out_edge,
                           unsigned int in_align,
@@ -482,6 +498,24 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
        unsigned int out_seam = 0;
        unsigned int in_seam = 0;
        unsigned int min_diff = UINT_MAX;
+       unsigned int out_start;
+       unsigned int out_end;
+       unsigned int in_start;
+       unsigned int in_end;
+
+       /* Start within 1024 pixels of the right / bottom edge */
+       out_start = max_t(int, index * out_align, out_edge - 1024);
+       /* End before having to add more columns to the left / rows above */
+       out_end = min_t(unsigned int, out_edge, index * 1024 + 1);
+
+       /*
+        * Limit input seam position to make sure that the downsized input tile
+        * to the right or bottom does not exceed 1024 pixels.
+        */
+       in_start = max_t(int, index * in_align,
+                        in_edge - (1024 << downsize_coeff));
+       in_end = min_t(unsigned int, in_edge,
+                      index * (1024 << downsize_coeff) + 1);
 
        /*
         * Output tiles must start at a multiple of 8 bytes horizontally and
@@ -492,6 +526,7 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
        for (out_pos = out_start; out_pos < out_end; out_pos += out_align) {
                unsigned int in_pos;
                unsigned int in_pos_aligned;
+               unsigned int in_pos_rounded;
                unsigned int abs_diff;
 
                /*
@@ -512,9 +547,16 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
                 * start the input tile at, 19.13 fixed point.
                 */
                in_pos_aligned = round_closest(in_pos, 8192U * in_align);
+               /* Convert 19.13 fixed point to integer */
+               in_pos_rounded = in_pos_aligned / 8192U;
+
+               if (in_pos_rounded < in_start)
+                       continue;
+               if (in_pos_rounded >= in_end)
+                       break;
 
                if ((in_burst > 1) &&
-                   (in_edge - in_pos_aligned / 8192U) % in_burst)
+                   (in_edge - in_pos_rounded) % in_burst)
                        continue;
 
                if (in_pos < in_pos_aligned)
@@ -523,19 +565,18 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
                        abs_diff = in_pos - in_pos_aligned;
 
                if (abs_diff < min_diff) {
-                       in_seam = in_pos_aligned;
+                       in_seam = in_pos_rounded;
                        out_seam = out_pos;
                        min_diff = abs_diff;
                }
        }
 
        *_out_seam = out_seam;
-       /* Convert 19.13 fixed point to integer seam position */
-       *_in_seam = DIV_ROUND_CLOSEST(in_seam, 8192U);
+       *_in_seam = in_seam;
 
-       dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) diff %u.%03u\n",
+       dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) in [%u, %u] diff %u.%03u\n",
                __func__, out_seam, out_align, out_start, out_end,
-               *_in_seam, in_align, min_diff / 8192,
+               in_seam, in_align, in_start, in_end, min_diff / 8192,
                DIV_ROUND_CLOSEST(min_diff % 8192 * 1000, 8192));
 }
 
@@ -712,8 +753,6 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
                                          !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
                bool allow_out_overshoot = (col < in->num_cols - 1) &&
                                           !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
-               unsigned int out_start;
-               unsigned int out_end;
                unsigned int in_left;
                unsigned int out_left;
 
@@ -722,12 +761,7 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
                 * horizontally.
                 */
 
-               /* Start within 1024 pixels of the right edge */
-               out_start = max_t(int, 0, out_right - 1024);
-               /* End before having to add more columns to the left */
-               out_end = min_t(unsigned int, out_right, col * 1024);
-
-               find_best_seam(ctx, out_start, out_end,
+               find_best_seam(ctx, col,
                               in_right, out_right,
                               in_left_align, out_left_align,
                               allow_in_overshoot ? 1 : 8 /* burst length */,
@@ -762,17 +796,10 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 
        for (row = in->num_rows - 1; row > 0; row--) {
                bool allow_overshoot = row < in->num_rows - 1;
-               unsigned int out_start;
-               unsigned int out_end;
                unsigned int in_top;
                unsigned int out_top;
 
-               /* Start within 1024 lines of the bottom edge */
-               out_start = max_t(int, 0, out_bottom - 1024);
-               /* End before having to add more rows above */
-               out_end = min_t(unsigned int, out_bottom, row * 1024);
-
-               find_best_seam(ctx, out_start, out_end,
+               find_best_seam(ctx, row,
                               in_bottom, out_bottom,
                               in_top_align, out_top_align,
                               1, allow_overshoot ? 1 : out_height_align,
@@ -809,13 +836,21 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
                in_bottom, flipped_out_top, out_bottom);
 }
 
-static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
-                                struct ipu_image_convert_image *image)
+static int calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
+                               struct ipu_image_convert_image *image)
 {
        struct ipu_image_convert_chan *chan = ctx->chan;
        struct ipu_image_convert_priv *priv = chan->priv;
+       unsigned int max_width = 1024;
+       unsigned int max_height = 1024;
        unsigned int i;
 
+       if (image->type == IMAGE_CONVERT_IN) {
+               /* Up to 4096x4096 input tile size */
+               max_width <<= ctx->downsize_coeff_h;
+               max_height <<= ctx->downsize_coeff_v;
+       }
+
        for (i = 0; i < ctx->num_tiles; i++) {
                struct ipu_image_tile *tile;
                const unsigned int row = i / image->num_cols;
@@ -845,7 +880,17 @@ static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
                        image->type == IMAGE_CONVERT_IN ? "Input" : "Output",
                        row, col,
                        tile->width, tile->height, tile->left, tile->top);
+
+               if (!tile->width || tile->width > max_width ||
+                   !tile->height || tile->height > max_height) {
+                       dev_err(priv->ipu->dev, "invalid %s tile size: %ux%u\n",
+                               image->type == IMAGE_CONVERT_IN ? "input" :
+                               "output", tile->width, tile->height);
+                       return -EINVAL;
+               }
        }
+
+       return 0;
 }
 
 /*
@@ -1076,6 +1121,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
                               !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
                u32 resized_width;
                u32 resize_coeff_h;
+               u32 in_width;
 
                tile_idx = col;
                in_tile = &ctx->in.tile[tile_idx];
@@ -1093,33 +1139,35 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
                dev_dbg(priv->ipu->dev, "%s: column %u hscale: *8192/%u\n",
                        __func__, col, resize_coeff_h);
 
+               /*
+                * With the horizontal scaling factor known, round up resized
+                * width (output width or height) to burst size.
+                */
+               resized_width = round_up(resized_width, 8);
+
+               /*
+                * Calculate input width from the last accessed input pixel
+                * given resized width and scaling coefficients. Round up to
+                * burst size.
+                */
+               last_output = resized_width - 1;
+               if (closest && ((last_output * resize_coeff_h) % 8192))
+                       last_output++;
+               in_width = round_up(
+                       (DIV_ROUND_UP(last_output * resize_coeff_h, 8192) + 1)
+                       << ctx->downsize_coeff_h, 8);
 
                for (row = 0; row < ctx->in.num_rows; row++) {
                        tile_idx = row * ctx->in.num_cols + col;
                        in_tile = &ctx->in.tile[tile_idx];
                        out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
 
-                       /*
-                        * With the horizontal scaling factor known, round up
-                        * resized width (output width or height) to burst size.
-                        */
                        if (ipu_rot_mode_is_irt(ctx->rot_mode))
-                               out_tile->height = round_up(resized_width, 8);
+                               out_tile->height = resized_width;
                        else
-                               out_tile->width = round_up(resized_width, 8);
-
-                       /*
-                        * Calculate input width from the last accessed input
-                        * pixel given resized width and scaling coefficients.
-                        * Round up to burst size.
-                        */
-                       last_output = round_up(resized_width, 8) - 1;
-                       if (closest)
-                               last_output++;
-                       in_tile->width = round_up(
-                               (DIV_ROUND_UP(last_output * resize_coeff_h,
-                                             8192) + 1)
-                               << ctx->downsize_coeff_h, 8);
+                               out_tile->width = resized_width;
+
+                       in_tile->width = in_width;
                }
 
                ctx->resize_coeffs_h[col] = resize_coeff_h;
@@ -1130,6 +1178,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
                               !(ctx->rot_mode & IPU_ROT_BIT_VFLIP);
                u32 resized_height;
                u32 resize_coeff_v;
+               u32 in_height;
 
                tile_idx = row * ctx->in.num_cols;
                in_tile = &ctx->in.tile[tile_idx];
@@ -1147,33 +1196,35 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
                dev_dbg(priv->ipu->dev, "%s: row %u vscale: *8192/%u\n",
                        __func__, row, resize_coeff_v);
 
+               /*
+                * With the vertical scaling factor known, round up resized
+                * height (output width or height) to IDMAC limitations.
+                */
+               resized_height = round_up(resized_height, 2);
+
+               /*
+                * Calculate input width from the last accessed input pixel
+                * given resized height and scaling coefficients. Align to
+                * IDMAC restrictions.
+                */
+               last_output = resized_height - 1;
+               if (closest && ((last_output * resize_coeff_v) % 8192))
+                       last_output++;
+               in_height = round_up(
+                       (DIV_ROUND_UP(last_output * resize_coeff_v, 8192) + 1)
+                       << ctx->downsize_coeff_v, 2);
+
                for (col = 0; col < ctx->in.num_cols; col++) {
                        tile_idx = row * ctx->in.num_cols + col;
                        in_tile = &ctx->in.tile[tile_idx];
                        out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
 
-                       /*
-                        * With the vertical scaling factor known, round up
-                        * resized height (output width or height) to IDMAC
-                        * limitations.
-                        */
                        if (ipu_rot_mode_is_irt(ctx->rot_mode))
-                               out_tile->width = round_up(resized_height, 2);
+                               out_tile->width = resized_height;
                        else
-                               out_tile->height = round_up(resized_height, 2);
-
-                       /*
-                        * Calculate input width from the last accessed input
-                        * pixel given resized height and scaling coefficients.
-                        * Align to IDMAC restrictions.
-                        */
-                       last_output = round_up(resized_height, 2) - 1;
-                       if (closest)
-                               last_output++;
-                       in_tile->height = round_up(
-                               (DIV_ROUND_UP(last_output * resize_coeff_v,
-                                             8192) + 1)
-                               << ctx->downsize_coeff_v, 2);
+                               out_tile->height = resized_height;
+
+                       in_tile->height = in_height;
                }
 
                ctx->resize_coeffs_v[row] = resize_coeff_v;
@@ -2024,22 +2075,26 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
        ctx->chan = chan;
        init_completion(&ctx->aborted);
 
+       ctx->rot_mode = rot_mode;
+
+       /* Sets ctx->in.num_rows/cols as well */
+       ret = calc_image_resize_coefficients(ctx, in, out);
+       if (ret)
+               goto out_free;
+
        s_image = &ctx->in;
        d_image = &ctx->out;
 
        /* set tiling and rotation */
-       d_image->num_rows = num_stripes(out->pix.height);
-       d_image->num_cols = num_stripes(out->pix.width);
        if (ipu_rot_mode_is_irt(rot_mode)) {
-               s_image->num_rows = d_image->num_cols;
-               s_image->num_cols = d_image->num_rows;
+               d_image->num_rows = s_image->num_cols;
+               d_image->num_cols = s_image->num_rows;
        } else {
-               s_image->num_rows = d_image->num_rows;
-               s_image->num_cols = d_image->num_cols;
+               d_image->num_rows = s_image->num_rows;
+               d_image->num_cols = s_image->num_cols;
        }
 
        ctx->num_tiles = d_image->num_cols * d_image->num_rows;
-       ctx->rot_mode = rot_mode;
 
        ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN);
        if (ret)
@@ -2048,15 +2103,14 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
        if (ret)
                goto out_free;
 
-       ret = calc_image_resize_coefficients(ctx, in, out);
-       if (ret)
-               goto out_free;
-
        calc_out_tile_map(ctx);
 
        find_seams(ctx, s_image, d_image);
 
-       calc_tile_dimensions(ctx, s_image);
+       ret = calc_tile_dimensions(ctx, s_image);
+       if (ret)
+               goto out_free;
+
        ret = calc_tile_offsets(ctx, s_image);
        if (ret)
                goto out_free;