drivers/gpu/drm/i915/gem/i915_gem_object_blt.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2019 Intel Corporation
   4  */
   5
   6 #include "i915_drv.h"
   7 #include "gt/intel_context.h"
   8 #include "gt/intel_engine_pm.h"
   9 #include "gt/intel_engine_pool.h"
  10 #include "gt/intel_gt.h"
  11 #include "i915_gem_clflush.h"
  12 #include "i915_gem_object_blt.h"
  13
  14 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
  15                                          struct i915_vma *vma,
  16                                          u32 value)
  17 {
  18         struct drm_i915_private *i915 = ce->vm->i915;
  19         const u32 block_size = S16_MAX * PAGE_SIZE;
  20         struct intel_engine_pool_node *pool;
  21         struct i915_vma *batch;
  22         u64 offset;
  23         u64 count;
  24         u64 rem;
  25         u32 size;
  26         u32 *cmd;
  27         int err;
  28
  29         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
  30         intel_engine_pm_get(ce->engine);
  31
  32         count = div_u64(vma->size, block_size);
  33         size = (1 + 8 * count) * sizeof(u32);
  34         size = round_up(size, PAGE_SIZE);
  35         pool = intel_engine_pool_get(&ce->engine->pool, size);
  36         if (IS_ERR(pool)) {
  37                 err = PTR_ERR(pool);
  38                 goto out_pm;
  39         }
  40
  41         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
  42         if (IS_ERR(cmd)) {
  43                 err = PTR_ERR(cmd);
  44                 goto out_put;
  45         }
  46
  47         rem = vma->size;
  48         offset = vma->node.start;
  49
  50         do {
  51                 u32 size = min_t(u64, rem, block_size);
  52
  53                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
  54
  55                 if (INTEL_GEN(i915) >= 8) {
  56                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
  57                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
  58                         *cmd++ = 0;
  59                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
  60                         *cmd++ = lower_32_bits(offset);
  61                         *cmd++ = upper_32_bits(offset);
  62                         *cmd++ = value;
  63                 } else {
  64                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
  65                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
  66                         *cmd++ = 0;
  67                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
  68                         *cmd++ = offset;
  69                         *cmd++ = value;
  70                 }
  71
  72                 /* Allow ourselves to be preempted in between blocks. */
  73                 *cmd++ = MI_ARB_CHECK;
  74
  75                 offset += size;
  76                 rem -= size;
  77         } while (rem);
  78
  79         *cmd = MI_BATCH_BUFFER_END;
  80         intel_gt_chipset_flush(ce->vm->gt);
  81
  82         i915_gem_object_unpin_map(pool->obj);
  83
  84         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
  85         if (IS_ERR(batch)) {
  86                 err = PTR_ERR(batch);
  87                 goto out_put;
  88         }
  89
  90         err = i915_vma_pin(batch, 0, 0, PIN_USER);
  91         if (unlikely(err))
  92                 goto out_put;
  93
  94         batch->private = pool;
  95         return batch;
  96
  97 out_put:
  98         intel_engine_pool_put(pool);
  99 out_pm:
 100         intel_engine_pm_put(ce->engine);
 101         return ERR_PTR(err);
 102 }
 103
 104 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
 105 {
 106         int err;
 107
 108         i915_vma_lock(vma);
 109         err = i915_request_await_object(rq, vma->obj, false);
 110         if (err == 0)
 111                 err = i915_vma_move_to_active(vma, rq, 0);
 112         i915_vma_unlock(vma);
 113         if (unlikely(err))
 114                 return err;
 115
 116         return intel_engine_pool_mark_active(vma->private, rq);
 117 }
 118
 119 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
 120 {
 121         i915_vma_unpin(vma);
 122         intel_engine_pool_put(vma->private);
 123         intel_engine_pm_put(ce->engine);
 124 }
 125
 126 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 127                              struct intel_context *ce,
 128                              u32 value)
 129 {
 130         struct i915_request *rq;
 131         struct i915_vma *batch;
 132         struct i915_vma *vma;
 133         int err;
 134
 135         vma = i915_vma_instance(obj, ce->vm, NULL);
 136         if (IS_ERR(vma))
 137                 return PTR_ERR(vma);
 138
 139         err = i915_vma_pin(vma, 0, 0, PIN_USER);
 140         if (unlikely(err))
 141                 return err;
 142
 143         if (obj->cache_dirty & ~obj->cache_coherent) {
 144                 i915_gem_object_lock(obj);
 145                 i915_gem_clflush_object(obj, 0);
 146                 i915_gem_object_unlock(obj);
 147         }
 148
 149         batch = intel_emit_vma_fill_blt(ce, vma, value);
 150         if (IS_ERR(batch)) {
 151                 err = PTR_ERR(batch);
 152                 goto out_unpin;
 153         }
 154
 155         rq = intel_context_create_request(ce);
 156         if (IS_ERR(rq)) {
 157                 err = PTR_ERR(rq);
 158                 goto out_batch;
 159         }
 160
 161         err = intel_emit_vma_mark_active(batch, rq);
 162         if (unlikely(err))
 163                 goto out_request;
 164
 165         err = i915_request_await_object(rq, obj, true);
 166         if (unlikely(err))
 167                 goto out_request;
 168
 169         if (ce->engine->emit_init_breadcrumb) {
 170                 err = ce->engine->emit_init_breadcrumb(rq);
 171                 if (unlikely(err))
 172                         goto out_request;
 173         }
 174
 175         i915_vma_lock(vma);
 176         err = i915_request_await_object(rq, vma->obj, true);
 177         if (err == 0)
 178                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 179         i915_vma_unlock(vma);
 180         if (unlikely(err))
 181                 goto out_request;
 182
 183         err = ce->engine->emit_bb_start(rq,
 184                                         batch->node.start, batch->node.size,
 185                                         0);
 186 out_request:
 187         if (unlikely(err))
 188                 i915_request_skip(rq, err);
 189
 190         i915_request_add(rq);
 191 out_batch:
 192         intel_emit_vma_release(ce, batch);
 193 out_unpin:
 194         i915_vma_unpin(vma);
 195         return err;
 196 }
 197
 198 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 199                                          struct i915_vma *src,
 200                                          struct i915_vma *dst)
 201 {
 202         struct drm_i915_private *i915 = ce->vm->i915;
 203         const u32 block_size = S16_MAX * PAGE_SIZE;
 204         struct intel_engine_pool_node *pool;
 205         struct i915_vma *batch;
 206         u64 src_offset, dst_offset;
 207         u64 count, rem;
 208         u32 size, *cmd;
 209         int err;
 210
 211         GEM_BUG_ON(src->size != dst->size);
 212
 213         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
 214         intel_engine_pm_get(ce->engine);
 215
 216         count = div_u64(dst->size, block_size);
 217         size = (1 + 11 * count) * sizeof(u32);
 218         size = round_up(size, PAGE_SIZE);
 219         pool = intel_engine_pool_get(&ce->engine->pool, size);
 220         if (IS_ERR(pool)) {
 221                 err = PTR_ERR(pool);
 222                 goto out_pm;
 223         }
 224
 225         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
 226         if (IS_ERR(cmd)) {
 227                 err = PTR_ERR(cmd);
 228                 goto out_put;
 229         }
 230
 231         rem = src->size;
 232         src_offset = src->node.start;
 233         dst_offset = dst->node.start;
 234
 235         do {
 236                 size = min_t(u64, rem, block_size);
 237                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
 238
 239                 if (INTEL_GEN(i915) >= 9) {
 240                         *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
 241                         *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
 242                         *cmd++ = 0;
 243                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 244                         *cmd++ = lower_32_bits(dst_offset);
 245                         *cmd++ = upper_32_bits(dst_offset);
 246                         *cmd++ = 0;
 247                         *cmd++ = PAGE_SIZE;
 248                         *cmd++ = lower_32_bits(src_offset);
 249                         *cmd++ = upper_32_bits(src_offset);
 250                 } else if (INTEL_GEN(i915) >= 8) {
 251                         *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
 252                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
 253                         *cmd++ = 0;
 254                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 255                         *cmd++ = lower_32_bits(dst_offset);
 256                         *cmd++ = upper_32_bits(dst_offset);
 257                         *cmd++ = 0;
 258                         *cmd++ = PAGE_SIZE;
 259                         *cmd++ = lower_32_bits(src_offset);
 260                         *cmd++ = upper_32_bits(src_offset);
 261                 } else {
 262                         *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
 263                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
 264                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
 265                         *cmd++ = dst_offset;
 266                         *cmd++ = PAGE_SIZE;
 267                         *cmd++ = src_offset;
 268                 }
 269
 270                 /* Allow ourselves to be preempted in between blocks. */
 271                 *cmd++ = MI_ARB_CHECK;
 272
 273                 src_offset += size;
 274                 dst_offset += size;
 275                 rem -= size;
 276         } while (rem);
 277
 278         *cmd = MI_BATCH_BUFFER_END;
 279         intel_gt_chipset_flush(ce->vm->gt);
 280
 281         i915_gem_object_unpin_map(pool->obj);
 282
 283         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
 284         if (IS_ERR(batch)) {
 285                 err = PTR_ERR(batch);
 286                 goto out_put;
 287         }
 288
 289         err = i915_vma_pin(batch, 0, 0, PIN_USER);
 290         if (unlikely(err))
 291                 goto out_put;
 292
 293         batch->private = pool;
 294         return batch;
 295
 296 out_put:
 297         intel_engine_pool_put(pool);
 298 out_pm:
 299         intel_engine_pm_put(ce->engine);
 300         return ERR_PTR(err);
 301 }
 302
 303 static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
 304 {
 305         struct drm_i915_gem_object *obj = vma->obj;
 306
 307         if (obj->cache_dirty & ~obj->cache_coherent)
 308                 i915_gem_clflush_object(obj, 0);
 309
 310         return i915_request_await_object(rq, obj, write);
 311 }
 312
 313 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 314                              struct drm_i915_gem_object *dst,
 315                              struct intel_context *ce)
 316 {
 317         struct drm_gem_object *objs[] = { &src->base, &dst->base };
 318         struct i915_address_space *vm = ce->vm;
 319         struct i915_vma *vma[2], *batch;
 320         struct ww_acquire_ctx acquire;
 321         struct i915_request *rq;
 322         int err, i;
 323
 324         vma[0] = i915_vma_instance(src, vm, NULL);
 325         if (IS_ERR(vma[0]))
 326                 return PTR_ERR(vma[0]);
 327
 328         err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
 329         if (unlikely(err))
 330                 return err;
 331
 332         vma[1] = i915_vma_instance(dst, vm, NULL);
 333         if (IS_ERR(vma[1]))
 334                 goto out_unpin_src;
 335
 336         err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
 337         if (unlikely(err))
 338                 goto out_unpin_src;
 339
 340         batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
 341         if (IS_ERR(batch)) {
 342                 err = PTR_ERR(batch);
 343                 goto out_unpin_dst;
 344         }
 345
 346         rq = intel_context_create_request(ce);
 347         if (IS_ERR(rq)) {
 348                 err = PTR_ERR(rq);
 349                 goto out_batch;
 350         }
 351
 352         err = intel_emit_vma_mark_active(batch, rq);
 353         if (unlikely(err))
 354                 goto out_request;
 355
 356         err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
 357         if (unlikely(err))
 358                 goto out_request;
 359
 360         for (i = 0; i < ARRAY_SIZE(vma); i++) {
 361                 err = move_to_gpu(vma[i], rq, i);
 362                 if (unlikely(err))
 363                         goto out_unlock;
 364         }
 365
 366         for (i = 0; i < ARRAY_SIZE(vma); i++) {
 367                 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
 368
 369                 err = i915_vma_move_to_active(vma[i], rq, flags);
 370                 if (unlikely(err))
 371                         goto out_unlock;
 372         }
 373
 374         if (rq->engine->emit_init_breadcrumb) {
 375                 err = rq->engine->emit_init_breadcrumb(rq);
 376                 if (unlikely(err))
 377                         goto out_unlock;
 378         }
 379
 380         err = rq->engine->emit_bb_start(rq,
 381                                         batch->node.start, batch->node.size,
 382                                         0);
 383 out_unlock:
 384         drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
 385 out_request:
 386         if (unlikely(err))
 387                 i915_request_skip(rq, err);
 388
 389         i915_request_add(rq);
 390 out_batch:
 391         intel_emit_vma_release(ce, batch);
 392 out_unpin_dst:
 393         i915_vma_unpin(vma[1]);
 394 out_unpin_src:
 395         i915_vma_unpin(vma[0]);
 396         return err;
 397 }
 398
 399 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 400 #include "selftests/i915_gem_object_blt.c"
 401 #endif