]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
Merge tag 'usercopy-v5.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
[linux.git] / drivers / gpu / drm / i915 / gem / i915_gem_object_blt.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include "i915_drv.h"
7 #include "gt/intel_context.h"
8 #include "gt/intel_engine_pm.h"
9 #include "gt/intel_engine_pool.h"
10 #include "gt/intel_gt.h"
11 #include "i915_gem_clflush.h"
12 #include "i915_gem_object_blt.h"
13
14 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
15                                          struct i915_vma *vma,
16                                          u32 value)
17 {
18         struct drm_i915_private *i915 = ce->vm->i915;
19         const u32 block_size = S16_MAX * PAGE_SIZE;
20         struct intel_engine_pool_node *pool;
21         struct i915_vma *batch;
22         u64 offset;
23         u64 count;
24         u64 rem;
25         u32 size;
26         u32 *cmd;
27         int err;
28
29         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
30         intel_engine_pm_get(ce->engine);
31
32         count = div_u64(vma->size, block_size);
33         size = (1 + 8 * count) * sizeof(u32);
34         size = round_up(size, PAGE_SIZE);
35         pool = intel_engine_pool_get(&ce->engine->pool, size);
36         if (IS_ERR(pool)) {
37                 err = PTR_ERR(pool);
38                 goto out_pm;
39         }
40
41         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
42         if (IS_ERR(cmd)) {
43                 err = PTR_ERR(cmd);
44                 goto out_put;
45         }
46
47         rem = vma->size;
48         offset = vma->node.start;
49
50         do {
51                 u32 size = min_t(u64, rem, block_size);
52
53                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
54
55                 if (INTEL_GEN(i915) >= 8) {
56                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
57                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
58                         *cmd++ = 0;
59                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
60                         *cmd++ = lower_32_bits(offset);
61                         *cmd++ = upper_32_bits(offset);
62                         *cmd++ = value;
63                 } else {
64                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
65                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
66                         *cmd++ = 0;
67                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
68                         *cmd++ = offset;
69                         *cmd++ = value;
70                 }
71
72                 /* Allow ourselves to be preempted in between blocks. */
73                 *cmd++ = MI_ARB_CHECK;
74
75                 offset += size;
76                 rem -= size;
77         } while (rem);
78
79         *cmd = MI_BATCH_BUFFER_END;
80         intel_gt_chipset_flush(ce->vm->gt);
81
82         i915_gem_object_unpin_map(pool->obj);
83
84         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
85         if (IS_ERR(batch)) {
86                 err = PTR_ERR(batch);
87                 goto out_put;
88         }
89
90         err = i915_vma_pin(batch, 0, 0, PIN_USER);
91         if (unlikely(err))
92                 goto out_put;
93
94         batch->private = pool;
95         return batch;
96
97 out_put:
98         intel_engine_pool_put(pool);
99 out_pm:
100         intel_engine_pm_put(ce->engine);
101         return ERR_PTR(err);
102 }
103
104 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
105 {
106         int err;
107
108         i915_vma_lock(vma);
109         err = i915_request_await_object(rq, vma->obj, false);
110         if (err == 0)
111                 err = i915_vma_move_to_active(vma, rq, 0);
112         i915_vma_unlock(vma);
113         if (unlikely(err))
114                 return err;
115
116         return intel_engine_pool_mark_active(vma->private, rq);
117 }
118
119 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
120 {
121         i915_vma_unpin(vma);
122         intel_engine_pool_put(vma->private);
123         intel_engine_pm_put(ce->engine);
124 }
125
126 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
127                              struct intel_context *ce,
128                              u32 value)
129 {
130         struct i915_request *rq;
131         struct i915_vma *batch;
132         struct i915_vma *vma;
133         int err;
134
135         vma = i915_vma_instance(obj, ce->vm, NULL);
136         if (IS_ERR(vma))
137                 return PTR_ERR(vma);
138
139         err = i915_vma_pin(vma, 0, 0, PIN_USER);
140         if (unlikely(err))
141                 return err;
142
143         if (obj->cache_dirty & ~obj->cache_coherent) {
144                 i915_gem_object_lock(obj);
145                 i915_gem_clflush_object(obj, 0);
146                 i915_gem_object_unlock(obj);
147         }
148
149         batch = intel_emit_vma_fill_blt(ce, vma, value);
150         if (IS_ERR(batch)) {
151                 err = PTR_ERR(batch);
152                 goto out_unpin;
153         }
154
155         rq = intel_context_create_request(ce);
156         if (IS_ERR(rq)) {
157                 err = PTR_ERR(rq);
158                 goto out_batch;
159         }
160
161         err = intel_emit_vma_mark_active(batch, rq);
162         if (unlikely(err))
163                 goto out_request;
164
165         err = i915_request_await_object(rq, obj, true);
166         if (unlikely(err))
167                 goto out_request;
168
169         if (ce->engine->emit_init_breadcrumb) {
170                 err = ce->engine->emit_init_breadcrumb(rq);
171                 if (unlikely(err))
172                         goto out_request;
173         }
174
175         i915_vma_lock(vma);
176         err = i915_request_await_object(rq, vma->obj, true);
177         if (err == 0)
178                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
179         i915_vma_unlock(vma);
180         if (unlikely(err))
181                 goto out_request;
182
183         err = ce->engine->emit_bb_start(rq,
184                                         batch->node.start, batch->node.size,
185                                         0);
186 out_request:
187         if (unlikely(err))
188                 i915_request_skip(rq, err);
189
190         i915_request_add(rq);
191 out_batch:
192         intel_emit_vma_release(ce, batch);
193 out_unpin:
194         i915_vma_unpin(vma);
195         return err;
196 }
197
198 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
199                                          struct i915_vma *src,
200                                          struct i915_vma *dst)
201 {
202         struct drm_i915_private *i915 = ce->vm->i915;
203         const u32 block_size = S16_MAX * PAGE_SIZE;
204         struct intel_engine_pool_node *pool;
205         struct i915_vma *batch;
206         u64 src_offset, dst_offset;
207         u64 count, rem;
208         u32 size, *cmd;
209         int err;
210
211         GEM_BUG_ON(src->size != dst->size);
212
213         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
214         intel_engine_pm_get(ce->engine);
215
216         count = div_u64(dst->size, block_size);
217         size = (1 + 11 * count) * sizeof(u32);
218         size = round_up(size, PAGE_SIZE);
219         pool = intel_engine_pool_get(&ce->engine->pool, size);
220         if (IS_ERR(pool)) {
221                 err = PTR_ERR(pool);
222                 goto out_pm;
223         }
224
225         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
226         if (IS_ERR(cmd)) {
227                 err = PTR_ERR(cmd);
228                 goto out_put;
229         }
230
231         rem = src->size;
232         src_offset = src->node.start;
233         dst_offset = dst->node.start;
234
235         do {
236                 size = min_t(u64, rem, block_size);
237                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
238
239                 if (INTEL_GEN(i915) >= 9) {
240                         *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
241                         *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
242                         *cmd++ = 0;
243                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
244                         *cmd++ = lower_32_bits(dst_offset);
245                         *cmd++ = upper_32_bits(dst_offset);
246                         *cmd++ = 0;
247                         *cmd++ = PAGE_SIZE;
248                         *cmd++ = lower_32_bits(src_offset);
249                         *cmd++ = upper_32_bits(src_offset);
250                 } else if (INTEL_GEN(i915) >= 8) {
251                         *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
252                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
253                         *cmd++ = 0;
254                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
255                         *cmd++ = lower_32_bits(dst_offset);
256                         *cmd++ = upper_32_bits(dst_offset);
257                         *cmd++ = 0;
258                         *cmd++ = PAGE_SIZE;
259                         *cmd++ = lower_32_bits(src_offset);
260                         *cmd++ = upper_32_bits(src_offset);
261                 } else {
262                         *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
263                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
264                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
265                         *cmd++ = dst_offset;
266                         *cmd++ = PAGE_SIZE;
267                         *cmd++ = src_offset;
268                 }
269
270                 /* Allow ourselves to be preempted in between blocks. */
271                 *cmd++ = MI_ARB_CHECK;
272
273                 src_offset += size;
274                 dst_offset += size;
275                 rem -= size;
276         } while (rem);
277
278         *cmd = MI_BATCH_BUFFER_END;
279         intel_gt_chipset_flush(ce->vm->gt);
280
281         i915_gem_object_unpin_map(pool->obj);
282
283         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
284         if (IS_ERR(batch)) {
285                 err = PTR_ERR(batch);
286                 goto out_put;
287         }
288
289         err = i915_vma_pin(batch, 0, 0, PIN_USER);
290         if (unlikely(err))
291                 goto out_put;
292
293         batch->private = pool;
294         return batch;
295
296 out_put:
297         intel_engine_pool_put(pool);
298 out_pm:
299         intel_engine_pm_put(ce->engine);
300         return ERR_PTR(err);
301 }
302
303 static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
304 {
305         struct drm_i915_gem_object *obj = vma->obj;
306
307         if (obj->cache_dirty & ~obj->cache_coherent)
308                 i915_gem_clflush_object(obj, 0);
309
310         return i915_request_await_object(rq, obj, write);
311 }
312
313 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
314                              struct drm_i915_gem_object *dst,
315                              struct intel_context *ce)
316 {
317         struct drm_gem_object *objs[] = { &src->base, &dst->base };
318         struct i915_address_space *vm = ce->vm;
319         struct i915_vma *vma[2], *batch;
320         struct ww_acquire_ctx acquire;
321         struct i915_request *rq;
322         int err, i;
323
324         vma[0] = i915_vma_instance(src, vm, NULL);
325         if (IS_ERR(vma[0]))
326                 return PTR_ERR(vma[0]);
327
328         err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
329         if (unlikely(err))
330                 return err;
331
332         vma[1] = i915_vma_instance(dst, vm, NULL);
333         if (IS_ERR(vma[1]))
334                 goto out_unpin_src;
335
336         err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
337         if (unlikely(err))
338                 goto out_unpin_src;
339
340         batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
341         if (IS_ERR(batch)) {
342                 err = PTR_ERR(batch);
343                 goto out_unpin_dst;
344         }
345
346         rq = intel_context_create_request(ce);
347         if (IS_ERR(rq)) {
348                 err = PTR_ERR(rq);
349                 goto out_batch;
350         }
351
352         err = intel_emit_vma_mark_active(batch, rq);
353         if (unlikely(err))
354                 goto out_request;
355
356         err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
357         if (unlikely(err))
358                 goto out_request;
359
360         for (i = 0; i < ARRAY_SIZE(vma); i++) {
361                 err = move_to_gpu(vma[i], rq, i);
362                 if (unlikely(err))
363                         goto out_unlock;
364         }
365
366         for (i = 0; i < ARRAY_SIZE(vma); i++) {
367                 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
368
369                 err = i915_vma_move_to_active(vma[i], rq, flags);
370                 if (unlikely(err))
371                         goto out_unlock;
372         }
373
374         if (rq->engine->emit_init_breadcrumb) {
375                 err = rq->engine->emit_init_breadcrumb(rq);
376                 if (unlikely(err))
377                         goto out_unlock;
378         }
379
380         err = rq->engine->emit_bb_start(rq,
381                                         batch->node.start, batch->node.size,
382                                         0);
383 out_unlock:
384         drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
385 out_request:
386         if (unlikely(err))
387                 i915_request_skip(rq, err);
388
389         i915_request_add(rq);
390 out_batch:
391         intel_emit_vma_release(ce, batch);
392 out_unpin_dst:
393         i915_vma_unpin(vma[1]);
394 out_unpin_src:
395         i915_vma_unpin(vma[0]);
396         return err;
397 }
398
399 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
400 #include "selftests/i915_gem_object_blt.c"
401 #endif