]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/intel_ringbuffer.h
a02c92dac5da1a85fa5b6575923a9f99085cb4c3
[linux.git] / drivers / gpu / drm / i915 / intel_ringbuffer.h
1 /* SPDX-License-Identifier: MIT */
2 #ifndef _INTEL_RINGBUFFER_H_
3 #define _INTEL_RINGBUFFER_H_
4
5 #include <drm/drm_util.h>
6
7 #include <linux/hashtable.h>
8 #include <linux/irq_work.h>
9 #include <linux/random.h>
10 #include <linux/seqlock.h>
11
12 #include "i915_gem_batch_pool.h"
13 #include "i915_pmu.h"
14 #include "i915_reg.h"
15 #include "i915_request.h"
16 #include "i915_selftest.h"
17 #include "i915_timeline.h"
18 #include "intel_engine_types.h"
19 #include "intel_gpu_commands.h"
20 #include "intel_workarounds.h"
21
22 struct drm_printer;
23
24 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
25  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
26  * to give some inclination as to some of the magic values used in the various
27  * workarounds!
28  */
29 #define CACHELINE_BYTES 64
30 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
31
32 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
33 #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
34
35 #define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
36 #define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
37
38 #define I915_READ_HEAD(engine)  I915_READ(RING_HEAD((engine)->mmio_base))
39 #define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
40
41 #define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
42 #define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
43
44 #define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
45 #define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
46
47 #define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
48 #define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
49
50 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
51  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
52  */
53 enum intel_engine_hangcheck_action {
54         ENGINE_IDLE = 0,
55         ENGINE_WAIT,
56         ENGINE_ACTIVE_SEQNO,
57         ENGINE_ACTIVE_HEAD,
58         ENGINE_ACTIVE_SUBUNITS,
59         ENGINE_WAIT_KICK,
60         ENGINE_DEAD,
61 };
62
63 static inline const char *
64 hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
65 {
66         switch (a) {
67         case ENGINE_IDLE:
68                 return "idle";
69         case ENGINE_WAIT:
70                 return "wait";
71         case ENGINE_ACTIVE_SEQNO:
72                 return "active seqno";
73         case ENGINE_ACTIVE_HEAD:
74                 return "active head";
75         case ENGINE_ACTIVE_SUBUNITS:
76                 return "active subunits";
77         case ENGINE_WAIT_KICK:
78                 return "wait kick";
79         case ENGINE_DEAD:
80                 return "dead";
81         }
82
83         return "unknown";
84 }
85
86 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
87
88 static inline bool __execlists_need_preempt(int prio, int last)
89 {
90         /*
91          * Allow preemption of low -> normal -> high, but we do
92          * not allow low priority tasks to preempt other low priority
93          * tasks under the impression that latency for low priority
94          * tasks does not matter (as much as background throughput),
95          * so kiss.
96          *
97          * More naturally we would write
98          *      prio >= max(0, last);
99          * except that we wish to prevent triggering preemption at the same
100          * priority level: the task that is running should remain running
101          * to preserve FIFO ordering of dependencies.
102          */
103         return prio > max(I915_PRIORITY_NORMAL - 1, last);
104 }
105
106 static inline void
107 execlists_set_active(struct intel_engine_execlists *execlists,
108                      unsigned int bit)
109 {
110         __set_bit(bit, (unsigned long *)&execlists->active);
111 }
112
113 static inline bool
114 execlists_set_active_once(struct intel_engine_execlists *execlists,
115                           unsigned int bit)
116 {
117         return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
118 }
119
120 static inline void
121 execlists_clear_active(struct intel_engine_execlists *execlists,
122                        unsigned int bit)
123 {
124         __clear_bit(bit, (unsigned long *)&execlists->active);
125 }
126
127 static inline void
128 execlists_clear_all_active(struct intel_engine_execlists *execlists)
129 {
130         execlists->active = 0;
131 }
132
133 static inline bool
134 execlists_is_active(const struct intel_engine_execlists *execlists,
135                     unsigned int bit)
136 {
137         return test_bit(bit, (unsigned long *)&execlists->active);
138 }
139
140 void execlists_user_begin(struct intel_engine_execlists *execlists,
141                           const struct execlist_port *port);
142 void execlists_user_end(struct intel_engine_execlists *execlists);
143
144 void
145 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
146
147 void
148 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
149
150 static inline unsigned int
151 execlists_num_ports(const struct intel_engine_execlists * const execlists)
152 {
153         return execlists->port_mask + 1;
154 }
155
156 static inline struct execlist_port *
157 execlists_port_complete(struct intel_engine_execlists * const execlists,
158                         struct execlist_port * const port)
159 {
160         const unsigned int m = execlists->port_mask;
161
162         GEM_BUG_ON(port_index(port, execlists) != 0);
163         GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
164
165         memmove(port, port + 1, m * sizeof(struct execlist_port));
166         memset(port + m, 0, sizeof(struct execlist_port));
167
168         return port;
169 }
170
171 static inline u32
172 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
173 {
174         /* Ensure that the compiler doesn't optimize away the load. */
175         return READ_ONCE(engine->status_page.addr[reg]);
176 }
177
178 static inline void
179 intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
180 {
181         /* Writing into the status page should be done sparingly. Since
182          * we do when we are uncertain of the device state, we take a bit
183          * of extra paranoia to try and ensure that the HWS takes the value
184          * we give and that it doesn't end up trapped inside the CPU!
185          */
186         if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
187                 mb();
188                 clflush(&engine->status_page.addr[reg]);
189                 engine->status_page.addr[reg] = value;
190                 clflush(&engine->status_page.addr[reg]);
191                 mb();
192         } else {
193                 WRITE_ONCE(engine->status_page.addr[reg], value);
194         }
195 }
196
197 /*
198  * Reads a dword out of the status page, which is written to from the command
199  * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
200  * MI_STORE_DATA_IMM.
201  *
202  * The following dwords have a reserved meaning:
203  * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
204  * 0x04: ring 0 head pointer
205  * 0x05: ring 1 head pointer (915-class)
206  * 0x06: ring 2 head pointer (915-class)
207  * 0x10-0x1b: Context status DWords (GM45)
208  * 0x1f: Last written status offset. (GM45)
209  * 0x20-0x2f: Reserved (Gen6+)
210  *
211  * The area from dword 0x30 to 0x3ff is available for driver usage.
212  */
213 #define I915_GEM_HWS_PREEMPT            0x32
214 #define I915_GEM_HWS_PREEMPT_ADDR       (I915_GEM_HWS_PREEMPT * sizeof(u32))
215 #define I915_GEM_HWS_HANGCHECK          0x34
216 #define I915_GEM_HWS_HANGCHECK_ADDR     (I915_GEM_HWS_HANGCHECK * sizeof(u32))
217 #define I915_GEM_HWS_SEQNO              0x40
218 #define I915_GEM_HWS_SEQNO_ADDR         (I915_GEM_HWS_SEQNO * sizeof(u32))
219 #define I915_GEM_HWS_SCRATCH            0x80
220 #define I915_GEM_HWS_SCRATCH_ADDR       (I915_GEM_HWS_SCRATCH * sizeof(u32))
221
222 #define I915_HWS_CSB_BUF0_INDEX         0x10
223 #define I915_HWS_CSB_WRITE_INDEX        0x1f
224 #define CNL_HWS_CSB_WRITE_INDEX         0x2f
225
226 struct intel_ring *
227 intel_engine_create_ring(struct intel_engine_cs *engine,
228                          struct i915_timeline *timeline,
229                          int size);
230 int intel_ring_pin(struct intel_ring *ring);
231 void intel_ring_reset(struct intel_ring *ring, u32 tail);
232 unsigned int intel_ring_update_space(struct intel_ring *ring);
233 void intel_ring_unpin(struct intel_ring *ring);
234 void intel_ring_free(struct kref *ref);
235
236 static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
237 {
238         kref_get(&ring->ref);
239         return ring;
240 }
241
242 static inline void intel_ring_put(struct intel_ring *ring)
243 {
244         kref_put(&ring->ref, intel_ring_free);
245 }
246
247 void intel_engine_stop(struct intel_engine_cs *engine);
248 void intel_engine_cleanup(struct intel_engine_cs *engine);
249
250 void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
251
252 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
253
254 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
255
256 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
257 {
258         /* Dummy function.
259          *
260          * This serves as a placeholder in the code so that the reader
261          * can compare against the preceding intel_ring_begin() and
262          * check that the number of dwords emitted matches the space
263          * reserved for the command packet (i.e. the value passed to
264          * intel_ring_begin()).
265          */
266         GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
267 }
268
269 static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
270 {
271         return pos & (ring->size - 1);
272 }
273
274 static inline bool
275 intel_ring_offset_valid(const struct intel_ring *ring,
276                         unsigned int pos)
277 {
278         if (pos & -ring->size) /* must be strictly within the ring */
279                 return false;
280
281         if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
282                 return false;
283
284         return true;
285 }
286
287 static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
288 {
289         /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
290         u32 offset = addr - rq->ring->vaddr;
291         GEM_BUG_ON(offset > rq->ring->size);
292         return intel_ring_wrap(rq->ring, offset);
293 }
294
295 static inline void
296 assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
297 {
298         GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
299
300         /*
301          * "Ring Buffer Use"
302          *      Gen2 BSpec "1. Programming Environment" / 1.4.4.6
303          *      Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
304          *      Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
305          * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
306          * same cacheline, the Head Pointer must not be greater than the Tail
307          * Pointer."
308          *
309          * We use ring->head as the last known location of the actual RING_HEAD,
310          * it may have advanced but in the worst case it is equally the same
311          * as ring->head and so we should never program RING_TAIL to advance
312          * into the same cacheline as ring->head.
313          */
314 #define cacheline(a) round_down(a, CACHELINE_BYTES)
315         GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
316                    tail < ring->head);
317 #undef cacheline
318 }
319
320 static inline unsigned int
321 intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
322 {
323         /* Whilst writes to the tail are strictly order, there is no
324          * serialisation between readers and the writers. The tail may be
325          * read by i915_request_retire() just as it is being updated
326          * by execlists, as although the breadcrumb is complete, the context
327          * switch hasn't been seen.
328          */
329         assert_ring_tail_valid(ring, tail);
330         ring->tail = tail;
331         return tail;
332 }
333
334 static inline unsigned int
335 __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
336 {
337         /*
338          * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
339          * same cacheline, the Head Pointer must not be greater than the Tail
340          * Pointer."
341          */
342         GEM_BUG_ON(!is_power_of_2(size));
343         return (head - tail - CACHELINE_BYTES) & (size - 1);
344 }
345
346 int intel_engine_setup_common(struct intel_engine_cs *engine);
347 int intel_engine_init_common(struct intel_engine_cs *engine);
348 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
349
350 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
351 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
352 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
353 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
354
355 int intel_engine_stop_cs(struct intel_engine_cs *engine);
356 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
357
358 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
359
360 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
361 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
362
363 void intel_engine_get_instdone(struct intel_engine_cs *engine,
364                                struct intel_instdone *instdone);
365
366 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
367 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
368
369 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
370 void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
371
372 bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
373 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
374
375 static inline void
376 intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
377 {
378         irq_work_queue(&engine->breadcrumbs.irq_work);
379 }
380
381 bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
382
383 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
384 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
385
386 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
387                                     struct drm_printer *p);
388
389 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
390 {
391         memset(batch, 0, 6 * sizeof(u32));
392
393         batch[0] = GFX_OP_PIPE_CONTROL(6);
394         batch[1] = flags;
395         batch[2] = offset;
396
397         return batch + 6;
398 }
399
400 static inline u32 *
401 gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
402 {
403         /* We're using qword write, offset should be aligned to 8 bytes. */
404         GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
405
406         /* w/a for post sync ops following a GPGPU operation we
407          * need a prior CS_STALL, which is emitted by the flush
408          * following the batch.
409          */
410         *cs++ = GFX_OP_PIPE_CONTROL(6);
411         *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
412         *cs++ = gtt_offset;
413         *cs++ = 0;
414         *cs++ = value;
415         /* We're thrashing one dword of HWS. */
416         *cs++ = 0;
417
418         return cs;
419 }
420
421 static inline u32 *
422 gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
423 {
424         /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
425         GEM_BUG_ON(gtt_offset & (1 << 5));
426         /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
427         GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
428
429         *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
430         *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
431         *cs++ = 0;
432         *cs++ = value;
433
434         return cs;
435 }
436
437 static inline void intel_engine_reset(struct intel_engine_cs *engine,
438                                       bool stalled)
439 {
440         if (engine->reset.reset)
441                 engine->reset.reset(engine, stalled);
442 }
443
444 void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
445
446 bool intel_engine_is_idle(struct intel_engine_cs *engine);
447 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
448
449 void intel_engine_lost_context(struct intel_engine_cs *engine);
450
451 void intel_engines_park(struct drm_i915_private *i915);
452 void intel_engines_unpark(struct drm_i915_private *i915);
453
454 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
455 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
456
457 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
458
459 __printf(3, 4)
460 void intel_engine_dump(struct intel_engine_cs *engine,
461                        struct drm_printer *m,
462                        const char *header, ...);
463
464 struct intel_engine_cs *
465 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
466
467 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
468 {
469         unsigned long flags;
470
471         if (READ_ONCE(engine->stats.enabled) == 0)
472                 return;
473
474         write_seqlock_irqsave(&engine->stats.lock, flags);
475
476         if (engine->stats.enabled > 0) {
477                 if (engine->stats.active++ == 0)
478                         engine->stats.start = ktime_get();
479                 GEM_BUG_ON(engine->stats.active == 0);
480         }
481
482         write_sequnlock_irqrestore(&engine->stats.lock, flags);
483 }
484
485 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
486 {
487         unsigned long flags;
488
489         if (READ_ONCE(engine->stats.enabled) == 0)
490                 return;
491
492         write_seqlock_irqsave(&engine->stats.lock, flags);
493
494         if (engine->stats.enabled > 0) {
495                 ktime_t last;
496
497                 if (engine->stats.active && --engine->stats.active == 0) {
498                         /*
499                          * Decrement the active context count and in case GPU
500                          * is now idle add up to the running total.
501                          */
502                         last = ktime_sub(ktime_get(), engine->stats.start);
503
504                         engine->stats.total = ktime_add(engine->stats.total,
505                                                         last);
506                 } else if (engine->stats.active == 0) {
507                         /*
508                          * After turning on engine stats, context out might be
509                          * the first event in which case we account from the
510                          * time stats gathering was turned on.
511                          */
512                         last = ktime_sub(ktime_get(), engine->stats.enabled_at);
513
514                         engine->stats.total = ktime_add(engine->stats.total,
515                                                         last);
516                 }
517         }
518
519         write_sequnlock_irqrestore(&engine->stats.lock, flags);
520 }
521
522 int intel_enable_engine_stats(struct intel_engine_cs *engine);
523 void intel_disable_engine_stats(struct intel_engine_cs *engine);
524
525 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
526
527 struct i915_request *
528 intel_engine_find_active_request(struct intel_engine_cs *engine);
529
530 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
531
532 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
533 {
534         if (!execlists->preempt_hang.inject_hang)
535                 return false;
536
537         complete(&execlists->preempt_hang.completion);
538         return true;
539 }
540
541 #else
542
543 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
544 {
545         return false;
546 }
547
548 #endif
549
550 static inline u32
551 intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
552 {
553         return engine->hangcheck.next_seqno =
554                 next_pseudo_random32(engine->hangcheck.next_seqno);
555 }
556
557 static inline u32
558 intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
559 {
560         return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
561 }
562
563 #endif /* _INTEL_RINGBUFFER_H_ */