2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhi Wang <zhi.a.wang@intel.com>
27 * Ping Gao <ping.a.gao@intel.com>
28 * Tina Zhang <tina.zhang@intel.com>
29 * Chanbin Du <changbin.du@intel.com>
30 * Min He <min.he@intel.com>
31 * Bing Niu <bing.niu@intel.com>
32 * Zhenyu Wang <zhenyuw@linux.intel.com>
36 #include <linux/kthread.h>
41 #define RING_CTX_OFF(x) \
42 offsetof(struct execlist_ring_context, x)
44 static void set_context_pdp_root_pointer(
45 struct execlist_ring_context *ring_context,
48 struct execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW;
51 for (i = 0; i < 8; i++)
52 pdp_pair[i].val = pdp[7 - i];
56 * when populating shadow ctx from guest, we should not overrride oa related
57 * registers, so that they will not be overlapped by guest oa configs. Thus
58 * made it possible to capture oa data from host for both host and guests.
60 static void sr_oa_regs(struct intel_vgpu_workload *workload,
61 u32 *reg_state, bool save)
63 struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
64 u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
65 u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
68 i915_mmio_reg_offset(EU_PERF_CNTL0),
69 i915_mmio_reg_offset(EU_PERF_CNTL1),
70 i915_mmio_reg_offset(EU_PERF_CNTL2),
71 i915_mmio_reg_offset(EU_PERF_CNTL3),
72 i915_mmio_reg_offset(EU_PERF_CNTL4),
73 i915_mmio_reg_offset(EU_PERF_CNTL5),
74 i915_mmio_reg_offset(EU_PERF_CNTL6),
77 if (!workload || !reg_state || workload->ring_id != RCS)
81 workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
83 for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
84 u32 state_offset = ctx_flexeu0 + i * 2;
86 workload->flex_mmio[i] = reg_state[state_offset + 1];
89 reg_state[ctx_oactxctrl] =
90 i915_mmio_reg_offset(GEN8_OACTXCONTROL);
91 reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
93 for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
94 u32 state_offset = ctx_flexeu0 + i * 2;
95 u32 mmio = flex_mmio[i];
97 reg_state[state_offset] = mmio;
98 reg_state[state_offset + 1] = workload->flex_mmio[i];
103 static int populate_shadow_context(struct intel_vgpu_workload *workload)
105 struct intel_vgpu *vgpu = workload->vgpu;
106 struct intel_gvt *gvt = vgpu->gvt;
107 int ring_id = workload->ring_id;
108 struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
109 struct drm_i915_gem_object *ctx_obj =
110 shadow_ctx->engine[ring_id].state->obj;
111 struct execlist_ring_context *shadow_ring_context;
114 unsigned long context_gpa, context_page_num;
117 gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
118 workload->ctx_desc.lrca);
120 context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
122 context_page_num = context_page_num >> PAGE_SHIFT;
124 if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
125 context_page_num = 19;
129 while (i < context_page_num) {
130 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
131 (u32)((workload->ctx_desc.lrca + i) <<
132 I915_GTT_PAGE_SHIFT));
133 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
134 gvt_vgpu_err("Invalid guest context descriptor\n");
138 page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
140 intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
146 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
147 shadow_ring_context = kmap(page);
149 sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
150 #define COPY_REG(name) \
151 intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
152 + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
155 COPY_REG(ctx_timestamp);
157 if (ring_id == RCS) {
158 COPY_REG(bb_per_ctx_ptr);
159 COPY_REG(rcs_indirect_ctx);
160 COPY_REG(rcs_indirect_ctx_offset);
164 set_context_pdp_root_pointer(shadow_ring_context,
165 workload->shadow_mm->shadow_page_table);
167 intel_gvt_hypervisor_read_gpa(vgpu,
168 workload->ring_context_gpa +
169 sizeof(*shadow_ring_context),
170 (void *)shadow_ring_context +
171 sizeof(*shadow_ring_context),
172 I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
174 sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
179 static inline bool is_gvt_request(struct drm_i915_gem_request *req)
181 return i915_gem_context_force_single_submission(req->ctx);
184 static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
186 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
187 u32 ring_base = dev_priv->engine[ring_id]->mmio_base;
190 reg = RING_INSTDONE(ring_base);
191 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
192 reg = RING_ACTHD(ring_base);
193 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
194 reg = RING_ACTHD_UDW(ring_base);
195 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
198 static int shadow_context_status_change(struct notifier_block *nb,
199 unsigned long action, void *data)
201 struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
202 struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
203 shadow_ctx_notifier_block[req->engine->id]);
204 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
205 enum intel_engine_id ring_id = req->engine->id;
206 struct intel_vgpu_workload *workload;
209 if (!is_gvt_request(req)) {
210 spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
211 if (action == INTEL_CONTEXT_SCHEDULE_IN &&
212 scheduler->engine_owner[ring_id]) {
213 /* Switch ring from vGPU to host. */
214 intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
216 scheduler->engine_owner[ring_id] = NULL;
218 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
223 workload = scheduler->current_workload[ring_id];
224 if (unlikely(!workload))
228 case INTEL_CONTEXT_SCHEDULE_IN:
229 spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
230 if (workload->vgpu != scheduler->engine_owner[ring_id]) {
231 /* Switch ring from host to vGPU or vGPU to vGPU. */
232 intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
233 workload->vgpu, ring_id);
234 scheduler->engine_owner[ring_id] = workload->vgpu;
236 gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
237 ring_id, workload->vgpu->id);
238 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
239 atomic_set(&workload->shadow_ctx_active, 1);
241 case INTEL_CONTEXT_SCHEDULE_OUT:
242 save_ring_hw_state(workload->vgpu, ring_id);
243 atomic_set(&workload->shadow_ctx_active, 0);
245 case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
246 save_ring_hw_state(workload->vgpu, ring_id);
252 wake_up(&workload->shadow_ctx_status_wq);
256 static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
257 struct intel_engine_cs *engine)
259 struct intel_context *ce = &ctx->engine[engine->id];
264 /* Update bits 0-11 of the context descriptor which includes flags
265 * like GEN8_CTX_* cached in desc_template
267 desc &= U64_MAX << 12;
268 desc |= ctx->desc_template & ((1ULL << 12) - 1);
273 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
275 struct intel_vgpu *vgpu = workload->vgpu;
276 void *shadow_ring_buffer_va;
279 /* allocate shadow ring buffer */
280 cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
282 gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
287 shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
289 /* get shadow ring buffer va */
290 workload->shadow_ring_buffer_va = cs;
292 memcpy(cs, shadow_ring_buffer_va,
295 cs += workload->rb_len / sizeof(u32);
296 intel_ring_advance(workload->req, cs);
301 static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
303 if (!wa_ctx->indirect_ctx.obj)
306 i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
307 i915_gem_object_put(wa_ctx->indirect_ctx.obj);
311 * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
312 * shadow it as well, include ringbuffer,wa_ctx and ctx.
313 * @workload: an abstract entity for each execlist submission.
315 * This function is called before the workload submitting to i915, to make
316 * sure the content of the workload is valid.
318 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
320 struct intel_vgpu *vgpu = workload->vgpu;
321 struct intel_vgpu_submission *s = &vgpu->submission;
322 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
323 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
324 int ring_id = workload->ring_id;
325 struct intel_engine_cs *engine = dev_priv->engine[ring_id];
326 struct intel_ring *ring;
329 lockdep_assert_held(&dev_priv->drm.struct_mutex);
331 if (workload->shadowed)
334 shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
335 shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
336 GEN8_CTX_ADDRESSING_MODE_SHIFT;
338 if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated))
339 shadow_context_descriptor_update(shadow_ctx,
340 dev_priv->engine[ring_id]);
342 ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
346 if ((workload->ring_id == RCS) &&
347 (workload->wa_ctx.indirect_ctx.size != 0)) {
348 ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
353 /* pin shadow context by gvt even the shadow context will be pinned
354 * when i915 alloc request. That is because gvt will update the guest
355 * context from shadow context when workload is completed, and at that
356 * moment, i915 may already unpined the shadow context to make the
357 * shadow_ctx pages invalid. So gvt need to pin itself. After update
358 * the guest context, gvt can unpin the shadow_ctx safely.
360 ring = engine->context_pin(engine, shadow_ctx);
363 gvt_vgpu_err("fail to pin shadow context\n");
367 ret = populate_shadow_context(workload);
370 workload->shadowed = true;
374 engine->context_unpin(engine, shadow_ctx);
376 release_shadow_wa_ctx(&workload->wa_ctx);
381 static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
383 int ring_id = workload->ring_id;
384 struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
385 struct intel_engine_cs *engine = dev_priv->engine[ring_id];
386 struct drm_i915_gem_request *rq;
387 struct intel_vgpu *vgpu = workload->vgpu;
388 struct intel_vgpu_submission *s = &vgpu->submission;
389 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
392 rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
394 gvt_vgpu_err("fail to allocate gem request\n");
399 gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
401 workload->req = i915_gem_request_get(rq);
402 ret = copy_workload_to_ring_buffer(workload);
408 engine->context_unpin(engine, shadow_ctx);
409 release_shadow_wa_ctx(&workload->wa_ctx);
413 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload);
415 static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
417 struct intel_gvt *gvt = workload->vgpu->gvt;
418 const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd;
419 struct intel_vgpu_shadow_bb *bb;
422 list_for_each_entry(bb, &workload->shadow_bb, list) {
423 bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0);
424 if (IS_ERR(bb->vma)) {
425 ret = PTR_ERR(bb->vma);
429 /* relocate shadow batch buffer */
430 bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
431 if (gmadr_bytes == 8)
432 bb->bb_start_cmd_va[2] = 0;
434 /* No one is going to touch shadow bb from now on. */
435 if (bb->clflush & CLFLUSH_AFTER) {
436 drm_clflush_virt_range(bb->va, bb->obj->base.size);
437 bb->clflush &= ~CLFLUSH_AFTER;
440 ret = i915_gem_object_set_to_gtt_domain(bb->obj, false);
444 i915_gem_obj_finish_shmem_access(bb->obj);
445 bb->accessing = false;
447 i915_vma_move_to_active(bb->vma, workload->req, 0);
451 release_shadow_batch_buffer(workload);
455 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
457 struct intel_vgpu_workload *workload = container_of(wa_ctx,
458 struct intel_vgpu_workload,
460 int ring_id = workload->ring_id;
461 struct intel_vgpu_submission *s = &workload->vgpu->submission;
462 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
463 struct drm_i915_gem_object *ctx_obj =
464 shadow_ctx->engine[ring_id].state->obj;
465 struct execlist_ring_context *shadow_ring_context;
468 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
469 shadow_ring_context = kmap_atomic(page);
471 shadow_ring_context->bb_per_ctx_ptr.val =
472 (shadow_ring_context->bb_per_ctx_ptr.val &
473 (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma;
474 shadow_ring_context->rcs_indirect_ctx.val =
475 (shadow_ring_context->rcs_indirect_ctx.val &
476 (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma;
478 kunmap_atomic(shadow_ring_context);
482 static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
484 struct i915_vma *vma;
485 unsigned char *per_ctx_va =
486 (unsigned char *)wa_ctx->indirect_ctx.shadow_va +
487 wa_ctx->indirect_ctx.size;
489 if (wa_ctx->indirect_ctx.size == 0)
492 vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
493 0, CACHELINE_BYTES, 0);
497 /* FIXME: we are not tracking our pinned VMA leaving it
498 * up to the core to fix up the stray pin_count upon
502 wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma);
504 wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1);
505 memset(per_ctx_va, 0, CACHELINE_BYTES);
507 update_wa_ctx_2_shadow_ctx(wa_ctx);
511 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
513 struct intel_vgpu *vgpu = workload->vgpu;
514 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
515 struct intel_vgpu_shadow_bb *bb, *pos;
517 if (list_empty(&workload->shadow_bb))
520 bb = list_first_entry(&workload->shadow_bb,
521 struct intel_vgpu_shadow_bb, list);
523 mutex_lock(&dev_priv->drm.struct_mutex);
525 list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
528 i915_gem_obj_finish_shmem_access(bb->obj);
530 if (bb->va && !IS_ERR(bb->va))
531 i915_gem_object_unpin_map(bb->obj);
533 if (bb->vma && !IS_ERR(bb->vma)) {
534 i915_vma_unpin(bb->vma);
535 i915_vma_close(bb->vma);
537 __i915_gem_object_release_unless_active(bb->obj);
543 mutex_unlock(&dev_priv->drm.struct_mutex);
546 static int prepare_workload(struct intel_vgpu_workload *workload)
548 struct intel_vgpu *vgpu = workload->vgpu;
551 ret = intel_vgpu_pin_mm(workload->shadow_mm);
553 gvt_vgpu_err("fail to vgpu pin mm\n");
557 ret = intel_vgpu_sync_oos_pages(workload->vgpu);
559 gvt_vgpu_err("fail to vgpu sync oos pages\n");
563 ret = intel_vgpu_flush_post_shadow(workload->vgpu);
565 gvt_vgpu_err("fail to flush post shadow\n");
569 ret = intel_gvt_generate_request(workload);
571 gvt_vgpu_err("fail to generate request\n");
575 ret = prepare_shadow_batch_buffer(workload);
577 gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n");
581 ret = prepare_shadow_wa_ctx(&workload->wa_ctx);
583 gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n");
584 goto err_shadow_batch;
587 if (workload->prepare) {
588 ret = workload->prepare(workload);
590 goto err_shadow_wa_ctx;
595 release_shadow_wa_ctx(&workload->wa_ctx);
597 release_shadow_batch_buffer(workload);
599 intel_vgpu_unpin_mm(workload->shadow_mm);
603 static int dispatch_workload(struct intel_vgpu_workload *workload)
605 struct intel_vgpu *vgpu = workload->vgpu;
606 struct intel_vgpu_submission *s = &vgpu->submission;
607 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
608 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
609 int ring_id = workload->ring_id;
610 struct intel_engine_cs *engine = dev_priv->engine[ring_id];
613 gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
616 mutex_lock(&dev_priv->drm.struct_mutex);
618 ret = intel_gvt_scan_and_shadow_workload(workload);
622 ret = prepare_workload(workload);
624 engine->context_unpin(engine, shadow_ctx);
630 workload->status = ret;
632 if (!IS_ERR_OR_NULL(workload->req)) {
633 gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
634 ring_id, workload->req);
635 i915_add_request(workload->req);
636 workload->dispatched = true;
639 mutex_unlock(&dev_priv->drm.struct_mutex);
643 static struct intel_vgpu_workload *pick_next_workload(
644 struct intel_gvt *gvt, int ring_id)
646 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
647 struct intel_vgpu_workload *workload = NULL;
649 mutex_lock(&gvt->lock);
652 * no current vgpu / will be scheduled out / no workload
655 if (!scheduler->current_vgpu) {
656 gvt_dbg_sched("ring id %d stop - no current vgpu\n", ring_id);
660 if (scheduler->need_reschedule) {
661 gvt_dbg_sched("ring id %d stop - will reschedule\n", ring_id);
665 if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
669 * still have current workload, maybe the workload disptacher
670 * fail to submit it for some reason, resubmit it.
672 if (scheduler->current_workload[ring_id]) {
673 workload = scheduler->current_workload[ring_id];
674 gvt_dbg_sched("ring id %d still have current workload %p\n",
680 * pick a workload as current workload
681 * once current workload is set, schedule policy routines
682 * will wait the current workload is finished when trying to
683 * schedule out a vgpu.
685 scheduler->current_workload[ring_id] = container_of(
686 workload_q_head(scheduler->current_vgpu, ring_id)->next,
687 struct intel_vgpu_workload, list);
689 workload = scheduler->current_workload[ring_id];
691 gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload);
693 atomic_inc(&workload->vgpu->submission.running_workload_num);
695 mutex_unlock(&gvt->lock);
699 static void update_guest_context(struct intel_vgpu_workload *workload)
701 struct intel_vgpu *vgpu = workload->vgpu;
702 struct intel_gvt *gvt = vgpu->gvt;
703 struct intel_vgpu_submission *s = &vgpu->submission;
704 struct i915_gem_context *shadow_ctx = s->shadow_ctx;
705 int ring_id = workload->ring_id;
706 struct drm_i915_gem_object *ctx_obj =
707 shadow_ctx->engine[ring_id].state->obj;
708 struct execlist_ring_context *shadow_ring_context;
711 unsigned long context_gpa, context_page_num;
714 gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
715 workload->ctx_desc.lrca);
717 context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
719 context_page_num = context_page_num >> PAGE_SHIFT;
721 if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
722 context_page_num = 19;
726 while (i < context_page_num) {
727 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
728 (u32)((workload->ctx_desc.lrca + i) <<
729 I915_GTT_PAGE_SHIFT));
730 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
731 gvt_vgpu_err("invalid guest context descriptor\n");
735 page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
737 intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
743 intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
744 RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
746 page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
747 shadow_ring_context = kmap(page);
749 #define COPY_REG(name) \
750 intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
751 RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
754 COPY_REG(ctx_timestamp);
758 intel_gvt_hypervisor_write_gpa(vgpu,
759 workload->ring_context_gpa +
760 sizeof(*shadow_ring_context),
761 (void *)shadow_ring_context +
762 sizeof(*shadow_ring_context),
763 I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
768 static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
770 struct intel_vgpu_submission *s = &vgpu->submission;
771 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
772 struct intel_engine_cs *engine;
773 struct intel_vgpu_workload *pos, *n;
776 /* free the unsubmited workloads in the queues. */
777 for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
778 list_for_each_entry_safe(pos, n,
779 &s->workload_q_head[engine->id], list) {
780 list_del_init(&pos->list);
781 intel_vgpu_destroy_workload(pos);
783 clear_bit(engine->id, s->shadow_ctx_desc_updated);
787 static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
789 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
790 struct intel_vgpu_workload *workload =
791 scheduler->current_workload[ring_id];
792 struct intel_vgpu *vgpu = workload->vgpu;
793 struct intel_vgpu_submission *s = &vgpu->submission;
796 mutex_lock(&gvt->lock);
798 /* For the workload w/ request, needs to wait for the context
799 * switch to make sure request is completed.
800 * For the workload w/o request, directly complete the workload.
803 struct drm_i915_private *dev_priv =
804 workload->vgpu->gvt->dev_priv;
805 struct intel_engine_cs *engine =
806 dev_priv->engine[workload->ring_id];
807 wait_event(workload->shadow_ctx_status_wq,
808 !atomic_read(&workload->shadow_ctx_active));
810 /* If this request caused GPU hang, req->fence.error will
811 * be set to -EIO. Use -EIO to set workload status so
812 * that when this request caused GPU hang, didn't trigger
813 * context switch interrupt to guest.
815 if (likely(workload->status == -EINPROGRESS)) {
816 if (workload->req->fence.error == -EIO)
817 workload->status = -EIO;
819 workload->status = 0;
822 i915_gem_request_put(fetch_and_zero(&workload->req));
824 if (!workload->status && !(vgpu->resetting_eng &
825 ENGINE_MASK(ring_id))) {
826 update_guest_context(workload);
828 for_each_set_bit(event, workload->pending_events,
830 intel_vgpu_trigger_virtual_event(vgpu, event);
832 mutex_lock(&dev_priv->drm.struct_mutex);
833 /* unpin shadow ctx as the shadow_ctx update is done */
834 engine->context_unpin(engine, s->shadow_ctx);
835 mutex_unlock(&dev_priv->drm.struct_mutex);
838 gvt_dbg_sched("ring id %d complete workload %p status %d\n",
839 ring_id, workload, workload->status);
841 scheduler->current_workload[ring_id] = NULL;
843 list_del_init(&workload->list);
845 if (!workload->status) {
846 release_shadow_batch_buffer(workload);
847 release_shadow_wa_ctx(&workload->wa_ctx);
850 if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
851 /* if workload->status is not successful means HW GPU
852 * has occurred GPU hang or something wrong with i915/GVT,
853 * and GVT won't inject context switch interrupt to guest.
854 * So this error is a vGPU hang actually to the guest.
855 * According to this we should emunlate a vGPU hang. If
856 * there are pending workloads which are already submitted
857 * from guest, we should clean them up like HW GPU does.
859 * if it is in middle of engine resetting, the pending
860 * workloads won't be submitted to HW GPU and will be
861 * cleaned up during the resetting process later, so doing
862 * the workload clean up here doesn't have any impact.
864 clean_workloads(vgpu, ENGINE_MASK(ring_id));
867 workload->complete(workload);
869 atomic_dec(&s->running_workload_num);
870 wake_up(&scheduler->workload_complete_wq);
872 if (gvt->scheduler.need_reschedule)
873 intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
875 mutex_unlock(&gvt->lock);
878 struct workload_thread_param {
879 struct intel_gvt *gvt;
883 static int workload_thread(void *priv)
885 struct workload_thread_param *p = (struct workload_thread_param *)priv;
886 struct intel_gvt *gvt = p->gvt;
887 int ring_id = p->ring_id;
888 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
889 struct intel_vgpu_workload *workload = NULL;
890 struct intel_vgpu *vgpu = NULL;
892 bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
893 || IS_KABYLAKE(gvt->dev_priv);
894 DEFINE_WAIT_FUNC(wait, woken_wake_function);
898 gvt_dbg_core("workload thread for ring %d started\n", ring_id);
900 while (!kthread_should_stop()) {
901 add_wait_queue(&scheduler->waitq[ring_id], &wait);
903 workload = pick_next_workload(gvt, ring_id);
906 wait_woken(&wait, TASK_INTERRUPTIBLE,
907 MAX_SCHEDULE_TIMEOUT);
908 } while (!kthread_should_stop());
909 remove_wait_queue(&scheduler->waitq[ring_id], &wait);
914 gvt_dbg_sched("ring id %d next workload %p vgpu %d\n",
915 workload->ring_id, workload,
918 intel_runtime_pm_get(gvt->dev_priv);
920 gvt_dbg_sched("ring id %d will dispatch workload %p\n",
921 workload->ring_id, workload);
924 intel_uncore_forcewake_get(gvt->dev_priv,
927 mutex_lock(&gvt->lock);
928 ret = dispatch_workload(workload);
929 mutex_unlock(&gvt->lock);
932 vgpu = workload->vgpu;
933 gvt_vgpu_err("fail to dispatch workload, skip\n");
937 gvt_dbg_sched("ring id %d wait workload %p\n",
938 workload->ring_id, workload);
939 i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
942 gvt_dbg_sched("will complete workload %p, status: %d\n",
943 workload, workload->status);
945 complete_current_workload(gvt, ring_id);
948 intel_uncore_forcewake_put(gvt->dev_priv,
951 intel_runtime_pm_put(gvt->dev_priv);
952 if (ret && (vgpu_is_vm_unhealthy(ret)))
953 enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
958 void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
960 struct intel_vgpu_submission *s = &vgpu->submission;
961 struct intel_gvt *gvt = vgpu->gvt;
962 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
964 if (atomic_read(&s->running_workload_num)) {
965 gvt_dbg_sched("wait vgpu idle\n");
967 wait_event(scheduler->workload_complete_wq,
968 !atomic_read(&s->running_workload_num));
972 void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
974 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
975 struct intel_engine_cs *engine;
976 enum intel_engine_id i;
978 gvt_dbg_core("clean workload scheduler\n");
980 for_each_engine(engine, gvt->dev_priv, i) {
981 atomic_notifier_chain_unregister(
982 &engine->context_status_notifier,
983 &gvt->shadow_ctx_notifier_block[i]);
984 kthread_stop(scheduler->thread[i]);
988 int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
990 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
991 struct workload_thread_param *param = NULL;
992 struct intel_engine_cs *engine;
993 enum intel_engine_id i;
996 gvt_dbg_core("init workload scheduler\n");
998 init_waitqueue_head(&scheduler->workload_complete_wq);
1000 for_each_engine(engine, gvt->dev_priv, i) {
1001 init_waitqueue_head(&scheduler->waitq[i]);
1003 param = kzalloc(sizeof(*param), GFP_KERNEL);
1012 scheduler->thread[i] = kthread_run(workload_thread, param,
1013 "gvt workload %d", i);
1014 if (IS_ERR(scheduler->thread[i])) {
1015 gvt_err("fail to create workload thread\n");
1016 ret = PTR_ERR(scheduler->thread[i]);
1020 gvt->shadow_ctx_notifier_block[i].notifier_call =
1021 shadow_context_status_change;
1022 atomic_notifier_chain_register(&engine->context_status_notifier,
1023 &gvt->shadow_ctx_notifier_block[i]);
1027 intel_gvt_clean_workload_scheduler(gvt);
1034 * intel_vgpu_clean_submission - free submission-related resource for vGPU
1037 * This function is called when a vGPU is being destroyed.
1040 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
1042 struct intel_vgpu_submission *s = &vgpu->submission;
1044 intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
1045 i915_gem_context_put(s->shadow_ctx);
1046 kmem_cache_destroy(s->workloads);
1051 * intel_vgpu_reset_submission - reset submission-related resource for vGPU
1053 * @engine_mask: engines expected to be reset
1055 * This function is called when a vGPU is being destroyed.
1058 void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
1059 unsigned long engine_mask)
1061 struct intel_vgpu_submission *s = &vgpu->submission;
1066 clean_workloads(vgpu, engine_mask);
1067 s->ops->reset(vgpu, engine_mask);
1071 * intel_vgpu_setup_submission - setup submission-related resource for vGPU
1074 * This function is called when a vGPU is being created.
1077 * Zero on success, negative error code if failed.
1080 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
1082 struct intel_vgpu_submission *s = &vgpu->submission;
1083 enum intel_engine_id i;
1084 struct intel_engine_cs *engine;
1087 s->shadow_ctx = i915_gem_context_create_gvt(
1088 &vgpu->gvt->dev_priv->drm);
1089 if (IS_ERR(s->shadow_ctx))
1090 return PTR_ERR(s->shadow_ctx);
1092 if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv))
1093 s->shadow_ctx->priority = INT_MAX;
1095 bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
1097 s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
1098 sizeof(struct intel_vgpu_workload), 0,
1102 if (!s->workloads) {
1104 goto out_shadow_ctx;
1107 for_each_engine(engine, vgpu->gvt->dev_priv, i)
1108 INIT_LIST_HEAD(&s->workload_q_head[i]);
1110 atomic_set(&s->running_workload_num, 0);
1111 bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
1116 i915_gem_context_put(s->shadow_ctx);
1121 * intel_vgpu_select_submission_ops - select virtual submission interface
1123 * @interface: expected vGPU virtual submission interface
1125 * This function is called when guest configures submission interface.
1128 * Zero on success, negative error code if failed.
1131 int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
1132 unsigned long engine_mask,
1133 unsigned int interface)
1135 struct intel_vgpu_submission *s = &vgpu->submission;
1136 const struct intel_vgpu_submission_ops *ops[] = {
1137 [INTEL_VGPU_EXECLIST_SUBMISSION] =
1138 &intel_vgpu_execlist_submission_ops,
1142 if (WARN_ON(interface >= ARRAY_SIZE(ops)))
1145 if (WARN_ON(interface == 0 && engine_mask != ALL_ENGINES))
1149 s->ops->clean(vgpu, engine_mask);
1151 if (interface == 0) {
1153 s->virtual_submission_interface = 0;
1155 gvt_dbg_core("vgpu%d: remove submission ops\n", vgpu->id);
1159 ret = ops[interface]->init(vgpu, engine_mask);
1163 s->ops = ops[interface];
1164 s->virtual_submission_interface = interface;
1167 gvt_dbg_core("vgpu%d: activate ops [ %s ]\n",
1168 vgpu->id, s->ops->name);
1174 * intel_vgpu_destroy_workload - destroy a vGPU workload
1177 * This function is called when destroy a vGPU workload.
1180 void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
1182 struct intel_vgpu_submission *s = &workload->vgpu->submission;
1184 if (workload->shadow_mm)
1185 intel_gvt_mm_unreference(workload->shadow_mm);
1187 kmem_cache_free(s->workloads, workload);
1190 static struct intel_vgpu_workload *
1191 alloc_workload(struct intel_vgpu *vgpu)
1193 struct intel_vgpu_submission *s = &vgpu->submission;
1194 struct intel_vgpu_workload *workload;
1196 workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL);
1198 return ERR_PTR(-ENOMEM);
1200 INIT_LIST_HEAD(&workload->list);
1201 INIT_LIST_HEAD(&workload->shadow_bb);
1203 init_waitqueue_head(&workload->shadow_ctx_status_wq);
1204 atomic_set(&workload->shadow_ctx_active, 0);
1206 workload->status = -EINPROGRESS;
1207 workload->shadowed = false;
1208 workload->vgpu = vgpu;
1213 #define RING_CTX_OFF(x) \
1214 offsetof(struct execlist_ring_context, x)
1216 static void read_guest_pdps(struct intel_vgpu *vgpu,
1217 u64 ring_context_gpa, u32 pdp[8])
1222 gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val);
1224 for (i = 0; i < 8; i++)
1225 intel_gvt_hypervisor_read_gpa(vgpu,
1226 gpa + i * 8, &pdp[7 - i], 4);
1229 static int prepare_mm(struct intel_vgpu_workload *workload)
1231 struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
1232 struct intel_vgpu_mm *mm;
1233 struct intel_vgpu *vgpu = workload->vgpu;
1234 int page_table_level;
1237 if (desc->addressing_mode == 1) { /* legacy 32-bit */
1238 page_table_level = 3;
1239 } else if (desc->addressing_mode == 3) { /* legacy 64 bit */
1240 page_table_level = 4;
1242 gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
1246 read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
1248 mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
1250 intel_gvt_mm_reference(mm);
1253 mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
1254 pdp, page_table_level, 0);
1256 gvt_vgpu_err("fail to create mm object.\n");
1260 workload->shadow_mm = mm;
1264 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
1265 ((a)->lrca == (b)->lrca))
1267 #define get_last_workload(q) \
1268 (list_empty(q) ? NULL : container_of(q->prev, \
1269 struct intel_vgpu_workload, list))
1271 * intel_vgpu_create_workload - create a vGPU workload
1273 * @desc: a guest context descriptor
1275 * This function is called when creating a vGPU workload.
1278 * struct intel_vgpu_workload * on success, negative error code in
1279 * pointer if failed.
1282 struct intel_vgpu_workload *
1283 intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
1284 struct execlist_ctx_descriptor_format *desc)
1286 struct intel_vgpu_submission *s = &vgpu->submission;
1287 struct list_head *q = workload_q_head(vgpu, ring_id);
1288 struct intel_vgpu_workload *last_workload = get_last_workload(q);
1289 struct intel_vgpu_workload *workload = NULL;
1290 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
1291 u64 ring_context_gpa;
1292 u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
1295 ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
1296 (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT));
1297 if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
1298 gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca);
1299 return ERR_PTR(-EINVAL);
1302 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1303 RING_CTX_OFF(ring_header.val), &head, 4);
1305 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1306 RING_CTX_OFF(ring_tail.val), &tail, 4);
1308 head &= RB_HEAD_OFF_MASK;
1309 tail &= RB_TAIL_OFF_MASK;
1311 if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
1312 gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
1313 gvt_dbg_el("ctx head %x real head %lx\n", head,
1314 last_workload->rb_tail);
1316 * cannot use guest context head pointer here,
1317 * as it might not be updated at this time
1319 head = last_workload->rb_tail;
1322 gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
1324 /* record some ring buffer register values for scan and shadow */
1325 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1326 RING_CTX_OFF(rb_start.val), &start, 4);
1327 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1328 RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
1329 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1330 RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
1332 workload = alloc_workload(vgpu);
1333 if (IS_ERR(workload))
1336 workload->ring_id = ring_id;
1337 workload->ctx_desc = *desc;
1338 workload->ring_context_gpa = ring_context_gpa;
1339 workload->rb_head = head;
1340 workload->rb_tail = tail;
1341 workload->rb_start = start;
1342 workload->rb_ctl = ctl;
1344 if (ring_id == RCS) {
1345 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1346 RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
1347 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1348 RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4);
1350 workload->wa_ctx.indirect_ctx.guest_gma =
1351 indirect_ctx & INDIRECT_CTX_ADDR_MASK;
1352 workload->wa_ctx.indirect_ctx.size =
1353 (indirect_ctx & INDIRECT_CTX_SIZE_MASK) *
1355 workload->wa_ctx.per_ctx.guest_gma =
1356 per_ctx & PER_CTX_ADDR_MASK;
1357 workload->wa_ctx.per_ctx.valid = per_ctx & 1;
1360 gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
1361 workload, ring_id, head, tail, start, ctl);
1363 ret = prepare_mm(workload);
1365 kmem_cache_free(s->workloads, workload);
1366 return ERR_PTR(ret);
1369 /* Only scan and shadow the first workload in the queue
1370 * as there is only one pre-allocated buf-obj for shadow.
1372 if (list_empty(workload_q_head(vgpu, ring_id))) {
1373 intel_runtime_pm_get(dev_priv);
1374 mutex_lock(&dev_priv->drm.struct_mutex);
1375 ret = intel_gvt_scan_and_shadow_workload(workload);
1376 mutex_unlock(&dev_priv->drm.struct_mutex);
1377 intel_runtime_pm_put(dev_priv);
1380 if (ret && (vgpu_is_vm_unhealthy(ret))) {
1381 enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
1382 intel_vgpu_destroy_workload(workload);
1383 return ERR_PTR(ret);
1390 * intel_vgpu_queue_workload - Qeue a vGPU workload
1391 * @workload: the workload to queue in
1393 void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload)
1395 list_add_tail(&workload->list,
1396 workload_q_head(workload->vgpu, workload->ring_id));
1397 intel_gvt_kick_schedule(workload->vgpu->gvt);
1398 wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->ring_id]);