]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/gvt/kvmgt.c
Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'
[linux.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  */
30
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/mmu_context.h>
35 #include <linux/types.h>
36 #include <linux/list.h>
37 #include <linux/rbtree.h>
38 #include <linux/spinlock.h>
39 #include <linux/eventfd.h>
40 #include <linux/uuid.h>
41 #include <linux/kvm_host.h>
42 #include <linux/vfio.h>
43 #include <linux/mdev.h>
44
45 #include "i915_drv.h"
46 #include "gvt.h"
47
48 static const struct intel_gvt_ops *intel_gvt_ops;
49
50 /* helper macros copied from vfio-pci */
51 #define VFIO_PCI_OFFSET_SHIFT   40
52 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
53 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
54 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
55
56 struct vfio_region {
57         u32                             type;
58         u32                             subtype;
59         size_t                          size;
60         u32                             flags;
61 };
62
63 struct kvmgt_pgfn {
64         gfn_t gfn;
65         struct hlist_node hnode;
66 };
67
68 struct kvmgt_guest_info {
69         struct kvm *kvm;
70         struct intel_vgpu *vgpu;
71         struct kvm_page_track_notifier_node track_node;
72 #define NR_BKT (1 << 18)
73         struct hlist_head ptable[NR_BKT];
74 #undef NR_BKT
75 };
76
77 struct gvt_dma {
78         struct rb_node node;
79         gfn_t gfn;
80         kvm_pfn_t pfn;
81 };
82
83 static inline bool handle_valid(unsigned long handle)
84 {
85         return !!(handle & ~0xff);
86 }
87
88 static int kvmgt_guest_init(struct mdev_device *mdev);
89 static void intel_vgpu_release_work(struct work_struct *work);
90 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
91
92 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
93 {
94         struct rb_node *node = vgpu->vdev.cache.rb_node;
95         struct gvt_dma *ret = NULL;
96
97         while (node) {
98                 struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
99
100                 if (gfn < itr->gfn)
101                         node = node->rb_left;
102                 else if (gfn > itr->gfn)
103                         node = node->rb_right;
104                 else {
105                         ret = itr;
106                         goto out;
107                 }
108         }
109
110 out:
111         return ret;
112 }
113
114 static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
115 {
116         struct gvt_dma *entry;
117         kvm_pfn_t pfn;
118
119         mutex_lock(&vgpu->vdev.cache_lock);
120
121         entry = __gvt_cache_find(vgpu, gfn);
122         pfn = (entry == NULL) ? 0 : entry->pfn;
123
124         mutex_unlock(&vgpu->vdev.cache_lock);
125         return pfn;
126 }
127
128 static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
129 {
130         struct gvt_dma *new, *itr;
131         struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
132
133         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
134         if (!new)
135                 return;
136
137         new->gfn = gfn;
138         new->pfn = pfn;
139
140         mutex_lock(&vgpu->vdev.cache_lock);
141         while (*link) {
142                 parent = *link;
143                 itr = rb_entry(parent, struct gvt_dma, node);
144
145                 if (gfn == itr->gfn)
146                         goto out;
147                 else if (gfn < itr->gfn)
148                         link = &parent->rb_left;
149                 else
150                         link = &parent->rb_right;
151         }
152
153         rb_link_node(&new->node, parent, link);
154         rb_insert_color(&new->node, &vgpu->vdev.cache);
155         mutex_unlock(&vgpu->vdev.cache_lock);
156         return;
157
158 out:
159         mutex_unlock(&vgpu->vdev.cache_lock);
160         kfree(new);
161 }
162
163 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
164                                 struct gvt_dma *entry)
165 {
166         rb_erase(&entry->node, &vgpu->vdev.cache);
167         kfree(entry);
168 }
169
170 static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
171 {
172         struct device *dev = mdev_dev(vgpu->vdev.mdev);
173         struct gvt_dma *this;
174         unsigned long g1;
175         int rc;
176
177         mutex_lock(&vgpu->vdev.cache_lock);
178         this  = __gvt_cache_find(vgpu, gfn);
179         if (!this) {
180                 mutex_unlock(&vgpu->vdev.cache_lock);
181                 return;
182         }
183
184         g1 = gfn;
185         rc = vfio_unpin_pages(dev, &g1, 1);
186         WARN_ON(rc != 1);
187         __gvt_cache_remove_entry(vgpu, this);
188         mutex_unlock(&vgpu->vdev.cache_lock);
189 }
190
191 static void gvt_cache_init(struct intel_vgpu *vgpu)
192 {
193         vgpu->vdev.cache = RB_ROOT;
194         mutex_init(&vgpu->vdev.cache_lock);
195 }
196
197 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
198 {
199         struct gvt_dma *dma;
200         struct rb_node *node = NULL;
201         struct device *dev = mdev_dev(vgpu->vdev.mdev);
202         unsigned long gfn;
203
204         mutex_lock(&vgpu->vdev.cache_lock);
205         while ((node = rb_first(&vgpu->vdev.cache))) {
206                 dma = rb_entry(node, struct gvt_dma, node);
207                 gfn = dma->gfn;
208
209                 vfio_unpin_pages(dev, &gfn, 1);
210                 __gvt_cache_remove_entry(vgpu, dma);
211         }
212         mutex_unlock(&vgpu->vdev.cache_lock);
213 }
214
215 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
216                 const char *name)
217 {
218         int i;
219         struct intel_vgpu_type *t;
220         const char *driver_name = dev_driver_string(
221                         &gvt->dev_priv->drm.pdev->dev);
222
223         for (i = 0; i < gvt->num_types; i++) {
224                 t = &gvt->types[i];
225                 if (!strncmp(t->name, name + strlen(driver_name) + 1,
226                         sizeof(t->name)))
227                         return t;
228         }
229
230         return NULL;
231 }
232
233 static ssize_t available_instances_show(struct kobject *kobj,
234                                         struct device *dev, char *buf)
235 {
236         struct intel_vgpu_type *type;
237         unsigned int num = 0;
238         void *gvt = kdev_to_i915(dev)->gvt;
239
240         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
241         if (!type)
242                 num = 0;
243         else
244                 num = type->avail_instance;
245
246         return sprintf(buf, "%u\n", num);
247 }
248
249 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
250                 char *buf)
251 {
252         return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
253 }
254
255 static ssize_t description_show(struct kobject *kobj, struct device *dev,
256                 char *buf)
257 {
258         struct intel_vgpu_type *type;
259         void *gvt = kdev_to_i915(dev)->gvt;
260
261         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
262         if (!type)
263                 return 0;
264
265         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
266                                 "fence: %d\n",
267                                 BYTES_TO_MB(type->low_gm_size),
268                                 BYTES_TO_MB(type->high_gm_size),
269                                 type->fence);
270 }
271
272 static MDEV_TYPE_ATTR_RO(available_instances);
273 static MDEV_TYPE_ATTR_RO(device_api);
274 static MDEV_TYPE_ATTR_RO(description);
275
276 static struct attribute *type_attrs[] = {
277         &mdev_type_attr_available_instances.attr,
278         &mdev_type_attr_device_api.attr,
279         &mdev_type_attr_description.attr,
280         NULL,
281 };
282
283 static struct attribute_group *intel_vgpu_type_groups[] = {
284         [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
285 };
286
287 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
288 {
289         int i, j;
290         struct intel_vgpu_type *type;
291         struct attribute_group *group;
292
293         for (i = 0; i < gvt->num_types; i++) {
294                 type = &gvt->types[i];
295
296                 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
297                 if (WARN_ON(!group))
298                         goto unwind;
299
300                 group->name = type->name;
301                 group->attrs = type_attrs;
302                 intel_vgpu_type_groups[i] = group;
303         }
304
305         return true;
306
307 unwind:
308         for (j = 0; j < i; j++) {
309                 group = intel_vgpu_type_groups[j];
310                 kfree(group);
311         }
312
313         return false;
314 }
315
316 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
317 {
318         int i;
319         struct attribute_group *group;
320
321         for (i = 0; i < gvt->num_types; i++) {
322                 group = intel_vgpu_type_groups[i];
323                 kfree(group);
324         }
325 }
326
327 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
328 {
329         hash_init(info->ptable);
330 }
331
332 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
333 {
334         struct kvmgt_pgfn *p;
335         struct hlist_node *tmp;
336         int i;
337
338         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
339                 hash_del(&p->hnode);
340                 kfree(p);
341         }
342 }
343
344 static struct kvmgt_pgfn *
345 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
346 {
347         struct kvmgt_pgfn *p, *res = NULL;
348
349         hash_for_each_possible(info->ptable, p, hnode, gfn) {
350                 if (gfn == p->gfn) {
351                         res = p;
352                         break;
353                 }
354         }
355
356         return res;
357 }
358
359 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
360                                 gfn_t gfn)
361 {
362         struct kvmgt_pgfn *p;
363
364         p = __kvmgt_protect_table_find(info, gfn);
365         return !!p;
366 }
367
368 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
369 {
370         struct kvmgt_pgfn *p;
371
372         if (kvmgt_gfn_is_write_protected(info, gfn))
373                 return;
374
375         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
376         if (WARN(!p, "gfn: 0x%llx\n", gfn))
377                 return;
378
379         p->gfn = gfn;
380         hash_add(info->ptable, &p->hnode, gfn);
381 }
382
383 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
384                                 gfn_t gfn)
385 {
386         struct kvmgt_pgfn *p;
387
388         p = __kvmgt_protect_table_find(info, gfn);
389         if (p) {
390                 hash_del(&p->hnode);
391                 kfree(p);
392         }
393 }
394
395 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
396 {
397         struct intel_vgpu *vgpu;
398         struct intel_vgpu_type *type;
399         struct device *pdev;
400         void *gvt;
401         int ret;
402
403         pdev = mdev_parent_dev(mdev);
404         gvt = kdev_to_i915(pdev)->gvt;
405
406         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
407         if (!type) {
408                 gvt_err("failed to find type %s to create\n",
409                                                 kobject_name(kobj));
410                 ret = -EINVAL;
411                 goto out;
412         }
413
414         vgpu = intel_gvt_ops->vgpu_create(gvt, type);
415         if (IS_ERR_OR_NULL(vgpu)) {
416                 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
417                 gvt_err("failed to create intel vgpu: %d\n", ret);
418                 goto out;
419         }
420
421         INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
422
423         vgpu->vdev.mdev = mdev;
424         mdev_set_drvdata(mdev, vgpu);
425
426         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
427                      dev_name(mdev_dev(mdev)));
428         ret = 0;
429
430 out:
431         return ret;
432 }
433
434 static int intel_vgpu_remove(struct mdev_device *mdev)
435 {
436         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
437
438         if (handle_valid(vgpu->handle))
439                 return -EBUSY;
440
441         intel_gvt_ops->vgpu_destroy(vgpu);
442         return 0;
443 }
444
445 static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
446                                      unsigned long action, void *data)
447 {
448         struct intel_vgpu *vgpu = container_of(nb,
449                                         struct intel_vgpu,
450                                         vdev.iommu_notifier);
451
452         if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
453                 struct vfio_iommu_type1_dma_unmap *unmap = data;
454                 unsigned long gfn, end_gfn;
455
456                 gfn = unmap->iova >> PAGE_SHIFT;
457                 end_gfn = gfn + unmap->size / PAGE_SIZE;
458
459                 while (gfn < end_gfn)
460                         gvt_cache_remove(vgpu, gfn++);
461         }
462
463         return NOTIFY_OK;
464 }
465
466 static int intel_vgpu_group_notifier(struct notifier_block *nb,
467                                      unsigned long action, void *data)
468 {
469         struct intel_vgpu *vgpu = container_of(nb,
470                                         struct intel_vgpu,
471                                         vdev.group_notifier);
472
473         /* the only action we care about */
474         if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
475                 vgpu->vdev.kvm = data;
476
477                 if (!data)
478                         schedule_work(&vgpu->vdev.release_work);
479         }
480
481         return NOTIFY_OK;
482 }
483
484 static int intel_vgpu_open(struct mdev_device *mdev)
485 {
486         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
487         unsigned long events;
488         int ret;
489
490         vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
491         vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
492
493         events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
494         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
495                                 &vgpu->vdev.iommu_notifier);
496         if (ret != 0) {
497                 gvt_err("vfio_register_notifier for iommu failed: %d\n", ret);
498                 goto out;
499         }
500
501         events = VFIO_GROUP_NOTIFY_SET_KVM;
502         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
503                                 &vgpu->vdev.group_notifier);
504         if (ret != 0) {
505                 gvt_err("vfio_register_notifier for group failed: %d\n", ret);
506                 goto undo_iommu;
507         }
508
509         ret = kvmgt_guest_init(mdev);
510         if (ret)
511                 goto undo_group;
512
513         atomic_set(&vgpu->vdev.released, 0);
514         return ret;
515
516 undo_group:
517         vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
518                                         &vgpu->vdev.group_notifier);
519
520 undo_iommu:
521         vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
522                                         &vgpu->vdev.iommu_notifier);
523 out:
524         return ret;
525 }
526
527 static void __intel_vgpu_release(struct intel_vgpu *vgpu)
528 {
529         struct kvmgt_guest_info *info;
530         int ret;
531
532         if (!handle_valid(vgpu->handle))
533                 return;
534
535         if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
536                 return;
537
538         ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
539                                         &vgpu->vdev.iommu_notifier);
540         WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
541
542         ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
543                                         &vgpu->vdev.group_notifier);
544         WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
545
546         info = (struct kvmgt_guest_info *)vgpu->handle;
547         kvmgt_guest_exit(info);
548
549         vgpu->vdev.kvm = NULL;
550         vgpu->handle = 0;
551 }
552
553 static void intel_vgpu_release(struct mdev_device *mdev)
554 {
555         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
556
557         __intel_vgpu_release(vgpu);
558 }
559
560 static void intel_vgpu_release_work(struct work_struct *work)
561 {
562         struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
563                                         vdev.release_work);
564
565         __intel_vgpu_release(vgpu);
566 }
567
568 static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
569 {
570         u32 start_lo, start_hi;
571         u32 mem_type;
572         int pos = PCI_BASE_ADDRESS_0;
573
574         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
575                         PCI_BASE_ADDRESS_MEM_MASK;
576         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
577                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
578
579         switch (mem_type) {
580         case PCI_BASE_ADDRESS_MEM_TYPE_64:
581                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
582                                                 + pos + 4));
583                 break;
584         case PCI_BASE_ADDRESS_MEM_TYPE_32:
585         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
586                 /* 1M mem BAR treated as 32-bit BAR */
587         default:
588                 /* mem unknown type treated as 32-bit BAR */
589                 start_hi = 0;
590                 break;
591         }
592
593         return ((u64)start_hi << 32) | start_lo;
594 }
595
596 static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
597                         size_t count, loff_t *ppos, bool is_write)
598 {
599         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
600         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
601         uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
602         int ret = -EINVAL;
603
604
605         if (index >= VFIO_PCI_NUM_REGIONS) {
606                 gvt_err("invalid index: %u\n", index);
607                 return -EINVAL;
608         }
609
610         switch (index) {
611         case VFIO_PCI_CONFIG_REGION_INDEX:
612                 if (is_write)
613                         ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
614                                                 buf, count);
615                 else
616                         ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
617                                                 buf, count);
618                 break;
619         case VFIO_PCI_BAR0_REGION_INDEX:
620         case VFIO_PCI_BAR1_REGION_INDEX:
621                 if (is_write) {
622                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
623
624                         ret = intel_gvt_ops->emulate_mmio_write(vgpu,
625                                                 bar0_start + pos, buf, count);
626                 } else {
627                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
628
629                         ret = intel_gvt_ops->emulate_mmio_read(vgpu,
630                                                 bar0_start + pos, buf, count);
631                 }
632                 break;
633         case VFIO_PCI_BAR2_REGION_INDEX:
634         case VFIO_PCI_BAR3_REGION_INDEX:
635         case VFIO_PCI_BAR4_REGION_INDEX:
636         case VFIO_PCI_BAR5_REGION_INDEX:
637         case VFIO_PCI_VGA_REGION_INDEX:
638         case VFIO_PCI_ROM_REGION_INDEX:
639         default:
640                 gvt_err("unsupported region: %u\n", index);
641         }
642
643         return ret == 0 ? count : ret;
644 }
645
646 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
647                         size_t count, loff_t *ppos)
648 {
649         unsigned int done = 0;
650         int ret;
651
652         while (count) {
653                 size_t filled;
654
655                 if (count >= 4 && !(*ppos % 4)) {
656                         u32 val;
657
658                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
659                                         ppos, false);
660                         if (ret <= 0)
661                                 goto read_err;
662
663                         if (copy_to_user(buf, &val, sizeof(val)))
664                                 goto read_err;
665
666                         filled = 4;
667                 } else if (count >= 2 && !(*ppos % 2)) {
668                         u16 val;
669
670                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
671                                         ppos, false);
672                         if (ret <= 0)
673                                 goto read_err;
674
675                         if (copy_to_user(buf, &val, sizeof(val)))
676                                 goto read_err;
677
678                         filled = 2;
679                 } else {
680                         u8 val;
681
682                         ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
683                                         false);
684                         if (ret <= 0)
685                                 goto read_err;
686
687                         if (copy_to_user(buf, &val, sizeof(val)))
688                                 goto read_err;
689
690                         filled = 1;
691                 }
692
693                 count -= filled;
694                 done += filled;
695                 *ppos += filled;
696                 buf += filled;
697         }
698
699         return done;
700
701 read_err:
702         return -EFAULT;
703 }
704
705 static ssize_t intel_vgpu_write(struct mdev_device *mdev,
706                                 const char __user *buf,
707                                 size_t count, loff_t *ppos)
708 {
709         unsigned int done = 0;
710         int ret;
711
712         while (count) {
713                 size_t filled;
714
715                 if (count >= 4 && !(*ppos % 4)) {
716                         u32 val;
717
718                         if (copy_from_user(&val, buf, sizeof(val)))
719                                 goto write_err;
720
721                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
722                                         ppos, true);
723                         if (ret <= 0)
724                                 goto write_err;
725
726                         filled = 4;
727                 } else if (count >= 2 && !(*ppos % 2)) {
728                         u16 val;
729
730                         if (copy_from_user(&val, buf, sizeof(val)))
731                                 goto write_err;
732
733                         ret = intel_vgpu_rw(mdev, (char *)&val,
734                                         sizeof(val), ppos, true);
735                         if (ret <= 0)
736                                 goto write_err;
737
738                         filled = 2;
739                 } else {
740                         u8 val;
741
742                         if (copy_from_user(&val, buf, sizeof(val)))
743                                 goto write_err;
744
745                         ret = intel_vgpu_rw(mdev, &val, sizeof(val),
746                                         ppos, true);
747                         if (ret <= 0)
748                                 goto write_err;
749
750                         filled = 1;
751                 }
752
753                 count -= filled;
754                 done += filled;
755                 *ppos += filled;
756                 buf += filled;
757         }
758
759         return done;
760 write_err:
761         return -EFAULT;
762 }
763
764 static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
765 {
766         unsigned int index;
767         u64 virtaddr;
768         unsigned long req_size, pgoff = 0;
769         pgprot_t pg_prot;
770         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
771
772         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
773         if (index >= VFIO_PCI_ROM_REGION_INDEX)
774                 return -EINVAL;
775
776         if (vma->vm_end < vma->vm_start)
777                 return -EINVAL;
778         if ((vma->vm_flags & VM_SHARED) == 0)
779                 return -EINVAL;
780         if (index != VFIO_PCI_BAR2_REGION_INDEX)
781                 return -EINVAL;
782
783         pg_prot = vma->vm_page_prot;
784         virtaddr = vma->vm_start;
785         req_size = vma->vm_end - vma->vm_start;
786         pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
787
788         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
789 }
790
791 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
792 {
793         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
794                 return 1;
795
796         return 0;
797 }
798
799 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
800                         unsigned int index, unsigned int start,
801                         unsigned int count, uint32_t flags,
802                         void *data)
803 {
804         return 0;
805 }
806
807 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
808                         unsigned int index, unsigned int start,
809                         unsigned int count, uint32_t flags, void *data)
810 {
811         return 0;
812 }
813
814 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
815                 unsigned int index, unsigned int start, unsigned int count,
816                 uint32_t flags, void *data)
817 {
818         return 0;
819 }
820
821 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
822                 unsigned int index, unsigned int start, unsigned int count,
823                 uint32_t flags, void *data)
824 {
825         struct eventfd_ctx *trigger;
826
827         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
828                 int fd = *(int *)data;
829
830                 trigger = eventfd_ctx_fdget(fd);
831                 if (IS_ERR(trigger)) {
832                         gvt_err("eventfd_ctx_fdget failed\n");
833                         return PTR_ERR(trigger);
834                 }
835                 vgpu->vdev.msi_trigger = trigger;
836         }
837
838         return 0;
839 }
840
841 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
842                 unsigned int index, unsigned int start, unsigned int count,
843                 void *data)
844 {
845         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
846                         unsigned int start, unsigned int count, uint32_t flags,
847                         void *data) = NULL;
848
849         switch (index) {
850         case VFIO_PCI_INTX_IRQ_INDEX:
851                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
852                 case VFIO_IRQ_SET_ACTION_MASK:
853                         func = intel_vgpu_set_intx_mask;
854                         break;
855                 case VFIO_IRQ_SET_ACTION_UNMASK:
856                         func = intel_vgpu_set_intx_unmask;
857                         break;
858                 case VFIO_IRQ_SET_ACTION_TRIGGER:
859                         func = intel_vgpu_set_intx_trigger;
860                         break;
861                 }
862                 break;
863         case VFIO_PCI_MSI_IRQ_INDEX:
864                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
865                 case VFIO_IRQ_SET_ACTION_MASK:
866                 case VFIO_IRQ_SET_ACTION_UNMASK:
867                         /* XXX Need masking support exported */
868                         break;
869                 case VFIO_IRQ_SET_ACTION_TRIGGER:
870                         func = intel_vgpu_set_msi_trigger;
871                         break;
872                 }
873                 break;
874         }
875
876         if (!func)
877                 return -ENOTTY;
878
879         return func(vgpu, index, start, count, flags, data);
880 }
881
882 static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
883                              unsigned long arg)
884 {
885         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
886         unsigned long minsz;
887
888         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
889
890         if (cmd == VFIO_DEVICE_GET_INFO) {
891                 struct vfio_device_info info;
892
893                 minsz = offsetofend(struct vfio_device_info, num_irqs);
894
895                 if (copy_from_user(&info, (void __user *)arg, minsz))
896                         return -EFAULT;
897
898                 if (info.argsz < minsz)
899                         return -EINVAL;
900
901                 info.flags = VFIO_DEVICE_FLAGS_PCI;
902                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
903                 info.num_regions = VFIO_PCI_NUM_REGIONS;
904                 info.num_irqs = VFIO_PCI_NUM_IRQS;
905
906                 return copy_to_user((void __user *)arg, &info, minsz) ?
907                         -EFAULT : 0;
908
909         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
910                 struct vfio_region_info info;
911                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
912                 int i, ret;
913                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
914                 size_t size;
915                 int nr_areas = 1;
916                 int cap_type_id;
917
918                 minsz = offsetofend(struct vfio_region_info, offset);
919
920                 if (copy_from_user(&info, (void __user *)arg, minsz))
921                         return -EFAULT;
922
923                 if (info.argsz < minsz)
924                         return -EINVAL;
925
926                 switch (info.index) {
927                 case VFIO_PCI_CONFIG_REGION_INDEX:
928                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
929                         info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
930                         info.flags = VFIO_REGION_INFO_FLAG_READ |
931                                      VFIO_REGION_INFO_FLAG_WRITE;
932                         break;
933                 case VFIO_PCI_BAR0_REGION_INDEX:
934                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
935                         info.size = vgpu->cfg_space.bar[info.index].size;
936                         if (!info.size) {
937                                 info.flags = 0;
938                                 break;
939                         }
940
941                         info.flags = VFIO_REGION_INFO_FLAG_READ |
942                                      VFIO_REGION_INFO_FLAG_WRITE;
943                         break;
944                 case VFIO_PCI_BAR1_REGION_INDEX:
945                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
946                         info.size = 0;
947                         info.flags = 0;
948                         break;
949                 case VFIO_PCI_BAR2_REGION_INDEX:
950                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
951                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
952                                         VFIO_REGION_INFO_FLAG_MMAP |
953                                         VFIO_REGION_INFO_FLAG_READ |
954                                         VFIO_REGION_INFO_FLAG_WRITE;
955                         info.size = gvt_aperture_sz(vgpu->gvt);
956
957                         size = sizeof(*sparse) +
958                                         (nr_areas * sizeof(*sparse->areas));
959                         sparse = kzalloc(size, GFP_KERNEL);
960                         if (!sparse)
961                                 return -ENOMEM;
962
963                         sparse->nr_areas = nr_areas;
964                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
965                         sparse->areas[0].offset =
966                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
967                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
968                         if (!caps.buf) {
969                                 kfree(caps.buf);
970                                 caps.buf = NULL;
971                                 caps.size = 0;
972                         }
973                         break;
974
975                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
976                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
977                         info.size = 0;
978
979                         info.flags = 0;
980                         gvt_dbg_core("get region info bar:%d\n", info.index);
981                         break;
982
983                 case VFIO_PCI_ROM_REGION_INDEX:
984                 case VFIO_PCI_VGA_REGION_INDEX:
985                         gvt_dbg_core("get region info index:%d\n", info.index);
986                         break;
987                 default:
988                         {
989                                 struct vfio_region_info_cap_type cap_type;
990
991                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
992                                                 vgpu->vdev.num_regions)
993                                         return -EINVAL;
994
995                                 i = info.index - VFIO_PCI_NUM_REGIONS;
996
997                                 info.offset =
998                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
999                                 info.size = vgpu->vdev.region[i].size;
1000                                 info.flags = vgpu->vdev.region[i].flags;
1001
1002                                 cap_type.type = vgpu->vdev.region[i].type;
1003                                 cap_type.subtype = vgpu->vdev.region[i].subtype;
1004
1005                                 ret = vfio_info_add_capability(&caps,
1006                                                 VFIO_REGION_INFO_CAP_TYPE,
1007                                                 &cap_type);
1008                                 if (ret)
1009                                         return ret;
1010                         }
1011                 }
1012
1013                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1014                         switch (cap_type_id) {
1015                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1016                                 ret = vfio_info_add_capability(&caps,
1017                                         VFIO_REGION_INFO_CAP_SPARSE_MMAP,
1018                                         sparse);
1019                                 kfree(sparse);
1020                                 if (ret)
1021                                         return ret;
1022                                 break;
1023                         default:
1024                                 return -EINVAL;
1025                         }
1026                 }
1027
1028                 if (caps.size) {
1029                         if (info.argsz < sizeof(info) + caps.size) {
1030                                 info.argsz = sizeof(info) + caps.size;
1031                                 info.cap_offset = 0;
1032                         } else {
1033                                 vfio_info_cap_shift(&caps, sizeof(info));
1034                                 if (copy_to_user((void __user *)arg +
1035                                                   sizeof(info), caps.buf,
1036                                                   caps.size)) {
1037                                         kfree(caps.buf);
1038                                         return -EFAULT;
1039                                 }
1040                                 info.cap_offset = sizeof(info);
1041                         }
1042
1043                         kfree(caps.buf);
1044                 }
1045
1046                 return copy_to_user((void __user *)arg, &info, minsz) ?
1047                         -EFAULT : 0;
1048         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1049                 struct vfio_irq_info info;
1050
1051                 minsz = offsetofend(struct vfio_irq_info, count);
1052
1053                 if (copy_from_user(&info, (void __user *)arg, minsz))
1054                         return -EFAULT;
1055
1056                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1057                         return -EINVAL;
1058
1059                 switch (info.index) {
1060                 case VFIO_PCI_INTX_IRQ_INDEX:
1061                 case VFIO_PCI_MSI_IRQ_INDEX:
1062                         break;
1063                 default:
1064                         return -EINVAL;
1065                 }
1066
1067                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1068
1069                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1070
1071                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1072                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1073                                        VFIO_IRQ_INFO_AUTOMASKED);
1074                 else
1075                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1076
1077                 return copy_to_user((void __user *)arg, &info, minsz) ?
1078                         -EFAULT : 0;
1079         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1080                 struct vfio_irq_set hdr;
1081                 u8 *data = NULL;
1082                 int ret = 0;
1083                 size_t data_size = 0;
1084
1085                 minsz = offsetofend(struct vfio_irq_set, count);
1086
1087                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1088                         return -EFAULT;
1089
1090                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1091                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1092
1093                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1094                                                 VFIO_PCI_NUM_IRQS, &data_size);
1095                         if (ret) {
1096                                 gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1097                                 return -EINVAL;
1098                         }
1099                         if (data_size) {
1100                                 data = memdup_user((void __user *)(arg + minsz),
1101                                                    data_size);
1102                                 if (IS_ERR(data))
1103                                         return PTR_ERR(data);
1104                         }
1105                 }
1106
1107                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1108                                         hdr.start, hdr.count, data);
1109                 kfree(data);
1110
1111                 return ret;
1112         } else if (cmd == VFIO_DEVICE_RESET) {
1113                 intel_gvt_ops->vgpu_reset(vgpu);
1114                 return 0;
1115         }
1116
1117         return 0;
1118 }
1119
1120 static const struct mdev_parent_ops intel_vgpu_ops = {
1121         .supported_type_groups  = intel_vgpu_type_groups,
1122         .create                 = intel_vgpu_create,
1123         .remove                 = intel_vgpu_remove,
1124
1125         .open                   = intel_vgpu_open,
1126         .release                = intel_vgpu_release,
1127
1128         .read                   = intel_vgpu_read,
1129         .write                  = intel_vgpu_write,
1130         .mmap                   = intel_vgpu_mmap,
1131         .ioctl                  = intel_vgpu_ioctl,
1132 };
1133
1134 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1135 {
1136         if (!intel_gvt_init_vgpu_type_groups(gvt))
1137                 return -EFAULT;
1138
1139         intel_gvt_ops = ops;
1140
1141         return mdev_register_device(dev, &intel_vgpu_ops);
1142 }
1143
1144 static void kvmgt_host_exit(struct device *dev, void *gvt)
1145 {
1146         intel_gvt_cleanup_vgpu_type_groups(gvt);
1147         mdev_unregister_device(dev);
1148 }
1149
1150 static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
1151 {
1152         struct kvmgt_guest_info *info;
1153         struct kvm *kvm;
1154         struct kvm_memory_slot *slot;
1155         int idx;
1156
1157         if (!handle_valid(handle))
1158                 return -ESRCH;
1159
1160         info = (struct kvmgt_guest_info *)handle;
1161         kvm = info->kvm;
1162
1163         idx = srcu_read_lock(&kvm->srcu);
1164         slot = gfn_to_memslot(kvm, gfn);
1165         if (!slot) {
1166                 srcu_read_unlock(&kvm->srcu, idx);
1167                 return -EINVAL;
1168         }
1169
1170         spin_lock(&kvm->mmu_lock);
1171
1172         if (kvmgt_gfn_is_write_protected(info, gfn))
1173                 goto out;
1174
1175         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1176         kvmgt_protect_table_add(info, gfn);
1177
1178 out:
1179         spin_unlock(&kvm->mmu_lock);
1180         srcu_read_unlock(&kvm->srcu, idx);
1181         return 0;
1182 }
1183
1184 static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
1185 {
1186         struct kvmgt_guest_info *info;
1187         struct kvm *kvm;
1188         struct kvm_memory_slot *slot;
1189         int idx;
1190
1191         if (!handle_valid(handle))
1192                 return 0;
1193
1194         info = (struct kvmgt_guest_info *)handle;
1195         kvm = info->kvm;
1196
1197         idx = srcu_read_lock(&kvm->srcu);
1198         slot = gfn_to_memslot(kvm, gfn);
1199         if (!slot) {
1200                 srcu_read_unlock(&kvm->srcu, idx);
1201                 return -EINVAL;
1202         }
1203
1204         spin_lock(&kvm->mmu_lock);
1205
1206         if (!kvmgt_gfn_is_write_protected(info, gfn))
1207                 goto out;
1208
1209         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1210         kvmgt_protect_table_del(info, gfn);
1211
1212 out:
1213         spin_unlock(&kvm->mmu_lock);
1214         srcu_read_unlock(&kvm->srcu, idx);
1215         return 0;
1216 }
1217
1218 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1219                 const u8 *val, int len,
1220                 struct kvm_page_track_notifier_node *node)
1221 {
1222         struct kvmgt_guest_info *info = container_of(node,
1223                                         struct kvmgt_guest_info, track_node);
1224
1225         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1226                 intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
1227                                         (void *)val, len);
1228 }
1229
1230 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1231                 struct kvm_memory_slot *slot,
1232                 struct kvm_page_track_notifier_node *node)
1233 {
1234         int i;
1235         gfn_t gfn;
1236         struct kvmgt_guest_info *info = container_of(node,
1237                                         struct kvmgt_guest_info, track_node);
1238
1239         spin_lock(&kvm->mmu_lock);
1240         for (i = 0; i < slot->npages; i++) {
1241                 gfn = slot->base_gfn + i;
1242                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1243                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1244                                                 KVM_PAGE_TRACK_WRITE);
1245                         kvmgt_protect_table_del(info, gfn);
1246                 }
1247         }
1248         spin_unlock(&kvm->mmu_lock);
1249 }
1250
1251 static bool kvmgt_check_guest(void)
1252 {
1253         unsigned int eax, ebx, ecx, edx;
1254         char s[12];
1255         unsigned int *i;
1256
1257         eax = KVM_CPUID_SIGNATURE;
1258         ebx = ecx = edx = 0;
1259
1260         asm volatile ("cpuid"
1261                       : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1262                       :
1263                       : "cc", "memory");
1264         i = (unsigned int *)s;
1265         i[0] = ebx;
1266         i[1] = ecx;
1267         i[2] = edx;
1268
1269         return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
1270 }
1271
1272 /**
1273  * NOTE:
1274  * It's actually impossible to check if we are running in KVM host,
1275  * since the "KVM host" is simply native. So we only dectect guest here.
1276  */
1277 static int kvmgt_detect_host(void)
1278 {
1279 #ifdef CONFIG_INTEL_IOMMU
1280         if (intel_iommu_gfx_mapped) {
1281                 gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
1282                 return -ENODEV;
1283         }
1284 #endif
1285         return kvmgt_check_guest() ? -ENODEV : 0;
1286 }
1287
1288 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1289 {
1290         struct intel_vgpu *itr;
1291         struct kvmgt_guest_info *info;
1292         int id;
1293         bool ret = false;
1294
1295         mutex_lock(&vgpu->gvt->lock);
1296         for_each_active_vgpu(vgpu->gvt, itr, id) {
1297                 if (!handle_valid(itr->handle))
1298                         continue;
1299
1300                 info = (struct kvmgt_guest_info *)itr->handle;
1301                 if (kvm && kvm == info->kvm) {
1302                         ret = true;
1303                         goto out;
1304                 }
1305         }
1306 out:
1307         mutex_unlock(&vgpu->gvt->lock);
1308         return ret;
1309 }
1310
1311 static int kvmgt_guest_init(struct mdev_device *mdev)
1312 {
1313         struct kvmgt_guest_info *info;
1314         struct intel_vgpu *vgpu;
1315         struct kvm *kvm;
1316
1317         vgpu = mdev_get_drvdata(mdev);
1318         if (handle_valid(vgpu->handle))
1319                 return -EEXIST;
1320
1321         kvm = vgpu->vdev.kvm;
1322         if (!kvm || kvm->mm != current->mm) {
1323                 gvt_err("KVM is required to use Intel vGPU\n");
1324                 return -ESRCH;
1325         }
1326
1327         if (__kvmgt_vgpu_exist(vgpu, kvm))
1328                 return -EEXIST;
1329
1330         info = vzalloc(sizeof(struct kvmgt_guest_info));
1331         if (!info)
1332                 return -ENOMEM;
1333
1334         vgpu->handle = (unsigned long)info;
1335         info->vgpu = vgpu;
1336         info->kvm = kvm;
1337
1338         kvmgt_protect_table_init(info);
1339         gvt_cache_init(vgpu);
1340
1341         info->track_node.track_write = kvmgt_page_track_write;
1342         info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1343         kvm_page_track_register_notifier(kvm, &info->track_node);
1344
1345         return 0;
1346 }
1347
1348 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1349 {
1350         if (!info) {
1351                 gvt_err("kvmgt_guest_info invalid\n");
1352                 return false;
1353         }
1354
1355         kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1356         kvmgt_protect_table_destroy(info);
1357         gvt_cache_destroy(info->vgpu);
1358         vfree(info);
1359
1360         return true;
1361 }
1362
1363 static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
1364 {
1365         /* nothing to do here */
1366         return 0;
1367 }
1368
1369 static void kvmgt_detach_vgpu(unsigned long handle)
1370 {
1371         /* nothing to do here */
1372 }
1373
1374 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1375 {
1376         struct kvmgt_guest_info *info;
1377         struct intel_vgpu *vgpu;
1378
1379         if (!handle_valid(handle))
1380                 return -ESRCH;
1381
1382         info = (struct kvmgt_guest_info *)handle;
1383         vgpu = info->vgpu;
1384
1385         if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
1386                 return 0;
1387
1388         return -EFAULT;
1389 }
1390
1391 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1392 {
1393         unsigned long pfn;
1394         struct kvmgt_guest_info *info;
1395         struct device *dev;
1396         int rc;
1397
1398         if (!handle_valid(handle))
1399                 return INTEL_GVT_INVALID_ADDR;
1400
1401         info = (struct kvmgt_guest_info *)handle;
1402         pfn = gvt_cache_find(info->vgpu, gfn);
1403         if (pfn != 0)
1404                 return pfn;
1405
1406         pfn = INTEL_GVT_INVALID_ADDR;
1407         dev = mdev_dev(info->vgpu->vdev.mdev);
1408         rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
1409         if (rc != 1) {
1410                 gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
1411                 return INTEL_GVT_INVALID_ADDR;
1412         }
1413
1414         gvt_cache_add(info->vgpu, gfn, pfn);
1415         return pfn;
1416 }
1417
1418 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
1419                         void *buf, unsigned long len, bool write)
1420 {
1421         struct kvmgt_guest_info *info;
1422         struct kvm *kvm;
1423         int ret;
1424         bool kthread = current->mm == NULL;
1425
1426         if (!handle_valid(handle))
1427                 return -ESRCH;
1428
1429         info = (struct kvmgt_guest_info *)handle;
1430         kvm = info->kvm;
1431
1432         if (kthread)
1433                 use_mm(kvm->mm);
1434
1435         ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
1436                       kvm_read_guest(kvm, gpa, buf, len);
1437
1438         if (kthread)
1439                 unuse_mm(kvm->mm);
1440
1441         return ret;
1442 }
1443
1444 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
1445                         void *buf, unsigned long len)
1446 {
1447         return kvmgt_rw_gpa(handle, gpa, buf, len, false);
1448 }
1449
1450 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
1451                         void *buf, unsigned long len)
1452 {
1453         return kvmgt_rw_gpa(handle, gpa, buf, len, true);
1454 }
1455
1456 static unsigned long kvmgt_virt_to_pfn(void *addr)
1457 {
1458         return PFN_DOWN(__pa(addr));
1459 }
1460
1461 struct intel_gvt_mpt kvmgt_mpt = {
1462         .detect_host = kvmgt_detect_host,
1463         .host_init = kvmgt_host_init,
1464         .host_exit = kvmgt_host_exit,
1465         .attach_vgpu = kvmgt_attach_vgpu,
1466         .detach_vgpu = kvmgt_detach_vgpu,
1467         .inject_msi = kvmgt_inject_msi,
1468         .from_virt_to_mfn = kvmgt_virt_to_pfn,
1469         .set_wp_page = kvmgt_write_protect_add,
1470         .unset_wp_page = kvmgt_write_protect_remove,
1471         .read_gpa = kvmgt_read_gpa,
1472         .write_gpa = kvmgt_write_gpa,
1473         .gfn_to_mfn = kvmgt_gfn_to_pfn,
1474 };
1475 EXPORT_SYMBOL_GPL(kvmgt_mpt);
1476
1477 static int __init kvmgt_init(void)
1478 {
1479         return 0;
1480 }
1481
1482 static void __exit kvmgt_exit(void)
1483 {
1484 }
1485
1486 module_init(kvmgt_init);
1487 module_exit(kvmgt_exit);
1488
1489 MODULE_LICENSE("GPL and additional rights");
1490 MODULE_AUTHOR("Intel Corporation");