1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
46 #define KMSG_COMPONENT "kvm-s390"
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "userspace_handled", VCPU_STAT(exit_userspace) },
64 { "exit_null", VCPU_STAT(exit_null) },
65 { "exit_validity", VCPU_STAT(exit_validity) },
66 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 { "exit_external_request", VCPU_STAT(exit_external_request) },
68 { "exit_io_request", VCPU_STAT(exit_io_request) },
69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 { "exit_instruction", VCPU_STAT(exit_instruction) },
71 { "exit_pei", VCPU_STAT(exit_pei) },
72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 { "deliver_program", VCPU_STAT(deliver_program) },
93 { "deliver_io", VCPU_STAT(deliver_io) },
94 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 { "inject_ckc", VCPU_STAT(inject_ckc) },
97 { "inject_cputm", VCPU_STAT(inject_cputm) },
98 { "inject_external_call", VCPU_STAT(inject_external_call) },
99 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 { "inject_io", VM_STAT(inject_io) },
102 { "inject_mchk", VCPU_STAT(inject_mchk) },
103 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 { "inject_program", VCPU_STAT(inject_program) },
105 { "inject_restart", VCPU_STAT(inject_restart) },
106 { "inject_service_signal", VM_STAT(inject_service_signal) },
107 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 { "inject_virtio", VM_STAT(inject_virtio) },
111 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 { "instruction_gs", VCPU_STAT(instruction_gs) },
113 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 { "instruction_sck", VCPU_STAT(instruction_sck) },
120 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 { "instruction_spx", VCPU_STAT(instruction_spx) },
122 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 { "instruction_stap", VCPU_STAT(instruction_stap) },
124 { "instruction_iske", VCPU_STAT(instruction_iske) },
125 { "instruction_ri", VCPU_STAT(instruction_ri) },
126 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 { "instruction_sske", VCPU_STAT(instruction_sske) },
128 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 { "instruction_essa", VCPU_STAT(instruction_essa) },
130 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 { "instruction_tb", VCPU_STAT(instruction_tb) },
133 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 { "instruction_sie", VCPU_STAT(instruction_sie) },
138 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
164 struct kvm_s390_tod_clock_ext {
170 /* allow nested virtualization in KVM (if enabled by user space) */
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 /* allow 1m huge page guest backing, if !nested */
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 * For now we handle at most 16 double words as this is what the s390 base
182 * kernel handles and stores in the prefix page. If we ever need to go beyond
183 * this, this requires changes to code, but the external uapi can stay.
185 #define SIZE_INTERNAL 16
188 * Base feature mask that defines default mask for facilities. Consists of the
189 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
193 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194 * and defines the facilities that can be enabled via a cpu model.
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
198 static unsigned long kvm_s390_fac_size(void)
200 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203 sizeof(S390_lowcore.stfle_fac_list));
205 return SIZE_INTERNAL;
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
220 /* every s390 is virtualization enabled ;-) */
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
232 * The TOD jumps by delta, we have to compensate this by adding
233 * -delta to the epoch.
237 /* sign-extension - we're adding to signed values below */
242 if (scb->ecd & ECD_MEF) {
243 scb->epdx += delta_idx;
244 if (scb->epoch < delta)
250 * This callback is executed during stop_machine(). All CPUs are therefore
251 * temporarily stopped. In order not to change guest behavior, we have to
252 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253 * so a CPU won't be stopped while calculating with the epoch.
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
259 struct kvm_vcpu *vcpu;
261 unsigned long long *delta = v;
263 list_for_each_entry(kvm, &vm_list, vm_list) {
264 kvm_for_each_vcpu(i, vcpu, kvm) {
265 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
267 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
270 if (vcpu->arch.cputm_enabled)
271 vcpu->arch.cputm_start += *delta;
272 if (vcpu->arch.vsie_block)
273 kvm_clock_sync_scb(vcpu->arch.vsie_block,
280 static struct notifier_block kvm_clock_notifier = {
281 .notifier_call = kvm_clock_sync,
284 int kvm_arch_hardware_setup(void)
286 gmap_notifier.notifier_call = kvm_gmap_notifier;
287 gmap_register_pte_notifier(&gmap_notifier);
288 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289 gmap_register_pte_notifier(&vsie_gmap_notifier);
290 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291 &kvm_clock_notifier);
295 void kvm_arch_hardware_unsetup(void)
297 gmap_unregister_pte_notifier(&gmap_notifier);
298 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300 &kvm_clock_notifier);
303 static void allow_cpu_feat(unsigned long nr)
305 set_bit_inv(nr, kvm_s390_available_cpu_feat);
308 static inline int plo_test_bit(unsigned char nr)
310 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
314 /* Parameter registers are ignored for "test bit" */
324 static void kvm_s390_cpu_feat_init(void)
328 for (i = 0; i < 256; ++i) {
330 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
333 if (test_facility(28)) /* TOD-clock steering */
334 ptff(kvm_s390_available_subfunc.ptff,
335 sizeof(kvm_s390_available_subfunc.ptff),
338 if (test_facility(17)) { /* MSA */
339 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340 kvm_s390_available_subfunc.kmac);
341 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342 kvm_s390_available_subfunc.kmc);
343 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
344 kvm_s390_available_subfunc.km);
345 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346 kvm_s390_available_subfunc.kimd);
347 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348 kvm_s390_available_subfunc.klmd);
350 if (test_facility(76)) /* MSA3 */
351 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352 kvm_s390_available_subfunc.pckmo);
353 if (test_facility(77)) { /* MSA4 */
354 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355 kvm_s390_available_subfunc.kmctr);
356 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357 kvm_s390_available_subfunc.kmf);
358 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359 kvm_s390_available_subfunc.kmo);
360 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361 kvm_s390_available_subfunc.pcc);
363 if (test_facility(57)) /* MSA5 */
364 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365 kvm_s390_available_subfunc.ppno);
367 if (test_facility(146)) /* MSA8 */
368 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369 kvm_s390_available_subfunc.kma);
371 if (MACHINE_HAS_ESOP)
372 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
374 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
377 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378 !test_facility(3) || !nested)
380 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381 if (sclp.has_64bscao)
382 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
384 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
386 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
388 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
390 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
392 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
394 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
396 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
398 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399 * all skey handling functions read/set the skey from the PGSTE
400 * instead of the real storage key.
402 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403 * pages being detected as preserved although they are resident.
405 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
408 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
412 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413 * cannot easily shadow the SCA because of the ipte lock.
417 int kvm_arch_init(void *opaque)
419 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
423 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
424 debug_unregister(kvm_s390_dbf);
428 kvm_s390_cpu_feat_init();
430 /* Register floating interrupt controller interface. */
431 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
434 void kvm_arch_exit(void)
436 debug_unregister(kvm_s390_dbf);
439 /* Section: device related */
440 long kvm_arch_dev_ioctl(struct file *filp,
441 unsigned int ioctl, unsigned long arg)
443 if (ioctl == KVM_S390_ENABLE_SIE)
444 return s390_enable_sie();
448 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
453 case KVM_CAP_S390_PSW:
454 case KVM_CAP_S390_GMAP:
455 case KVM_CAP_SYNC_MMU:
456 #ifdef CONFIG_KVM_S390_UCONTROL
457 case KVM_CAP_S390_UCONTROL:
459 case KVM_CAP_ASYNC_PF:
460 case KVM_CAP_SYNC_REGS:
461 case KVM_CAP_ONE_REG:
462 case KVM_CAP_ENABLE_CAP:
463 case KVM_CAP_S390_CSS_SUPPORT:
464 case KVM_CAP_IOEVENTFD:
465 case KVM_CAP_DEVICE_CTRL:
466 case KVM_CAP_ENABLE_CAP_VM:
467 case KVM_CAP_S390_IRQCHIP:
468 case KVM_CAP_VM_ATTRIBUTES:
469 case KVM_CAP_MP_STATE:
470 case KVM_CAP_IMMEDIATE_EXIT:
471 case KVM_CAP_S390_INJECT_IRQ:
472 case KVM_CAP_S390_USER_SIGP:
473 case KVM_CAP_S390_USER_STSI:
474 case KVM_CAP_S390_SKEYS:
475 case KVM_CAP_S390_IRQ_STATE:
476 case KVM_CAP_S390_USER_INSTR0:
477 case KVM_CAP_S390_CMMA_MIGRATION:
478 case KVM_CAP_S390_AIS:
479 case KVM_CAP_S390_AIS_MIGRATION:
482 case KVM_CAP_S390_HPAGE_1M:
487 case KVM_CAP_S390_MEM_OP:
490 case KVM_CAP_NR_VCPUS:
491 case KVM_CAP_MAX_VCPUS:
492 r = KVM_S390_BSCA_CPU_SLOTS;
493 if (!kvm_s390_use_sca_entries())
495 else if (sclp.has_esca && sclp.has_64bscao)
496 r = KVM_S390_ESCA_CPU_SLOTS;
498 case KVM_CAP_NR_MEMSLOTS:
499 r = KVM_USER_MEM_SLOTS;
501 case KVM_CAP_S390_COW:
502 r = MACHINE_HAS_ESOP;
504 case KVM_CAP_S390_VECTOR_REGISTERS:
507 case KVM_CAP_S390_RI:
508 r = test_facility(64);
510 case KVM_CAP_S390_GS:
511 r = test_facility(133);
513 case KVM_CAP_S390_BPB:
514 r = test_facility(82);
522 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
523 struct kvm_memory_slot *memslot)
526 gfn_t cur_gfn, last_gfn;
527 unsigned long gaddr, vmaddr;
528 struct gmap *gmap = kvm->arch.gmap;
529 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
531 /* Loop over all guest segments */
532 cur_gfn = memslot->base_gfn;
533 last_gfn = memslot->base_gfn + memslot->npages;
534 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
535 gaddr = gfn_to_gpa(cur_gfn);
536 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
537 if (kvm_is_error_hva(vmaddr))
540 bitmap_zero(bitmap, _PAGE_ENTRIES);
541 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
542 for (i = 0; i < _PAGE_ENTRIES; i++) {
543 if (test_bit(i, bitmap))
544 mark_page_dirty(kvm, cur_gfn + i);
547 if (fatal_signal_pending(current))
553 /* Section: vm related */
554 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
557 * Get (and clear) the dirty memory log for a memory slot.
559 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
560 struct kvm_dirty_log *log)
564 struct kvm_memslots *slots;
565 struct kvm_memory_slot *memslot;
568 if (kvm_is_ucontrol(kvm))
571 mutex_lock(&kvm->slots_lock);
574 if (log->slot >= KVM_USER_MEM_SLOTS)
577 slots = kvm_memslots(kvm);
578 memslot = id_to_memslot(slots, log->slot);
580 if (!memslot->dirty_bitmap)
583 kvm_s390_sync_dirty_log(kvm, memslot);
584 r = kvm_get_dirty_log(kvm, log, &is_dirty);
588 /* Clear the dirty log */
590 n = kvm_dirty_bitmap_bytes(memslot);
591 memset(memslot->dirty_bitmap, 0, n);
595 mutex_unlock(&kvm->slots_lock);
599 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
602 struct kvm_vcpu *vcpu;
604 kvm_for_each_vcpu(i, vcpu, kvm) {
605 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
609 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
617 case KVM_CAP_S390_IRQCHIP:
618 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
619 kvm->arch.use_irqchip = 1;
622 case KVM_CAP_S390_USER_SIGP:
623 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
624 kvm->arch.user_sigp = 1;
627 case KVM_CAP_S390_VECTOR_REGISTERS:
628 mutex_lock(&kvm->lock);
629 if (kvm->created_vcpus) {
631 } else if (MACHINE_HAS_VX) {
632 set_kvm_facility(kvm->arch.model.fac_mask, 129);
633 set_kvm_facility(kvm->arch.model.fac_list, 129);
634 if (test_facility(134)) {
635 set_kvm_facility(kvm->arch.model.fac_mask, 134);
636 set_kvm_facility(kvm->arch.model.fac_list, 134);
638 if (test_facility(135)) {
639 set_kvm_facility(kvm->arch.model.fac_mask, 135);
640 set_kvm_facility(kvm->arch.model.fac_list, 135);
645 mutex_unlock(&kvm->lock);
646 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
647 r ? "(not available)" : "(success)");
649 case KVM_CAP_S390_RI:
651 mutex_lock(&kvm->lock);
652 if (kvm->created_vcpus) {
654 } else if (test_facility(64)) {
655 set_kvm_facility(kvm->arch.model.fac_mask, 64);
656 set_kvm_facility(kvm->arch.model.fac_list, 64);
659 mutex_unlock(&kvm->lock);
660 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
661 r ? "(not available)" : "(success)");
663 case KVM_CAP_S390_AIS:
664 mutex_lock(&kvm->lock);
665 if (kvm->created_vcpus) {
668 set_kvm_facility(kvm->arch.model.fac_mask, 72);
669 set_kvm_facility(kvm->arch.model.fac_list, 72);
672 mutex_unlock(&kvm->lock);
673 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
674 r ? "(not available)" : "(success)");
676 case KVM_CAP_S390_GS:
678 mutex_lock(&kvm->lock);
679 if (kvm->created_vcpus) {
681 } else if (test_facility(133)) {
682 set_kvm_facility(kvm->arch.model.fac_mask, 133);
683 set_kvm_facility(kvm->arch.model.fac_list, 133);
686 mutex_unlock(&kvm->lock);
687 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
688 r ? "(not available)" : "(success)");
690 case KVM_CAP_S390_HPAGE_1M:
691 mutex_lock(&kvm->lock);
692 if (kvm->created_vcpus)
694 else if (!hpage || kvm->arch.use_cmma)
698 down_write(&kvm->mm->mmap_sem);
699 kvm->mm->context.allow_gmap_hpage_1m = 1;
700 up_write(&kvm->mm->mmap_sem);
702 * We might have to create fake 4k page
703 * tables. To avoid that the hardware works on
704 * stale PGSTEs, we emulate these instructions.
706 kvm->arch.use_skf = 0;
707 kvm->arch.use_pfmfi = 0;
709 mutex_unlock(&kvm->lock);
710 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
711 r ? "(not available)" : "(success)");
713 case KVM_CAP_S390_USER_STSI:
714 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
715 kvm->arch.user_stsi = 1;
718 case KVM_CAP_S390_USER_INSTR0:
719 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
720 kvm->arch.user_instr0 = 1;
721 icpt_operexc_on_all_vcpus(kvm);
731 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
735 switch (attr->attr) {
736 case KVM_S390_VM_MEM_LIMIT_SIZE:
738 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
739 kvm->arch.mem_limit);
740 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
750 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
754 switch (attr->attr) {
755 case KVM_S390_VM_MEM_ENABLE_CMMA:
760 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
761 mutex_lock(&kvm->lock);
762 if (kvm->created_vcpus)
764 else if (kvm->mm->context.allow_gmap_hpage_1m)
767 kvm->arch.use_cmma = 1;
768 /* Not compatible with cmma. */
769 kvm->arch.use_pfmfi = 0;
772 mutex_unlock(&kvm->lock);
774 case KVM_S390_VM_MEM_CLR_CMMA:
779 if (!kvm->arch.use_cmma)
782 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
783 mutex_lock(&kvm->lock);
784 idx = srcu_read_lock(&kvm->srcu);
785 s390_reset_cmma(kvm->arch.gmap->mm);
786 srcu_read_unlock(&kvm->srcu, idx);
787 mutex_unlock(&kvm->lock);
790 case KVM_S390_VM_MEM_LIMIT_SIZE: {
791 unsigned long new_limit;
793 if (kvm_is_ucontrol(kvm))
796 if (get_user(new_limit, (u64 __user *)attr->addr))
799 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
800 new_limit > kvm->arch.mem_limit)
806 /* gmap_create takes last usable address */
807 if (new_limit != KVM_S390_NO_MEM_LIMIT)
811 mutex_lock(&kvm->lock);
812 if (!kvm->created_vcpus) {
813 /* gmap_create will round the limit up */
814 struct gmap *new = gmap_create(current->mm, new_limit);
819 gmap_remove(kvm->arch.gmap);
821 kvm->arch.gmap = new;
825 mutex_unlock(&kvm->lock);
826 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
827 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
828 (void *) kvm->arch.gmap->asce);
838 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
840 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
842 struct kvm_vcpu *vcpu;
845 kvm_s390_vcpu_block_all(kvm);
847 kvm_for_each_vcpu(i, vcpu, kvm)
848 kvm_s390_vcpu_crypto_setup(vcpu);
850 kvm_s390_vcpu_unblock_all(kvm);
853 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
855 if (!test_kvm_facility(kvm, 76))
858 mutex_lock(&kvm->lock);
859 switch (attr->attr) {
860 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
862 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
863 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
864 kvm->arch.crypto.aes_kw = 1;
865 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
867 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
869 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
870 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
871 kvm->arch.crypto.dea_kw = 1;
872 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
874 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
875 kvm->arch.crypto.aes_kw = 0;
876 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
877 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
878 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
880 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
881 kvm->arch.crypto.dea_kw = 0;
882 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
883 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
884 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
887 mutex_unlock(&kvm->lock);
891 kvm_s390_vcpu_crypto_reset_all(kvm);
892 mutex_unlock(&kvm->lock);
896 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
899 struct kvm_vcpu *vcpu;
901 kvm_for_each_vcpu(cx, vcpu, kvm)
902 kvm_s390_sync_request(req, vcpu);
906 * Must be called with kvm->srcu held to avoid races on memslots, and with
907 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
909 static int kvm_s390_vm_start_migration(struct kvm *kvm)
911 struct kvm_memory_slot *ms;
912 struct kvm_memslots *slots;
913 unsigned long ram_pages = 0;
916 /* migration mode already enabled */
917 if (kvm->arch.migration_mode)
919 slots = kvm_memslots(kvm);
920 if (!slots || !slots->used_slots)
923 if (!kvm->arch.use_cmma) {
924 kvm->arch.migration_mode = 1;
927 /* mark all the pages in active slots as dirty */
928 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
929 ms = slots->memslots + slotnr;
931 * The second half of the bitmap is only used on x86,
932 * and would be wasted otherwise, so we put it to good
933 * use here to keep track of the state of the storage
936 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
937 ram_pages += ms->npages;
939 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
940 kvm->arch.migration_mode = 1;
941 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
946 * Must be called with kvm->slots_lock to avoid races with ourselves and
947 * kvm_s390_vm_start_migration.
949 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
951 /* migration mode already disabled */
952 if (!kvm->arch.migration_mode)
954 kvm->arch.migration_mode = 0;
955 if (kvm->arch.use_cmma)
956 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
960 static int kvm_s390_vm_set_migration(struct kvm *kvm,
961 struct kvm_device_attr *attr)
965 mutex_lock(&kvm->slots_lock);
966 switch (attr->attr) {
967 case KVM_S390_VM_MIGRATION_START:
968 res = kvm_s390_vm_start_migration(kvm);
970 case KVM_S390_VM_MIGRATION_STOP:
971 res = kvm_s390_vm_stop_migration(kvm);
976 mutex_unlock(&kvm->slots_lock);
981 static int kvm_s390_vm_get_migration(struct kvm *kvm,
982 struct kvm_device_attr *attr)
984 u64 mig = kvm->arch.migration_mode;
986 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
989 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
994 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
996 struct kvm_s390_vm_tod_clock gtod;
998 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1001 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1003 kvm_s390_set_tod_clock(kvm, >od);
1005 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1006 gtod.epoch_idx, gtod.tod);
1011 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1015 if (copy_from_user(>od_high, (void __user *)attr->addr,
1021 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1026 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1028 struct kvm_s390_vm_tod_clock gtod = { 0 };
1030 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1034 kvm_s390_set_tod_clock(kvm, >od);
1035 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1039 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1046 switch (attr->attr) {
1047 case KVM_S390_VM_TOD_EXT:
1048 ret = kvm_s390_set_tod_ext(kvm, attr);
1050 case KVM_S390_VM_TOD_HIGH:
1051 ret = kvm_s390_set_tod_high(kvm, attr);
1053 case KVM_S390_VM_TOD_LOW:
1054 ret = kvm_s390_set_tod_low(kvm, attr);
1063 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1064 struct kvm_s390_vm_tod_clock *gtod)
1066 struct kvm_s390_tod_clock_ext htod;
1070 get_tod_clock_ext((char *)&htod);
1072 gtod->tod = htod.tod + kvm->arch.epoch;
1073 gtod->epoch_idx = 0;
1074 if (test_kvm_facility(kvm, 139)) {
1075 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1076 if (gtod->tod < htod.tod)
1077 gtod->epoch_idx += 1;
1083 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1085 struct kvm_s390_vm_tod_clock gtod;
1087 memset(>od, 0, sizeof(gtod));
1088 kvm_s390_get_tod_clock(kvm, >od);
1089 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1092 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1093 gtod.epoch_idx, gtod.tod);
1097 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1101 if (copy_to_user((void __user *)attr->addr, >od_high,
1104 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1109 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1113 gtod = kvm_s390_get_tod_clock_fast(kvm);
1114 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1116 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1121 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1128 switch (attr->attr) {
1129 case KVM_S390_VM_TOD_EXT:
1130 ret = kvm_s390_get_tod_ext(kvm, attr);
1132 case KVM_S390_VM_TOD_HIGH:
1133 ret = kvm_s390_get_tod_high(kvm, attr);
1135 case KVM_S390_VM_TOD_LOW:
1136 ret = kvm_s390_get_tod_low(kvm, attr);
1145 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1147 struct kvm_s390_vm_cpu_processor *proc;
1148 u16 lowest_ibc, unblocked_ibc;
1151 mutex_lock(&kvm->lock);
1152 if (kvm->created_vcpus) {
1156 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1161 if (!copy_from_user(proc, (void __user *)attr->addr,
1163 kvm->arch.model.cpuid = proc->cpuid;
1164 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1165 unblocked_ibc = sclp.ibc & 0xfff;
1166 if (lowest_ibc && proc->ibc) {
1167 if (proc->ibc > unblocked_ibc)
1168 kvm->arch.model.ibc = unblocked_ibc;
1169 else if (proc->ibc < lowest_ibc)
1170 kvm->arch.model.ibc = lowest_ibc;
1172 kvm->arch.model.ibc = proc->ibc;
1174 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1175 S390_ARCH_FAC_LIST_SIZE_BYTE);
1176 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1177 kvm->arch.model.ibc,
1178 kvm->arch.model.cpuid);
1179 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1180 kvm->arch.model.fac_list[0],
1181 kvm->arch.model.fac_list[1],
1182 kvm->arch.model.fac_list[2]);
1187 mutex_unlock(&kvm->lock);
1191 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1192 struct kvm_device_attr *attr)
1194 struct kvm_s390_vm_cpu_feat data;
1196 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1198 if (!bitmap_subset((unsigned long *) data.feat,
1199 kvm_s390_available_cpu_feat,
1200 KVM_S390_VM_CPU_FEAT_NR_BITS))
1203 mutex_lock(&kvm->lock);
1204 if (kvm->created_vcpus) {
1205 mutex_unlock(&kvm->lock);
1208 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1209 KVM_S390_VM_CPU_FEAT_NR_BITS);
1210 mutex_unlock(&kvm->lock);
1211 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1218 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1219 struct kvm_device_attr *attr)
1222 * Once supported by kernel + hw, we have to store the subfunctions
1223 * in kvm->arch and remember that user space configured them.
1228 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1232 switch (attr->attr) {
1233 case KVM_S390_VM_CPU_PROCESSOR:
1234 ret = kvm_s390_set_processor(kvm, attr);
1236 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1237 ret = kvm_s390_set_processor_feat(kvm, attr);
1239 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1240 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1246 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1248 struct kvm_s390_vm_cpu_processor *proc;
1251 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1256 proc->cpuid = kvm->arch.model.cpuid;
1257 proc->ibc = kvm->arch.model.ibc;
1258 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1259 S390_ARCH_FAC_LIST_SIZE_BYTE);
1260 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1261 kvm->arch.model.ibc,
1262 kvm->arch.model.cpuid);
1263 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1264 kvm->arch.model.fac_list[0],
1265 kvm->arch.model.fac_list[1],
1266 kvm->arch.model.fac_list[2]);
1267 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1274 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1276 struct kvm_s390_vm_cpu_machine *mach;
1279 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1284 get_cpu_id((struct cpuid *) &mach->cpuid);
1285 mach->ibc = sclp.ibc;
1286 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1287 S390_ARCH_FAC_LIST_SIZE_BYTE);
1288 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1289 sizeof(S390_lowcore.stfle_fac_list));
1290 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1291 kvm->arch.model.ibc,
1292 kvm->arch.model.cpuid);
1293 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1297 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1301 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1308 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1309 struct kvm_device_attr *attr)
1311 struct kvm_s390_vm_cpu_feat data;
1313 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1314 KVM_S390_VM_CPU_FEAT_NR_BITS);
1315 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1317 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1324 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1325 struct kvm_device_attr *attr)
1327 struct kvm_s390_vm_cpu_feat data;
1329 bitmap_copy((unsigned long *) data.feat,
1330 kvm_s390_available_cpu_feat,
1331 KVM_S390_VM_CPU_FEAT_NR_BITS);
1332 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1334 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1341 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1342 struct kvm_device_attr *attr)
1345 * Once we can actually configure subfunctions (kernel + hw support),
1346 * we have to check if they were already set by user space, if so copy
1347 * them from kvm->arch.
1352 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1353 struct kvm_device_attr *attr)
1355 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1356 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1360 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1364 switch (attr->attr) {
1365 case KVM_S390_VM_CPU_PROCESSOR:
1366 ret = kvm_s390_get_processor(kvm, attr);
1368 case KVM_S390_VM_CPU_MACHINE:
1369 ret = kvm_s390_get_machine(kvm, attr);
1371 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1372 ret = kvm_s390_get_processor_feat(kvm, attr);
1374 case KVM_S390_VM_CPU_MACHINE_FEAT:
1375 ret = kvm_s390_get_machine_feat(kvm, attr);
1377 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1378 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1380 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1381 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1387 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1391 switch (attr->group) {
1392 case KVM_S390_VM_MEM_CTRL:
1393 ret = kvm_s390_set_mem_control(kvm, attr);
1395 case KVM_S390_VM_TOD:
1396 ret = kvm_s390_set_tod(kvm, attr);
1398 case KVM_S390_VM_CPU_MODEL:
1399 ret = kvm_s390_set_cpu_model(kvm, attr);
1401 case KVM_S390_VM_CRYPTO:
1402 ret = kvm_s390_vm_set_crypto(kvm, attr);
1404 case KVM_S390_VM_MIGRATION:
1405 ret = kvm_s390_vm_set_migration(kvm, attr);
1415 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1419 switch (attr->group) {
1420 case KVM_S390_VM_MEM_CTRL:
1421 ret = kvm_s390_get_mem_control(kvm, attr);
1423 case KVM_S390_VM_TOD:
1424 ret = kvm_s390_get_tod(kvm, attr);
1426 case KVM_S390_VM_CPU_MODEL:
1427 ret = kvm_s390_get_cpu_model(kvm, attr);
1429 case KVM_S390_VM_MIGRATION:
1430 ret = kvm_s390_vm_get_migration(kvm, attr);
1440 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1444 switch (attr->group) {
1445 case KVM_S390_VM_MEM_CTRL:
1446 switch (attr->attr) {
1447 case KVM_S390_VM_MEM_ENABLE_CMMA:
1448 case KVM_S390_VM_MEM_CLR_CMMA:
1449 ret = sclp.has_cmma ? 0 : -ENXIO;
1451 case KVM_S390_VM_MEM_LIMIT_SIZE:
1459 case KVM_S390_VM_TOD:
1460 switch (attr->attr) {
1461 case KVM_S390_VM_TOD_LOW:
1462 case KVM_S390_VM_TOD_HIGH:
1470 case KVM_S390_VM_CPU_MODEL:
1471 switch (attr->attr) {
1472 case KVM_S390_VM_CPU_PROCESSOR:
1473 case KVM_S390_VM_CPU_MACHINE:
1474 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1475 case KVM_S390_VM_CPU_MACHINE_FEAT:
1476 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1479 /* configuring subfunctions is not supported yet */
1480 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1486 case KVM_S390_VM_CRYPTO:
1487 switch (attr->attr) {
1488 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1489 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1490 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1491 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1499 case KVM_S390_VM_MIGRATION:
1510 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1514 int srcu_idx, i, r = 0;
1516 if (args->flags != 0)
1519 /* Is this guest using storage keys? */
1520 if (!mm_uses_skeys(current->mm))
1521 return KVM_S390_GET_SKEYS_NONE;
1523 /* Enforce sane limit on memory allocation */
1524 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1527 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1531 down_read(¤t->mm->mmap_sem);
1532 srcu_idx = srcu_read_lock(&kvm->srcu);
1533 for (i = 0; i < args->count; i++) {
1534 hva = gfn_to_hva(kvm, args->start_gfn + i);
1535 if (kvm_is_error_hva(hva)) {
1540 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1544 srcu_read_unlock(&kvm->srcu, srcu_idx);
1545 up_read(¤t->mm->mmap_sem);
1548 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1549 sizeof(uint8_t) * args->count);
1558 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1562 int srcu_idx, i, r = 0;
1565 if (args->flags != 0)
1568 /* Enforce sane limit on memory allocation */
1569 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1572 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1576 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1577 sizeof(uint8_t) * args->count);
1583 /* Enable storage key handling for the guest */
1584 r = s390_enable_skey();
1589 down_read(¤t->mm->mmap_sem);
1590 srcu_idx = srcu_read_lock(&kvm->srcu);
1591 while (i < args->count) {
1593 hva = gfn_to_hva(kvm, args->start_gfn + i);
1594 if (kvm_is_error_hva(hva)) {
1599 /* Lowest order bit is reserved */
1600 if (keys[i] & 0x01) {
1605 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1607 r = fixup_user_fault(current, current->mm, hva,
1608 FAULT_FLAG_WRITE, &unlocked);
1615 srcu_read_unlock(&kvm->srcu, srcu_idx);
1616 up_read(¤t->mm->mmap_sem);
1623 * Base address and length must be sent at the start of each block, therefore
1624 * it's cheaper to send some clean data, as long as it's less than the size of
1627 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1628 /* for consistency */
1629 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1632 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1633 * address falls in a hole. In that case the index of one of the memslots
1634 * bordering the hole is returned.
1636 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1638 int start = 0, end = slots->used_slots;
1639 int slot = atomic_read(&slots->lru_slot);
1640 struct kvm_memory_slot *memslots = slots->memslots;
1642 if (gfn >= memslots[slot].base_gfn &&
1643 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1646 while (start < end) {
1647 slot = start + (end - start) / 2;
1649 if (gfn >= memslots[slot].base_gfn)
1655 if (gfn >= memslots[start].base_gfn &&
1656 gfn < memslots[start].base_gfn + memslots[start].npages) {
1657 atomic_set(&slots->lru_slot, start);
1663 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1664 u8 *res, unsigned long bufsize)
1666 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1669 while (args->count < bufsize) {
1670 hva = gfn_to_hva(kvm, cur_gfn);
1672 * We return an error if the first value was invalid, but we
1673 * return successfully if at least one value was copied.
1675 if (kvm_is_error_hva(hva))
1676 return args->count ? 0 : -EFAULT;
1677 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1679 res[args->count++] = (pgstev >> 24) & 0x43;
1686 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1687 unsigned long cur_gfn)
1689 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1690 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1691 unsigned long ofs = cur_gfn - ms->base_gfn;
1693 if (ms->base_gfn + ms->npages <= cur_gfn) {
1695 /* If we are above the highest slot, wrap around */
1697 slotidx = slots->used_slots - 1;
1699 ms = slots->memslots + slotidx;
1702 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1703 while ((slotidx > 0) && (ofs >= ms->npages)) {
1705 ms = slots->memslots + slotidx;
1706 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1708 return ms->base_gfn + ofs;
1711 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1712 u8 *res, unsigned long bufsize)
1714 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1715 struct kvm_memslots *slots = kvm_memslots(kvm);
1716 struct kvm_memory_slot *ms;
1718 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1719 ms = gfn_to_memslot(kvm, cur_gfn);
1721 args->start_gfn = cur_gfn;
1724 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1725 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1727 while (args->count < bufsize) {
1728 hva = gfn_to_hva(kvm, cur_gfn);
1729 if (kvm_is_error_hva(hva))
1731 /* Decrement only if we actually flipped the bit to 0 */
1732 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1733 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1734 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1736 /* Save the value */
1737 res[args->count++] = (pgstev >> 24) & 0x43;
1738 /* If the next bit is too far away, stop. */
1739 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1741 /* If we reached the previous "next", find the next one */
1742 if (cur_gfn == next_gfn)
1743 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1744 /* Reached the end of memory or of the buffer, stop */
1745 if ((next_gfn >= mem_end) ||
1746 (next_gfn - args->start_gfn >= bufsize))
1749 /* Reached the end of the current memslot, take the next one. */
1750 if (cur_gfn - ms->base_gfn >= ms->npages) {
1751 ms = gfn_to_memslot(kvm, cur_gfn);
1760 * This function searches for the next page with dirty CMMA attributes, and
1761 * saves the attributes in the buffer up to either the end of the buffer or
1762 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1763 * no trailing clean bytes are saved.
1764 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1765 * output buffer will indicate 0 as length.
1767 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1768 struct kvm_s390_cmma_log *args)
1770 unsigned long bufsize;
1771 int srcu_idx, peek, ret;
1774 if (!kvm->arch.use_cmma)
1776 /* Invalid/unsupported flags were specified */
1777 if (args->flags & ~KVM_S390_CMMA_PEEK)
1779 /* Migration mode query, and we are not doing a migration */
1780 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1781 if (!peek && !kvm->arch.migration_mode)
1783 /* CMMA is disabled or was not used, or the buffer has length zero */
1784 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1785 if (!bufsize || !kvm->mm->context.uses_cmm) {
1786 memset(args, 0, sizeof(*args));
1789 /* We are not peeking, and there are no dirty pages */
1790 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1791 memset(args, 0, sizeof(*args));
1795 values = vmalloc(bufsize);
1799 down_read(&kvm->mm->mmap_sem);
1800 srcu_idx = srcu_read_lock(&kvm->srcu);
1802 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1804 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1805 srcu_read_unlock(&kvm->srcu, srcu_idx);
1806 up_read(&kvm->mm->mmap_sem);
1808 if (kvm->arch.migration_mode)
1809 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1811 args->remaining = 0;
1813 if (copy_to_user((void __user *)args->values, values, args->count))
1821 * This function sets the CMMA attributes for the given pages. If the input
1822 * buffer has zero length, no action is taken, otherwise the attributes are
1823 * set and the mm->context.uses_cmm flag is set.
1825 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1826 const struct kvm_s390_cmma_log *args)
1828 unsigned long hva, mask, pgstev, i;
1830 int srcu_idx, r = 0;
1834 if (!kvm->arch.use_cmma)
1836 /* invalid/unsupported flags */
1837 if (args->flags != 0)
1839 /* Enforce sane limit on memory allocation */
1840 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1843 if (args->count == 0)
1846 bits = vmalloc(array_size(sizeof(*bits), args->count));
1850 r = copy_from_user(bits, (void __user *)args->values, args->count);
1856 down_read(&kvm->mm->mmap_sem);
1857 srcu_idx = srcu_read_lock(&kvm->srcu);
1858 for (i = 0; i < args->count; i++) {
1859 hva = gfn_to_hva(kvm, args->start_gfn + i);
1860 if (kvm_is_error_hva(hva)) {
1866 pgstev = pgstev << 24;
1867 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1868 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1870 srcu_read_unlock(&kvm->srcu, srcu_idx);
1871 up_read(&kvm->mm->mmap_sem);
1873 if (!kvm->mm->context.uses_cmm) {
1874 down_write(&kvm->mm->mmap_sem);
1875 kvm->mm->context.uses_cmm = 1;
1876 up_write(&kvm->mm->mmap_sem);
1883 long kvm_arch_vm_ioctl(struct file *filp,
1884 unsigned int ioctl, unsigned long arg)
1886 struct kvm *kvm = filp->private_data;
1887 void __user *argp = (void __user *)arg;
1888 struct kvm_device_attr attr;
1892 case KVM_S390_INTERRUPT: {
1893 struct kvm_s390_interrupt s390int;
1896 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1898 r = kvm_s390_inject_vm(kvm, &s390int);
1901 case KVM_ENABLE_CAP: {
1902 struct kvm_enable_cap cap;
1904 if (copy_from_user(&cap, argp, sizeof(cap)))
1906 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1909 case KVM_CREATE_IRQCHIP: {
1910 struct kvm_irq_routing_entry routing;
1913 if (kvm->arch.use_irqchip) {
1914 /* Set up dummy routing. */
1915 memset(&routing, 0, sizeof(routing));
1916 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1920 case KVM_SET_DEVICE_ATTR: {
1922 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1924 r = kvm_s390_vm_set_attr(kvm, &attr);
1927 case KVM_GET_DEVICE_ATTR: {
1929 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1931 r = kvm_s390_vm_get_attr(kvm, &attr);
1934 case KVM_HAS_DEVICE_ATTR: {
1936 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1938 r = kvm_s390_vm_has_attr(kvm, &attr);
1941 case KVM_S390_GET_SKEYS: {
1942 struct kvm_s390_skeys args;
1945 if (copy_from_user(&args, argp,
1946 sizeof(struct kvm_s390_skeys)))
1948 r = kvm_s390_get_skeys(kvm, &args);
1951 case KVM_S390_SET_SKEYS: {
1952 struct kvm_s390_skeys args;
1955 if (copy_from_user(&args, argp,
1956 sizeof(struct kvm_s390_skeys)))
1958 r = kvm_s390_set_skeys(kvm, &args);
1961 case KVM_S390_GET_CMMA_BITS: {
1962 struct kvm_s390_cmma_log args;
1965 if (copy_from_user(&args, argp, sizeof(args)))
1967 mutex_lock(&kvm->slots_lock);
1968 r = kvm_s390_get_cmma_bits(kvm, &args);
1969 mutex_unlock(&kvm->slots_lock);
1971 r = copy_to_user(argp, &args, sizeof(args));
1977 case KVM_S390_SET_CMMA_BITS: {
1978 struct kvm_s390_cmma_log args;
1981 if (copy_from_user(&args, argp, sizeof(args)))
1983 mutex_lock(&kvm->slots_lock);
1984 r = kvm_s390_set_cmma_bits(kvm, &args);
1985 mutex_unlock(&kvm->slots_lock);
1995 static int kvm_s390_query_ap_config(u8 *config)
1997 u32 fcn_code = 0x04000000UL;
2000 memset(config, 0, 128);
2004 ".long 0xb2af0000\n" /* PQAP(QCI) */
2010 : "r" (fcn_code), "r" (config)
2011 : "cc", "0", "2", "memory"
2017 static int kvm_s390_apxa_installed(void)
2022 if (test_facility(12)) {
2023 cc = kvm_s390_query_ap_config(config);
2026 pr_err("PQAP(QCI) failed with cc=%d", cc);
2028 return config[0] & 0x40;
2034 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2036 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2038 if (kvm_s390_apxa_installed())
2039 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2041 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2044 static u64 kvm_s390_get_initial_cpuid(void)
2049 cpuid.version = 0xff;
2050 return *((u64 *) &cpuid);
2053 static void kvm_s390_crypto_init(struct kvm *kvm)
2055 if (!test_kvm_facility(kvm, 76))
2058 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2059 kvm_s390_set_crycb_format(kvm);
2061 /* Enable AES/DEA protected key functions by default */
2062 kvm->arch.crypto.aes_kw = 1;
2063 kvm->arch.crypto.dea_kw = 1;
2064 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2065 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2066 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2067 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2070 static void sca_dispose(struct kvm *kvm)
2072 if (kvm->arch.use_esca)
2073 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2075 free_page((unsigned long)(kvm->arch.sca));
2076 kvm->arch.sca = NULL;
2079 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2081 gfp_t alloc_flags = GFP_KERNEL;
2083 char debug_name[16];
2084 static unsigned long sca_offset;
2087 #ifdef CONFIG_KVM_S390_UCONTROL
2088 if (type & ~KVM_VM_S390_UCONTROL)
2090 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2097 rc = s390_enable_sie();
2103 if (!sclp.has_64bscao)
2104 alloc_flags |= GFP_DMA;
2105 rwlock_init(&kvm->arch.sca_lock);
2106 /* start with basic SCA */
2107 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2110 spin_lock(&kvm_lock);
2112 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2114 kvm->arch.sca = (struct bsca_block *)
2115 ((char *) kvm->arch.sca + sca_offset);
2116 spin_unlock(&kvm_lock);
2118 sprintf(debug_name, "kvm-%u", current->pid);
2120 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2124 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2125 kvm->arch.sie_page2 =
2126 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2127 if (!kvm->arch.sie_page2)
2130 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2132 for (i = 0; i < kvm_s390_fac_size(); i++) {
2133 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2134 (kvm_s390_fac_base[i] |
2135 kvm_s390_fac_ext[i]);
2136 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2137 kvm_s390_fac_base[i];
2140 /* we are always in czam mode - even on pre z14 machines */
2141 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2142 set_kvm_facility(kvm->arch.model.fac_list, 138);
2143 /* we emulate STHYI in kvm */
2144 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2145 set_kvm_facility(kvm->arch.model.fac_list, 74);
2146 if (MACHINE_HAS_TLB_GUEST) {
2147 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2148 set_kvm_facility(kvm->arch.model.fac_list, 147);
2151 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2152 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2154 kvm_s390_crypto_init(kvm);
2156 mutex_init(&kvm->arch.float_int.ais_lock);
2157 spin_lock_init(&kvm->arch.float_int.lock);
2158 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2159 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2160 init_waitqueue_head(&kvm->arch.ipte_wq);
2161 mutex_init(&kvm->arch.ipte_mutex);
2163 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2164 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2166 if (type & KVM_VM_S390_UCONTROL) {
2167 kvm->arch.gmap = NULL;
2168 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2170 if (sclp.hamax == U64_MAX)
2171 kvm->arch.mem_limit = TASK_SIZE_MAX;
2173 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2175 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2176 if (!kvm->arch.gmap)
2178 kvm->arch.gmap->private = kvm;
2179 kvm->arch.gmap->pfault_enabled = 0;
2182 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2183 kvm->arch.use_skf = sclp.has_skey;
2184 spin_lock_init(&kvm->arch.start_stop_lock);
2185 kvm_s390_vsie_init(kvm);
2186 kvm_s390_gisa_init(kvm);
2187 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2191 free_page((unsigned long)kvm->arch.sie_page2);
2192 debug_unregister(kvm->arch.dbf);
2194 KVM_EVENT(3, "creation of vm failed: %d", rc);
2198 bool kvm_arch_has_vcpu_debugfs(void)
2203 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2208 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2210 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2211 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2212 kvm_s390_clear_local_irqs(vcpu);
2213 kvm_clear_async_pf_completion_queue(vcpu);
2214 if (!kvm_is_ucontrol(vcpu->kvm))
2217 if (kvm_is_ucontrol(vcpu->kvm))
2218 gmap_remove(vcpu->arch.gmap);
2220 if (vcpu->kvm->arch.use_cmma)
2221 kvm_s390_vcpu_unsetup_cmma(vcpu);
2222 free_page((unsigned long)(vcpu->arch.sie_block));
2224 kvm_vcpu_uninit(vcpu);
2225 kmem_cache_free(kvm_vcpu_cache, vcpu);
2228 static void kvm_free_vcpus(struct kvm *kvm)
2231 struct kvm_vcpu *vcpu;
2233 kvm_for_each_vcpu(i, vcpu, kvm)
2234 kvm_arch_vcpu_destroy(vcpu);
2236 mutex_lock(&kvm->lock);
2237 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2238 kvm->vcpus[i] = NULL;
2240 atomic_set(&kvm->online_vcpus, 0);
2241 mutex_unlock(&kvm->lock);
2244 void kvm_arch_destroy_vm(struct kvm *kvm)
2246 kvm_free_vcpus(kvm);
2248 debug_unregister(kvm->arch.dbf);
2249 kvm_s390_gisa_destroy(kvm);
2250 free_page((unsigned long)kvm->arch.sie_page2);
2251 if (!kvm_is_ucontrol(kvm))
2252 gmap_remove(kvm->arch.gmap);
2253 kvm_s390_destroy_adapters(kvm);
2254 kvm_s390_clear_float_irqs(kvm);
2255 kvm_s390_vsie_destroy(kvm);
2256 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2259 /* Section: vcpu related */
2260 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2262 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2263 if (!vcpu->arch.gmap)
2265 vcpu->arch.gmap->private = vcpu->kvm;
2270 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2272 if (!kvm_s390_use_sca_entries())
2274 read_lock(&vcpu->kvm->arch.sca_lock);
2275 if (vcpu->kvm->arch.use_esca) {
2276 struct esca_block *sca = vcpu->kvm->arch.sca;
2278 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2279 sca->cpu[vcpu->vcpu_id].sda = 0;
2281 struct bsca_block *sca = vcpu->kvm->arch.sca;
2283 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2284 sca->cpu[vcpu->vcpu_id].sda = 0;
2286 read_unlock(&vcpu->kvm->arch.sca_lock);
2289 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2291 if (!kvm_s390_use_sca_entries()) {
2292 struct bsca_block *sca = vcpu->kvm->arch.sca;
2294 /* we still need the basic sca for the ipte control */
2295 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2296 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2299 read_lock(&vcpu->kvm->arch.sca_lock);
2300 if (vcpu->kvm->arch.use_esca) {
2301 struct esca_block *sca = vcpu->kvm->arch.sca;
2303 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2304 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2305 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2306 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2307 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2309 struct bsca_block *sca = vcpu->kvm->arch.sca;
2311 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2312 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2313 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2314 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2316 read_unlock(&vcpu->kvm->arch.sca_lock);
2319 /* Basic SCA to Extended SCA data copy routines */
2320 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2323 d->sigp_ctrl.c = s->sigp_ctrl.c;
2324 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2327 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2331 d->ipte_control = s->ipte_control;
2333 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2334 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2337 static int sca_switch_to_extended(struct kvm *kvm)
2339 struct bsca_block *old_sca = kvm->arch.sca;
2340 struct esca_block *new_sca;
2341 struct kvm_vcpu *vcpu;
2342 unsigned int vcpu_idx;
2345 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2349 scaoh = (u32)((u64)(new_sca) >> 32);
2350 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2352 kvm_s390_vcpu_block_all(kvm);
2353 write_lock(&kvm->arch.sca_lock);
2355 sca_copy_b_to_e(new_sca, old_sca);
2357 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2358 vcpu->arch.sie_block->scaoh = scaoh;
2359 vcpu->arch.sie_block->scaol = scaol;
2360 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2362 kvm->arch.sca = new_sca;
2363 kvm->arch.use_esca = 1;
2365 write_unlock(&kvm->arch.sca_lock);
2366 kvm_s390_vcpu_unblock_all(kvm);
2368 free_page((unsigned long)old_sca);
2370 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2371 old_sca, kvm->arch.sca);
2375 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2379 if (!kvm_s390_use_sca_entries()) {
2380 if (id < KVM_MAX_VCPUS)
2384 if (id < KVM_S390_BSCA_CPU_SLOTS)
2386 if (!sclp.has_esca || !sclp.has_64bscao)
2389 mutex_lock(&kvm->lock);
2390 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2391 mutex_unlock(&kvm->lock);
2393 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2396 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2398 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2399 kvm_clear_async_pf_completion_queue(vcpu);
2400 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2406 kvm_s390_set_prefix(vcpu, 0);
2407 if (test_kvm_facility(vcpu->kvm, 64))
2408 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2409 if (test_kvm_facility(vcpu->kvm, 82))
2410 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2411 if (test_kvm_facility(vcpu->kvm, 133))
2412 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2413 if (test_kvm_facility(vcpu->kvm, 156))
2414 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2415 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2416 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2419 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2421 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2423 if (kvm_is_ucontrol(vcpu->kvm))
2424 return __kvm_ucontrol_vcpu_init(vcpu);
2429 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2430 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2432 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2433 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2434 vcpu->arch.cputm_start = get_tod_clock_fast();
2435 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2438 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2439 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2441 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2442 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2443 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2444 vcpu->arch.cputm_start = 0;
2445 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2448 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2449 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2451 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2452 vcpu->arch.cputm_enabled = true;
2453 __start_cpu_timer_accounting(vcpu);
2456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2457 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2459 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2460 __stop_cpu_timer_accounting(vcpu);
2461 vcpu->arch.cputm_enabled = false;
2464 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2466 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2467 __enable_cpu_timer_accounting(vcpu);
2471 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2473 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2474 __disable_cpu_timer_accounting(vcpu);
2478 /* set the cpu timer - may only be called from the VCPU thread itself */
2479 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2481 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2482 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2483 if (vcpu->arch.cputm_enabled)
2484 vcpu->arch.cputm_start = get_tod_clock_fast();
2485 vcpu->arch.sie_block->cputm = cputm;
2486 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2490 /* update and get the cpu timer - can also be called from other VCPU threads */
2491 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2496 if (unlikely(!vcpu->arch.cputm_enabled))
2497 return vcpu->arch.sie_block->cputm;
2499 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2501 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2503 * If the writer would ever execute a read in the critical
2504 * section, e.g. in irq context, we have a deadlock.
2506 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2507 value = vcpu->arch.sie_block->cputm;
2508 /* if cputm_start is 0, accounting is being started/stopped */
2509 if (likely(vcpu->arch.cputm_start))
2510 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2511 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2516 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2519 gmap_enable(vcpu->arch.enabled_gmap);
2520 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2521 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2522 __start_cpu_timer_accounting(vcpu);
2526 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2529 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2530 __stop_cpu_timer_accounting(vcpu);
2531 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2532 vcpu->arch.enabled_gmap = gmap_get_enabled();
2533 gmap_disable(vcpu->arch.enabled_gmap);
2537 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2539 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2540 vcpu->arch.sie_block->gpsw.mask = 0UL;
2541 vcpu->arch.sie_block->gpsw.addr = 0UL;
2542 kvm_s390_set_prefix(vcpu, 0);
2543 kvm_s390_set_cpu_timer(vcpu, 0);
2544 vcpu->arch.sie_block->ckc = 0UL;
2545 vcpu->arch.sie_block->todpr = 0;
2546 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2547 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2548 CR0_INTERRUPT_KEY_SUBMASK |
2549 CR0_MEASUREMENT_ALERT_SUBMASK;
2550 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2552 CR14_EXTERNAL_DAMAGE_SUBMASK;
2553 /* make sure the new fpc will be lazily loaded */
2555 current->thread.fpu.fpc = 0;
2556 vcpu->arch.sie_block->gbea = 1;
2557 vcpu->arch.sie_block->pp = 0;
2558 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2559 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2560 kvm_clear_async_pf_completion_queue(vcpu);
2561 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2562 kvm_s390_vcpu_stop(vcpu);
2563 kvm_s390_clear_local_irqs(vcpu);
2566 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2568 mutex_lock(&vcpu->kvm->lock);
2570 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2571 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2573 mutex_unlock(&vcpu->kvm->lock);
2574 if (!kvm_is_ucontrol(vcpu->kvm)) {
2575 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2578 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2579 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2580 /* make vcpu_load load the right gmap on the first trigger */
2581 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2584 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2586 if (!test_kvm_facility(vcpu->kvm, 76))
2589 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2591 if (vcpu->kvm->arch.crypto.aes_kw)
2592 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2593 if (vcpu->kvm->arch.crypto.dea_kw)
2594 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2596 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2599 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2601 free_page(vcpu->arch.sie_block->cbrlo);
2602 vcpu->arch.sie_block->cbrlo = 0;
2605 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2607 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2608 if (!vcpu->arch.sie_block->cbrlo)
2613 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2615 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2617 vcpu->arch.sie_block->ibc = model->ibc;
2618 if (test_kvm_facility(vcpu->kvm, 7))
2619 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2622 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2626 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2630 if (test_kvm_facility(vcpu->kvm, 78))
2631 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2632 else if (test_kvm_facility(vcpu->kvm, 8))
2633 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2635 kvm_s390_vcpu_setup_model(vcpu);
2637 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2638 if (MACHINE_HAS_ESOP)
2639 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2640 if (test_kvm_facility(vcpu->kvm, 9))
2641 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2642 if (test_kvm_facility(vcpu->kvm, 73))
2643 vcpu->arch.sie_block->ecb |= ECB_TE;
2645 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2646 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2647 if (test_kvm_facility(vcpu->kvm, 130))
2648 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2649 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2651 vcpu->arch.sie_block->eca |= ECA_CEI;
2653 vcpu->arch.sie_block->eca |= ECA_IB;
2655 vcpu->arch.sie_block->eca |= ECA_SII;
2656 if (sclp.has_sigpif)
2657 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2658 if (test_kvm_facility(vcpu->kvm, 129)) {
2659 vcpu->arch.sie_block->eca |= ECA_VX;
2660 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2662 if (test_kvm_facility(vcpu->kvm, 139))
2663 vcpu->arch.sie_block->ecd |= ECD_MEF;
2664 if (test_kvm_facility(vcpu->kvm, 156))
2665 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2666 if (vcpu->arch.sie_block->gd) {
2667 vcpu->arch.sie_block->eca |= ECA_AIV;
2668 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2669 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2671 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2673 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2676 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2678 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2680 if (vcpu->kvm->arch.use_cmma) {
2681 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2685 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2686 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2688 kvm_s390_vcpu_crypto_setup(vcpu);
2693 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2696 struct kvm_vcpu *vcpu;
2697 struct sie_page *sie_page;
2700 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2705 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2709 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2710 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2714 vcpu->arch.sie_block = &sie_page->sie_block;
2715 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2717 /* the real guest size will always be smaller than msl */
2718 vcpu->arch.sie_block->mso = 0;
2719 vcpu->arch.sie_block->msl = sclp.hamax;
2721 vcpu->arch.sie_block->icpua = id;
2722 spin_lock_init(&vcpu->arch.local_int.lock);
2723 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2724 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2725 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2726 seqcount_init(&vcpu->arch.cputm_seqcount);
2728 rc = kvm_vcpu_init(vcpu, kvm, id);
2730 goto out_free_sie_block;
2731 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2732 vcpu->arch.sie_block);
2733 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2737 free_page((unsigned long)(vcpu->arch.sie_block));
2739 kmem_cache_free(kvm_vcpu_cache, vcpu);
2744 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2746 return kvm_s390_vcpu_has_irq(vcpu, 0);
2749 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2751 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2754 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2756 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2760 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2762 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2765 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2767 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2771 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2773 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2777 * Kick a guest cpu out of SIE and wait until SIE is not running.
2778 * If the CPU is not running (e.g. waiting as idle) the function will
2779 * return immediately. */
2780 void exit_sie(struct kvm_vcpu *vcpu)
2782 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2783 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2787 /* Kick a guest cpu out of SIE to process a request synchronously */
2788 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2790 kvm_make_request(req, vcpu);
2791 kvm_s390_vcpu_request(vcpu);
2794 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2797 struct kvm *kvm = gmap->private;
2798 struct kvm_vcpu *vcpu;
2799 unsigned long prefix;
2802 if (gmap_is_shadow(gmap))
2804 if (start >= 1UL << 31)
2805 /* We are only interested in prefix pages */
2807 kvm_for_each_vcpu(i, vcpu, kvm) {
2808 /* match against both prefix pages */
2809 prefix = kvm_s390_get_prefix(vcpu);
2810 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2811 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2813 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2818 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2820 /* kvm common code refers to this, but never calls it */
2825 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2826 struct kvm_one_reg *reg)
2831 case KVM_REG_S390_TODPR:
2832 r = put_user(vcpu->arch.sie_block->todpr,
2833 (u32 __user *)reg->addr);
2835 case KVM_REG_S390_EPOCHDIFF:
2836 r = put_user(vcpu->arch.sie_block->epoch,
2837 (u64 __user *)reg->addr);
2839 case KVM_REG_S390_CPU_TIMER:
2840 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2841 (u64 __user *)reg->addr);
2843 case KVM_REG_S390_CLOCK_COMP:
2844 r = put_user(vcpu->arch.sie_block->ckc,
2845 (u64 __user *)reg->addr);
2847 case KVM_REG_S390_PFTOKEN:
2848 r = put_user(vcpu->arch.pfault_token,
2849 (u64 __user *)reg->addr);
2851 case KVM_REG_S390_PFCOMPARE:
2852 r = put_user(vcpu->arch.pfault_compare,
2853 (u64 __user *)reg->addr);
2855 case KVM_REG_S390_PFSELECT:
2856 r = put_user(vcpu->arch.pfault_select,
2857 (u64 __user *)reg->addr);
2859 case KVM_REG_S390_PP:
2860 r = put_user(vcpu->arch.sie_block->pp,
2861 (u64 __user *)reg->addr);
2863 case KVM_REG_S390_GBEA:
2864 r = put_user(vcpu->arch.sie_block->gbea,
2865 (u64 __user *)reg->addr);
2874 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2875 struct kvm_one_reg *reg)
2881 case KVM_REG_S390_TODPR:
2882 r = get_user(vcpu->arch.sie_block->todpr,
2883 (u32 __user *)reg->addr);
2885 case KVM_REG_S390_EPOCHDIFF:
2886 r = get_user(vcpu->arch.sie_block->epoch,
2887 (u64 __user *)reg->addr);
2889 case KVM_REG_S390_CPU_TIMER:
2890 r = get_user(val, (u64 __user *)reg->addr);
2892 kvm_s390_set_cpu_timer(vcpu, val);
2894 case KVM_REG_S390_CLOCK_COMP:
2895 r = get_user(vcpu->arch.sie_block->ckc,
2896 (u64 __user *)reg->addr);
2898 case KVM_REG_S390_PFTOKEN:
2899 r = get_user(vcpu->arch.pfault_token,
2900 (u64 __user *)reg->addr);
2901 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2902 kvm_clear_async_pf_completion_queue(vcpu);
2904 case KVM_REG_S390_PFCOMPARE:
2905 r = get_user(vcpu->arch.pfault_compare,
2906 (u64 __user *)reg->addr);
2908 case KVM_REG_S390_PFSELECT:
2909 r = get_user(vcpu->arch.pfault_select,
2910 (u64 __user *)reg->addr);
2912 case KVM_REG_S390_PP:
2913 r = get_user(vcpu->arch.sie_block->pp,
2914 (u64 __user *)reg->addr);
2916 case KVM_REG_S390_GBEA:
2917 r = get_user(vcpu->arch.sie_block->gbea,
2918 (u64 __user *)reg->addr);
2927 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2929 kvm_s390_vcpu_initial_reset(vcpu);
2933 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2936 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2941 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2944 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2949 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2950 struct kvm_sregs *sregs)
2954 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2955 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2961 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2962 struct kvm_sregs *sregs)
2966 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2967 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2973 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2979 if (test_fp_ctl(fpu->fpc)) {
2983 vcpu->run->s.regs.fpc = fpu->fpc;
2985 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2986 (freg_t *) fpu->fprs);
2988 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2995 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2999 /* make sure we have the latest values */
3002 convert_vx_to_fp((freg_t *) fpu->fprs,
3003 (__vector128 *) vcpu->run->s.regs.vrs);
3005 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3006 fpu->fpc = vcpu->run->s.regs.fpc;
3012 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3016 if (!is_vcpu_stopped(vcpu))
3019 vcpu->run->psw_mask = psw.mask;
3020 vcpu->run->psw_addr = psw.addr;
3025 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3026 struct kvm_translation *tr)
3028 return -EINVAL; /* not implemented yet */
3031 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3032 KVM_GUESTDBG_USE_HW_BP | \
3033 KVM_GUESTDBG_ENABLE)
3035 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3036 struct kvm_guest_debug *dbg)
3042 vcpu->guest_debug = 0;
3043 kvm_s390_clear_bp_data(vcpu);
3045 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3049 if (!sclp.has_gpere) {
3054 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3055 vcpu->guest_debug = dbg->control;
3056 /* enforce guest PER */
3057 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3059 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3060 rc = kvm_s390_import_bp_data(vcpu, dbg);
3062 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3063 vcpu->arch.guestdbg.last_bp = 0;
3067 vcpu->guest_debug = 0;
3068 kvm_s390_clear_bp_data(vcpu);
3069 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3077 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3078 struct kvm_mp_state *mp_state)
3084 /* CHECK_STOP and LOAD are not supported yet */
3085 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3086 KVM_MP_STATE_OPERATING;
3092 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3093 struct kvm_mp_state *mp_state)
3099 /* user space knows about this interface - let it control the state */
3100 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3102 switch (mp_state->mp_state) {
3103 case KVM_MP_STATE_STOPPED:
3104 kvm_s390_vcpu_stop(vcpu);
3106 case KVM_MP_STATE_OPERATING:
3107 kvm_s390_vcpu_start(vcpu);
3109 case KVM_MP_STATE_LOAD:
3110 case KVM_MP_STATE_CHECK_STOP:
3111 /* fall through - CHECK_STOP and LOAD are not supported yet */
3120 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3122 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3125 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3128 kvm_s390_vcpu_request_handled(vcpu);
3129 if (!kvm_request_pending(vcpu))
3132 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3133 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3134 * This ensures that the ipte instruction for this request has
3135 * already finished. We might race against a second unmapper that
3136 * wants to set the blocking bit. Lets just retry the request loop.
3138 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3140 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3141 kvm_s390_get_prefix(vcpu),
3142 PAGE_SIZE * 2, PROT_WRITE);
3144 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3150 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3151 vcpu->arch.sie_block->ihcpu = 0xffff;
3155 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3156 if (!ibs_enabled(vcpu)) {
3157 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3158 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3163 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3164 if (ibs_enabled(vcpu)) {
3165 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3166 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3171 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3172 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3176 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3178 * Disable CMM virtualization; we will emulate the ESSA
3179 * instruction manually, in order to provide additional
3180 * functionalities needed for live migration.
3182 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3186 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3188 * Re-enable CMM virtualization if CMMA is available and
3189 * CMM has been used.
3191 if ((vcpu->kvm->arch.use_cmma) &&
3192 (vcpu->kvm->mm->context.uses_cmm))
3193 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3197 /* nothing to do, just clear the request */
3198 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3203 void kvm_s390_set_tod_clock(struct kvm *kvm,
3204 const struct kvm_s390_vm_tod_clock *gtod)
3206 struct kvm_vcpu *vcpu;
3207 struct kvm_s390_tod_clock_ext htod;
3210 mutex_lock(&kvm->lock);
3213 get_tod_clock_ext((char *)&htod);
3215 kvm->arch.epoch = gtod->tod - htod.tod;
3217 if (test_kvm_facility(kvm, 139)) {
3218 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3219 if (kvm->arch.epoch > gtod->tod)
3220 kvm->arch.epdx -= 1;
3223 kvm_s390_vcpu_block_all(kvm);
3224 kvm_for_each_vcpu(i, vcpu, kvm) {
3225 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3226 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3229 kvm_s390_vcpu_unblock_all(kvm);
3231 mutex_unlock(&kvm->lock);
3235 * kvm_arch_fault_in_page - fault-in guest page if necessary
3236 * @vcpu: The corresponding virtual cpu
3237 * @gpa: Guest physical address
3238 * @writable: Whether the page should be writable or not
3240 * Make sure that a guest page has been faulted-in on the host.
3242 * Return: Zero on success, negative error code otherwise.
3244 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3246 return gmap_fault(vcpu->arch.gmap, gpa,
3247 writable ? FAULT_FLAG_WRITE : 0);
3250 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3251 unsigned long token)
3253 struct kvm_s390_interrupt inti;
3254 struct kvm_s390_irq irq;
3257 irq.u.ext.ext_params2 = token;
3258 irq.type = KVM_S390_INT_PFAULT_INIT;
3259 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3261 inti.type = KVM_S390_INT_PFAULT_DONE;
3262 inti.parm64 = token;
3263 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3267 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3268 struct kvm_async_pf *work)
3270 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3271 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3274 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3275 struct kvm_async_pf *work)
3277 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3278 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3281 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3282 struct kvm_async_pf *work)
3284 /* s390 will always inject the page directly */
3287 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3290 * s390 will always inject the page directly,
3291 * but we still want check_async_completion to cleanup
3296 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3299 struct kvm_arch_async_pf arch;
3302 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3304 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3305 vcpu->arch.pfault_compare)
3307 if (psw_extint_disabled(vcpu))
3309 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3311 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3313 if (!vcpu->arch.gmap->pfault_enabled)
3316 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3317 hva += current->thread.gmap_addr & ~PAGE_MASK;
3318 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3321 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3325 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3330 * On s390 notifications for arriving pages will be delivered directly
3331 * to the guest but the house keeping for completed pfaults is
3332 * handled outside the worker.
3334 kvm_check_async_pf_completion(vcpu);
3336 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3337 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3342 if (test_cpu_flag(CIF_MCCK_PENDING))
3345 if (!kvm_is_ucontrol(vcpu->kvm)) {
3346 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3351 rc = kvm_s390_handle_requests(vcpu);
3355 if (guestdbg_enabled(vcpu)) {
3356 kvm_s390_backup_guest_per_regs(vcpu);
3357 kvm_s390_patch_guest_per_regs(vcpu);
3360 vcpu->arch.sie_block->icptcode = 0;
3361 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3362 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3363 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3368 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3370 struct kvm_s390_pgm_info pgm_info = {
3371 .code = PGM_ADDRESSING,
3376 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3377 trace_kvm_s390_sie_fault(vcpu);
3380 * We want to inject an addressing exception, which is defined as a
3381 * suppressing or terminating exception. However, since we came here
3382 * by a DAT access exception, the PSW still points to the faulting
3383 * instruction since DAT exceptions are nullifying. So we've got
3384 * to look up the current opcode to get the length of the instruction
3385 * to be able to forward the PSW.
3387 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3388 ilen = insn_length(opcode);
3392 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3393 * Forward by arbitrary ilc, injection will take care of
3394 * nullification if necessary.
3396 pgm_info = vcpu->arch.pgm;
3399 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3400 kvm_s390_forward_psw(vcpu, ilen);
3401 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3404 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3406 struct mcck_volatile_info *mcck_info;
3407 struct sie_page *sie_page;
3409 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3410 vcpu->arch.sie_block->icptcode);
3411 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3413 if (guestdbg_enabled(vcpu))
3414 kvm_s390_restore_guest_per_regs(vcpu);
3416 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3417 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3419 if (exit_reason == -EINTR) {
3420 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3421 sie_page = container_of(vcpu->arch.sie_block,
3422 struct sie_page, sie_block);
3423 mcck_info = &sie_page->mcck_info;
3424 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3428 if (vcpu->arch.sie_block->icptcode > 0) {
3429 int rc = kvm_handle_sie_intercept(vcpu);
3431 if (rc != -EOPNOTSUPP)
3433 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3434 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3435 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3436 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3438 } else if (exit_reason != -EFAULT) {
3439 vcpu->stat.exit_null++;
3441 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3442 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3443 vcpu->run->s390_ucontrol.trans_exc_code =
3444 current->thread.gmap_addr;
3445 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3447 } else if (current->thread.gmap_pfault) {
3448 trace_kvm_s390_major_guest_pfault(vcpu);
3449 current->thread.gmap_pfault = 0;
3450 if (kvm_arch_setup_async_pf(vcpu))
3452 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3454 return vcpu_post_run_fault_in_sie(vcpu);
3457 static int __vcpu_run(struct kvm_vcpu *vcpu)
3459 int rc, exit_reason;
3462 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3463 * ning the guest), so that memslots (and other stuff) are protected
3465 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3468 rc = vcpu_pre_run(vcpu);
3472 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3474 * As PF_VCPU will be used in fault handler, between
3475 * guest_enter and guest_exit should be no uaccess.
3477 local_irq_disable();
3478 guest_enter_irqoff();
3479 __disable_cpu_timer_accounting(vcpu);
3481 exit_reason = sie64a(vcpu->arch.sie_block,
3482 vcpu->run->s.regs.gprs);
3483 local_irq_disable();
3484 __enable_cpu_timer_accounting(vcpu);
3485 guest_exit_irqoff();
3487 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3489 rc = vcpu_post_run(vcpu, exit_reason);
3490 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3492 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3496 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3498 struct runtime_instr_cb *riccb;
3501 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3502 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3503 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3504 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3505 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3506 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3507 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3508 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3509 /* some control register changes require a tlb flush */
3510 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3512 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3513 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3514 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3515 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3516 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3517 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3519 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3520 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3521 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3522 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3523 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3524 kvm_clear_async_pf_completion_queue(vcpu);
3527 * If userspace sets the riccb (e.g. after migration) to a valid state,
3528 * we should enable RI here instead of doing the lazy enablement.
3530 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3531 test_kvm_facility(vcpu->kvm, 64) &&
3533 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3534 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3535 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3538 * If userspace sets the gscb (e.g. after migration) to non-zero,
3539 * we should enable GS here instead of doing the lazy enablement.
3541 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3542 test_kvm_facility(vcpu->kvm, 133) &&
3544 !vcpu->arch.gs_enabled) {
3545 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3546 vcpu->arch.sie_block->ecb |= ECB_GS;
3547 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3548 vcpu->arch.gs_enabled = 1;
3550 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3551 test_kvm_facility(vcpu->kvm, 82)) {
3552 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3553 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3555 save_access_regs(vcpu->arch.host_acrs);
3556 restore_access_regs(vcpu->run->s.regs.acrs);
3557 /* save host (userspace) fprs/vrs */
3559 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3560 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3562 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3564 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3565 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3566 if (test_fp_ctl(current->thread.fpu.fpc))
3567 /* User space provided an invalid FPC, let's clear it */
3568 current->thread.fpu.fpc = 0;
3569 if (MACHINE_HAS_GS) {
3571 __ctl_set_bit(2, 4);
3572 if (current->thread.gs_cb) {
3573 vcpu->arch.host_gscb = current->thread.gs_cb;
3574 save_gs_cb(vcpu->arch.host_gscb);
3576 if (vcpu->arch.gs_enabled) {
3577 current->thread.gs_cb = (struct gs_cb *)
3578 &vcpu->run->s.regs.gscb;
3579 restore_gs_cb(current->thread.gs_cb);
3583 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3585 kvm_run->kvm_dirty_regs = 0;
3588 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3590 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3591 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3592 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3593 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3594 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3595 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3596 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3597 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3598 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3599 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3600 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3601 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3602 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3603 save_access_regs(vcpu->run->s.regs.acrs);
3604 restore_access_regs(vcpu->arch.host_acrs);
3605 /* Save guest register state */
3607 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3608 /* Restore will be done lazily at return */
3609 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3610 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3611 if (MACHINE_HAS_GS) {
3612 __ctl_set_bit(2, 4);
3613 if (vcpu->arch.gs_enabled)
3614 save_gs_cb(current->thread.gs_cb);
3616 current->thread.gs_cb = vcpu->arch.host_gscb;
3617 restore_gs_cb(vcpu->arch.host_gscb);
3619 if (!vcpu->arch.host_gscb)
3620 __ctl_clear_bit(2, 4);
3621 vcpu->arch.host_gscb = NULL;
3623 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3626 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3630 if (kvm_run->immediate_exit)
3635 if (guestdbg_exit_pending(vcpu)) {
3636 kvm_s390_prepare_debug_exit(vcpu);
3641 kvm_sigset_activate(vcpu);
3643 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3644 kvm_s390_vcpu_start(vcpu);
3645 } else if (is_vcpu_stopped(vcpu)) {
3646 pr_err_ratelimited("can't run stopped vcpu %d\n",
3652 sync_regs(vcpu, kvm_run);
3653 enable_cpu_timer_accounting(vcpu);
3656 rc = __vcpu_run(vcpu);
3658 if (signal_pending(current) && !rc) {
3659 kvm_run->exit_reason = KVM_EXIT_INTR;
3663 if (guestdbg_exit_pending(vcpu) && !rc) {
3664 kvm_s390_prepare_debug_exit(vcpu);
3668 if (rc == -EREMOTE) {
3669 /* userspace support is needed, kvm_run has been prepared */
3673 disable_cpu_timer_accounting(vcpu);
3674 store_regs(vcpu, kvm_run);
3676 kvm_sigset_deactivate(vcpu);
3678 vcpu->stat.exit_userspace++;
3685 * store status at address
3686 * we use have two special cases:
3687 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3688 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3690 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3692 unsigned char archmode = 1;
3693 freg_t fprs[NUM_FPRS];
3698 px = kvm_s390_get_prefix(vcpu);
3699 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3700 if (write_guest_abs(vcpu, 163, &archmode, 1))
3703 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3704 if (write_guest_real(vcpu, 163, &archmode, 1))
3708 gpa -= __LC_FPREGS_SAVE_AREA;
3710 /* manually convert vector registers if necessary */
3711 if (MACHINE_HAS_VX) {
3712 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3713 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3716 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3717 vcpu->run->s.regs.fprs, 128);
3719 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3720 vcpu->run->s.regs.gprs, 128);
3721 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3722 &vcpu->arch.sie_block->gpsw, 16);
3723 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3725 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3726 &vcpu->run->s.regs.fpc, 4);
3727 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3728 &vcpu->arch.sie_block->todpr, 4);
3729 cputm = kvm_s390_get_cpu_timer(vcpu);
3730 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3732 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3733 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3735 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3736 &vcpu->run->s.regs.acrs, 64);
3737 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3738 &vcpu->arch.sie_block->gcr, 128);
3739 return rc ? -EFAULT : 0;
3742 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3745 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3746 * switch in the run ioctl. Let's update our copies before we save
3747 * it into the save area
3750 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3751 save_access_regs(vcpu->run->s.regs.acrs);
3753 return kvm_s390_store_status_unloaded(vcpu, addr);
3756 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3758 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3759 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3762 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3765 struct kvm_vcpu *vcpu;
3767 kvm_for_each_vcpu(i, vcpu, kvm) {
3768 __disable_ibs_on_vcpu(vcpu);
3772 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3776 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3777 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3780 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3782 int i, online_vcpus, started_vcpus = 0;
3784 if (!is_vcpu_stopped(vcpu))
3787 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3788 /* Only one cpu at a time may enter/leave the STOPPED state. */
3789 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3790 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3792 for (i = 0; i < online_vcpus; i++) {
3793 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3797 if (started_vcpus == 0) {
3798 /* we're the only active VCPU -> speed it up */
3799 __enable_ibs_on_vcpu(vcpu);
3800 } else if (started_vcpus == 1) {
3802 * As we are starting a second VCPU, we have to disable
3803 * the IBS facility on all VCPUs to remove potentially
3804 * oustanding ENABLE requests.
3806 __disable_ibs_on_all_vcpus(vcpu->kvm);
3809 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3811 * Another VCPU might have used IBS while we were offline.
3812 * Let's play safe and flush the VCPU at startup.
3814 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3815 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3819 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3821 int i, online_vcpus, started_vcpus = 0;
3822 struct kvm_vcpu *started_vcpu = NULL;
3824 if (is_vcpu_stopped(vcpu))
3827 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3828 /* Only one cpu at a time may enter/leave the STOPPED state. */
3829 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3830 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3832 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3833 kvm_s390_clear_stop_irq(vcpu);
3835 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3836 __disable_ibs_on_vcpu(vcpu);
3838 for (i = 0; i < online_vcpus; i++) {
3839 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3841 started_vcpu = vcpu->kvm->vcpus[i];
3845 if (started_vcpus == 1) {
3847 * As we only have one VCPU left, we want to enable the
3848 * IBS facility for that VCPU to speed it up.
3850 __enable_ibs_on_vcpu(started_vcpu);
3853 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3857 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3858 struct kvm_enable_cap *cap)
3866 case KVM_CAP_S390_CSS_SUPPORT:
3867 if (!vcpu->kvm->arch.css_support) {
3868 vcpu->kvm->arch.css_support = 1;
3869 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3870 trace_kvm_s390_enable_css(vcpu->kvm);
3881 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3882 struct kvm_s390_mem_op *mop)
3884 void __user *uaddr = (void __user *)mop->buf;
3885 void *tmpbuf = NULL;
3887 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3888 | KVM_S390_MEMOP_F_CHECK_ONLY;
3890 if (mop->flags & ~supported_flags)
3893 if (mop->size > MEM_OP_MAX_SIZE)
3896 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3897 tmpbuf = vmalloc(mop->size);
3902 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3905 case KVM_S390_MEMOP_LOGICAL_READ:
3906 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3907 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3908 mop->size, GACC_FETCH);
3911 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3913 if (copy_to_user(uaddr, tmpbuf, mop->size))
3917 case KVM_S390_MEMOP_LOGICAL_WRITE:
3918 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3919 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3920 mop->size, GACC_STORE);
3923 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3927 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3933 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3935 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3936 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3942 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3943 unsigned int ioctl, unsigned long arg)
3945 struct kvm_vcpu *vcpu = filp->private_data;
3946 void __user *argp = (void __user *)arg;
3949 case KVM_S390_IRQ: {
3950 struct kvm_s390_irq s390irq;
3952 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3954 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3956 case KVM_S390_INTERRUPT: {
3957 struct kvm_s390_interrupt s390int;
3958 struct kvm_s390_irq s390irq;
3960 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3962 if (s390int_to_s390irq(&s390int, &s390irq))
3964 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3967 return -ENOIOCTLCMD;
3970 long kvm_arch_vcpu_ioctl(struct file *filp,
3971 unsigned int ioctl, unsigned long arg)
3973 struct kvm_vcpu *vcpu = filp->private_data;
3974 void __user *argp = (void __user *)arg;
3981 case KVM_S390_STORE_STATUS:
3982 idx = srcu_read_lock(&vcpu->kvm->srcu);
3983 r = kvm_s390_vcpu_store_status(vcpu, arg);
3984 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3986 case KVM_S390_SET_INITIAL_PSW: {
3990 if (copy_from_user(&psw, argp, sizeof(psw)))
3992 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3995 case KVM_S390_INITIAL_RESET:
3996 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3998 case KVM_SET_ONE_REG:
3999 case KVM_GET_ONE_REG: {
4000 struct kvm_one_reg reg;
4002 if (copy_from_user(®, argp, sizeof(reg)))
4004 if (ioctl == KVM_SET_ONE_REG)
4005 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4007 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4010 #ifdef CONFIG_KVM_S390_UCONTROL
4011 case KVM_S390_UCAS_MAP: {
4012 struct kvm_s390_ucas_mapping ucasmap;
4014 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4019 if (!kvm_is_ucontrol(vcpu->kvm)) {
4024 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4025 ucasmap.vcpu_addr, ucasmap.length);
4028 case KVM_S390_UCAS_UNMAP: {
4029 struct kvm_s390_ucas_mapping ucasmap;
4031 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4036 if (!kvm_is_ucontrol(vcpu->kvm)) {
4041 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4046 case KVM_S390_VCPU_FAULT: {
4047 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4050 case KVM_ENABLE_CAP:
4052 struct kvm_enable_cap cap;
4054 if (copy_from_user(&cap, argp, sizeof(cap)))
4056 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4059 case KVM_S390_MEM_OP: {
4060 struct kvm_s390_mem_op mem_op;
4062 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4063 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4068 case KVM_S390_SET_IRQ_STATE: {
4069 struct kvm_s390_irq_state irq_state;
4072 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4074 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4075 irq_state.len == 0 ||
4076 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4080 /* do not use irq_state.flags, it will break old QEMUs */
4081 r = kvm_s390_set_irq_state(vcpu,
4082 (void __user *) irq_state.buf,
4086 case KVM_S390_GET_IRQ_STATE: {
4087 struct kvm_s390_irq_state irq_state;
4090 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4092 if (irq_state.len == 0) {
4096 /* do not use irq_state.flags, it will break old QEMUs */
4097 r = kvm_s390_get_irq_state(vcpu,
4098 (__u8 __user *) irq_state.buf,
4110 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4112 #ifdef CONFIG_KVM_S390_UCONTROL
4113 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4114 && (kvm_is_ucontrol(vcpu->kvm))) {
4115 vmf->page = virt_to_page(vcpu->arch.sie_block);
4116 get_page(vmf->page);
4120 return VM_FAULT_SIGBUS;
4123 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4124 unsigned long npages)
4129 /* Section: memory related */
4130 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4131 struct kvm_memory_slot *memslot,
4132 const struct kvm_userspace_memory_region *mem,
4133 enum kvm_mr_change change)
4135 /* A few sanity checks. We can have memory slots which have to be
4136 located/ended at a segment boundary (1MB). The memory in userland is
4137 ok to be fragmented into various different vmas. It is okay to mmap()
4138 and munmap() stuff in this slot after doing this call at any time */
4140 if (mem->userspace_addr & 0xffffful)
4143 if (mem->memory_size & 0xffffful)
4146 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4152 void kvm_arch_commit_memory_region(struct kvm *kvm,
4153 const struct kvm_userspace_memory_region *mem,
4154 const struct kvm_memory_slot *old,
4155 const struct kvm_memory_slot *new,
4156 enum kvm_mr_change change)
4160 /* If the basics of the memslot do not change, we do not want
4161 * to update the gmap. Every update causes several unnecessary
4162 * segment translation exceptions. This is usually handled just
4163 * fine by the normal fault handler + gmap, but it will also
4164 * cause faults on the prefix page of running guest CPUs.
4166 if (old->userspace_addr == mem->userspace_addr &&
4167 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4168 old->npages * PAGE_SIZE == mem->memory_size)
4171 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4172 mem->guest_phys_addr, mem->memory_size);
4174 pr_warn("failed to commit memory region\n");
4178 static inline unsigned long nonhyp_mask(int i)
4180 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4182 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4185 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4187 vcpu->valid_wakeup = false;
4190 static int __init kvm_s390_init(void)
4194 if (!sclp.has_sief2) {
4195 pr_info("SIE not available\n");
4199 if (nested && hpage) {
4200 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4204 for (i = 0; i < 16; i++)
4205 kvm_s390_fac_base[i] |=
4206 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4208 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4211 static void __exit kvm_s390_exit(void)
4216 module_init(kvm_s390_init);
4217 module_exit(kvm_s390_exit);
4220 * Enable autoloading of the kvm module.
4221 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4222 * since x86 takes a different approach.
4224 #include <linux/miscdevice.h>
4225 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4226 MODULE_ALIAS("devname:kvm");