2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
34 #include <asm/pgtable.h>
37 #include <asm/switch_to.h>
40 #include <asm/cpacf.h>
45 #define KMSG_COMPONENT "kvm-s390"
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 #define CREATE_TRACE_POINTS
51 #include "trace-s390.h"
53 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
69 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
70 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
71 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
72 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
73 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
74 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
75 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
76 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
77 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
78 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
79 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
80 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
81 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
82 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
83 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
84 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
85 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
86 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
87 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
88 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
89 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
90 { "instruction_spx", VCPU_STAT(instruction_spx) },
91 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
92 { "instruction_stap", VCPU_STAT(instruction_stap) },
93 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
94 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
95 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
96 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
97 { "instruction_essa", VCPU_STAT(instruction_essa) },
98 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
99 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
100 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
101 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
102 { "instruction_sie", VCPU_STAT(instruction_sie) },
103 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
104 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
105 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
106 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
107 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
108 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
109 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
110 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
111 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
112 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
113 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
114 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
115 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
116 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
117 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
118 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
119 { "diagnose_10", VCPU_STAT(diagnose_10) },
120 { "diagnose_44", VCPU_STAT(diagnose_44) },
121 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
122 { "diagnose_258", VCPU_STAT(diagnose_258) },
123 { "diagnose_308", VCPU_STAT(diagnose_308) },
124 { "diagnose_500", VCPU_STAT(diagnose_500) },
128 /* upper facilities limit for kvm */
129 unsigned long kvm_s390_fac_list_mask[16] = {
130 0xffe6000000000000UL,
131 0x005e000000000000UL,
134 unsigned long kvm_s390_fac_list_mask_size(void)
136 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
137 return ARRAY_SIZE(kvm_s390_fac_list_mask);
140 /* available cpu features supported by kvm */
141 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
142 /* available subfunctions indicated via query / "test bit" */
143 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
145 static struct gmap_notifier gmap_notifier;
146 static struct gmap_notifier vsie_gmap_notifier;
147 debug_info_t *kvm_s390_dbf;
149 /* Section: not file related */
150 int kvm_arch_hardware_enable(void)
152 /* every s390 is virtualization enabled ;-) */
156 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160 * This callback is executed during stop_machine(). All CPUs are therefore
161 * temporarily stopped. In order not to change guest behavior, we have to
162 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
163 * so a CPU won't be stopped while calculating with the epoch.
165 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169 struct kvm_vcpu *vcpu;
171 unsigned long long *delta = v;
173 list_for_each_entry(kvm, &vm_list, vm_list) {
174 kvm->arch.epoch -= *delta;
175 kvm_for_each_vcpu(i, vcpu, kvm) {
176 vcpu->arch.sie_block->epoch -= *delta;
177 if (vcpu->arch.cputm_enabled)
178 vcpu->arch.cputm_start += *delta;
184 static struct notifier_block kvm_clock_notifier = {
185 .notifier_call = kvm_clock_sync,
188 int kvm_arch_hardware_setup(void)
190 gmap_notifier.notifier_call = kvm_gmap_notifier;
191 gmap_register_pte_notifier(&gmap_notifier);
192 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
193 gmap_register_pte_notifier(&vsie_gmap_notifier);
194 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
195 &kvm_clock_notifier);
199 void kvm_arch_hardware_unsetup(void)
201 gmap_unregister_pte_notifier(&gmap_notifier);
202 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
203 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
204 &kvm_clock_notifier);
207 static void allow_cpu_feat(unsigned long nr)
209 set_bit_inv(nr, kvm_s390_available_cpu_feat);
212 static inline int plo_test_bit(unsigned char nr)
214 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
215 int cc = 3; /* subfunction not available */
218 /* Parameter registers are ignored for "test bit" */
228 static void kvm_s390_cpu_feat_init(void)
232 for (i = 0; i < 256; ++i) {
234 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
237 if (test_facility(28)) /* TOD-clock steering */
238 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
240 if (test_facility(17)) { /* MSA */
241 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
242 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
243 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
244 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
245 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
247 if (test_facility(76)) /* MSA3 */
248 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
249 if (test_facility(77)) { /* MSA4 */
250 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
251 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
252 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
253 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
255 if (test_facility(57)) /* MSA5 */
256 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
258 if (MACHINE_HAS_ESOP)
259 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
261 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
262 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
264 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
267 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
268 if (sclp.has_64bscao)
269 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
271 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
273 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
276 int kvm_arch_init(void *opaque)
278 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
282 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
283 debug_unregister(kvm_s390_dbf);
287 kvm_s390_cpu_feat_init();
289 /* Register floating interrupt controller interface. */
290 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
293 void kvm_arch_exit(void)
295 debug_unregister(kvm_s390_dbf);
298 /* Section: device related */
299 long kvm_arch_dev_ioctl(struct file *filp,
300 unsigned int ioctl, unsigned long arg)
302 if (ioctl == KVM_S390_ENABLE_SIE)
303 return s390_enable_sie();
307 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
312 case KVM_CAP_S390_PSW:
313 case KVM_CAP_S390_GMAP:
314 case KVM_CAP_SYNC_MMU:
315 #ifdef CONFIG_KVM_S390_UCONTROL
316 case KVM_CAP_S390_UCONTROL:
318 case KVM_CAP_ASYNC_PF:
319 case KVM_CAP_SYNC_REGS:
320 case KVM_CAP_ONE_REG:
321 case KVM_CAP_ENABLE_CAP:
322 case KVM_CAP_S390_CSS_SUPPORT:
323 case KVM_CAP_IOEVENTFD:
324 case KVM_CAP_DEVICE_CTRL:
325 case KVM_CAP_ENABLE_CAP_VM:
326 case KVM_CAP_S390_IRQCHIP:
327 case KVM_CAP_VM_ATTRIBUTES:
328 case KVM_CAP_MP_STATE:
329 case KVM_CAP_S390_INJECT_IRQ:
330 case KVM_CAP_S390_USER_SIGP:
331 case KVM_CAP_S390_USER_STSI:
332 case KVM_CAP_S390_SKEYS:
333 case KVM_CAP_S390_IRQ_STATE:
336 case KVM_CAP_S390_MEM_OP:
339 case KVM_CAP_NR_VCPUS:
340 case KVM_CAP_MAX_VCPUS:
341 r = KVM_S390_BSCA_CPU_SLOTS;
342 if (sclp.has_esca && sclp.has_64bscao)
343 r = KVM_S390_ESCA_CPU_SLOTS;
345 case KVM_CAP_NR_MEMSLOTS:
346 r = KVM_USER_MEM_SLOTS;
348 case KVM_CAP_S390_COW:
349 r = MACHINE_HAS_ESOP;
351 case KVM_CAP_S390_VECTOR_REGISTERS:
354 case KVM_CAP_S390_RI:
355 r = test_facility(64);
363 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
364 struct kvm_memory_slot *memslot)
366 gfn_t cur_gfn, last_gfn;
367 unsigned long address;
368 struct gmap *gmap = kvm->arch.gmap;
370 /* Loop over all guest pages */
371 last_gfn = memslot->base_gfn + memslot->npages;
372 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
373 address = gfn_to_hva_memslot(memslot, cur_gfn);
375 if (test_and_clear_guest_dirty(gmap->mm, address))
376 mark_page_dirty(kvm, cur_gfn);
377 if (fatal_signal_pending(current))
383 /* Section: vm related */
384 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
387 * Get (and clear) the dirty memory log for a memory slot.
389 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
390 struct kvm_dirty_log *log)
394 struct kvm_memslots *slots;
395 struct kvm_memory_slot *memslot;
398 mutex_lock(&kvm->slots_lock);
401 if (log->slot >= KVM_USER_MEM_SLOTS)
404 slots = kvm_memslots(kvm);
405 memslot = id_to_memslot(slots, log->slot);
407 if (!memslot->dirty_bitmap)
410 kvm_s390_sync_dirty_log(kvm, memslot);
411 r = kvm_get_dirty_log(kvm, log, &is_dirty);
415 /* Clear the dirty log */
417 n = kvm_dirty_bitmap_bytes(memslot);
418 memset(memslot->dirty_bitmap, 0, n);
422 mutex_unlock(&kvm->slots_lock);
426 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
434 case KVM_CAP_S390_IRQCHIP:
435 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
436 kvm->arch.use_irqchip = 1;
439 case KVM_CAP_S390_USER_SIGP:
440 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
441 kvm->arch.user_sigp = 1;
444 case KVM_CAP_S390_VECTOR_REGISTERS:
445 mutex_lock(&kvm->lock);
446 if (kvm->created_vcpus) {
448 } else if (MACHINE_HAS_VX) {
449 set_kvm_facility(kvm->arch.model.fac_mask, 129);
450 set_kvm_facility(kvm->arch.model.fac_list, 129);
454 mutex_unlock(&kvm->lock);
455 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
456 r ? "(not available)" : "(success)");
458 case KVM_CAP_S390_RI:
460 mutex_lock(&kvm->lock);
461 if (kvm->created_vcpus) {
463 } else if (test_facility(64)) {
464 set_kvm_facility(kvm->arch.model.fac_mask, 64);
465 set_kvm_facility(kvm->arch.model.fac_list, 64);
468 mutex_unlock(&kvm->lock);
469 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
470 r ? "(not available)" : "(success)");
472 case KVM_CAP_S390_USER_STSI:
473 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
474 kvm->arch.user_stsi = 1;
484 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
488 switch (attr->attr) {
489 case KVM_S390_VM_MEM_LIMIT_SIZE:
491 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
492 kvm->arch.mem_limit);
493 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
503 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
507 switch (attr->attr) {
508 case KVM_S390_VM_MEM_ENABLE_CMMA:
514 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
515 mutex_lock(&kvm->lock);
516 if (!kvm->created_vcpus) {
517 kvm->arch.use_cmma = 1;
520 mutex_unlock(&kvm->lock);
522 case KVM_S390_VM_MEM_CLR_CMMA:
527 if (!kvm->arch.use_cmma)
530 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
531 mutex_lock(&kvm->lock);
532 idx = srcu_read_lock(&kvm->srcu);
533 s390_reset_cmma(kvm->arch.gmap->mm);
534 srcu_read_unlock(&kvm->srcu, idx);
535 mutex_unlock(&kvm->lock);
538 case KVM_S390_VM_MEM_LIMIT_SIZE: {
539 unsigned long new_limit;
541 if (kvm_is_ucontrol(kvm))
544 if (get_user(new_limit, (u64 __user *)attr->addr))
547 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
548 new_limit > kvm->arch.mem_limit)
554 /* gmap_create takes last usable address */
555 if (new_limit != KVM_S390_NO_MEM_LIMIT)
559 mutex_lock(&kvm->lock);
560 if (!kvm->created_vcpus) {
561 /* gmap_create will round the limit up */
562 struct gmap *new = gmap_create(current->mm, new_limit);
567 gmap_remove(kvm->arch.gmap);
569 kvm->arch.gmap = new;
573 mutex_unlock(&kvm->lock);
574 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
575 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
576 (void *) kvm->arch.gmap->asce);
586 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
588 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
590 struct kvm_vcpu *vcpu;
593 if (!test_kvm_facility(kvm, 76))
596 mutex_lock(&kvm->lock);
597 switch (attr->attr) {
598 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
600 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
601 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
602 kvm->arch.crypto.aes_kw = 1;
603 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
605 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
607 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
608 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
609 kvm->arch.crypto.dea_kw = 1;
610 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
612 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
613 kvm->arch.crypto.aes_kw = 0;
614 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
615 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
616 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
618 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
619 kvm->arch.crypto.dea_kw = 0;
620 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
621 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
622 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
625 mutex_unlock(&kvm->lock);
629 kvm_for_each_vcpu(i, vcpu, kvm) {
630 kvm_s390_vcpu_crypto_setup(vcpu);
633 mutex_unlock(&kvm->lock);
637 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
641 if (copy_from_user(>od_high, (void __user *)attr->addr,
647 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
652 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
656 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
659 kvm_s390_set_tod_clock(kvm, gtod);
660 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
664 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
671 switch (attr->attr) {
672 case KVM_S390_VM_TOD_HIGH:
673 ret = kvm_s390_set_tod_high(kvm, attr);
675 case KVM_S390_VM_TOD_LOW:
676 ret = kvm_s390_set_tod_low(kvm, attr);
685 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
689 if (copy_to_user((void __user *)attr->addr, >od_high,
692 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
697 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
701 gtod = kvm_s390_get_tod_clock_fast(kvm);
702 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
704 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
709 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
716 switch (attr->attr) {
717 case KVM_S390_VM_TOD_HIGH:
718 ret = kvm_s390_get_tod_high(kvm, attr);
720 case KVM_S390_VM_TOD_LOW:
721 ret = kvm_s390_get_tod_low(kvm, attr);
730 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
732 struct kvm_s390_vm_cpu_processor *proc;
733 u16 lowest_ibc, unblocked_ibc;
736 mutex_lock(&kvm->lock);
737 if (kvm->created_vcpus) {
741 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
746 if (!copy_from_user(proc, (void __user *)attr->addr,
748 kvm->arch.model.cpuid = proc->cpuid;
749 lowest_ibc = sclp.ibc >> 16 & 0xfff;
750 unblocked_ibc = sclp.ibc & 0xfff;
752 if (proc->ibc > unblocked_ibc)
753 kvm->arch.model.ibc = unblocked_ibc;
754 else if (proc->ibc < lowest_ibc)
755 kvm->arch.model.ibc = lowest_ibc;
757 kvm->arch.model.ibc = proc->ibc;
759 memcpy(kvm->arch.model.fac_list, proc->fac_list,
760 S390_ARCH_FAC_LIST_SIZE_BYTE);
765 mutex_unlock(&kvm->lock);
769 static int kvm_s390_set_processor_feat(struct kvm *kvm,
770 struct kvm_device_attr *attr)
772 struct kvm_s390_vm_cpu_feat data;
775 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
777 if (!bitmap_subset((unsigned long *) data.feat,
778 kvm_s390_available_cpu_feat,
779 KVM_S390_VM_CPU_FEAT_NR_BITS))
782 mutex_lock(&kvm->lock);
783 if (!atomic_read(&kvm->online_vcpus)) {
784 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
785 KVM_S390_VM_CPU_FEAT_NR_BITS);
788 mutex_unlock(&kvm->lock);
792 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
793 struct kvm_device_attr *attr)
796 * Once supported by kernel + hw, we have to store the subfunctions
797 * in kvm->arch and remember that user space configured them.
802 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
806 switch (attr->attr) {
807 case KVM_S390_VM_CPU_PROCESSOR:
808 ret = kvm_s390_set_processor(kvm, attr);
810 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
811 ret = kvm_s390_set_processor_feat(kvm, attr);
813 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
814 ret = kvm_s390_set_processor_subfunc(kvm, attr);
820 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
822 struct kvm_s390_vm_cpu_processor *proc;
825 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
830 proc->cpuid = kvm->arch.model.cpuid;
831 proc->ibc = kvm->arch.model.ibc;
832 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
833 S390_ARCH_FAC_LIST_SIZE_BYTE);
834 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
841 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
843 struct kvm_s390_vm_cpu_machine *mach;
846 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
851 get_cpu_id((struct cpuid *) &mach->cpuid);
852 mach->ibc = sclp.ibc;
853 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
854 S390_ARCH_FAC_LIST_SIZE_BYTE);
855 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
856 S390_ARCH_FAC_LIST_SIZE_BYTE);
857 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
864 static int kvm_s390_get_processor_feat(struct kvm *kvm,
865 struct kvm_device_attr *attr)
867 struct kvm_s390_vm_cpu_feat data;
869 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
870 KVM_S390_VM_CPU_FEAT_NR_BITS);
871 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
876 static int kvm_s390_get_machine_feat(struct kvm *kvm,
877 struct kvm_device_attr *attr)
879 struct kvm_s390_vm_cpu_feat data;
881 bitmap_copy((unsigned long *) data.feat,
882 kvm_s390_available_cpu_feat,
883 KVM_S390_VM_CPU_FEAT_NR_BITS);
884 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
889 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
890 struct kvm_device_attr *attr)
893 * Once we can actually configure subfunctions (kernel + hw support),
894 * we have to check if they were already set by user space, if so copy
895 * them from kvm->arch.
900 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
901 struct kvm_device_attr *attr)
903 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
904 sizeof(struct kvm_s390_vm_cpu_subfunc)))
908 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
912 switch (attr->attr) {
913 case KVM_S390_VM_CPU_PROCESSOR:
914 ret = kvm_s390_get_processor(kvm, attr);
916 case KVM_S390_VM_CPU_MACHINE:
917 ret = kvm_s390_get_machine(kvm, attr);
919 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
920 ret = kvm_s390_get_processor_feat(kvm, attr);
922 case KVM_S390_VM_CPU_MACHINE_FEAT:
923 ret = kvm_s390_get_machine_feat(kvm, attr);
925 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
926 ret = kvm_s390_get_processor_subfunc(kvm, attr);
928 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
929 ret = kvm_s390_get_machine_subfunc(kvm, attr);
935 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
939 switch (attr->group) {
940 case KVM_S390_VM_MEM_CTRL:
941 ret = kvm_s390_set_mem_control(kvm, attr);
943 case KVM_S390_VM_TOD:
944 ret = kvm_s390_set_tod(kvm, attr);
946 case KVM_S390_VM_CPU_MODEL:
947 ret = kvm_s390_set_cpu_model(kvm, attr);
949 case KVM_S390_VM_CRYPTO:
950 ret = kvm_s390_vm_set_crypto(kvm, attr);
960 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
964 switch (attr->group) {
965 case KVM_S390_VM_MEM_CTRL:
966 ret = kvm_s390_get_mem_control(kvm, attr);
968 case KVM_S390_VM_TOD:
969 ret = kvm_s390_get_tod(kvm, attr);
971 case KVM_S390_VM_CPU_MODEL:
972 ret = kvm_s390_get_cpu_model(kvm, attr);
982 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
986 switch (attr->group) {
987 case KVM_S390_VM_MEM_CTRL:
988 switch (attr->attr) {
989 case KVM_S390_VM_MEM_ENABLE_CMMA:
990 case KVM_S390_VM_MEM_CLR_CMMA:
991 ret = sclp.has_cmma ? 0 : -ENXIO;
993 case KVM_S390_VM_MEM_LIMIT_SIZE:
1001 case KVM_S390_VM_TOD:
1002 switch (attr->attr) {
1003 case KVM_S390_VM_TOD_LOW:
1004 case KVM_S390_VM_TOD_HIGH:
1012 case KVM_S390_VM_CPU_MODEL:
1013 switch (attr->attr) {
1014 case KVM_S390_VM_CPU_PROCESSOR:
1015 case KVM_S390_VM_CPU_MACHINE:
1016 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1017 case KVM_S390_VM_CPU_MACHINE_FEAT:
1018 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1021 /* configuring subfunctions is not supported yet */
1022 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1028 case KVM_S390_VM_CRYPTO:
1029 switch (attr->attr) {
1030 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1031 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1032 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1033 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1049 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1055 if (args->flags != 0)
1058 /* Is this guest using storage keys? */
1059 if (!mm_use_skey(current->mm))
1060 return KVM_S390_GET_SKEYS_NONE;
1062 /* Enforce sane limit on memory allocation */
1063 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1066 keys = kmalloc_array(args->count, sizeof(uint8_t),
1067 GFP_KERNEL | __GFP_NOWARN);
1069 keys = vmalloc(sizeof(uint8_t) * args->count);
1073 down_read(¤t->mm->mmap_sem);
1074 for (i = 0; i < args->count; i++) {
1075 hva = gfn_to_hva(kvm, args->start_gfn + i);
1076 if (kvm_is_error_hva(hva)) {
1081 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1085 up_read(¤t->mm->mmap_sem);
1088 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1089 sizeof(uint8_t) * args->count);
1098 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1104 if (args->flags != 0)
1107 /* Enforce sane limit on memory allocation */
1108 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1111 keys = kmalloc_array(args->count, sizeof(uint8_t),
1112 GFP_KERNEL | __GFP_NOWARN);
1114 keys = vmalloc(sizeof(uint8_t) * args->count);
1118 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1119 sizeof(uint8_t) * args->count);
1125 /* Enable storage key handling for the guest */
1126 r = s390_enable_skey();
1130 down_read(¤t->mm->mmap_sem);
1131 for (i = 0; i < args->count; i++) {
1132 hva = gfn_to_hva(kvm, args->start_gfn + i);
1133 if (kvm_is_error_hva(hva)) {
1138 /* Lowest order bit is reserved */
1139 if (keys[i] & 0x01) {
1144 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1148 up_read(¤t->mm->mmap_sem);
1154 long kvm_arch_vm_ioctl(struct file *filp,
1155 unsigned int ioctl, unsigned long arg)
1157 struct kvm *kvm = filp->private_data;
1158 void __user *argp = (void __user *)arg;
1159 struct kvm_device_attr attr;
1163 case KVM_S390_INTERRUPT: {
1164 struct kvm_s390_interrupt s390int;
1167 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1169 r = kvm_s390_inject_vm(kvm, &s390int);
1172 case KVM_ENABLE_CAP: {
1173 struct kvm_enable_cap cap;
1175 if (copy_from_user(&cap, argp, sizeof(cap)))
1177 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1180 case KVM_CREATE_IRQCHIP: {
1181 struct kvm_irq_routing_entry routing;
1184 if (kvm->arch.use_irqchip) {
1185 /* Set up dummy routing. */
1186 memset(&routing, 0, sizeof(routing));
1187 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1191 case KVM_SET_DEVICE_ATTR: {
1193 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1195 r = kvm_s390_vm_set_attr(kvm, &attr);
1198 case KVM_GET_DEVICE_ATTR: {
1200 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1202 r = kvm_s390_vm_get_attr(kvm, &attr);
1205 case KVM_HAS_DEVICE_ATTR: {
1207 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1209 r = kvm_s390_vm_has_attr(kvm, &attr);
1212 case KVM_S390_GET_SKEYS: {
1213 struct kvm_s390_skeys args;
1216 if (copy_from_user(&args, argp,
1217 sizeof(struct kvm_s390_skeys)))
1219 r = kvm_s390_get_skeys(kvm, &args);
1222 case KVM_S390_SET_SKEYS: {
1223 struct kvm_s390_skeys args;
1226 if (copy_from_user(&args, argp,
1227 sizeof(struct kvm_s390_skeys)))
1229 r = kvm_s390_set_skeys(kvm, &args);
1239 static int kvm_s390_query_ap_config(u8 *config)
1241 u32 fcn_code = 0x04000000UL;
1244 memset(config, 0, 128);
1248 ".long 0xb2af0000\n" /* PQAP(QCI) */
1254 : "r" (fcn_code), "r" (config)
1255 : "cc", "0", "2", "memory"
1261 static int kvm_s390_apxa_installed(void)
1266 if (test_facility(12)) {
1267 cc = kvm_s390_query_ap_config(config);
1270 pr_err("PQAP(QCI) failed with cc=%d", cc);
1272 return config[0] & 0x40;
1278 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1280 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1282 if (kvm_s390_apxa_installed())
1283 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1285 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1288 static u64 kvm_s390_get_initial_cpuid(void)
1293 cpuid.version = 0xff;
1294 return *((u64 *) &cpuid);
1297 static void kvm_s390_crypto_init(struct kvm *kvm)
1299 if (!test_kvm_facility(kvm, 76))
1302 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1303 kvm_s390_set_crycb_format(kvm);
1305 /* Enable AES/DEA protected key functions by default */
1306 kvm->arch.crypto.aes_kw = 1;
1307 kvm->arch.crypto.dea_kw = 1;
1308 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1309 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1310 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1311 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1314 static void sca_dispose(struct kvm *kvm)
1316 if (kvm->arch.use_esca)
1317 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1319 free_page((unsigned long)(kvm->arch.sca));
1320 kvm->arch.sca = NULL;
1323 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1325 gfp_t alloc_flags = GFP_KERNEL;
1327 char debug_name[16];
1328 static unsigned long sca_offset;
1331 #ifdef CONFIG_KVM_S390_UCONTROL
1332 if (type & ~KVM_VM_S390_UCONTROL)
1334 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1341 rc = s390_enable_sie();
1347 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1349 kvm->arch.use_esca = 0; /* start with basic SCA */
1350 if (!sclp.has_64bscao)
1351 alloc_flags |= GFP_DMA;
1352 rwlock_init(&kvm->arch.sca_lock);
1353 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1356 spin_lock(&kvm_lock);
1358 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1360 kvm->arch.sca = (struct bsca_block *)
1361 ((char *) kvm->arch.sca + sca_offset);
1362 spin_unlock(&kvm_lock);
1364 sprintf(debug_name, "kvm-%u", current->pid);
1366 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1370 kvm->arch.sie_page2 =
1371 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1372 if (!kvm->arch.sie_page2)
1375 /* Populate the facility mask initially. */
1376 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1377 S390_ARCH_FAC_LIST_SIZE_BYTE);
1378 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1379 if (i < kvm_s390_fac_list_mask_size())
1380 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1382 kvm->arch.model.fac_mask[i] = 0UL;
1385 /* Populate the facility list initially. */
1386 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1387 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1388 S390_ARCH_FAC_LIST_SIZE_BYTE);
1390 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1391 set_kvm_facility(kvm->arch.model.fac_list, 74);
1393 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1394 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1396 kvm_s390_crypto_init(kvm);
1398 spin_lock_init(&kvm->arch.float_int.lock);
1399 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1400 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1401 init_waitqueue_head(&kvm->arch.ipte_wq);
1402 mutex_init(&kvm->arch.ipte_mutex);
1404 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1405 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1407 if (type & KVM_VM_S390_UCONTROL) {
1408 kvm->arch.gmap = NULL;
1409 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1411 if (sclp.hamax == U64_MAX)
1412 kvm->arch.mem_limit = TASK_MAX_SIZE;
1414 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1416 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1417 if (!kvm->arch.gmap)
1419 kvm->arch.gmap->private = kvm;
1420 kvm->arch.gmap->pfault_enabled = 0;
1423 kvm->arch.css_support = 0;
1424 kvm->arch.use_irqchip = 0;
1425 kvm->arch.epoch = 0;
1427 spin_lock_init(&kvm->arch.start_stop_lock);
1428 kvm_s390_vsie_init(kvm);
1429 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1433 free_page((unsigned long)kvm->arch.sie_page2);
1434 debug_unregister(kvm->arch.dbf);
1436 KVM_EVENT(3, "creation of vm failed: %d", rc);
1440 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1442 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1443 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1444 kvm_s390_clear_local_irqs(vcpu);
1445 kvm_clear_async_pf_completion_queue(vcpu);
1446 if (!kvm_is_ucontrol(vcpu->kvm))
1449 if (kvm_is_ucontrol(vcpu->kvm))
1450 gmap_remove(vcpu->arch.gmap);
1452 if (vcpu->kvm->arch.use_cmma)
1453 kvm_s390_vcpu_unsetup_cmma(vcpu);
1454 free_page((unsigned long)(vcpu->arch.sie_block));
1456 kvm_vcpu_uninit(vcpu);
1457 kmem_cache_free(kvm_vcpu_cache, vcpu);
1460 static void kvm_free_vcpus(struct kvm *kvm)
1463 struct kvm_vcpu *vcpu;
1465 kvm_for_each_vcpu(i, vcpu, kvm)
1466 kvm_arch_vcpu_destroy(vcpu);
1468 mutex_lock(&kvm->lock);
1469 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1470 kvm->vcpus[i] = NULL;
1472 atomic_set(&kvm->online_vcpus, 0);
1473 mutex_unlock(&kvm->lock);
1476 void kvm_arch_destroy_vm(struct kvm *kvm)
1478 kvm_free_vcpus(kvm);
1480 debug_unregister(kvm->arch.dbf);
1481 free_page((unsigned long)kvm->arch.sie_page2);
1482 if (!kvm_is_ucontrol(kvm))
1483 gmap_remove(kvm->arch.gmap);
1484 kvm_s390_destroy_adapters(kvm);
1485 kvm_s390_clear_float_irqs(kvm);
1486 kvm_s390_vsie_destroy(kvm);
1487 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1490 /* Section: vcpu related */
1491 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1493 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1494 if (!vcpu->arch.gmap)
1496 vcpu->arch.gmap->private = vcpu->kvm;
1501 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1503 read_lock(&vcpu->kvm->arch.sca_lock);
1504 if (vcpu->kvm->arch.use_esca) {
1505 struct esca_block *sca = vcpu->kvm->arch.sca;
1507 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1508 sca->cpu[vcpu->vcpu_id].sda = 0;
1510 struct bsca_block *sca = vcpu->kvm->arch.sca;
1512 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1513 sca->cpu[vcpu->vcpu_id].sda = 0;
1515 read_unlock(&vcpu->kvm->arch.sca_lock);
1518 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1520 read_lock(&vcpu->kvm->arch.sca_lock);
1521 if (vcpu->kvm->arch.use_esca) {
1522 struct esca_block *sca = vcpu->kvm->arch.sca;
1524 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1525 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1526 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1527 vcpu->arch.sie_block->ecb2 |= 0x04U;
1528 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1530 struct bsca_block *sca = vcpu->kvm->arch.sca;
1532 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1533 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1534 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1535 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1537 read_unlock(&vcpu->kvm->arch.sca_lock);
1540 /* Basic SCA to Extended SCA data copy routines */
1541 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1544 d->sigp_ctrl.c = s->sigp_ctrl.c;
1545 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1548 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1552 d->ipte_control = s->ipte_control;
1554 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1555 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1558 static int sca_switch_to_extended(struct kvm *kvm)
1560 struct bsca_block *old_sca = kvm->arch.sca;
1561 struct esca_block *new_sca;
1562 struct kvm_vcpu *vcpu;
1563 unsigned int vcpu_idx;
1566 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1570 scaoh = (u32)((u64)(new_sca) >> 32);
1571 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1573 kvm_s390_vcpu_block_all(kvm);
1574 write_lock(&kvm->arch.sca_lock);
1576 sca_copy_b_to_e(new_sca, old_sca);
1578 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1579 vcpu->arch.sie_block->scaoh = scaoh;
1580 vcpu->arch.sie_block->scaol = scaol;
1581 vcpu->arch.sie_block->ecb2 |= 0x04U;
1583 kvm->arch.sca = new_sca;
1584 kvm->arch.use_esca = 1;
1586 write_unlock(&kvm->arch.sca_lock);
1587 kvm_s390_vcpu_unblock_all(kvm);
1589 free_page((unsigned long)old_sca);
1591 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1592 old_sca, kvm->arch.sca);
1596 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1600 if (id < KVM_S390_BSCA_CPU_SLOTS)
1602 if (!sclp.has_esca || !sclp.has_64bscao)
1605 mutex_lock(&kvm->lock);
1606 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1607 mutex_unlock(&kvm->lock);
1609 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1612 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1614 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1615 kvm_clear_async_pf_completion_queue(vcpu);
1616 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1622 if (test_kvm_facility(vcpu->kvm, 64))
1623 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1624 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1625 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1628 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1630 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1632 if (kvm_is_ucontrol(vcpu->kvm))
1633 return __kvm_ucontrol_vcpu_init(vcpu);
1638 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1639 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1641 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1642 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1643 vcpu->arch.cputm_start = get_tod_clock_fast();
1644 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1647 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1648 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1650 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1651 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1652 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1653 vcpu->arch.cputm_start = 0;
1654 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1657 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1658 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1660 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1661 vcpu->arch.cputm_enabled = true;
1662 __start_cpu_timer_accounting(vcpu);
1665 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1666 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1668 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1669 __stop_cpu_timer_accounting(vcpu);
1670 vcpu->arch.cputm_enabled = false;
1673 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1675 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1676 __enable_cpu_timer_accounting(vcpu);
1680 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1682 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1683 __disable_cpu_timer_accounting(vcpu);
1687 /* set the cpu timer - may only be called from the VCPU thread itself */
1688 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1690 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1691 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1692 if (vcpu->arch.cputm_enabled)
1693 vcpu->arch.cputm_start = get_tod_clock_fast();
1694 vcpu->arch.sie_block->cputm = cputm;
1695 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1699 /* update and get the cpu timer - can also be called from other VCPU threads */
1700 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1705 if (unlikely(!vcpu->arch.cputm_enabled))
1706 return vcpu->arch.sie_block->cputm;
1708 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1710 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1712 * If the writer would ever execute a read in the critical
1713 * section, e.g. in irq context, we have a deadlock.
1715 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1716 value = vcpu->arch.sie_block->cputm;
1717 /* if cputm_start is 0, accounting is being started/stopped */
1718 if (likely(vcpu->arch.cputm_start))
1719 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1720 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1725 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1727 /* Save host register state */
1729 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1730 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1733 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1735 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1736 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1737 if (test_fp_ctl(current->thread.fpu.fpc))
1738 /* User space provided an invalid FPC, let's clear it */
1739 current->thread.fpu.fpc = 0;
1741 save_access_regs(vcpu->arch.host_acrs);
1742 restore_access_regs(vcpu->run->s.regs.acrs);
1743 gmap_enable(vcpu->arch.enabled_gmap);
1744 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1745 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1746 __start_cpu_timer_accounting(vcpu);
1750 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1753 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1754 __stop_cpu_timer_accounting(vcpu);
1755 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1756 vcpu->arch.enabled_gmap = gmap_get_enabled();
1757 gmap_disable(vcpu->arch.enabled_gmap);
1759 /* Save guest register state */
1761 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1763 /* Restore host register state */
1764 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1765 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1767 save_access_regs(vcpu->run->s.regs.acrs);
1768 restore_access_regs(vcpu->arch.host_acrs);
1771 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1773 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1774 vcpu->arch.sie_block->gpsw.mask = 0UL;
1775 vcpu->arch.sie_block->gpsw.addr = 0UL;
1776 kvm_s390_set_prefix(vcpu, 0);
1777 kvm_s390_set_cpu_timer(vcpu, 0);
1778 vcpu->arch.sie_block->ckc = 0UL;
1779 vcpu->arch.sie_block->todpr = 0;
1780 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1781 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1782 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1783 /* make sure the new fpc will be lazily loaded */
1785 current->thread.fpu.fpc = 0;
1786 vcpu->arch.sie_block->gbea = 1;
1787 vcpu->arch.sie_block->pp = 0;
1788 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1789 kvm_clear_async_pf_completion_queue(vcpu);
1790 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1791 kvm_s390_vcpu_stop(vcpu);
1792 kvm_s390_clear_local_irqs(vcpu);
1795 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1797 mutex_lock(&vcpu->kvm->lock);
1799 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1801 mutex_unlock(&vcpu->kvm->lock);
1802 if (!kvm_is_ucontrol(vcpu->kvm)) {
1803 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1806 /* make vcpu_load load the right gmap on the first trigger */
1807 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1810 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1812 if (!test_kvm_facility(vcpu->kvm, 76))
1815 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1817 if (vcpu->kvm->arch.crypto.aes_kw)
1818 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1819 if (vcpu->kvm->arch.crypto.dea_kw)
1820 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1822 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1825 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1827 free_page(vcpu->arch.sie_block->cbrlo);
1828 vcpu->arch.sie_block->cbrlo = 0;
1831 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1833 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1834 if (!vcpu->arch.sie_block->cbrlo)
1837 vcpu->arch.sie_block->ecb2 |= 0x80;
1838 vcpu->arch.sie_block->ecb2 &= ~0x08;
1842 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1844 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1846 vcpu->arch.sie_block->ibc = model->ibc;
1847 if (test_kvm_facility(vcpu->kvm, 7))
1848 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1851 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1855 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1859 if (test_kvm_facility(vcpu->kvm, 78))
1860 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1861 else if (test_kvm_facility(vcpu->kvm, 8))
1862 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1864 kvm_s390_vcpu_setup_model(vcpu);
1866 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1867 if (MACHINE_HAS_ESOP)
1868 vcpu->arch.sie_block->ecb |= 0x02;
1869 if (test_kvm_facility(vcpu->kvm, 9))
1870 vcpu->arch.sie_block->ecb |= 0x04;
1871 if (test_kvm_facility(vcpu->kvm, 73))
1872 vcpu->arch.sie_block->ecb |= 0x10;
1874 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1875 vcpu->arch.sie_block->ecb2 |= 0x08;
1876 vcpu->arch.sie_block->eca = 0x1002000U;
1878 vcpu->arch.sie_block->eca |= 0x80000000U;
1880 vcpu->arch.sie_block->eca |= 0x40000000U;
1882 vcpu->arch.sie_block->eca |= 1;
1883 if (sclp.has_sigpif)
1884 vcpu->arch.sie_block->eca |= 0x10000000U;
1885 if (test_kvm_facility(vcpu->kvm, 64))
1886 vcpu->arch.sie_block->ecb3 |= 0x01;
1887 if (test_kvm_facility(vcpu->kvm, 129)) {
1888 vcpu->arch.sie_block->eca |= 0x00020000;
1889 vcpu->arch.sie_block->ecd |= 0x20000000;
1891 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1892 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1893 if (test_kvm_facility(vcpu->kvm, 74))
1894 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1896 if (vcpu->kvm->arch.use_cmma) {
1897 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1901 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1902 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1904 kvm_s390_vcpu_crypto_setup(vcpu);
1909 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1912 struct kvm_vcpu *vcpu;
1913 struct sie_page *sie_page;
1916 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1921 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1925 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1929 vcpu->arch.sie_block = &sie_page->sie_block;
1930 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1932 /* the real guest size will always be smaller than msl */
1933 vcpu->arch.sie_block->mso = 0;
1934 vcpu->arch.sie_block->msl = sclp.hamax;
1936 vcpu->arch.sie_block->icpua = id;
1937 spin_lock_init(&vcpu->arch.local_int.lock);
1938 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1939 vcpu->arch.local_int.wq = &vcpu->wq;
1940 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1941 seqcount_init(&vcpu->arch.cputm_seqcount);
1943 rc = kvm_vcpu_init(vcpu, kvm, id);
1945 goto out_free_sie_block;
1946 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1947 vcpu->arch.sie_block);
1948 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1952 free_page((unsigned long)(vcpu->arch.sie_block));
1954 kmem_cache_free(kvm_vcpu_cache, vcpu);
1959 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1961 return kvm_s390_vcpu_has_irq(vcpu, 0);
1964 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1966 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1970 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1972 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1975 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1977 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1981 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1983 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1987 * Kick a guest cpu out of SIE and wait until SIE is not running.
1988 * If the CPU is not running (e.g. waiting as idle) the function will
1989 * return immediately. */
1990 void exit_sie(struct kvm_vcpu *vcpu)
1992 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1993 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1997 /* Kick a guest cpu out of SIE to process a request synchronously */
1998 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2000 kvm_make_request(req, vcpu);
2001 kvm_s390_vcpu_request(vcpu);
2004 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2007 struct kvm *kvm = gmap->private;
2008 struct kvm_vcpu *vcpu;
2009 unsigned long prefix;
2012 if (gmap_is_shadow(gmap))
2014 if (start >= 1UL << 31)
2015 /* We are only interested in prefix pages */
2017 kvm_for_each_vcpu(i, vcpu, kvm) {
2018 /* match against both prefix pages */
2019 prefix = kvm_s390_get_prefix(vcpu);
2020 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2021 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2023 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2028 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2030 /* kvm common code refers to this, but never calls it */
2035 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2036 struct kvm_one_reg *reg)
2041 case KVM_REG_S390_TODPR:
2042 r = put_user(vcpu->arch.sie_block->todpr,
2043 (u32 __user *)reg->addr);
2045 case KVM_REG_S390_EPOCHDIFF:
2046 r = put_user(vcpu->arch.sie_block->epoch,
2047 (u64 __user *)reg->addr);
2049 case KVM_REG_S390_CPU_TIMER:
2050 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2051 (u64 __user *)reg->addr);
2053 case KVM_REG_S390_CLOCK_COMP:
2054 r = put_user(vcpu->arch.sie_block->ckc,
2055 (u64 __user *)reg->addr);
2057 case KVM_REG_S390_PFTOKEN:
2058 r = put_user(vcpu->arch.pfault_token,
2059 (u64 __user *)reg->addr);
2061 case KVM_REG_S390_PFCOMPARE:
2062 r = put_user(vcpu->arch.pfault_compare,
2063 (u64 __user *)reg->addr);
2065 case KVM_REG_S390_PFSELECT:
2066 r = put_user(vcpu->arch.pfault_select,
2067 (u64 __user *)reg->addr);
2069 case KVM_REG_S390_PP:
2070 r = put_user(vcpu->arch.sie_block->pp,
2071 (u64 __user *)reg->addr);
2073 case KVM_REG_S390_GBEA:
2074 r = put_user(vcpu->arch.sie_block->gbea,
2075 (u64 __user *)reg->addr);
2084 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2085 struct kvm_one_reg *reg)
2091 case KVM_REG_S390_TODPR:
2092 r = get_user(vcpu->arch.sie_block->todpr,
2093 (u32 __user *)reg->addr);
2095 case KVM_REG_S390_EPOCHDIFF:
2096 r = get_user(vcpu->arch.sie_block->epoch,
2097 (u64 __user *)reg->addr);
2099 case KVM_REG_S390_CPU_TIMER:
2100 r = get_user(val, (u64 __user *)reg->addr);
2102 kvm_s390_set_cpu_timer(vcpu, val);
2104 case KVM_REG_S390_CLOCK_COMP:
2105 r = get_user(vcpu->arch.sie_block->ckc,
2106 (u64 __user *)reg->addr);
2108 case KVM_REG_S390_PFTOKEN:
2109 r = get_user(vcpu->arch.pfault_token,
2110 (u64 __user *)reg->addr);
2111 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2112 kvm_clear_async_pf_completion_queue(vcpu);
2114 case KVM_REG_S390_PFCOMPARE:
2115 r = get_user(vcpu->arch.pfault_compare,
2116 (u64 __user *)reg->addr);
2118 case KVM_REG_S390_PFSELECT:
2119 r = get_user(vcpu->arch.pfault_select,
2120 (u64 __user *)reg->addr);
2122 case KVM_REG_S390_PP:
2123 r = get_user(vcpu->arch.sie_block->pp,
2124 (u64 __user *)reg->addr);
2126 case KVM_REG_S390_GBEA:
2127 r = get_user(vcpu->arch.sie_block->gbea,
2128 (u64 __user *)reg->addr);
2137 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2139 kvm_s390_vcpu_initial_reset(vcpu);
2143 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2145 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2149 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2151 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2155 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2156 struct kvm_sregs *sregs)
2158 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2159 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2160 restore_access_regs(vcpu->run->s.regs.acrs);
2164 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2165 struct kvm_sregs *sregs)
2167 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2168 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2172 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2174 /* make sure the new values will be lazily loaded */
2176 if (test_fp_ctl(fpu->fpc))
2178 current->thread.fpu.fpc = fpu->fpc;
2180 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2182 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2186 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2188 /* make sure we have the latest values */
2191 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2193 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2194 fpu->fpc = current->thread.fpu.fpc;
2198 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2202 if (!is_vcpu_stopped(vcpu))
2205 vcpu->run->psw_mask = psw.mask;
2206 vcpu->run->psw_addr = psw.addr;
2211 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2212 struct kvm_translation *tr)
2214 return -EINVAL; /* not implemented yet */
2217 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2218 KVM_GUESTDBG_USE_HW_BP | \
2219 KVM_GUESTDBG_ENABLE)
2221 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2222 struct kvm_guest_debug *dbg)
2226 vcpu->guest_debug = 0;
2227 kvm_s390_clear_bp_data(vcpu);
2229 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2231 if (!sclp.has_gpere)
2234 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2235 vcpu->guest_debug = dbg->control;
2236 /* enforce guest PER */
2237 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2239 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2240 rc = kvm_s390_import_bp_data(vcpu, dbg);
2242 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2243 vcpu->arch.guestdbg.last_bp = 0;
2247 vcpu->guest_debug = 0;
2248 kvm_s390_clear_bp_data(vcpu);
2249 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2255 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2256 struct kvm_mp_state *mp_state)
2258 /* CHECK_STOP and LOAD are not supported yet */
2259 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2260 KVM_MP_STATE_OPERATING;
2263 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2264 struct kvm_mp_state *mp_state)
2268 /* user space knows about this interface - let it control the state */
2269 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2271 switch (mp_state->mp_state) {
2272 case KVM_MP_STATE_STOPPED:
2273 kvm_s390_vcpu_stop(vcpu);
2275 case KVM_MP_STATE_OPERATING:
2276 kvm_s390_vcpu_start(vcpu);
2278 case KVM_MP_STATE_LOAD:
2279 case KVM_MP_STATE_CHECK_STOP:
2280 /* fall through - CHECK_STOP and LOAD are not supported yet */
2288 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2290 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2293 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2296 kvm_s390_vcpu_request_handled(vcpu);
2297 if (!vcpu->requests)
2300 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2301 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2302 * This ensures that the ipte instruction for this request has
2303 * already finished. We might race against a second unmapper that
2304 * wants to set the blocking bit. Lets just retry the request loop.
2306 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2308 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2309 kvm_s390_get_prefix(vcpu),
2310 PAGE_SIZE * 2, PROT_WRITE);
2316 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2317 vcpu->arch.sie_block->ihcpu = 0xffff;
2321 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2322 if (!ibs_enabled(vcpu)) {
2323 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2324 atomic_or(CPUSTAT_IBS,
2325 &vcpu->arch.sie_block->cpuflags);
2330 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2331 if (ibs_enabled(vcpu)) {
2332 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2333 atomic_andnot(CPUSTAT_IBS,
2334 &vcpu->arch.sie_block->cpuflags);
2339 /* nothing to do, just clear the request */
2340 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2345 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2347 struct kvm_vcpu *vcpu;
2350 mutex_lock(&kvm->lock);
2352 kvm->arch.epoch = tod - get_tod_clock();
2353 kvm_s390_vcpu_block_all(kvm);
2354 kvm_for_each_vcpu(i, vcpu, kvm)
2355 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2356 kvm_s390_vcpu_unblock_all(kvm);
2358 mutex_unlock(&kvm->lock);
2362 * kvm_arch_fault_in_page - fault-in guest page if necessary
2363 * @vcpu: The corresponding virtual cpu
2364 * @gpa: Guest physical address
2365 * @writable: Whether the page should be writable or not
2367 * Make sure that a guest page has been faulted-in on the host.
2369 * Return: Zero on success, negative error code otherwise.
2371 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2373 return gmap_fault(vcpu->arch.gmap, gpa,
2374 writable ? FAULT_FLAG_WRITE : 0);
2377 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2378 unsigned long token)
2380 struct kvm_s390_interrupt inti;
2381 struct kvm_s390_irq irq;
2384 irq.u.ext.ext_params2 = token;
2385 irq.type = KVM_S390_INT_PFAULT_INIT;
2386 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2388 inti.type = KVM_S390_INT_PFAULT_DONE;
2389 inti.parm64 = token;
2390 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2394 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2395 struct kvm_async_pf *work)
2397 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2398 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2401 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2402 struct kvm_async_pf *work)
2404 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2405 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2408 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2409 struct kvm_async_pf *work)
2411 /* s390 will always inject the page directly */
2414 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2417 * s390 will always inject the page directly,
2418 * but we still want check_async_completion to cleanup
2423 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2426 struct kvm_arch_async_pf arch;
2429 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2431 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2432 vcpu->arch.pfault_compare)
2434 if (psw_extint_disabled(vcpu))
2436 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2438 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2440 if (!vcpu->arch.gmap->pfault_enabled)
2443 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2444 hva += current->thread.gmap_addr & ~PAGE_MASK;
2445 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2448 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2452 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2457 * On s390 notifications for arriving pages will be delivered directly
2458 * to the guest but the house keeping for completed pfaults is
2459 * handled outside the worker.
2461 kvm_check_async_pf_completion(vcpu);
2463 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2464 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2469 if (test_cpu_flag(CIF_MCCK_PENDING))
2472 if (!kvm_is_ucontrol(vcpu->kvm)) {
2473 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2478 rc = kvm_s390_handle_requests(vcpu);
2482 if (guestdbg_enabled(vcpu)) {
2483 kvm_s390_backup_guest_per_regs(vcpu);
2484 kvm_s390_patch_guest_per_regs(vcpu);
2487 vcpu->arch.sie_block->icptcode = 0;
2488 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2489 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2490 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2495 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2497 struct kvm_s390_pgm_info pgm_info = {
2498 .code = PGM_ADDRESSING,
2503 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2504 trace_kvm_s390_sie_fault(vcpu);
2507 * We want to inject an addressing exception, which is defined as a
2508 * suppressing or terminating exception. However, since we came here
2509 * by a DAT access exception, the PSW still points to the faulting
2510 * instruction since DAT exceptions are nullifying. So we've got
2511 * to look up the current opcode to get the length of the instruction
2512 * to be able to forward the PSW.
2514 rc = read_guest_instr(vcpu, &opcode, 1);
2515 ilen = insn_length(opcode);
2519 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2520 * Forward by arbitrary ilc, injection will take care of
2521 * nullification if necessary.
2523 pgm_info = vcpu->arch.pgm;
2526 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2527 kvm_s390_forward_psw(vcpu, ilen);
2528 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2531 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2533 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2534 vcpu->arch.sie_block->icptcode);
2535 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2537 if (guestdbg_enabled(vcpu))
2538 kvm_s390_restore_guest_per_regs(vcpu);
2540 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2541 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2543 if (vcpu->arch.sie_block->icptcode > 0) {
2544 int rc = kvm_handle_sie_intercept(vcpu);
2546 if (rc != -EOPNOTSUPP)
2548 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2549 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2550 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2551 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2553 } else if (exit_reason != -EFAULT) {
2554 vcpu->stat.exit_null++;
2556 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2557 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2558 vcpu->run->s390_ucontrol.trans_exc_code =
2559 current->thread.gmap_addr;
2560 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2562 } else if (current->thread.gmap_pfault) {
2563 trace_kvm_s390_major_guest_pfault(vcpu);
2564 current->thread.gmap_pfault = 0;
2565 if (kvm_arch_setup_async_pf(vcpu))
2567 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2569 return vcpu_post_run_fault_in_sie(vcpu);
2572 static int __vcpu_run(struct kvm_vcpu *vcpu)
2574 int rc, exit_reason;
2577 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2578 * ning the guest), so that memslots (and other stuff) are protected
2580 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2583 rc = vcpu_pre_run(vcpu);
2587 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2589 * As PF_VCPU will be used in fault handler, between
2590 * guest_enter and guest_exit should be no uaccess.
2592 local_irq_disable();
2593 __kvm_guest_enter();
2594 __disable_cpu_timer_accounting(vcpu);
2596 exit_reason = sie64a(vcpu->arch.sie_block,
2597 vcpu->run->s.regs.gprs);
2598 local_irq_disable();
2599 __enable_cpu_timer_accounting(vcpu);
2602 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2604 rc = vcpu_post_run(vcpu, exit_reason);
2605 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2607 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2611 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2613 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2614 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2615 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2616 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2617 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2618 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2619 /* some control register changes require a tlb flush */
2620 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2622 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2623 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2624 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2625 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2626 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2627 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2629 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2630 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2631 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2632 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2633 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2634 kvm_clear_async_pf_completion_queue(vcpu);
2636 kvm_run->kvm_dirty_regs = 0;
2639 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2641 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2642 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2643 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2644 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2645 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2646 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2647 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2648 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2649 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2650 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2651 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2652 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2655 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2660 if (guestdbg_exit_pending(vcpu)) {
2661 kvm_s390_prepare_debug_exit(vcpu);
2665 if (vcpu->sigset_active)
2666 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2668 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2669 kvm_s390_vcpu_start(vcpu);
2670 } else if (is_vcpu_stopped(vcpu)) {
2671 pr_err_ratelimited("can't run stopped vcpu %d\n",
2676 sync_regs(vcpu, kvm_run);
2677 enable_cpu_timer_accounting(vcpu);
2680 rc = __vcpu_run(vcpu);
2682 if (signal_pending(current) && !rc) {
2683 kvm_run->exit_reason = KVM_EXIT_INTR;
2687 if (guestdbg_exit_pending(vcpu) && !rc) {
2688 kvm_s390_prepare_debug_exit(vcpu);
2692 if (rc == -EREMOTE) {
2693 /* userspace support is needed, kvm_run has been prepared */
2697 disable_cpu_timer_accounting(vcpu);
2698 store_regs(vcpu, kvm_run);
2700 if (vcpu->sigset_active)
2701 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2703 vcpu->stat.exit_userspace++;
2708 * store status at address
2709 * we use have two special cases:
2710 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2711 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2713 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2715 unsigned char archmode = 1;
2716 freg_t fprs[NUM_FPRS];
2721 px = kvm_s390_get_prefix(vcpu);
2722 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2723 if (write_guest_abs(vcpu, 163, &archmode, 1))
2726 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2727 if (write_guest_real(vcpu, 163, &archmode, 1))
2731 gpa -= __LC_FPREGS_SAVE_AREA;
2733 /* manually convert vector registers if necessary */
2734 if (MACHINE_HAS_VX) {
2735 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2736 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2739 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2740 vcpu->run->s.regs.fprs, 128);
2742 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2743 vcpu->run->s.regs.gprs, 128);
2744 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2745 &vcpu->arch.sie_block->gpsw, 16);
2746 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2748 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2749 &vcpu->run->s.regs.fpc, 4);
2750 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2751 &vcpu->arch.sie_block->todpr, 4);
2752 cputm = kvm_s390_get_cpu_timer(vcpu);
2753 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2755 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2756 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2758 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2759 &vcpu->run->s.regs.acrs, 64);
2760 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2761 &vcpu->arch.sie_block->gcr, 128);
2762 return rc ? -EFAULT : 0;
2765 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2768 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2769 * copying in vcpu load/put. Lets update our copies before we save
2770 * it into the save area
2773 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2774 save_access_regs(vcpu->run->s.regs.acrs);
2776 return kvm_s390_store_status_unloaded(vcpu, addr);
2780 * store additional status at address
2782 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2785 /* Only bits 0-53 are used for address formation */
2786 if (!(gpa & ~0x3ff))
2789 return write_guest_abs(vcpu, gpa & ~0x3ff,
2790 (void *)&vcpu->run->s.regs.vrs, 512);
2793 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2795 if (!test_kvm_facility(vcpu->kvm, 129))
2799 * The guest VXRS are in the host VXRs due to the lazy
2800 * copying in vcpu load/put. We can simply call save_fpu_regs()
2801 * to save the current register state because we are in the
2802 * middle of a load/put cycle.
2804 * Let's update our copies before we save it into the save area.
2808 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2811 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2813 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2814 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2817 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2820 struct kvm_vcpu *vcpu;
2822 kvm_for_each_vcpu(i, vcpu, kvm) {
2823 __disable_ibs_on_vcpu(vcpu);
2827 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2831 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2832 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2835 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2837 int i, online_vcpus, started_vcpus = 0;
2839 if (!is_vcpu_stopped(vcpu))
2842 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2843 /* Only one cpu at a time may enter/leave the STOPPED state. */
2844 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2845 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2847 for (i = 0; i < online_vcpus; i++) {
2848 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2852 if (started_vcpus == 0) {
2853 /* we're the only active VCPU -> speed it up */
2854 __enable_ibs_on_vcpu(vcpu);
2855 } else if (started_vcpus == 1) {
2857 * As we are starting a second VCPU, we have to disable
2858 * the IBS facility on all VCPUs to remove potentially
2859 * oustanding ENABLE requests.
2861 __disable_ibs_on_all_vcpus(vcpu->kvm);
2864 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2866 * Another VCPU might have used IBS while we were offline.
2867 * Let's play safe and flush the VCPU at startup.
2869 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2870 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2874 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2876 int i, online_vcpus, started_vcpus = 0;
2877 struct kvm_vcpu *started_vcpu = NULL;
2879 if (is_vcpu_stopped(vcpu))
2882 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2883 /* Only one cpu at a time may enter/leave the STOPPED state. */
2884 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2885 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2887 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2888 kvm_s390_clear_stop_irq(vcpu);
2890 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2891 __disable_ibs_on_vcpu(vcpu);
2893 for (i = 0; i < online_vcpus; i++) {
2894 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2896 started_vcpu = vcpu->kvm->vcpus[i];
2900 if (started_vcpus == 1) {
2902 * As we only have one VCPU left, we want to enable the
2903 * IBS facility for that VCPU to speed it up.
2905 __enable_ibs_on_vcpu(started_vcpu);
2908 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2912 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2913 struct kvm_enable_cap *cap)
2921 case KVM_CAP_S390_CSS_SUPPORT:
2922 if (!vcpu->kvm->arch.css_support) {
2923 vcpu->kvm->arch.css_support = 1;
2924 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2925 trace_kvm_s390_enable_css(vcpu->kvm);
2936 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2937 struct kvm_s390_mem_op *mop)
2939 void __user *uaddr = (void __user *)mop->buf;
2940 void *tmpbuf = NULL;
2942 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2943 | KVM_S390_MEMOP_F_CHECK_ONLY;
2945 if (mop->flags & ~supported_flags)
2948 if (mop->size > MEM_OP_MAX_SIZE)
2951 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2952 tmpbuf = vmalloc(mop->size);
2957 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2960 case KVM_S390_MEMOP_LOGICAL_READ:
2961 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2962 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2963 mop->size, GACC_FETCH);
2966 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2968 if (copy_to_user(uaddr, tmpbuf, mop->size))
2972 case KVM_S390_MEMOP_LOGICAL_WRITE:
2973 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2974 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2975 mop->size, GACC_STORE);
2978 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2982 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2988 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2990 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2991 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2997 long kvm_arch_vcpu_ioctl(struct file *filp,
2998 unsigned int ioctl, unsigned long arg)
3000 struct kvm_vcpu *vcpu = filp->private_data;
3001 void __user *argp = (void __user *)arg;
3006 case KVM_S390_IRQ: {
3007 struct kvm_s390_irq s390irq;
3010 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3012 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3015 case KVM_S390_INTERRUPT: {
3016 struct kvm_s390_interrupt s390int;
3017 struct kvm_s390_irq s390irq;
3020 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3022 if (s390int_to_s390irq(&s390int, &s390irq))
3024 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3027 case KVM_S390_STORE_STATUS:
3028 idx = srcu_read_lock(&vcpu->kvm->srcu);
3029 r = kvm_s390_vcpu_store_status(vcpu, arg);
3030 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3032 case KVM_S390_SET_INITIAL_PSW: {
3036 if (copy_from_user(&psw, argp, sizeof(psw)))
3038 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3041 case KVM_S390_INITIAL_RESET:
3042 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3044 case KVM_SET_ONE_REG:
3045 case KVM_GET_ONE_REG: {
3046 struct kvm_one_reg reg;
3048 if (copy_from_user(®, argp, sizeof(reg)))
3050 if (ioctl == KVM_SET_ONE_REG)
3051 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3053 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3056 #ifdef CONFIG_KVM_S390_UCONTROL
3057 case KVM_S390_UCAS_MAP: {
3058 struct kvm_s390_ucas_mapping ucasmap;
3060 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3065 if (!kvm_is_ucontrol(vcpu->kvm)) {
3070 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3071 ucasmap.vcpu_addr, ucasmap.length);
3074 case KVM_S390_UCAS_UNMAP: {
3075 struct kvm_s390_ucas_mapping ucasmap;
3077 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3082 if (!kvm_is_ucontrol(vcpu->kvm)) {
3087 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3092 case KVM_S390_VCPU_FAULT: {
3093 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3096 case KVM_ENABLE_CAP:
3098 struct kvm_enable_cap cap;
3100 if (copy_from_user(&cap, argp, sizeof(cap)))
3102 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3105 case KVM_S390_MEM_OP: {
3106 struct kvm_s390_mem_op mem_op;
3108 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3109 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3114 case KVM_S390_SET_IRQ_STATE: {
3115 struct kvm_s390_irq_state irq_state;
3118 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3120 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3121 irq_state.len == 0 ||
3122 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3126 r = kvm_s390_set_irq_state(vcpu,
3127 (void __user *) irq_state.buf,
3131 case KVM_S390_GET_IRQ_STATE: {
3132 struct kvm_s390_irq_state irq_state;
3135 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3137 if (irq_state.len == 0) {
3141 r = kvm_s390_get_irq_state(vcpu,
3142 (__u8 __user *) irq_state.buf,
3152 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3154 #ifdef CONFIG_KVM_S390_UCONTROL
3155 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3156 && (kvm_is_ucontrol(vcpu->kvm))) {
3157 vmf->page = virt_to_page(vcpu->arch.sie_block);
3158 get_page(vmf->page);
3162 return VM_FAULT_SIGBUS;
3165 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3166 unsigned long npages)
3171 /* Section: memory related */
3172 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3173 struct kvm_memory_slot *memslot,
3174 const struct kvm_userspace_memory_region *mem,
3175 enum kvm_mr_change change)
3177 /* A few sanity checks. We can have memory slots which have to be
3178 located/ended at a segment boundary (1MB). The memory in userland is
3179 ok to be fragmented into various different vmas. It is okay to mmap()
3180 and munmap() stuff in this slot after doing this call at any time */
3182 if (mem->userspace_addr & 0xffffful)
3185 if (mem->memory_size & 0xffffful)
3188 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3194 void kvm_arch_commit_memory_region(struct kvm *kvm,
3195 const struct kvm_userspace_memory_region *mem,
3196 const struct kvm_memory_slot *old,
3197 const struct kvm_memory_slot *new,
3198 enum kvm_mr_change change)
3202 /* If the basics of the memslot do not change, we do not want
3203 * to update the gmap. Every update causes several unnecessary
3204 * segment translation exceptions. This is usually handled just
3205 * fine by the normal fault handler + gmap, but it will also
3206 * cause faults on the prefix page of running guest CPUs.
3208 if (old->userspace_addr == mem->userspace_addr &&
3209 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3210 old->npages * PAGE_SIZE == mem->memory_size)
3213 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3214 mem->guest_phys_addr, mem->memory_size);
3216 pr_warn("failed to commit memory region\n");
3220 static inline unsigned long nonhyp_mask(int i)
3222 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3224 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3227 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3229 vcpu->valid_wakeup = false;
3232 static int __init kvm_s390_init(void)
3236 if (!sclp.has_sief2) {
3237 pr_info("SIE not available\n");
3241 for (i = 0; i < 16; i++)
3242 kvm_s390_fac_list_mask[i] |=
3243 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3245 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3248 static void __exit kvm_s390_exit(void)
3253 module_init(kvm_s390_init);
3254 module_exit(kvm_s390_exit);
3257 * Enable autoloading of the kvm module.
3258 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3259 * since x86 takes a different approach.
3261 #include <linux/miscdevice.h>
3262 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3263 MODULE_ALIAS("devname:kvm");