1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
46 #define KMSG_COMPONENT "kvm-s390"
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "userspace_handled", VCPU_STAT(exit_userspace) },
64 { "exit_null", VCPU_STAT(exit_null) },
65 { "exit_validity", VCPU_STAT(exit_validity) },
66 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 { "exit_external_request", VCPU_STAT(exit_external_request) },
68 { "exit_io_request", VCPU_STAT(exit_io_request) },
69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 { "exit_instruction", VCPU_STAT(exit_instruction) },
71 { "exit_pei", VCPU_STAT(exit_pei) },
72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 { "deliver_program", VCPU_STAT(deliver_program) },
93 { "deliver_io", VCPU_STAT(deliver_io) },
94 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 { "inject_ckc", VCPU_STAT(inject_ckc) },
97 { "inject_cputm", VCPU_STAT(inject_cputm) },
98 { "inject_external_call", VCPU_STAT(inject_external_call) },
99 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 { "inject_io", VM_STAT(inject_io) },
102 { "inject_mchk", VCPU_STAT(inject_mchk) },
103 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 { "inject_program", VCPU_STAT(inject_program) },
105 { "inject_restart", VCPU_STAT(inject_restart) },
106 { "inject_service_signal", VM_STAT(inject_service_signal) },
107 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 { "inject_virtio", VM_STAT(inject_virtio) },
111 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 { "instruction_gs", VCPU_STAT(instruction_gs) },
113 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 { "instruction_sck", VCPU_STAT(instruction_sck) },
120 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 { "instruction_spx", VCPU_STAT(instruction_spx) },
122 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 { "instruction_stap", VCPU_STAT(instruction_stap) },
124 { "instruction_iske", VCPU_STAT(instruction_iske) },
125 { "instruction_ri", VCPU_STAT(instruction_ri) },
126 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 { "instruction_sske", VCPU_STAT(instruction_sske) },
128 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 { "instruction_essa", VCPU_STAT(instruction_essa) },
130 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 { "instruction_tb", VCPU_STAT(instruction_tb) },
133 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 { "instruction_sie", VCPU_STAT(instruction_sie) },
138 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
164 struct kvm_s390_tod_clock_ext {
170 /* allow nested virtualization in KVM (if enabled by user space) */
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
177 * For now we handle at most 16 double words as this is what the s390 base
178 * kernel handles and stores in the prefix page. If we ever need to go beyond
179 * this, this requires changes to code, but the external uapi can stay.
181 #define SIZE_INTERNAL 16
184 * Base feature mask that defines default mask for facilities. Consists of the
185 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
187 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
189 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
190 * and defines the facilities that can be enabled via a cpu model.
192 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
194 static unsigned long kvm_s390_fac_size(void)
196 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
197 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
198 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
199 sizeof(S390_lowcore.stfle_fac_list));
201 return SIZE_INTERNAL;
204 /* available cpu features supported by kvm */
205 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
206 /* available subfunctions indicated via query / "test bit" */
207 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
209 static struct gmap_notifier gmap_notifier;
210 static struct gmap_notifier vsie_gmap_notifier;
211 debug_info_t *kvm_s390_dbf;
213 /* Section: not file related */
214 int kvm_arch_hardware_enable(void)
216 /* every s390 is virtualization enabled ;-) */
220 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
223 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 * The TOD jumps by delta, we have to compensate this by adding
229 * -delta to the epoch.
233 /* sign-extension - we're adding to signed values below */
238 if (scb->ecd & ECD_MEF) {
239 scb->epdx += delta_idx;
240 if (scb->epoch < delta)
246 * This callback is executed during stop_machine(). All CPUs are therefore
247 * temporarily stopped. In order not to change guest behavior, we have to
248 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
249 * so a CPU won't be stopped while calculating with the epoch.
251 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
255 struct kvm_vcpu *vcpu;
257 unsigned long long *delta = v;
259 list_for_each_entry(kvm, &vm_list, vm_list) {
260 kvm_for_each_vcpu(i, vcpu, kvm) {
261 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
263 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
264 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
266 if (vcpu->arch.cputm_enabled)
267 vcpu->arch.cputm_start += *delta;
268 if (vcpu->arch.vsie_block)
269 kvm_clock_sync_scb(vcpu->arch.vsie_block,
276 static struct notifier_block kvm_clock_notifier = {
277 .notifier_call = kvm_clock_sync,
280 int kvm_arch_hardware_setup(void)
282 gmap_notifier.notifier_call = kvm_gmap_notifier;
283 gmap_register_pte_notifier(&gmap_notifier);
284 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
285 gmap_register_pte_notifier(&vsie_gmap_notifier);
286 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
287 &kvm_clock_notifier);
291 void kvm_arch_hardware_unsetup(void)
293 gmap_unregister_pte_notifier(&gmap_notifier);
294 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
295 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
296 &kvm_clock_notifier);
299 static void allow_cpu_feat(unsigned long nr)
301 set_bit_inv(nr, kvm_s390_available_cpu_feat);
304 static inline int plo_test_bit(unsigned char nr)
306 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
310 /* Parameter registers are ignored for "test bit" */
320 static void kvm_s390_cpu_feat_init(void)
324 for (i = 0; i < 256; ++i) {
326 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
329 if (test_facility(28)) /* TOD-clock steering */
330 ptff(kvm_s390_available_subfunc.ptff,
331 sizeof(kvm_s390_available_subfunc.ptff),
334 if (test_facility(17)) { /* MSA */
335 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
336 kvm_s390_available_subfunc.kmac);
337 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
338 kvm_s390_available_subfunc.kmc);
339 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
340 kvm_s390_available_subfunc.km);
341 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
342 kvm_s390_available_subfunc.kimd);
343 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
344 kvm_s390_available_subfunc.klmd);
346 if (test_facility(76)) /* MSA3 */
347 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
348 kvm_s390_available_subfunc.pckmo);
349 if (test_facility(77)) { /* MSA4 */
350 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
351 kvm_s390_available_subfunc.kmctr);
352 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
353 kvm_s390_available_subfunc.kmf);
354 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
355 kvm_s390_available_subfunc.kmo);
356 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
357 kvm_s390_available_subfunc.pcc);
359 if (test_facility(57)) /* MSA5 */
360 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
361 kvm_s390_available_subfunc.ppno);
363 if (test_facility(146)) /* MSA8 */
364 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
365 kvm_s390_available_subfunc.kma);
367 if (MACHINE_HAS_ESOP)
368 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
370 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
371 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
373 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
374 !test_facility(3) || !nested)
376 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
377 if (sclp.has_64bscao)
378 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
380 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
382 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
384 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
386 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
388 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
390 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
392 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
394 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
395 * all skey handling functions read/set the skey from the PGSTE
396 * instead of the real storage key.
398 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
399 * pages being detected as preserved although they are resident.
401 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
402 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
404 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
405 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
406 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
408 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
409 * cannot easily shadow the SCA because of the ipte lock.
413 int kvm_arch_init(void *opaque)
415 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
419 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
420 debug_unregister(kvm_s390_dbf);
424 kvm_s390_cpu_feat_init();
426 /* Register floating interrupt controller interface. */
427 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
430 void kvm_arch_exit(void)
432 debug_unregister(kvm_s390_dbf);
435 /* Section: device related */
436 long kvm_arch_dev_ioctl(struct file *filp,
437 unsigned int ioctl, unsigned long arg)
439 if (ioctl == KVM_S390_ENABLE_SIE)
440 return s390_enable_sie();
444 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
449 case KVM_CAP_S390_PSW:
450 case KVM_CAP_S390_GMAP:
451 case KVM_CAP_SYNC_MMU:
452 #ifdef CONFIG_KVM_S390_UCONTROL
453 case KVM_CAP_S390_UCONTROL:
455 case KVM_CAP_ASYNC_PF:
456 case KVM_CAP_SYNC_REGS:
457 case KVM_CAP_ONE_REG:
458 case KVM_CAP_ENABLE_CAP:
459 case KVM_CAP_S390_CSS_SUPPORT:
460 case KVM_CAP_IOEVENTFD:
461 case KVM_CAP_DEVICE_CTRL:
462 case KVM_CAP_ENABLE_CAP_VM:
463 case KVM_CAP_S390_IRQCHIP:
464 case KVM_CAP_VM_ATTRIBUTES:
465 case KVM_CAP_MP_STATE:
466 case KVM_CAP_IMMEDIATE_EXIT:
467 case KVM_CAP_S390_INJECT_IRQ:
468 case KVM_CAP_S390_USER_SIGP:
469 case KVM_CAP_S390_USER_STSI:
470 case KVM_CAP_S390_SKEYS:
471 case KVM_CAP_S390_IRQ_STATE:
472 case KVM_CAP_S390_USER_INSTR0:
473 case KVM_CAP_S390_CMMA_MIGRATION:
474 case KVM_CAP_S390_AIS:
475 case KVM_CAP_S390_AIS_MIGRATION:
478 case KVM_CAP_S390_MEM_OP:
481 case KVM_CAP_NR_VCPUS:
482 case KVM_CAP_MAX_VCPUS:
483 r = KVM_S390_BSCA_CPU_SLOTS;
484 if (!kvm_s390_use_sca_entries())
486 else if (sclp.has_esca && sclp.has_64bscao)
487 r = KVM_S390_ESCA_CPU_SLOTS;
489 case KVM_CAP_NR_MEMSLOTS:
490 r = KVM_USER_MEM_SLOTS;
492 case KVM_CAP_S390_COW:
493 r = MACHINE_HAS_ESOP;
495 case KVM_CAP_S390_VECTOR_REGISTERS:
498 case KVM_CAP_S390_RI:
499 r = test_facility(64);
501 case KVM_CAP_S390_GS:
502 r = test_facility(133);
504 case KVM_CAP_S390_BPB:
505 r = test_facility(82);
513 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
514 struct kvm_memory_slot *memslot)
516 gfn_t cur_gfn, last_gfn;
517 unsigned long address;
518 struct gmap *gmap = kvm->arch.gmap;
520 /* Loop over all guest pages */
521 last_gfn = memslot->base_gfn + memslot->npages;
522 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
523 address = gfn_to_hva_memslot(memslot, cur_gfn);
525 if (test_and_clear_guest_dirty(gmap->mm, address))
526 mark_page_dirty(kvm, cur_gfn);
527 if (fatal_signal_pending(current))
533 /* Section: vm related */
534 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
537 * Get (and clear) the dirty memory log for a memory slot.
539 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
540 struct kvm_dirty_log *log)
544 struct kvm_memslots *slots;
545 struct kvm_memory_slot *memslot;
548 if (kvm_is_ucontrol(kvm))
551 mutex_lock(&kvm->slots_lock);
554 if (log->slot >= KVM_USER_MEM_SLOTS)
557 slots = kvm_memslots(kvm);
558 memslot = id_to_memslot(slots, log->slot);
560 if (!memslot->dirty_bitmap)
563 kvm_s390_sync_dirty_log(kvm, memslot);
564 r = kvm_get_dirty_log(kvm, log, &is_dirty);
568 /* Clear the dirty log */
570 n = kvm_dirty_bitmap_bytes(memslot);
571 memset(memslot->dirty_bitmap, 0, n);
575 mutex_unlock(&kvm->slots_lock);
579 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
582 struct kvm_vcpu *vcpu;
584 kvm_for_each_vcpu(i, vcpu, kvm) {
585 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
589 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
597 case KVM_CAP_S390_IRQCHIP:
598 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
599 kvm->arch.use_irqchip = 1;
602 case KVM_CAP_S390_USER_SIGP:
603 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
604 kvm->arch.user_sigp = 1;
607 case KVM_CAP_S390_VECTOR_REGISTERS:
608 mutex_lock(&kvm->lock);
609 if (kvm->created_vcpus) {
611 } else if (MACHINE_HAS_VX) {
612 set_kvm_facility(kvm->arch.model.fac_mask, 129);
613 set_kvm_facility(kvm->arch.model.fac_list, 129);
614 if (test_facility(134)) {
615 set_kvm_facility(kvm->arch.model.fac_mask, 134);
616 set_kvm_facility(kvm->arch.model.fac_list, 134);
618 if (test_facility(135)) {
619 set_kvm_facility(kvm->arch.model.fac_mask, 135);
620 set_kvm_facility(kvm->arch.model.fac_list, 135);
625 mutex_unlock(&kvm->lock);
626 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
627 r ? "(not available)" : "(success)");
629 case KVM_CAP_S390_RI:
631 mutex_lock(&kvm->lock);
632 if (kvm->created_vcpus) {
634 } else if (test_facility(64)) {
635 set_kvm_facility(kvm->arch.model.fac_mask, 64);
636 set_kvm_facility(kvm->arch.model.fac_list, 64);
639 mutex_unlock(&kvm->lock);
640 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
641 r ? "(not available)" : "(success)");
643 case KVM_CAP_S390_AIS:
644 mutex_lock(&kvm->lock);
645 if (kvm->created_vcpus) {
648 set_kvm_facility(kvm->arch.model.fac_mask, 72);
649 set_kvm_facility(kvm->arch.model.fac_list, 72);
652 mutex_unlock(&kvm->lock);
653 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
654 r ? "(not available)" : "(success)");
656 case KVM_CAP_S390_GS:
658 mutex_lock(&kvm->lock);
659 if (kvm->created_vcpus) {
661 } else if (test_facility(133)) {
662 set_kvm_facility(kvm->arch.model.fac_mask, 133);
663 set_kvm_facility(kvm->arch.model.fac_list, 133);
666 mutex_unlock(&kvm->lock);
667 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
668 r ? "(not available)" : "(success)");
670 case KVM_CAP_S390_USER_STSI:
671 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
672 kvm->arch.user_stsi = 1;
675 case KVM_CAP_S390_USER_INSTR0:
676 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
677 kvm->arch.user_instr0 = 1;
678 icpt_operexc_on_all_vcpus(kvm);
688 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
692 switch (attr->attr) {
693 case KVM_S390_VM_MEM_LIMIT_SIZE:
695 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
696 kvm->arch.mem_limit);
697 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
707 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
711 switch (attr->attr) {
712 case KVM_S390_VM_MEM_ENABLE_CMMA:
718 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
719 mutex_lock(&kvm->lock);
720 if (!kvm->created_vcpus) {
721 kvm->arch.use_cmma = 1;
722 /* Not compatible with cmma. */
723 kvm->arch.use_pfmfi = 0;
726 mutex_unlock(&kvm->lock);
728 case KVM_S390_VM_MEM_CLR_CMMA:
733 if (!kvm->arch.use_cmma)
736 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
737 mutex_lock(&kvm->lock);
738 idx = srcu_read_lock(&kvm->srcu);
739 s390_reset_cmma(kvm->arch.gmap->mm);
740 srcu_read_unlock(&kvm->srcu, idx);
741 mutex_unlock(&kvm->lock);
744 case KVM_S390_VM_MEM_LIMIT_SIZE: {
745 unsigned long new_limit;
747 if (kvm_is_ucontrol(kvm))
750 if (get_user(new_limit, (u64 __user *)attr->addr))
753 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
754 new_limit > kvm->arch.mem_limit)
760 /* gmap_create takes last usable address */
761 if (new_limit != KVM_S390_NO_MEM_LIMIT)
765 mutex_lock(&kvm->lock);
766 if (!kvm->created_vcpus) {
767 /* gmap_create will round the limit up */
768 struct gmap *new = gmap_create(current->mm, new_limit);
773 gmap_remove(kvm->arch.gmap);
775 kvm->arch.gmap = new;
779 mutex_unlock(&kvm->lock);
780 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
781 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
782 (void *) kvm->arch.gmap->asce);
792 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
794 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
796 struct kvm_vcpu *vcpu;
799 kvm_s390_vcpu_block_all(kvm);
801 kvm_for_each_vcpu(i, vcpu, kvm)
802 kvm_s390_vcpu_crypto_setup(vcpu);
804 kvm_s390_vcpu_unblock_all(kvm);
807 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
809 if (!test_kvm_facility(kvm, 76))
812 mutex_lock(&kvm->lock);
813 switch (attr->attr) {
814 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
816 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
817 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
818 kvm->arch.crypto.aes_kw = 1;
819 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
821 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
823 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
824 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
825 kvm->arch.crypto.dea_kw = 1;
826 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
828 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
829 kvm->arch.crypto.aes_kw = 0;
830 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
831 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
832 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
834 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
835 kvm->arch.crypto.dea_kw = 0;
836 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
837 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
838 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
841 mutex_unlock(&kvm->lock);
845 kvm_s390_vcpu_crypto_reset_all(kvm);
846 mutex_unlock(&kvm->lock);
850 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
853 struct kvm_vcpu *vcpu;
855 kvm_for_each_vcpu(cx, vcpu, kvm)
856 kvm_s390_sync_request(req, vcpu);
860 * Must be called with kvm->srcu held to avoid races on memslots, and with
861 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
863 static int kvm_s390_vm_start_migration(struct kvm *kvm)
865 struct kvm_memory_slot *ms;
866 struct kvm_memslots *slots;
867 unsigned long ram_pages = 0;
870 /* migration mode already enabled */
871 if (kvm->arch.migration_mode)
873 slots = kvm_memslots(kvm);
874 if (!slots || !slots->used_slots)
877 if (!kvm->arch.use_cmma) {
878 kvm->arch.migration_mode = 1;
881 /* mark all the pages in active slots as dirty */
882 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
883 ms = slots->memslots + slotnr;
885 * The second half of the bitmap is only used on x86,
886 * and would be wasted otherwise, so we put it to good
887 * use here to keep track of the state of the storage
890 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
891 ram_pages += ms->npages;
893 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
894 kvm->arch.migration_mode = 1;
895 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
900 * Must be called with kvm->slots_lock to avoid races with ourselves and
901 * kvm_s390_vm_start_migration.
903 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
905 /* migration mode already disabled */
906 if (!kvm->arch.migration_mode)
908 kvm->arch.migration_mode = 0;
909 if (kvm->arch.use_cmma)
910 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
914 static int kvm_s390_vm_set_migration(struct kvm *kvm,
915 struct kvm_device_attr *attr)
919 mutex_lock(&kvm->slots_lock);
920 switch (attr->attr) {
921 case KVM_S390_VM_MIGRATION_START:
922 res = kvm_s390_vm_start_migration(kvm);
924 case KVM_S390_VM_MIGRATION_STOP:
925 res = kvm_s390_vm_stop_migration(kvm);
930 mutex_unlock(&kvm->slots_lock);
935 static int kvm_s390_vm_get_migration(struct kvm *kvm,
936 struct kvm_device_attr *attr)
938 u64 mig = kvm->arch.migration_mode;
940 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
943 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
948 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
950 struct kvm_s390_vm_tod_clock gtod;
952 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
955 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
957 kvm_s390_set_tod_clock(kvm, >od);
959 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
960 gtod.epoch_idx, gtod.tod);
965 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
969 if (copy_from_user(>od_high, (void __user *)attr->addr,
975 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
980 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
982 struct kvm_s390_vm_tod_clock gtod = { 0 };
984 if (copy_from_user(>od.tod, (void __user *)attr->addr,
988 kvm_s390_set_tod_clock(kvm, >od);
989 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
993 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1000 switch (attr->attr) {
1001 case KVM_S390_VM_TOD_EXT:
1002 ret = kvm_s390_set_tod_ext(kvm, attr);
1004 case KVM_S390_VM_TOD_HIGH:
1005 ret = kvm_s390_set_tod_high(kvm, attr);
1007 case KVM_S390_VM_TOD_LOW:
1008 ret = kvm_s390_set_tod_low(kvm, attr);
1017 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1018 struct kvm_s390_vm_tod_clock *gtod)
1020 struct kvm_s390_tod_clock_ext htod;
1024 get_tod_clock_ext((char *)&htod);
1026 gtod->tod = htod.tod + kvm->arch.epoch;
1027 gtod->epoch_idx = 0;
1028 if (test_kvm_facility(kvm, 139)) {
1029 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1030 if (gtod->tod < htod.tod)
1031 gtod->epoch_idx += 1;
1037 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1039 struct kvm_s390_vm_tod_clock gtod;
1041 memset(>od, 0, sizeof(gtod));
1042 kvm_s390_get_tod_clock(kvm, >od);
1043 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1046 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1047 gtod.epoch_idx, gtod.tod);
1051 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1055 if (copy_to_user((void __user *)attr->addr, >od_high,
1058 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1063 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1067 gtod = kvm_s390_get_tod_clock_fast(kvm);
1068 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1070 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1075 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1082 switch (attr->attr) {
1083 case KVM_S390_VM_TOD_EXT:
1084 ret = kvm_s390_get_tod_ext(kvm, attr);
1086 case KVM_S390_VM_TOD_HIGH:
1087 ret = kvm_s390_get_tod_high(kvm, attr);
1089 case KVM_S390_VM_TOD_LOW:
1090 ret = kvm_s390_get_tod_low(kvm, attr);
1099 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1101 struct kvm_s390_vm_cpu_processor *proc;
1102 u16 lowest_ibc, unblocked_ibc;
1105 mutex_lock(&kvm->lock);
1106 if (kvm->created_vcpus) {
1110 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1115 if (!copy_from_user(proc, (void __user *)attr->addr,
1117 kvm->arch.model.cpuid = proc->cpuid;
1118 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1119 unblocked_ibc = sclp.ibc & 0xfff;
1120 if (lowest_ibc && proc->ibc) {
1121 if (proc->ibc > unblocked_ibc)
1122 kvm->arch.model.ibc = unblocked_ibc;
1123 else if (proc->ibc < lowest_ibc)
1124 kvm->arch.model.ibc = lowest_ibc;
1126 kvm->arch.model.ibc = proc->ibc;
1128 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1129 S390_ARCH_FAC_LIST_SIZE_BYTE);
1130 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1131 kvm->arch.model.ibc,
1132 kvm->arch.model.cpuid);
1133 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1134 kvm->arch.model.fac_list[0],
1135 kvm->arch.model.fac_list[1],
1136 kvm->arch.model.fac_list[2]);
1141 mutex_unlock(&kvm->lock);
1145 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1146 struct kvm_device_attr *attr)
1148 struct kvm_s390_vm_cpu_feat data;
1150 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1152 if (!bitmap_subset((unsigned long *) data.feat,
1153 kvm_s390_available_cpu_feat,
1154 KVM_S390_VM_CPU_FEAT_NR_BITS))
1157 mutex_lock(&kvm->lock);
1158 if (kvm->created_vcpus) {
1159 mutex_unlock(&kvm->lock);
1162 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1163 KVM_S390_VM_CPU_FEAT_NR_BITS);
1164 mutex_unlock(&kvm->lock);
1165 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1172 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1173 struct kvm_device_attr *attr)
1176 * Once supported by kernel + hw, we have to store the subfunctions
1177 * in kvm->arch and remember that user space configured them.
1182 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1186 switch (attr->attr) {
1187 case KVM_S390_VM_CPU_PROCESSOR:
1188 ret = kvm_s390_set_processor(kvm, attr);
1190 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1191 ret = kvm_s390_set_processor_feat(kvm, attr);
1193 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1194 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1200 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1202 struct kvm_s390_vm_cpu_processor *proc;
1205 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1210 proc->cpuid = kvm->arch.model.cpuid;
1211 proc->ibc = kvm->arch.model.ibc;
1212 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1213 S390_ARCH_FAC_LIST_SIZE_BYTE);
1214 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1215 kvm->arch.model.ibc,
1216 kvm->arch.model.cpuid);
1217 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1218 kvm->arch.model.fac_list[0],
1219 kvm->arch.model.fac_list[1],
1220 kvm->arch.model.fac_list[2]);
1221 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1228 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1230 struct kvm_s390_vm_cpu_machine *mach;
1233 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1238 get_cpu_id((struct cpuid *) &mach->cpuid);
1239 mach->ibc = sclp.ibc;
1240 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1241 S390_ARCH_FAC_LIST_SIZE_BYTE);
1242 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1243 sizeof(S390_lowcore.stfle_fac_list));
1244 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1245 kvm->arch.model.ibc,
1246 kvm->arch.model.cpuid);
1247 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1251 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1255 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1262 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1263 struct kvm_device_attr *attr)
1265 struct kvm_s390_vm_cpu_feat data;
1267 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1268 KVM_S390_VM_CPU_FEAT_NR_BITS);
1269 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1271 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1278 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1279 struct kvm_device_attr *attr)
1281 struct kvm_s390_vm_cpu_feat data;
1283 bitmap_copy((unsigned long *) data.feat,
1284 kvm_s390_available_cpu_feat,
1285 KVM_S390_VM_CPU_FEAT_NR_BITS);
1286 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1288 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1295 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1296 struct kvm_device_attr *attr)
1299 * Once we can actually configure subfunctions (kernel + hw support),
1300 * we have to check if they were already set by user space, if so copy
1301 * them from kvm->arch.
1306 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1307 struct kvm_device_attr *attr)
1309 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1310 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1314 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1318 switch (attr->attr) {
1319 case KVM_S390_VM_CPU_PROCESSOR:
1320 ret = kvm_s390_get_processor(kvm, attr);
1322 case KVM_S390_VM_CPU_MACHINE:
1323 ret = kvm_s390_get_machine(kvm, attr);
1325 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1326 ret = kvm_s390_get_processor_feat(kvm, attr);
1328 case KVM_S390_VM_CPU_MACHINE_FEAT:
1329 ret = kvm_s390_get_machine_feat(kvm, attr);
1331 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1332 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1334 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1335 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1341 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1345 switch (attr->group) {
1346 case KVM_S390_VM_MEM_CTRL:
1347 ret = kvm_s390_set_mem_control(kvm, attr);
1349 case KVM_S390_VM_TOD:
1350 ret = kvm_s390_set_tod(kvm, attr);
1352 case KVM_S390_VM_CPU_MODEL:
1353 ret = kvm_s390_set_cpu_model(kvm, attr);
1355 case KVM_S390_VM_CRYPTO:
1356 ret = kvm_s390_vm_set_crypto(kvm, attr);
1358 case KVM_S390_VM_MIGRATION:
1359 ret = kvm_s390_vm_set_migration(kvm, attr);
1369 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1373 switch (attr->group) {
1374 case KVM_S390_VM_MEM_CTRL:
1375 ret = kvm_s390_get_mem_control(kvm, attr);
1377 case KVM_S390_VM_TOD:
1378 ret = kvm_s390_get_tod(kvm, attr);
1380 case KVM_S390_VM_CPU_MODEL:
1381 ret = kvm_s390_get_cpu_model(kvm, attr);
1383 case KVM_S390_VM_MIGRATION:
1384 ret = kvm_s390_vm_get_migration(kvm, attr);
1394 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1398 switch (attr->group) {
1399 case KVM_S390_VM_MEM_CTRL:
1400 switch (attr->attr) {
1401 case KVM_S390_VM_MEM_ENABLE_CMMA:
1402 case KVM_S390_VM_MEM_CLR_CMMA:
1403 ret = sclp.has_cmma ? 0 : -ENXIO;
1405 case KVM_S390_VM_MEM_LIMIT_SIZE:
1413 case KVM_S390_VM_TOD:
1414 switch (attr->attr) {
1415 case KVM_S390_VM_TOD_LOW:
1416 case KVM_S390_VM_TOD_HIGH:
1424 case KVM_S390_VM_CPU_MODEL:
1425 switch (attr->attr) {
1426 case KVM_S390_VM_CPU_PROCESSOR:
1427 case KVM_S390_VM_CPU_MACHINE:
1428 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1429 case KVM_S390_VM_CPU_MACHINE_FEAT:
1430 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1433 /* configuring subfunctions is not supported yet */
1434 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1440 case KVM_S390_VM_CRYPTO:
1441 switch (attr->attr) {
1442 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1443 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1444 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1445 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1453 case KVM_S390_VM_MIGRATION:
1464 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1468 int srcu_idx, i, r = 0;
1470 if (args->flags != 0)
1473 /* Is this guest using storage keys? */
1474 if (!mm_uses_skeys(current->mm))
1475 return KVM_S390_GET_SKEYS_NONE;
1477 /* Enforce sane limit on memory allocation */
1478 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1481 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1485 down_read(¤t->mm->mmap_sem);
1486 srcu_idx = srcu_read_lock(&kvm->srcu);
1487 for (i = 0; i < args->count; i++) {
1488 hva = gfn_to_hva(kvm, args->start_gfn + i);
1489 if (kvm_is_error_hva(hva)) {
1494 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1498 srcu_read_unlock(&kvm->srcu, srcu_idx);
1499 up_read(¤t->mm->mmap_sem);
1502 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1503 sizeof(uint8_t) * args->count);
1512 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1516 int srcu_idx, i, r = 0;
1518 if (args->flags != 0)
1521 /* Enforce sane limit on memory allocation */
1522 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1525 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1529 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1530 sizeof(uint8_t) * args->count);
1536 /* Enable storage key handling for the guest */
1537 r = s390_enable_skey();
1541 down_read(¤t->mm->mmap_sem);
1542 srcu_idx = srcu_read_lock(&kvm->srcu);
1543 for (i = 0; i < args->count; i++) {
1544 hva = gfn_to_hva(kvm, args->start_gfn + i);
1545 if (kvm_is_error_hva(hva)) {
1550 /* Lowest order bit is reserved */
1551 if (keys[i] & 0x01) {
1556 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1560 srcu_read_unlock(&kvm->srcu, srcu_idx);
1561 up_read(¤t->mm->mmap_sem);
1568 * Base address and length must be sent at the start of each block, therefore
1569 * it's cheaper to send some clean data, as long as it's less than the size of
1572 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1573 /* for consistency */
1574 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1577 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1578 * address falls in a hole. In that case the index of one of the memslots
1579 * bordering the hole is returned.
1581 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1583 int start = 0, end = slots->used_slots;
1584 int slot = atomic_read(&slots->lru_slot);
1585 struct kvm_memory_slot *memslots = slots->memslots;
1587 if (gfn >= memslots[slot].base_gfn &&
1588 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1591 while (start < end) {
1592 slot = start + (end - start) / 2;
1594 if (gfn >= memslots[slot].base_gfn)
1600 if (gfn >= memslots[start].base_gfn &&
1601 gfn < memslots[start].base_gfn + memslots[start].npages) {
1602 atomic_set(&slots->lru_slot, start);
1608 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1609 u8 *res, unsigned long bufsize)
1611 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1614 while (args->count < bufsize) {
1615 hva = gfn_to_hva(kvm, cur_gfn);
1617 * We return an error if the first value was invalid, but we
1618 * return successfully if at least one value was copied.
1620 if (kvm_is_error_hva(hva))
1621 return args->count ? 0 : -EFAULT;
1622 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1624 res[args->count++] = (pgstev >> 24) & 0x43;
1631 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1632 unsigned long cur_gfn)
1634 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1635 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1636 unsigned long ofs = cur_gfn - ms->base_gfn;
1638 if (ms->base_gfn + ms->npages <= cur_gfn) {
1640 /* If we are above the highest slot, wrap around */
1642 slotidx = slots->used_slots - 1;
1644 ms = slots->memslots + slotidx;
1647 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1648 while ((slotidx > 0) && (ofs >= ms->npages)) {
1650 ms = slots->memslots + slotidx;
1651 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1653 return ms->base_gfn + ofs;
1656 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1657 u8 *res, unsigned long bufsize)
1659 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1660 struct kvm_memslots *slots = kvm_memslots(kvm);
1661 struct kvm_memory_slot *ms;
1663 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1664 ms = gfn_to_memslot(kvm, cur_gfn);
1666 args->start_gfn = cur_gfn;
1669 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1670 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1672 while (args->count < bufsize) {
1673 hva = gfn_to_hva(kvm, cur_gfn);
1674 if (kvm_is_error_hva(hva))
1676 /* Decrement only if we actually flipped the bit to 0 */
1677 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1678 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1679 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1681 /* Save the value */
1682 res[args->count++] = (pgstev >> 24) & 0x43;
1683 /* If the next bit is too far away, stop. */
1684 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1686 /* If we reached the previous "next", find the next one */
1687 if (cur_gfn == next_gfn)
1688 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1689 /* Reached the end of memory or of the buffer, stop */
1690 if ((next_gfn >= mem_end) ||
1691 (next_gfn - args->start_gfn >= bufsize))
1694 /* Reached the end of the current memslot, take the next one. */
1695 if (cur_gfn - ms->base_gfn >= ms->npages) {
1696 ms = gfn_to_memslot(kvm, cur_gfn);
1705 * This function searches for the next page with dirty CMMA attributes, and
1706 * saves the attributes in the buffer up to either the end of the buffer or
1707 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1708 * no trailing clean bytes are saved.
1709 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1710 * output buffer will indicate 0 as length.
1712 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1713 struct kvm_s390_cmma_log *args)
1715 unsigned long bufsize;
1716 int srcu_idx, peek, ret;
1719 if (!kvm->arch.use_cmma)
1721 /* Invalid/unsupported flags were specified */
1722 if (args->flags & ~KVM_S390_CMMA_PEEK)
1724 /* Migration mode query, and we are not doing a migration */
1725 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1726 if (!peek && !kvm->arch.migration_mode)
1728 /* CMMA is disabled or was not used, or the buffer has length zero */
1729 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1730 if (!bufsize || !kvm->mm->context.uses_cmm) {
1731 memset(args, 0, sizeof(*args));
1734 /* We are not peeking, and there are no dirty pages */
1735 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1736 memset(args, 0, sizeof(*args));
1740 values = vmalloc(bufsize);
1744 down_read(&kvm->mm->mmap_sem);
1745 srcu_idx = srcu_read_lock(&kvm->srcu);
1747 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1749 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1750 srcu_read_unlock(&kvm->srcu, srcu_idx);
1751 up_read(&kvm->mm->mmap_sem);
1753 if (kvm->arch.migration_mode)
1754 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1756 args->remaining = 0;
1758 if (copy_to_user((void __user *)args->values, values, args->count))
1766 * This function sets the CMMA attributes for the given pages. If the input
1767 * buffer has zero length, no action is taken, otherwise the attributes are
1768 * set and the mm->context.uses_cmm flag is set.
1770 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1771 const struct kvm_s390_cmma_log *args)
1773 unsigned long hva, mask, pgstev, i;
1775 int srcu_idx, r = 0;
1779 if (!kvm->arch.use_cmma)
1781 /* invalid/unsupported flags */
1782 if (args->flags != 0)
1784 /* Enforce sane limit on memory allocation */
1785 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1788 if (args->count == 0)
1791 bits = vmalloc(array_size(sizeof(*bits), args->count));
1795 r = copy_from_user(bits, (void __user *)args->values, args->count);
1801 down_read(&kvm->mm->mmap_sem);
1802 srcu_idx = srcu_read_lock(&kvm->srcu);
1803 for (i = 0; i < args->count; i++) {
1804 hva = gfn_to_hva(kvm, args->start_gfn + i);
1805 if (kvm_is_error_hva(hva)) {
1811 pgstev = pgstev << 24;
1812 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1813 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1815 srcu_read_unlock(&kvm->srcu, srcu_idx);
1816 up_read(&kvm->mm->mmap_sem);
1818 if (!kvm->mm->context.uses_cmm) {
1819 down_write(&kvm->mm->mmap_sem);
1820 kvm->mm->context.uses_cmm = 1;
1821 up_write(&kvm->mm->mmap_sem);
1828 long kvm_arch_vm_ioctl(struct file *filp,
1829 unsigned int ioctl, unsigned long arg)
1831 struct kvm *kvm = filp->private_data;
1832 void __user *argp = (void __user *)arg;
1833 struct kvm_device_attr attr;
1837 case KVM_S390_INTERRUPT: {
1838 struct kvm_s390_interrupt s390int;
1841 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1843 r = kvm_s390_inject_vm(kvm, &s390int);
1846 case KVM_ENABLE_CAP: {
1847 struct kvm_enable_cap cap;
1849 if (copy_from_user(&cap, argp, sizeof(cap)))
1851 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1854 case KVM_CREATE_IRQCHIP: {
1855 struct kvm_irq_routing_entry routing;
1858 if (kvm->arch.use_irqchip) {
1859 /* Set up dummy routing. */
1860 memset(&routing, 0, sizeof(routing));
1861 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1865 case KVM_SET_DEVICE_ATTR: {
1867 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1869 r = kvm_s390_vm_set_attr(kvm, &attr);
1872 case KVM_GET_DEVICE_ATTR: {
1874 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1876 r = kvm_s390_vm_get_attr(kvm, &attr);
1879 case KVM_HAS_DEVICE_ATTR: {
1881 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1883 r = kvm_s390_vm_has_attr(kvm, &attr);
1886 case KVM_S390_GET_SKEYS: {
1887 struct kvm_s390_skeys args;
1890 if (copy_from_user(&args, argp,
1891 sizeof(struct kvm_s390_skeys)))
1893 r = kvm_s390_get_skeys(kvm, &args);
1896 case KVM_S390_SET_SKEYS: {
1897 struct kvm_s390_skeys args;
1900 if (copy_from_user(&args, argp,
1901 sizeof(struct kvm_s390_skeys)))
1903 r = kvm_s390_set_skeys(kvm, &args);
1906 case KVM_S390_GET_CMMA_BITS: {
1907 struct kvm_s390_cmma_log args;
1910 if (copy_from_user(&args, argp, sizeof(args)))
1912 mutex_lock(&kvm->slots_lock);
1913 r = kvm_s390_get_cmma_bits(kvm, &args);
1914 mutex_unlock(&kvm->slots_lock);
1916 r = copy_to_user(argp, &args, sizeof(args));
1922 case KVM_S390_SET_CMMA_BITS: {
1923 struct kvm_s390_cmma_log args;
1926 if (copy_from_user(&args, argp, sizeof(args)))
1928 mutex_lock(&kvm->slots_lock);
1929 r = kvm_s390_set_cmma_bits(kvm, &args);
1930 mutex_unlock(&kvm->slots_lock);
1940 static int kvm_s390_query_ap_config(u8 *config)
1942 u32 fcn_code = 0x04000000UL;
1945 memset(config, 0, 128);
1949 ".long 0xb2af0000\n" /* PQAP(QCI) */
1955 : "r" (fcn_code), "r" (config)
1956 : "cc", "0", "2", "memory"
1962 static int kvm_s390_apxa_installed(void)
1967 if (test_facility(12)) {
1968 cc = kvm_s390_query_ap_config(config);
1971 pr_err("PQAP(QCI) failed with cc=%d", cc);
1973 return config[0] & 0x40;
1979 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1981 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1983 if (kvm_s390_apxa_installed())
1984 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1986 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1989 static u64 kvm_s390_get_initial_cpuid(void)
1994 cpuid.version = 0xff;
1995 return *((u64 *) &cpuid);
1998 static void kvm_s390_crypto_init(struct kvm *kvm)
2000 if (!test_kvm_facility(kvm, 76))
2003 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2004 kvm_s390_set_crycb_format(kvm);
2006 /* Enable AES/DEA protected key functions by default */
2007 kvm->arch.crypto.aes_kw = 1;
2008 kvm->arch.crypto.dea_kw = 1;
2009 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2010 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2011 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2012 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2015 static void sca_dispose(struct kvm *kvm)
2017 if (kvm->arch.use_esca)
2018 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2020 free_page((unsigned long)(kvm->arch.sca));
2021 kvm->arch.sca = NULL;
2024 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2026 gfp_t alloc_flags = GFP_KERNEL;
2028 char debug_name[16];
2029 static unsigned long sca_offset;
2032 #ifdef CONFIG_KVM_S390_UCONTROL
2033 if (type & ~KVM_VM_S390_UCONTROL)
2035 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2042 rc = s390_enable_sie();
2048 if (!sclp.has_64bscao)
2049 alloc_flags |= GFP_DMA;
2050 rwlock_init(&kvm->arch.sca_lock);
2051 /* start with basic SCA */
2052 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2055 spin_lock(&kvm_lock);
2057 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2059 kvm->arch.sca = (struct bsca_block *)
2060 ((char *) kvm->arch.sca + sca_offset);
2061 spin_unlock(&kvm_lock);
2063 sprintf(debug_name, "kvm-%u", current->pid);
2065 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2069 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2070 kvm->arch.sie_page2 =
2071 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2072 if (!kvm->arch.sie_page2)
2075 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2077 for (i = 0; i < kvm_s390_fac_size(); i++) {
2078 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2079 (kvm_s390_fac_base[i] |
2080 kvm_s390_fac_ext[i]);
2081 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2082 kvm_s390_fac_base[i];
2085 /* we are always in czam mode - even on pre z14 machines */
2086 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2087 set_kvm_facility(kvm->arch.model.fac_list, 138);
2088 /* we emulate STHYI in kvm */
2089 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2090 set_kvm_facility(kvm->arch.model.fac_list, 74);
2091 if (MACHINE_HAS_TLB_GUEST) {
2092 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2093 set_kvm_facility(kvm->arch.model.fac_list, 147);
2096 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2097 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2099 kvm_s390_crypto_init(kvm);
2101 mutex_init(&kvm->arch.float_int.ais_lock);
2102 spin_lock_init(&kvm->arch.float_int.lock);
2103 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2104 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2105 init_waitqueue_head(&kvm->arch.ipte_wq);
2106 mutex_init(&kvm->arch.ipte_mutex);
2108 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2109 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2111 if (type & KVM_VM_S390_UCONTROL) {
2112 kvm->arch.gmap = NULL;
2113 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2115 if (sclp.hamax == U64_MAX)
2116 kvm->arch.mem_limit = TASK_SIZE_MAX;
2118 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2120 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2121 if (!kvm->arch.gmap)
2123 kvm->arch.gmap->private = kvm;
2124 kvm->arch.gmap->pfault_enabled = 0;
2127 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2128 kvm->arch.use_skf = sclp.has_skey;
2129 spin_lock_init(&kvm->arch.start_stop_lock);
2130 kvm_s390_vsie_init(kvm);
2131 kvm_s390_gisa_init(kvm);
2132 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2136 free_page((unsigned long)kvm->arch.sie_page2);
2137 debug_unregister(kvm->arch.dbf);
2139 KVM_EVENT(3, "creation of vm failed: %d", rc);
2143 bool kvm_arch_has_vcpu_debugfs(void)
2148 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2153 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2155 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2156 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2157 kvm_s390_clear_local_irqs(vcpu);
2158 kvm_clear_async_pf_completion_queue(vcpu);
2159 if (!kvm_is_ucontrol(vcpu->kvm))
2162 if (kvm_is_ucontrol(vcpu->kvm))
2163 gmap_remove(vcpu->arch.gmap);
2165 if (vcpu->kvm->arch.use_cmma)
2166 kvm_s390_vcpu_unsetup_cmma(vcpu);
2167 free_page((unsigned long)(vcpu->arch.sie_block));
2169 kvm_vcpu_uninit(vcpu);
2170 kmem_cache_free(kvm_vcpu_cache, vcpu);
2173 static void kvm_free_vcpus(struct kvm *kvm)
2176 struct kvm_vcpu *vcpu;
2178 kvm_for_each_vcpu(i, vcpu, kvm)
2179 kvm_arch_vcpu_destroy(vcpu);
2181 mutex_lock(&kvm->lock);
2182 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2183 kvm->vcpus[i] = NULL;
2185 atomic_set(&kvm->online_vcpus, 0);
2186 mutex_unlock(&kvm->lock);
2189 void kvm_arch_destroy_vm(struct kvm *kvm)
2191 kvm_free_vcpus(kvm);
2193 debug_unregister(kvm->arch.dbf);
2194 kvm_s390_gisa_destroy(kvm);
2195 free_page((unsigned long)kvm->arch.sie_page2);
2196 if (!kvm_is_ucontrol(kvm))
2197 gmap_remove(kvm->arch.gmap);
2198 kvm_s390_destroy_adapters(kvm);
2199 kvm_s390_clear_float_irqs(kvm);
2200 kvm_s390_vsie_destroy(kvm);
2201 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2204 /* Section: vcpu related */
2205 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2207 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2208 if (!vcpu->arch.gmap)
2210 vcpu->arch.gmap->private = vcpu->kvm;
2215 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2217 if (!kvm_s390_use_sca_entries())
2219 read_lock(&vcpu->kvm->arch.sca_lock);
2220 if (vcpu->kvm->arch.use_esca) {
2221 struct esca_block *sca = vcpu->kvm->arch.sca;
2223 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2224 sca->cpu[vcpu->vcpu_id].sda = 0;
2226 struct bsca_block *sca = vcpu->kvm->arch.sca;
2228 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2229 sca->cpu[vcpu->vcpu_id].sda = 0;
2231 read_unlock(&vcpu->kvm->arch.sca_lock);
2234 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2236 if (!kvm_s390_use_sca_entries()) {
2237 struct bsca_block *sca = vcpu->kvm->arch.sca;
2239 /* we still need the basic sca for the ipte control */
2240 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2241 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2244 read_lock(&vcpu->kvm->arch.sca_lock);
2245 if (vcpu->kvm->arch.use_esca) {
2246 struct esca_block *sca = vcpu->kvm->arch.sca;
2248 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2249 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2250 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2251 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2254 struct bsca_block *sca = vcpu->kvm->arch.sca;
2256 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2257 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2258 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2259 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2261 read_unlock(&vcpu->kvm->arch.sca_lock);
2264 /* Basic SCA to Extended SCA data copy routines */
2265 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2268 d->sigp_ctrl.c = s->sigp_ctrl.c;
2269 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2272 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2276 d->ipte_control = s->ipte_control;
2278 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2279 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2282 static int sca_switch_to_extended(struct kvm *kvm)
2284 struct bsca_block *old_sca = kvm->arch.sca;
2285 struct esca_block *new_sca;
2286 struct kvm_vcpu *vcpu;
2287 unsigned int vcpu_idx;
2290 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2294 scaoh = (u32)((u64)(new_sca) >> 32);
2295 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2297 kvm_s390_vcpu_block_all(kvm);
2298 write_lock(&kvm->arch.sca_lock);
2300 sca_copy_b_to_e(new_sca, old_sca);
2302 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2303 vcpu->arch.sie_block->scaoh = scaoh;
2304 vcpu->arch.sie_block->scaol = scaol;
2305 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2307 kvm->arch.sca = new_sca;
2308 kvm->arch.use_esca = 1;
2310 write_unlock(&kvm->arch.sca_lock);
2311 kvm_s390_vcpu_unblock_all(kvm);
2313 free_page((unsigned long)old_sca);
2315 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2316 old_sca, kvm->arch.sca);
2320 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2324 if (!kvm_s390_use_sca_entries()) {
2325 if (id < KVM_MAX_VCPUS)
2329 if (id < KVM_S390_BSCA_CPU_SLOTS)
2331 if (!sclp.has_esca || !sclp.has_64bscao)
2334 mutex_lock(&kvm->lock);
2335 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2336 mutex_unlock(&kvm->lock);
2338 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2341 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2343 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2344 kvm_clear_async_pf_completion_queue(vcpu);
2345 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2351 kvm_s390_set_prefix(vcpu, 0);
2352 if (test_kvm_facility(vcpu->kvm, 64))
2353 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2354 if (test_kvm_facility(vcpu->kvm, 82))
2355 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2356 if (test_kvm_facility(vcpu->kvm, 133))
2357 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2358 if (test_kvm_facility(vcpu->kvm, 156))
2359 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2360 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2361 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2364 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2366 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2368 if (kvm_is_ucontrol(vcpu->kvm))
2369 return __kvm_ucontrol_vcpu_init(vcpu);
2374 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2375 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2377 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2378 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2379 vcpu->arch.cputm_start = get_tod_clock_fast();
2380 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2383 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2384 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2386 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2387 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2388 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2389 vcpu->arch.cputm_start = 0;
2390 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2393 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2394 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2396 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2397 vcpu->arch.cputm_enabled = true;
2398 __start_cpu_timer_accounting(vcpu);
2401 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2402 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2404 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2405 __stop_cpu_timer_accounting(vcpu);
2406 vcpu->arch.cputm_enabled = false;
2409 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2411 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2412 __enable_cpu_timer_accounting(vcpu);
2416 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2418 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2419 __disable_cpu_timer_accounting(vcpu);
2423 /* set the cpu timer - may only be called from the VCPU thread itself */
2424 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2426 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2427 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2428 if (vcpu->arch.cputm_enabled)
2429 vcpu->arch.cputm_start = get_tod_clock_fast();
2430 vcpu->arch.sie_block->cputm = cputm;
2431 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2435 /* update and get the cpu timer - can also be called from other VCPU threads */
2436 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2441 if (unlikely(!vcpu->arch.cputm_enabled))
2442 return vcpu->arch.sie_block->cputm;
2444 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2446 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2448 * If the writer would ever execute a read in the critical
2449 * section, e.g. in irq context, we have a deadlock.
2451 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2452 value = vcpu->arch.sie_block->cputm;
2453 /* if cputm_start is 0, accounting is being started/stopped */
2454 if (likely(vcpu->arch.cputm_start))
2455 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2456 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2461 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2464 gmap_enable(vcpu->arch.enabled_gmap);
2465 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2466 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2467 __start_cpu_timer_accounting(vcpu);
2471 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2474 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2475 __stop_cpu_timer_accounting(vcpu);
2476 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2477 vcpu->arch.enabled_gmap = gmap_get_enabled();
2478 gmap_disable(vcpu->arch.enabled_gmap);
2482 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2484 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2485 vcpu->arch.sie_block->gpsw.mask = 0UL;
2486 vcpu->arch.sie_block->gpsw.addr = 0UL;
2487 kvm_s390_set_prefix(vcpu, 0);
2488 kvm_s390_set_cpu_timer(vcpu, 0);
2489 vcpu->arch.sie_block->ckc = 0UL;
2490 vcpu->arch.sie_block->todpr = 0;
2491 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2492 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2493 CR0_INTERRUPT_KEY_SUBMASK |
2494 CR0_MEASUREMENT_ALERT_SUBMASK;
2495 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2497 CR14_EXTERNAL_DAMAGE_SUBMASK;
2498 /* make sure the new fpc will be lazily loaded */
2500 current->thread.fpu.fpc = 0;
2501 vcpu->arch.sie_block->gbea = 1;
2502 vcpu->arch.sie_block->pp = 0;
2503 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2504 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2505 kvm_clear_async_pf_completion_queue(vcpu);
2506 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2507 kvm_s390_vcpu_stop(vcpu);
2508 kvm_s390_clear_local_irqs(vcpu);
2511 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2513 mutex_lock(&vcpu->kvm->lock);
2515 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2516 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2518 mutex_unlock(&vcpu->kvm->lock);
2519 if (!kvm_is_ucontrol(vcpu->kvm)) {
2520 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2523 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2524 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2525 /* make vcpu_load load the right gmap on the first trigger */
2526 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2529 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2531 if (!test_kvm_facility(vcpu->kvm, 76))
2534 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2536 if (vcpu->kvm->arch.crypto.aes_kw)
2537 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2538 if (vcpu->kvm->arch.crypto.dea_kw)
2539 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2541 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2544 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2546 free_page(vcpu->arch.sie_block->cbrlo);
2547 vcpu->arch.sie_block->cbrlo = 0;
2550 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2552 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2553 if (!vcpu->arch.sie_block->cbrlo)
2558 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2560 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2562 vcpu->arch.sie_block->ibc = model->ibc;
2563 if (test_kvm_facility(vcpu->kvm, 7))
2564 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2567 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2571 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2575 if (test_kvm_facility(vcpu->kvm, 78))
2576 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2577 else if (test_kvm_facility(vcpu->kvm, 8))
2578 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2580 kvm_s390_vcpu_setup_model(vcpu);
2582 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2583 if (MACHINE_HAS_ESOP)
2584 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2585 if (test_kvm_facility(vcpu->kvm, 9))
2586 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2587 if (test_kvm_facility(vcpu->kvm, 73))
2588 vcpu->arch.sie_block->ecb |= ECB_TE;
2590 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2591 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2592 if (test_kvm_facility(vcpu->kvm, 130))
2593 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2594 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2596 vcpu->arch.sie_block->eca |= ECA_CEI;
2598 vcpu->arch.sie_block->eca |= ECA_IB;
2600 vcpu->arch.sie_block->eca |= ECA_SII;
2601 if (sclp.has_sigpif)
2602 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2603 if (test_kvm_facility(vcpu->kvm, 129)) {
2604 vcpu->arch.sie_block->eca |= ECA_VX;
2605 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2607 if (test_kvm_facility(vcpu->kvm, 139))
2608 vcpu->arch.sie_block->ecd |= ECD_MEF;
2609 if (test_kvm_facility(vcpu->kvm, 156))
2610 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2611 if (vcpu->arch.sie_block->gd) {
2612 vcpu->arch.sie_block->eca |= ECA_AIV;
2613 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2614 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2616 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2618 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2621 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2623 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2625 if (vcpu->kvm->arch.use_cmma) {
2626 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2630 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2631 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2633 kvm_s390_vcpu_crypto_setup(vcpu);
2638 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2641 struct kvm_vcpu *vcpu;
2642 struct sie_page *sie_page;
2645 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2650 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2654 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2655 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2659 vcpu->arch.sie_block = &sie_page->sie_block;
2660 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2662 /* the real guest size will always be smaller than msl */
2663 vcpu->arch.sie_block->mso = 0;
2664 vcpu->arch.sie_block->msl = sclp.hamax;
2666 vcpu->arch.sie_block->icpua = id;
2667 spin_lock_init(&vcpu->arch.local_int.lock);
2668 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2669 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2670 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2671 seqcount_init(&vcpu->arch.cputm_seqcount);
2673 rc = kvm_vcpu_init(vcpu, kvm, id);
2675 goto out_free_sie_block;
2676 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2677 vcpu->arch.sie_block);
2678 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2682 free_page((unsigned long)(vcpu->arch.sie_block));
2684 kmem_cache_free(kvm_vcpu_cache, vcpu);
2689 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2691 return kvm_s390_vcpu_has_irq(vcpu, 0);
2694 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2696 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2699 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2701 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2705 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2707 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2710 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2712 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2716 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2718 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2722 * Kick a guest cpu out of SIE and wait until SIE is not running.
2723 * If the CPU is not running (e.g. waiting as idle) the function will
2724 * return immediately. */
2725 void exit_sie(struct kvm_vcpu *vcpu)
2727 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2728 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2732 /* Kick a guest cpu out of SIE to process a request synchronously */
2733 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2735 kvm_make_request(req, vcpu);
2736 kvm_s390_vcpu_request(vcpu);
2739 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2742 struct kvm *kvm = gmap->private;
2743 struct kvm_vcpu *vcpu;
2744 unsigned long prefix;
2747 if (gmap_is_shadow(gmap))
2749 if (start >= 1UL << 31)
2750 /* We are only interested in prefix pages */
2752 kvm_for_each_vcpu(i, vcpu, kvm) {
2753 /* match against both prefix pages */
2754 prefix = kvm_s390_get_prefix(vcpu);
2755 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2756 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2758 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2763 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2765 /* kvm common code refers to this, but never calls it */
2770 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2771 struct kvm_one_reg *reg)
2776 case KVM_REG_S390_TODPR:
2777 r = put_user(vcpu->arch.sie_block->todpr,
2778 (u32 __user *)reg->addr);
2780 case KVM_REG_S390_EPOCHDIFF:
2781 r = put_user(vcpu->arch.sie_block->epoch,
2782 (u64 __user *)reg->addr);
2784 case KVM_REG_S390_CPU_TIMER:
2785 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2786 (u64 __user *)reg->addr);
2788 case KVM_REG_S390_CLOCK_COMP:
2789 r = put_user(vcpu->arch.sie_block->ckc,
2790 (u64 __user *)reg->addr);
2792 case KVM_REG_S390_PFTOKEN:
2793 r = put_user(vcpu->arch.pfault_token,
2794 (u64 __user *)reg->addr);
2796 case KVM_REG_S390_PFCOMPARE:
2797 r = put_user(vcpu->arch.pfault_compare,
2798 (u64 __user *)reg->addr);
2800 case KVM_REG_S390_PFSELECT:
2801 r = put_user(vcpu->arch.pfault_select,
2802 (u64 __user *)reg->addr);
2804 case KVM_REG_S390_PP:
2805 r = put_user(vcpu->arch.sie_block->pp,
2806 (u64 __user *)reg->addr);
2808 case KVM_REG_S390_GBEA:
2809 r = put_user(vcpu->arch.sie_block->gbea,
2810 (u64 __user *)reg->addr);
2819 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2820 struct kvm_one_reg *reg)
2826 case KVM_REG_S390_TODPR:
2827 r = get_user(vcpu->arch.sie_block->todpr,
2828 (u32 __user *)reg->addr);
2830 case KVM_REG_S390_EPOCHDIFF:
2831 r = get_user(vcpu->arch.sie_block->epoch,
2832 (u64 __user *)reg->addr);
2834 case KVM_REG_S390_CPU_TIMER:
2835 r = get_user(val, (u64 __user *)reg->addr);
2837 kvm_s390_set_cpu_timer(vcpu, val);
2839 case KVM_REG_S390_CLOCK_COMP:
2840 r = get_user(vcpu->arch.sie_block->ckc,
2841 (u64 __user *)reg->addr);
2843 case KVM_REG_S390_PFTOKEN:
2844 r = get_user(vcpu->arch.pfault_token,
2845 (u64 __user *)reg->addr);
2846 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2847 kvm_clear_async_pf_completion_queue(vcpu);
2849 case KVM_REG_S390_PFCOMPARE:
2850 r = get_user(vcpu->arch.pfault_compare,
2851 (u64 __user *)reg->addr);
2853 case KVM_REG_S390_PFSELECT:
2854 r = get_user(vcpu->arch.pfault_select,
2855 (u64 __user *)reg->addr);
2857 case KVM_REG_S390_PP:
2858 r = get_user(vcpu->arch.sie_block->pp,
2859 (u64 __user *)reg->addr);
2861 case KVM_REG_S390_GBEA:
2862 r = get_user(vcpu->arch.sie_block->gbea,
2863 (u64 __user *)reg->addr);
2872 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2874 kvm_s390_vcpu_initial_reset(vcpu);
2878 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2881 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2886 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2889 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2894 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2895 struct kvm_sregs *sregs)
2899 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2900 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2906 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2907 struct kvm_sregs *sregs)
2911 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2912 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2918 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2924 if (test_fp_ctl(fpu->fpc)) {
2928 vcpu->run->s.regs.fpc = fpu->fpc;
2930 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2931 (freg_t *) fpu->fprs);
2933 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2940 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2944 /* make sure we have the latest values */
2947 convert_vx_to_fp((freg_t *) fpu->fprs,
2948 (__vector128 *) vcpu->run->s.regs.vrs);
2950 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2951 fpu->fpc = vcpu->run->s.regs.fpc;
2957 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2961 if (!is_vcpu_stopped(vcpu))
2964 vcpu->run->psw_mask = psw.mask;
2965 vcpu->run->psw_addr = psw.addr;
2970 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2971 struct kvm_translation *tr)
2973 return -EINVAL; /* not implemented yet */
2976 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2977 KVM_GUESTDBG_USE_HW_BP | \
2978 KVM_GUESTDBG_ENABLE)
2980 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2981 struct kvm_guest_debug *dbg)
2987 vcpu->guest_debug = 0;
2988 kvm_s390_clear_bp_data(vcpu);
2990 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2994 if (!sclp.has_gpere) {
2999 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3000 vcpu->guest_debug = dbg->control;
3001 /* enforce guest PER */
3002 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3004 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3005 rc = kvm_s390_import_bp_data(vcpu, dbg);
3007 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3008 vcpu->arch.guestdbg.last_bp = 0;
3012 vcpu->guest_debug = 0;
3013 kvm_s390_clear_bp_data(vcpu);
3014 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3022 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3023 struct kvm_mp_state *mp_state)
3029 /* CHECK_STOP and LOAD are not supported yet */
3030 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3031 KVM_MP_STATE_OPERATING;
3037 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3038 struct kvm_mp_state *mp_state)
3044 /* user space knows about this interface - let it control the state */
3045 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3047 switch (mp_state->mp_state) {
3048 case KVM_MP_STATE_STOPPED:
3049 kvm_s390_vcpu_stop(vcpu);
3051 case KVM_MP_STATE_OPERATING:
3052 kvm_s390_vcpu_start(vcpu);
3054 case KVM_MP_STATE_LOAD:
3055 case KVM_MP_STATE_CHECK_STOP:
3056 /* fall through - CHECK_STOP and LOAD are not supported yet */
3065 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3067 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3070 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3073 kvm_s390_vcpu_request_handled(vcpu);
3074 if (!kvm_request_pending(vcpu))
3077 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3078 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3079 * This ensures that the ipte instruction for this request has
3080 * already finished. We might race against a second unmapper that
3081 * wants to set the blocking bit. Lets just retry the request loop.
3083 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3085 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3086 kvm_s390_get_prefix(vcpu),
3087 PAGE_SIZE * 2, PROT_WRITE);
3089 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3095 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3096 vcpu->arch.sie_block->ihcpu = 0xffff;
3100 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3101 if (!ibs_enabled(vcpu)) {
3102 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3103 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3108 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3109 if (ibs_enabled(vcpu)) {
3110 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3111 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3116 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3117 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3121 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3123 * Disable CMM virtualization; we will emulate the ESSA
3124 * instruction manually, in order to provide additional
3125 * functionalities needed for live migration.
3127 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3131 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3133 * Re-enable CMM virtualization if CMMA is available and
3134 * CMM has been used.
3136 if ((vcpu->kvm->arch.use_cmma) &&
3137 (vcpu->kvm->mm->context.uses_cmm))
3138 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3142 /* nothing to do, just clear the request */
3143 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3148 void kvm_s390_set_tod_clock(struct kvm *kvm,
3149 const struct kvm_s390_vm_tod_clock *gtod)
3151 struct kvm_vcpu *vcpu;
3152 struct kvm_s390_tod_clock_ext htod;
3155 mutex_lock(&kvm->lock);
3158 get_tod_clock_ext((char *)&htod);
3160 kvm->arch.epoch = gtod->tod - htod.tod;
3162 if (test_kvm_facility(kvm, 139)) {
3163 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3164 if (kvm->arch.epoch > gtod->tod)
3165 kvm->arch.epdx -= 1;
3168 kvm_s390_vcpu_block_all(kvm);
3169 kvm_for_each_vcpu(i, vcpu, kvm) {
3170 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3171 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3174 kvm_s390_vcpu_unblock_all(kvm);
3176 mutex_unlock(&kvm->lock);
3180 * kvm_arch_fault_in_page - fault-in guest page if necessary
3181 * @vcpu: The corresponding virtual cpu
3182 * @gpa: Guest physical address
3183 * @writable: Whether the page should be writable or not
3185 * Make sure that a guest page has been faulted-in on the host.
3187 * Return: Zero on success, negative error code otherwise.
3189 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3191 return gmap_fault(vcpu->arch.gmap, gpa,
3192 writable ? FAULT_FLAG_WRITE : 0);
3195 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3196 unsigned long token)
3198 struct kvm_s390_interrupt inti;
3199 struct kvm_s390_irq irq;
3202 irq.u.ext.ext_params2 = token;
3203 irq.type = KVM_S390_INT_PFAULT_INIT;
3204 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3206 inti.type = KVM_S390_INT_PFAULT_DONE;
3207 inti.parm64 = token;
3208 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3212 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3213 struct kvm_async_pf *work)
3215 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3216 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3219 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3220 struct kvm_async_pf *work)
3222 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3223 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3226 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3227 struct kvm_async_pf *work)
3229 /* s390 will always inject the page directly */
3232 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3235 * s390 will always inject the page directly,
3236 * but we still want check_async_completion to cleanup
3241 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3244 struct kvm_arch_async_pf arch;
3247 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3249 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3250 vcpu->arch.pfault_compare)
3252 if (psw_extint_disabled(vcpu))
3254 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3256 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3258 if (!vcpu->arch.gmap->pfault_enabled)
3261 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3262 hva += current->thread.gmap_addr & ~PAGE_MASK;
3263 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3266 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3270 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3275 * On s390 notifications for arriving pages will be delivered directly
3276 * to the guest but the house keeping for completed pfaults is
3277 * handled outside the worker.
3279 kvm_check_async_pf_completion(vcpu);
3281 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3282 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3287 if (test_cpu_flag(CIF_MCCK_PENDING))
3290 if (!kvm_is_ucontrol(vcpu->kvm)) {
3291 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3296 rc = kvm_s390_handle_requests(vcpu);
3300 if (guestdbg_enabled(vcpu)) {
3301 kvm_s390_backup_guest_per_regs(vcpu);
3302 kvm_s390_patch_guest_per_regs(vcpu);
3305 vcpu->arch.sie_block->icptcode = 0;
3306 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3307 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3308 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3313 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3315 struct kvm_s390_pgm_info pgm_info = {
3316 .code = PGM_ADDRESSING,
3321 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3322 trace_kvm_s390_sie_fault(vcpu);
3325 * We want to inject an addressing exception, which is defined as a
3326 * suppressing or terminating exception. However, since we came here
3327 * by a DAT access exception, the PSW still points to the faulting
3328 * instruction since DAT exceptions are nullifying. So we've got
3329 * to look up the current opcode to get the length of the instruction
3330 * to be able to forward the PSW.
3332 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3333 ilen = insn_length(opcode);
3337 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3338 * Forward by arbitrary ilc, injection will take care of
3339 * nullification if necessary.
3341 pgm_info = vcpu->arch.pgm;
3344 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3345 kvm_s390_forward_psw(vcpu, ilen);
3346 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3349 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3351 struct mcck_volatile_info *mcck_info;
3352 struct sie_page *sie_page;
3354 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3355 vcpu->arch.sie_block->icptcode);
3356 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3358 if (guestdbg_enabled(vcpu))
3359 kvm_s390_restore_guest_per_regs(vcpu);
3361 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3362 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3364 if (exit_reason == -EINTR) {
3365 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3366 sie_page = container_of(vcpu->arch.sie_block,
3367 struct sie_page, sie_block);
3368 mcck_info = &sie_page->mcck_info;
3369 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3373 if (vcpu->arch.sie_block->icptcode > 0) {
3374 int rc = kvm_handle_sie_intercept(vcpu);
3376 if (rc != -EOPNOTSUPP)
3378 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3379 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3380 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3381 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3383 } else if (exit_reason != -EFAULT) {
3384 vcpu->stat.exit_null++;
3386 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3387 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3388 vcpu->run->s390_ucontrol.trans_exc_code =
3389 current->thread.gmap_addr;
3390 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3392 } else if (current->thread.gmap_pfault) {
3393 trace_kvm_s390_major_guest_pfault(vcpu);
3394 current->thread.gmap_pfault = 0;
3395 if (kvm_arch_setup_async_pf(vcpu))
3397 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3399 return vcpu_post_run_fault_in_sie(vcpu);
3402 static int __vcpu_run(struct kvm_vcpu *vcpu)
3404 int rc, exit_reason;
3407 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3408 * ning the guest), so that memslots (and other stuff) are protected
3410 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3413 rc = vcpu_pre_run(vcpu);
3417 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3419 * As PF_VCPU will be used in fault handler, between
3420 * guest_enter and guest_exit should be no uaccess.
3422 local_irq_disable();
3423 guest_enter_irqoff();
3424 __disable_cpu_timer_accounting(vcpu);
3426 exit_reason = sie64a(vcpu->arch.sie_block,
3427 vcpu->run->s.regs.gprs);
3428 local_irq_disable();
3429 __enable_cpu_timer_accounting(vcpu);
3430 guest_exit_irqoff();
3432 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3434 rc = vcpu_post_run(vcpu, exit_reason);
3435 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3437 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3441 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3443 struct runtime_instr_cb *riccb;
3446 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3447 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3448 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3449 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3450 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3451 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3452 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3453 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3454 /* some control register changes require a tlb flush */
3455 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3457 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3458 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3459 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3460 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3461 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3462 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3464 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3465 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3466 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3467 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3468 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3469 kvm_clear_async_pf_completion_queue(vcpu);
3472 * If userspace sets the riccb (e.g. after migration) to a valid state,
3473 * we should enable RI here instead of doing the lazy enablement.
3475 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3476 test_kvm_facility(vcpu->kvm, 64) &&
3478 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3479 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3480 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3483 * If userspace sets the gscb (e.g. after migration) to non-zero,
3484 * we should enable GS here instead of doing the lazy enablement.
3486 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3487 test_kvm_facility(vcpu->kvm, 133) &&
3489 !vcpu->arch.gs_enabled) {
3490 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3491 vcpu->arch.sie_block->ecb |= ECB_GS;
3492 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3493 vcpu->arch.gs_enabled = 1;
3495 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3496 test_kvm_facility(vcpu->kvm, 82)) {
3497 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3498 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3500 save_access_regs(vcpu->arch.host_acrs);
3501 restore_access_regs(vcpu->run->s.regs.acrs);
3502 /* save host (userspace) fprs/vrs */
3504 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3505 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3507 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3509 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3510 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3511 if (test_fp_ctl(current->thread.fpu.fpc))
3512 /* User space provided an invalid FPC, let's clear it */
3513 current->thread.fpu.fpc = 0;
3514 if (MACHINE_HAS_GS) {
3516 __ctl_set_bit(2, 4);
3517 if (current->thread.gs_cb) {
3518 vcpu->arch.host_gscb = current->thread.gs_cb;
3519 save_gs_cb(vcpu->arch.host_gscb);
3521 if (vcpu->arch.gs_enabled) {
3522 current->thread.gs_cb = (struct gs_cb *)
3523 &vcpu->run->s.regs.gscb;
3524 restore_gs_cb(current->thread.gs_cb);
3528 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3530 kvm_run->kvm_dirty_regs = 0;
3533 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3535 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3536 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3537 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3538 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3539 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3540 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3541 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3542 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3543 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3544 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3545 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3546 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3547 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3548 save_access_regs(vcpu->run->s.regs.acrs);
3549 restore_access_regs(vcpu->arch.host_acrs);
3550 /* Save guest register state */
3552 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3553 /* Restore will be done lazily at return */
3554 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3555 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3556 if (MACHINE_HAS_GS) {
3557 __ctl_set_bit(2, 4);
3558 if (vcpu->arch.gs_enabled)
3559 save_gs_cb(current->thread.gs_cb);
3561 current->thread.gs_cb = vcpu->arch.host_gscb;
3562 restore_gs_cb(vcpu->arch.host_gscb);
3564 if (!vcpu->arch.host_gscb)
3565 __ctl_clear_bit(2, 4);
3566 vcpu->arch.host_gscb = NULL;
3568 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3571 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3575 if (kvm_run->immediate_exit)
3580 if (guestdbg_exit_pending(vcpu)) {
3581 kvm_s390_prepare_debug_exit(vcpu);
3586 kvm_sigset_activate(vcpu);
3588 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3589 kvm_s390_vcpu_start(vcpu);
3590 } else if (is_vcpu_stopped(vcpu)) {
3591 pr_err_ratelimited("can't run stopped vcpu %d\n",
3597 sync_regs(vcpu, kvm_run);
3598 enable_cpu_timer_accounting(vcpu);
3601 rc = __vcpu_run(vcpu);
3603 if (signal_pending(current) && !rc) {
3604 kvm_run->exit_reason = KVM_EXIT_INTR;
3608 if (guestdbg_exit_pending(vcpu) && !rc) {
3609 kvm_s390_prepare_debug_exit(vcpu);
3613 if (rc == -EREMOTE) {
3614 /* userspace support is needed, kvm_run has been prepared */
3618 disable_cpu_timer_accounting(vcpu);
3619 store_regs(vcpu, kvm_run);
3621 kvm_sigset_deactivate(vcpu);
3623 vcpu->stat.exit_userspace++;
3630 * store status at address
3631 * we use have two special cases:
3632 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3633 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3635 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3637 unsigned char archmode = 1;
3638 freg_t fprs[NUM_FPRS];
3643 px = kvm_s390_get_prefix(vcpu);
3644 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3645 if (write_guest_abs(vcpu, 163, &archmode, 1))
3648 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3649 if (write_guest_real(vcpu, 163, &archmode, 1))
3653 gpa -= __LC_FPREGS_SAVE_AREA;
3655 /* manually convert vector registers if necessary */
3656 if (MACHINE_HAS_VX) {
3657 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3658 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3661 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3662 vcpu->run->s.regs.fprs, 128);
3664 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3665 vcpu->run->s.regs.gprs, 128);
3666 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3667 &vcpu->arch.sie_block->gpsw, 16);
3668 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3670 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3671 &vcpu->run->s.regs.fpc, 4);
3672 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3673 &vcpu->arch.sie_block->todpr, 4);
3674 cputm = kvm_s390_get_cpu_timer(vcpu);
3675 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3677 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3678 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3680 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3681 &vcpu->run->s.regs.acrs, 64);
3682 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3683 &vcpu->arch.sie_block->gcr, 128);
3684 return rc ? -EFAULT : 0;
3687 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3690 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3691 * switch in the run ioctl. Let's update our copies before we save
3692 * it into the save area
3695 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3696 save_access_regs(vcpu->run->s.regs.acrs);
3698 return kvm_s390_store_status_unloaded(vcpu, addr);
3701 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3703 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3704 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3707 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3710 struct kvm_vcpu *vcpu;
3712 kvm_for_each_vcpu(i, vcpu, kvm) {
3713 __disable_ibs_on_vcpu(vcpu);
3717 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3721 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3722 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3725 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3727 int i, online_vcpus, started_vcpus = 0;
3729 if (!is_vcpu_stopped(vcpu))
3732 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3733 /* Only one cpu at a time may enter/leave the STOPPED state. */
3734 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3735 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3737 for (i = 0; i < online_vcpus; i++) {
3738 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3742 if (started_vcpus == 0) {
3743 /* we're the only active VCPU -> speed it up */
3744 __enable_ibs_on_vcpu(vcpu);
3745 } else if (started_vcpus == 1) {
3747 * As we are starting a second VCPU, we have to disable
3748 * the IBS facility on all VCPUs to remove potentially
3749 * oustanding ENABLE requests.
3751 __disable_ibs_on_all_vcpus(vcpu->kvm);
3754 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3756 * Another VCPU might have used IBS while we were offline.
3757 * Let's play safe and flush the VCPU at startup.
3759 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3760 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3764 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3766 int i, online_vcpus, started_vcpus = 0;
3767 struct kvm_vcpu *started_vcpu = NULL;
3769 if (is_vcpu_stopped(vcpu))
3772 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3773 /* Only one cpu at a time may enter/leave the STOPPED state. */
3774 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3775 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3777 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3778 kvm_s390_clear_stop_irq(vcpu);
3780 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3781 __disable_ibs_on_vcpu(vcpu);
3783 for (i = 0; i < online_vcpus; i++) {
3784 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3786 started_vcpu = vcpu->kvm->vcpus[i];
3790 if (started_vcpus == 1) {
3792 * As we only have one VCPU left, we want to enable the
3793 * IBS facility for that VCPU to speed it up.
3795 __enable_ibs_on_vcpu(started_vcpu);
3798 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3802 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3803 struct kvm_enable_cap *cap)
3811 case KVM_CAP_S390_CSS_SUPPORT:
3812 if (!vcpu->kvm->arch.css_support) {
3813 vcpu->kvm->arch.css_support = 1;
3814 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3815 trace_kvm_s390_enable_css(vcpu->kvm);
3826 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3827 struct kvm_s390_mem_op *mop)
3829 void __user *uaddr = (void __user *)mop->buf;
3830 void *tmpbuf = NULL;
3832 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3833 | KVM_S390_MEMOP_F_CHECK_ONLY;
3835 if (mop->flags & ~supported_flags)
3838 if (mop->size > MEM_OP_MAX_SIZE)
3841 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3842 tmpbuf = vmalloc(mop->size);
3847 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3850 case KVM_S390_MEMOP_LOGICAL_READ:
3851 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3852 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3853 mop->size, GACC_FETCH);
3856 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3858 if (copy_to_user(uaddr, tmpbuf, mop->size))
3862 case KVM_S390_MEMOP_LOGICAL_WRITE:
3863 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3864 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3865 mop->size, GACC_STORE);
3868 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3872 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3878 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3880 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3881 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3887 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3888 unsigned int ioctl, unsigned long arg)
3890 struct kvm_vcpu *vcpu = filp->private_data;
3891 void __user *argp = (void __user *)arg;
3894 case KVM_S390_IRQ: {
3895 struct kvm_s390_irq s390irq;
3897 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3899 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3901 case KVM_S390_INTERRUPT: {
3902 struct kvm_s390_interrupt s390int;
3903 struct kvm_s390_irq s390irq;
3905 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3907 if (s390int_to_s390irq(&s390int, &s390irq))
3909 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3912 return -ENOIOCTLCMD;
3915 long kvm_arch_vcpu_ioctl(struct file *filp,
3916 unsigned int ioctl, unsigned long arg)
3918 struct kvm_vcpu *vcpu = filp->private_data;
3919 void __user *argp = (void __user *)arg;
3926 case KVM_S390_STORE_STATUS:
3927 idx = srcu_read_lock(&vcpu->kvm->srcu);
3928 r = kvm_s390_vcpu_store_status(vcpu, arg);
3929 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3931 case KVM_S390_SET_INITIAL_PSW: {
3935 if (copy_from_user(&psw, argp, sizeof(psw)))
3937 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3940 case KVM_S390_INITIAL_RESET:
3941 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3943 case KVM_SET_ONE_REG:
3944 case KVM_GET_ONE_REG: {
3945 struct kvm_one_reg reg;
3947 if (copy_from_user(®, argp, sizeof(reg)))
3949 if (ioctl == KVM_SET_ONE_REG)
3950 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3952 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3955 #ifdef CONFIG_KVM_S390_UCONTROL
3956 case KVM_S390_UCAS_MAP: {
3957 struct kvm_s390_ucas_mapping ucasmap;
3959 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3964 if (!kvm_is_ucontrol(vcpu->kvm)) {
3969 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3970 ucasmap.vcpu_addr, ucasmap.length);
3973 case KVM_S390_UCAS_UNMAP: {
3974 struct kvm_s390_ucas_mapping ucasmap;
3976 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3981 if (!kvm_is_ucontrol(vcpu->kvm)) {
3986 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3991 case KVM_S390_VCPU_FAULT: {
3992 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3995 case KVM_ENABLE_CAP:
3997 struct kvm_enable_cap cap;
3999 if (copy_from_user(&cap, argp, sizeof(cap)))
4001 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4004 case KVM_S390_MEM_OP: {
4005 struct kvm_s390_mem_op mem_op;
4007 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4008 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4013 case KVM_S390_SET_IRQ_STATE: {
4014 struct kvm_s390_irq_state irq_state;
4017 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4019 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4020 irq_state.len == 0 ||
4021 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4025 /* do not use irq_state.flags, it will break old QEMUs */
4026 r = kvm_s390_set_irq_state(vcpu,
4027 (void __user *) irq_state.buf,
4031 case KVM_S390_GET_IRQ_STATE: {
4032 struct kvm_s390_irq_state irq_state;
4035 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4037 if (irq_state.len == 0) {
4041 /* do not use irq_state.flags, it will break old QEMUs */
4042 r = kvm_s390_get_irq_state(vcpu,
4043 (__u8 __user *) irq_state.buf,
4055 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4057 #ifdef CONFIG_KVM_S390_UCONTROL
4058 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4059 && (kvm_is_ucontrol(vcpu->kvm))) {
4060 vmf->page = virt_to_page(vcpu->arch.sie_block);
4061 get_page(vmf->page);
4065 return VM_FAULT_SIGBUS;
4068 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4069 unsigned long npages)
4074 /* Section: memory related */
4075 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4076 struct kvm_memory_slot *memslot,
4077 const struct kvm_userspace_memory_region *mem,
4078 enum kvm_mr_change change)
4080 /* A few sanity checks. We can have memory slots which have to be
4081 located/ended at a segment boundary (1MB). The memory in userland is
4082 ok to be fragmented into various different vmas. It is okay to mmap()
4083 and munmap() stuff in this slot after doing this call at any time */
4085 if (mem->userspace_addr & 0xffffful)
4088 if (mem->memory_size & 0xffffful)
4091 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4097 void kvm_arch_commit_memory_region(struct kvm *kvm,
4098 const struct kvm_userspace_memory_region *mem,
4099 const struct kvm_memory_slot *old,
4100 const struct kvm_memory_slot *new,
4101 enum kvm_mr_change change)
4105 /* If the basics of the memslot do not change, we do not want
4106 * to update the gmap. Every update causes several unnecessary
4107 * segment translation exceptions. This is usually handled just
4108 * fine by the normal fault handler + gmap, but it will also
4109 * cause faults on the prefix page of running guest CPUs.
4111 if (old->userspace_addr == mem->userspace_addr &&
4112 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4113 old->npages * PAGE_SIZE == mem->memory_size)
4116 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4117 mem->guest_phys_addr, mem->memory_size);
4119 pr_warn("failed to commit memory region\n");
4123 static inline unsigned long nonhyp_mask(int i)
4125 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4127 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4130 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4132 vcpu->valid_wakeup = false;
4135 static int __init kvm_s390_init(void)
4139 if (!sclp.has_sief2) {
4140 pr_info("SIE not available\n");
4144 for (i = 0; i < 16; i++)
4145 kvm_s390_fac_base[i] |=
4146 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4148 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4151 static void __exit kvm_s390_exit(void)
4156 module_init(kvm_s390_init);
4157 module_exit(kvm_s390_exit);
4160 * Enable autoloading of the kvm module.
4161 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4162 * since x86 takes a different approach.
4164 #include <linux/miscdevice.h>
4165 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4166 MODULE_ALIAS("devname:kvm");