1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2017
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
46 #define KMSG_COMPONENT "kvm-s390"
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 { "instruction_spx", VCPU_STAT(instruction_spx) },
93 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 { "instruction_stap", VCPU_STAT(instruction_stap) },
95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 { "instruction_essa", VCPU_STAT(instruction_essa) },
100 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 { "instruction_sie", VCPU_STAT(instruction_sie) },
105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 { "diagnose_10", VCPU_STAT(diagnose_10) },
122 { "diagnose_44", VCPU_STAT(diagnose_44) },
123 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 { "diagnose_258", VCPU_STAT(diagnose_258) },
125 { "diagnose_308", VCPU_STAT(diagnose_308) },
126 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 struct kvm_s390_tod_clock_ext {
136 /* allow nested virtualization in KVM (if enabled by user space) */
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
144 unsigned long kvm_s390_fac_list_mask_size(void)
146 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147 return ARRAY_SIZE(kvm_s390_fac_list_mask);
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
162 /* every s390 is virtualization enabled ;-) */
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 * This callback is executed during stop_machine(). All CPUs are therefore
171 * temporarily stopped. In order not to change guest behavior, we have to
172 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173 * so a CPU won't be stopped while calculating with the epoch.
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 struct kvm_vcpu *vcpu;
181 unsigned long long *delta = v;
183 list_for_each_entry(kvm, &vm_list, vm_list) {
184 kvm->arch.epoch -= *delta;
185 kvm_for_each_vcpu(i, vcpu, kvm) {
186 vcpu->arch.sie_block->epoch -= *delta;
187 if (vcpu->arch.cputm_enabled)
188 vcpu->arch.cputm_start += *delta;
189 if (vcpu->arch.vsie_block)
190 vcpu->arch.vsie_block->epoch -= *delta;
196 static struct notifier_block kvm_clock_notifier = {
197 .notifier_call = kvm_clock_sync,
200 int kvm_arch_hardware_setup(void)
202 gmap_notifier.notifier_call = kvm_gmap_notifier;
203 gmap_register_pte_notifier(&gmap_notifier);
204 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205 gmap_register_pte_notifier(&vsie_gmap_notifier);
206 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207 &kvm_clock_notifier);
211 void kvm_arch_hardware_unsetup(void)
213 gmap_unregister_pte_notifier(&gmap_notifier);
214 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216 &kvm_clock_notifier);
219 static void allow_cpu_feat(unsigned long nr)
221 set_bit_inv(nr, kvm_s390_available_cpu_feat);
224 static inline int plo_test_bit(unsigned char nr)
226 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 /* Parameter registers are ignored for "test bit" */
240 static void kvm_s390_cpu_feat_init(void)
244 for (i = 0; i < 256; ++i) {
246 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
249 if (test_facility(28)) /* TOD-clock steering */
250 ptff(kvm_s390_available_subfunc.ptff,
251 sizeof(kvm_s390_available_subfunc.ptff),
254 if (test_facility(17)) { /* MSA */
255 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.kmac);
257 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.kmc);
259 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260 kvm_s390_available_subfunc.km);
261 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.kimd);
263 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.klmd);
266 if (test_facility(76)) /* MSA3 */
267 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.pckmo);
269 if (test_facility(77)) { /* MSA4 */
270 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.kmctr);
272 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273 kvm_s390_available_subfunc.kmf);
274 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.kmo);
276 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.pcc);
279 if (test_facility(57)) /* MSA5 */
280 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281 kvm_s390_available_subfunc.ppno);
283 if (test_facility(146)) /* MSA8 */
284 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285 kvm_s390_available_subfunc.kma);
287 if (MACHINE_HAS_ESOP)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
290 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
293 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294 !test_facility(3) || !nested)
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297 if (sclp.has_64bscao)
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
306 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
308 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
310 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
312 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
314 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315 * all skey handling functions read/set the skey from the PGSTE
316 * instead of the real storage key.
318 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319 * pages being detected as preserved although they are resident.
321 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
324 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
328 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329 * cannot easily shadow the SCA because of the ipte lock.
333 int kvm_arch_init(void *opaque)
335 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340 debug_unregister(kvm_s390_dbf);
344 kvm_s390_cpu_feat_init();
346 /* Register floating interrupt controller interface. */
347 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
350 void kvm_arch_exit(void)
352 debug_unregister(kvm_s390_dbf);
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357 unsigned int ioctl, unsigned long arg)
359 if (ioctl == KVM_S390_ENABLE_SIE)
360 return s390_enable_sie();
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
369 case KVM_CAP_S390_PSW:
370 case KVM_CAP_S390_GMAP:
371 case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373 case KVM_CAP_S390_UCONTROL:
375 case KVM_CAP_ASYNC_PF:
376 case KVM_CAP_SYNC_REGS:
377 case KVM_CAP_ONE_REG:
378 case KVM_CAP_ENABLE_CAP:
379 case KVM_CAP_S390_CSS_SUPPORT:
380 case KVM_CAP_IOEVENTFD:
381 case KVM_CAP_DEVICE_CTRL:
382 case KVM_CAP_ENABLE_CAP_VM:
383 case KVM_CAP_S390_IRQCHIP:
384 case KVM_CAP_VM_ATTRIBUTES:
385 case KVM_CAP_MP_STATE:
386 case KVM_CAP_IMMEDIATE_EXIT:
387 case KVM_CAP_S390_INJECT_IRQ:
388 case KVM_CAP_S390_USER_SIGP:
389 case KVM_CAP_S390_USER_STSI:
390 case KVM_CAP_S390_SKEYS:
391 case KVM_CAP_S390_IRQ_STATE:
392 case KVM_CAP_S390_USER_INSTR0:
393 case KVM_CAP_S390_CMMA_MIGRATION:
394 case KVM_CAP_S390_AIS:
395 case KVM_CAP_S390_AIS_MIGRATION:
398 case KVM_CAP_S390_MEM_OP:
401 case KVM_CAP_NR_VCPUS:
402 case KVM_CAP_MAX_VCPUS:
403 r = KVM_S390_BSCA_CPU_SLOTS;
404 if (!kvm_s390_use_sca_entries())
406 else if (sclp.has_esca && sclp.has_64bscao)
407 r = KVM_S390_ESCA_CPU_SLOTS;
409 case KVM_CAP_NR_MEMSLOTS:
410 r = KVM_USER_MEM_SLOTS;
412 case KVM_CAP_S390_COW:
413 r = MACHINE_HAS_ESOP;
415 case KVM_CAP_S390_VECTOR_REGISTERS:
418 case KVM_CAP_S390_RI:
419 r = test_facility(64);
421 case KVM_CAP_S390_GS:
422 r = test_facility(133);
430 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
431 struct kvm_memory_slot *memslot)
433 gfn_t cur_gfn, last_gfn;
434 unsigned long address;
435 struct gmap *gmap = kvm->arch.gmap;
437 /* Loop over all guest pages */
438 last_gfn = memslot->base_gfn + memslot->npages;
439 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
440 address = gfn_to_hva_memslot(memslot, cur_gfn);
442 if (test_and_clear_guest_dirty(gmap->mm, address))
443 mark_page_dirty(kvm, cur_gfn);
444 if (fatal_signal_pending(current))
450 /* Section: vm related */
451 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454 * Get (and clear) the dirty memory log for a memory slot.
456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
457 struct kvm_dirty_log *log)
461 struct kvm_memslots *slots;
462 struct kvm_memory_slot *memslot;
465 if (kvm_is_ucontrol(kvm))
468 mutex_lock(&kvm->slots_lock);
471 if (log->slot >= KVM_USER_MEM_SLOTS)
474 slots = kvm_memslots(kvm);
475 memslot = id_to_memslot(slots, log->slot);
477 if (!memslot->dirty_bitmap)
480 kvm_s390_sync_dirty_log(kvm, memslot);
481 r = kvm_get_dirty_log(kvm, log, &is_dirty);
485 /* Clear the dirty log */
487 n = kvm_dirty_bitmap_bytes(memslot);
488 memset(memslot->dirty_bitmap, 0, n);
492 mutex_unlock(&kvm->slots_lock);
496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 struct kvm_vcpu *vcpu;
501 kvm_for_each_vcpu(i, vcpu, kvm) {
502 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
514 case KVM_CAP_S390_IRQCHIP:
515 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
516 kvm->arch.use_irqchip = 1;
519 case KVM_CAP_S390_USER_SIGP:
520 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
521 kvm->arch.user_sigp = 1;
524 case KVM_CAP_S390_VECTOR_REGISTERS:
525 mutex_lock(&kvm->lock);
526 if (kvm->created_vcpus) {
528 } else if (MACHINE_HAS_VX) {
529 set_kvm_facility(kvm->arch.model.fac_mask, 129);
530 set_kvm_facility(kvm->arch.model.fac_list, 129);
531 if (test_facility(134)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 134);
533 set_kvm_facility(kvm->arch.model.fac_list, 134);
535 if (test_facility(135)) {
536 set_kvm_facility(kvm->arch.model.fac_mask, 135);
537 set_kvm_facility(kvm->arch.model.fac_list, 135);
542 mutex_unlock(&kvm->lock);
543 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
544 r ? "(not available)" : "(success)");
546 case KVM_CAP_S390_RI:
548 mutex_lock(&kvm->lock);
549 if (kvm->created_vcpus) {
551 } else if (test_facility(64)) {
552 set_kvm_facility(kvm->arch.model.fac_mask, 64);
553 set_kvm_facility(kvm->arch.model.fac_list, 64);
556 mutex_unlock(&kvm->lock);
557 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
558 r ? "(not available)" : "(success)");
560 case KVM_CAP_S390_AIS:
561 mutex_lock(&kvm->lock);
562 if (kvm->created_vcpus) {
565 set_kvm_facility(kvm->arch.model.fac_mask, 72);
566 set_kvm_facility(kvm->arch.model.fac_list, 72);
569 mutex_unlock(&kvm->lock);
570 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
571 r ? "(not available)" : "(success)");
573 case KVM_CAP_S390_GS:
575 mutex_lock(&kvm->lock);
576 if (kvm->created_vcpus) {
578 } else if (test_facility(133)) {
579 set_kvm_facility(kvm->arch.model.fac_mask, 133);
580 set_kvm_facility(kvm->arch.model.fac_list, 133);
583 mutex_unlock(&kvm->lock);
584 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
585 r ? "(not available)" : "(success)");
587 case KVM_CAP_S390_USER_STSI:
588 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
589 kvm->arch.user_stsi = 1;
592 case KVM_CAP_S390_USER_INSTR0:
593 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
594 kvm->arch.user_instr0 = 1;
595 icpt_operexc_on_all_vcpus(kvm);
605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 switch (attr->attr) {
610 case KVM_S390_VM_MEM_LIMIT_SIZE:
612 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
613 kvm->arch.mem_limit);
614 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 switch (attr->attr) {
629 case KVM_S390_VM_MEM_ENABLE_CMMA:
635 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
636 mutex_lock(&kvm->lock);
637 if (!kvm->created_vcpus) {
638 kvm->arch.use_cmma = 1;
641 mutex_unlock(&kvm->lock);
643 case KVM_S390_VM_MEM_CLR_CMMA:
648 if (!kvm->arch.use_cmma)
651 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
652 mutex_lock(&kvm->lock);
653 idx = srcu_read_lock(&kvm->srcu);
654 s390_reset_cmma(kvm->arch.gmap->mm);
655 srcu_read_unlock(&kvm->srcu, idx);
656 mutex_unlock(&kvm->lock);
659 case KVM_S390_VM_MEM_LIMIT_SIZE: {
660 unsigned long new_limit;
662 if (kvm_is_ucontrol(kvm))
665 if (get_user(new_limit, (u64 __user *)attr->addr))
668 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
669 new_limit > kvm->arch.mem_limit)
675 /* gmap_create takes last usable address */
676 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680 mutex_lock(&kvm->lock);
681 if (!kvm->created_vcpus) {
682 /* gmap_create will round the limit up */
683 struct gmap *new = gmap_create(current->mm, new_limit);
688 gmap_remove(kvm->arch.gmap);
690 kvm->arch.gmap = new;
694 mutex_unlock(&kvm->lock);
695 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
696 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
697 (void *) kvm->arch.gmap->asce);
707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
711 struct kvm_vcpu *vcpu;
714 if (!test_kvm_facility(kvm, 76))
717 mutex_lock(&kvm->lock);
718 switch (attr->attr) {
719 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
721 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
722 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
723 kvm->arch.crypto.aes_kw = 1;
724 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
726 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
728 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
729 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
730 kvm->arch.crypto.dea_kw = 1;
731 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
733 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
734 kvm->arch.crypto.aes_kw = 0;
735 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
736 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
737 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
739 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
740 kvm->arch.crypto.dea_kw = 0;
741 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
742 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746 mutex_unlock(&kvm->lock);
750 kvm_for_each_vcpu(i, vcpu, kvm) {
751 kvm_s390_vcpu_crypto_setup(vcpu);
754 mutex_unlock(&kvm->lock);
758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 struct kvm_vcpu *vcpu;
763 kvm_for_each_vcpu(cx, vcpu, kvm)
764 kvm_s390_sync_request(req, vcpu);
768 * Must be called with kvm->srcu held to avoid races on memslots, and with
769 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
771 static int kvm_s390_vm_start_migration(struct kvm *kvm)
773 struct kvm_s390_migration_state *mgs;
774 struct kvm_memory_slot *ms;
775 /* should be the only one */
776 struct kvm_memslots *slots;
777 unsigned long ram_pages;
780 /* migration mode already enabled */
781 if (kvm->arch.migration_state)
784 slots = kvm_memslots(kvm);
785 if (!slots || !slots->used_slots)
788 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791 kvm->arch.migration_state = mgs;
793 if (kvm->arch.use_cmma) {
795 * Get the last slot. They should be sorted by base_gfn, so the
796 * last slot is also the one at the end of the address space.
797 * We have verified above that at least one slot is present.
799 ms = slots->memslots + slots->used_slots - 1;
800 /* round up so we only use full longs */
801 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
802 /* allocate enough bytes to store all the bits */
803 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
804 if (!mgs->pgste_bitmap) {
806 kvm->arch.migration_state = NULL;
810 mgs->bitmap_size = ram_pages;
811 atomic64_set(&mgs->dirty_pages, ram_pages);
812 /* mark all the pages in active slots as dirty */
813 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
814 ms = slots->memslots + slotnr;
815 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
818 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
824 * Must be called with kvm->lock to avoid races with ourselves and
825 * kvm_s390_vm_start_migration.
827 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
829 struct kvm_s390_migration_state *mgs;
831 /* migration mode already disabled */
832 if (!kvm->arch.migration_state)
834 mgs = kvm->arch.migration_state;
835 kvm->arch.migration_state = NULL;
837 if (kvm->arch.use_cmma) {
838 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
839 vfree(mgs->pgste_bitmap);
845 static int kvm_s390_vm_set_migration(struct kvm *kvm,
846 struct kvm_device_attr *attr)
848 int idx, res = -ENXIO;
850 mutex_lock(&kvm->lock);
851 switch (attr->attr) {
852 case KVM_S390_VM_MIGRATION_START:
853 idx = srcu_read_lock(&kvm->srcu);
854 res = kvm_s390_vm_start_migration(kvm);
855 srcu_read_unlock(&kvm->srcu, idx);
857 case KVM_S390_VM_MIGRATION_STOP:
858 res = kvm_s390_vm_stop_migration(kvm);
863 mutex_unlock(&kvm->lock);
868 static int kvm_s390_vm_get_migration(struct kvm *kvm,
869 struct kvm_device_attr *attr)
871 u64 mig = (kvm->arch.migration_state != NULL);
873 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
876 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
881 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
883 struct kvm_s390_vm_tod_clock gtod;
885 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
888 if (test_kvm_facility(kvm, 139))
889 kvm_s390_set_tod_clock_ext(kvm, >od);
890 else if (gtod.epoch_idx == 0)
891 kvm_s390_set_tod_clock(kvm, gtod.tod);
895 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
896 gtod.epoch_idx, gtod.tod);
901 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
905 if (copy_from_user(>od_high, (void __user *)attr->addr,
911 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
916 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
920 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
923 kvm_s390_set_tod_clock(kvm, gtod);
924 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
928 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
935 switch (attr->attr) {
936 case KVM_S390_VM_TOD_EXT:
937 ret = kvm_s390_set_tod_ext(kvm, attr);
939 case KVM_S390_VM_TOD_HIGH:
940 ret = kvm_s390_set_tod_high(kvm, attr);
942 case KVM_S390_VM_TOD_LOW:
943 ret = kvm_s390_set_tod_low(kvm, attr);
952 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
953 struct kvm_s390_vm_tod_clock *gtod)
955 struct kvm_s390_tod_clock_ext htod;
959 get_tod_clock_ext((char *)&htod);
961 gtod->tod = htod.tod + kvm->arch.epoch;
962 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
964 if (gtod->tod < htod.tod)
965 gtod->epoch_idx += 1;
970 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
972 struct kvm_s390_vm_tod_clock gtod;
974 memset(>od, 0, sizeof(gtod));
976 if (test_kvm_facility(kvm, 139))
977 kvm_s390_get_tod_clock_ext(kvm, >od);
979 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
981 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
984 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
985 gtod.epoch_idx, gtod.tod);
989 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
993 if (copy_to_user((void __user *)attr->addr, >od_high,
996 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1001 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1005 gtod = kvm_s390_get_tod_clock_fast(kvm);
1006 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1008 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1013 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1020 switch (attr->attr) {
1021 case KVM_S390_VM_TOD_EXT:
1022 ret = kvm_s390_get_tod_ext(kvm, attr);
1024 case KVM_S390_VM_TOD_HIGH:
1025 ret = kvm_s390_get_tod_high(kvm, attr);
1027 case KVM_S390_VM_TOD_LOW:
1028 ret = kvm_s390_get_tod_low(kvm, attr);
1037 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1039 struct kvm_s390_vm_cpu_processor *proc;
1040 u16 lowest_ibc, unblocked_ibc;
1043 mutex_lock(&kvm->lock);
1044 if (kvm->created_vcpus) {
1048 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1053 if (!copy_from_user(proc, (void __user *)attr->addr,
1055 kvm->arch.model.cpuid = proc->cpuid;
1056 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1057 unblocked_ibc = sclp.ibc & 0xfff;
1058 if (lowest_ibc && proc->ibc) {
1059 if (proc->ibc > unblocked_ibc)
1060 kvm->arch.model.ibc = unblocked_ibc;
1061 else if (proc->ibc < lowest_ibc)
1062 kvm->arch.model.ibc = lowest_ibc;
1064 kvm->arch.model.ibc = proc->ibc;
1066 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1067 S390_ARCH_FAC_LIST_SIZE_BYTE);
1068 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1069 kvm->arch.model.ibc,
1070 kvm->arch.model.cpuid);
1071 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1072 kvm->arch.model.fac_list[0],
1073 kvm->arch.model.fac_list[1],
1074 kvm->arch.model.fac_list[2]);
1079 mutex_unlock(&kvm->lock);
1083 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1084 struct kvm_device_attr *attr)
1086 struct kvm_s390_vm_cpu_feat data;
1088 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1090 if (!bitmap_subset((unsigned long *) data.feat,
1091 kvm_s390_available_cpu_feat,
1092 KVM_S390_VM_CPU_FEAT_NR_BITS))
1095 mutex_lock(&kvm->lock);
1096 if (kvm->created_vcpus) {
1097 mutex_unlock(&kvm->lock);
1100 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101 KVM_S390_VM_CPU_FEAT_NR_BITS);
1102 mutex_unlock(&kvm->lock);
1103 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111 struct kvm_device_attr *attr)
1114 * Once supported by kernel + hw, we have to store the subfunctions
1115 * in kvm->arch and remember that user space configured them.
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1124 switch (attr->attr) {
1125 case KVM_S390_VM_CPU_PROCESSOR:
1126 ret = kvm_s390_set_processor(kvm, attr);
1128 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129 ret = kvm_s390_set_processor_feat(kvm, attr);
1131 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1140 struct kvm_s390_vm_cpu_processor *proc;
1143 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1148 proc->cpuid = kvm->arch.model.cpuid;
1149 proc->ibc = kvm->arch.model.ibc;
1150 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151 S390_ARCH_FAC_LIST_SIZE_BYTE);
1152 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153 kvm->arch.model.ibc,
1154 kvm->arch.model.cpuid);
1155 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156 kvm->arch.model.fac_list[0],
1157 kvm->arch.model.fac_list[1],
1158 kvm->arch.model.fac_list[2]);
1159 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1168 struct kvm_s390_vm_cpu_machine *mach;
1171 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1176 get_cpu_id((struct cpuid *) &mach->cpuid);
1177 mach->ibc = sclp.ibc;
1178 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179 S390_ARCH_FAC_LIST_SIZE_BYTE);
1180 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181 sizeof(S390_lowcore.stfle_fac_list));
1182 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1183 kvm->arch.model.ibc,
1184 kvm->arch.model.cpuid);
1185 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1189 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1193 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201 struct kvm_device_attr *attr)
1203 struct kvm_s390_vm_cpu_feat data;
1205 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206 KVM_S390_VM_CPU_FEAT_NR_BITS);
1207 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1209 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1216 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1217 struct kvm_device_attr *attr)
1219 struct kvm_s390_vm_cpu_feat data;
1221 bitmap_copy((unsigned long *) data.feat,
1222 kvm_s390_available_cpu_feat,
1223 KVM_S390_VM_CPU_FEAT_NR_BITS);
1224 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1226 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1233 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1234 struct kvm_device_attr *attr)
1237 * Once we can actually configure subfunctions (kernel + hw support),
1238 * we have to check if they were already set by user space, if so copy
1239 * them from kvm->arch.
1244 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1245 struct kvm_device_attr *attr)
1247 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1248 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1252 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1256 switch (attr->attr) {
1257 case KVM_S390_VM_CPU_PROCESSOR:
1258 ret = kvm_s390_get_processor(kvm, attr);
1260 case KVM_S390_VM_CPU_MACHINE:
1261 ret = kvm_s390_get_machine(kvm, attr);
1263 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1264 ret = kvm_s390_get_processor_feat(kvm, attr);
1266 case KVM_S390_VM_CPU_MACHINE_FEAT:
1267 ret = kvm_s390_get_machine_feat(kvm, attr);
1269 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1270 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1272 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1273 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1279 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1283 switch (attr->group) {
1284 case KVM_S390_VM_MEM_CTRL:
1285 ret = kvm_s390_set_mem_control(kvm, attr);
1287 case KVM_S390_VM_TOD:
1288 ret = kvm_s390_set_tod(kvm, attr);
1290 case KVM_S390_VM_CPU_MODEL:
1291 ret = kvm_s390_set_cpu_model(kvm, attr);
1293 case KVM_S390_VM_CRYPTO:
1294 ret = kvm_s390_vm_set_crypto(kvm, attr);
1296 case KVM_S390_VM_MIGRATION:
1297 ret = kvm_s390_vm_set_migration(kvm, attr);
1307 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1311 switch (attr->group) {
1312 case KVM_S390_VM_MEM_CTRL:
1313 ret = kvm_s390_get_mem_control(kvm, attr);
1315 case KVM_S390_VM_TOD:
1316 ret = kvm_s390_get_tod(kvm, attr);
1318 case KVM_S390_VM_CPU_MODEL:
1319 ret = kvm_s390_get_cpu_model(kvm, attr);
1321 case KVM_S390_VM_MIGRATION:
1322 ret = kvm_s390_vm_get_migration(kvm, attr);
1332 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1336 switch (attr->group) {
1337 case KVM_S390_VM_MEM_CTRL:
1338 switch (attr->attr) {
1339 case KVM_S390_VM_MEM_ENABLE_CMMA:
1340 case KVM_S390_VM_MEM_CLR_CMMA:
1341 ret = sclp.has_cmma ? 0 : -ENXIO;
1343 case KVM_S390_VM_MEM_LIMIT_SIZE:
1351 case KVM_S390_VM_TOD:
1352 switch (attr->attr) {
1353 case KVM_S390_VM_TOD_LOW:
1354 case KVM_S390_VM_TOD_HIGH:
1362 case KVM_S390_VM_CPU_MODEL:
1363 switch (attr->attr) {
1364 case KVM_S390_VM_CPU_PROCESSOR:
1365 case KVM_S390_VM_CPU_MACHINE:
1366 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1367 case KVM_S390_VM_CPU_MACHINE_FEAT:
1368 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1371 /* configuring subfunctions is not supported yet */
1372 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1378 case KVM_S390_VM_CRYPTO:
1379 switch (attr->attr) {
1380 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1381 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1382 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1383 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1391 case KVM_S390_VM_MIGRATION:
1402 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1406 int srcu_idx, i, r = 0;
1408 if (args->flags != 0)
1411 /* Is this guest using storage keys? */
1412 if (!mm_use_skey(current->mm))
1413 return KVM_S390_GET_SKEYS_NONE;
1415 /* Enforce sane limit on memory allocation */
1416 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1419 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1423 down_read(¤t->mm->mmap_sem);
1424 srcu_idx = srcu_read_lock(&kvm->srcu);
1425 for (i = 0; i < args->count; i++) {
1426 hva = gfn_to_hva(kvm, args->start_gfn + i);
1427 if (kvm_is_error_hva(hva)) {
1432 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1436 srcu_read_unlock(&kvm->srcu, srcu_idx);
1437 up_read(¤t->mm->mmap_sem);
1440 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1441 sizeof(uint8_t) * args->count);
1450 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1454 int srcu_idx, i, r = 0;
1456 if (args->flags != 0)
1459 /* Enforce sane limit on memory allocation */
1460 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1463 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1467 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1468 sizeof(uint8_t) * args->count);
1474 /* Enable storage key handling for the guest */
1475 r = s390_enable_skey();
1479 down_read(¤t->mm->mmap_sem);
1480 srcu_idx = srcu_read_lock(&kvm->srcu);
1481 for (i = 0; i < args->count; i++) {
1482 hva = gfn_to_hva(kvm, args->start_gfn + i);
1483 if (kvm_is_error_hva(hva)) {
1488 /* Lowest order bit is reserved */
1489 if (keys[i] & 0x01) {
1494 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1498 srcu_read_unlock(&kvm->srcu, srcu_idx);
1499 up_read(¤t->mm->mmap_sem);
1506 * Base address and length must be sent at the start of each block, therefore
1507 * it's cheaper to send some clean data, as long as it's less than the size of
1510 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1511 /* for consistency */
1512 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1515 * This function searches for the next page with dirty CMMA attributes, and
1516 * saves the attributes in the buffer up to either the end of the buffer or
1517 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1518 * no trailing clean bytes are saved.
1519 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1520 * output buffer will indicate 0 as length.
1522 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1523 struct kvm_s390_cmma_log *args)
1525 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1526 unsigned long bufsize, hva, pgstev, i, next, cur;
1527 int srcu_idx, peek, r = 0, rr;
1530 cur = args->start_gfn;
1531 i = next = pgstev = 0;
1533 if (unlikely(!kvm->arch.use_cmma))
1535 /* Invalid/unsupported flags were specified */
1536 if (args->flags & ~KVM_S390_CMMA_PEEK)
1538 /* Migration mode query, and we are not doing a migration */
1539 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1542 /* CMMA is disabled or was not used, or the buffer has length zero */
1543 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1544 if (!bufsize || !kvm->mm->context.use_cmma) {
1545 memset(args, 0, sizeof(*args));
1550 /* We are not peeking, and there are no dirty pages */
1551 if (!atomic64_read(&s->dirty_pages)) {
1552 memset(args, 0, sizeof(*args));
1555 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1557 if (cur >= s->bitmap_size) /* nothing found, loop back */
1558 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1559 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1560 memset(args, 0, sizeof(*args));
1563 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1566 res = vmalloc(bufsize);
1570 args->start_gfn = cur;
1572 down_read(&kvm->mm->mmap_sem);
1573 srcu_idx = srcu_read_lock(&kvm->srcu);
1574 while (i < bufsize) {
1575 hva = gfn_to_hva(kvm, cur);
1576 if (kvm_is_error_hva(hva)) {
1580 /* decrement only if we actually flipped the bit to 0 */
1581 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1582 atomic64_dec(&s->dirty_pages);
1583 r = get_pgste(kvm->mm, hva, &pgstev);
1586 /* save the value */
1587 res[i++] = (pgstev >> 24) & 0x43;
1589 * if the next bit is too far away, stop.
1590 * if we reached the previous "next", find the next one
1593 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1596 next = find_next_bit(s->pgste_bitmap,
1597 s->bitmap_size, cur + 1);
1598 /* reached the end of the bitmap or of the buffer, stop */
1599 if ((next >= s->bitmap_size) ||
1600 (next >= args->start_gfn + bufsize))
1605 srcu_read_unlock(&kvm->srcu, srcu_idx);
1606 up_read(&kvm->mm->mmap_sem);
1608 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1610 rr = copy_to_user((void __user *)args->values, res, args->count);
1619 * This function sets the CMMA attributes for the given pages. If the input
1620 * buffer has zero length, no action is taken, otherwise the attributes are
1621 * set and the mm->context.use_cmma flag is set.
1623 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1624 const struct kvm_s390_cmma_log *args)
1626 unsigned long hva, mask, pgstev, i;
1628 int srcu_idx, r = 0;
1632 if (!kvm->arch.use_cmma)
1634 /* invalid/unsupported flags */
1635 if (args->flags != 0)
1637 /* Enforce sane limit on memory allocation */
1638 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1641 if (args->count == 0)
1644 bits = vmalloc(sizeof(*bits) * args->count);
1648 r = copy_from_user(bits, (void __user *)args->values, args->count);
1654 down_read(&kvm->mm->mmap_sem);
1655 srcu_idx = srcu_read_lock(&kvm->srcu);
1656 for (i = 0; i < args->count; i++) {
1657 hva = gfn_to_hva(kvm, args->start_gfn + i);
1658 if (kvm_is_error_hva(hva)) {
1664 pgstev = pgstev << 24;
1665 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1666 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1668 srcu_read_unlock(&kvm->srcu, srcu_idx);
1669 up_read(&kvm->mm->mmap_sem);
1671 if (!kvm->mm->context.use_cmma) {
1672 down_write(&kvm->mm->mmap_sem);
1673 kvm->mm->context.use_cmma = 1;
1674 up_write(&kvm->mm->mmap_sem);
1681 long kvm_arch_vm_ioctl(struct file *filp,
1682 unsigned int ioctl, unsigned long arg)
1684 struct kvm *kvm = filp->private_data;
1685 void __user *argp = (void __user *)arg;
1686 struct kvm_device_attr attr;
1690 case KVM_S390_INTERRUPT: {
1691 struct kvm_s390_interrupt s390int;
1694 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1696 r = kvm_s390_inject_vm(kvm, &s390int);
1699 case KVM_ENABLE_CAP: {
1700 struct kvm_enable_cap cap;
1702 if (copy_from_user(&cap, argp, sizeof(cap)))
1704 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1707 case KVM_CREATE_IRQCHIP: {
1708 struct kvm_irq_routing_entry routing;
1711 if (kvm->arch.use_irqchip) {
1712 /* Set up dummy routing. */
1713 memset(&routing, 0, sizeof(routing));
1714 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1718 case KVM_SET_DEVICE_ATTR: {
1720 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1722 r = kvm_s390_vm_set_attr(kvm, &attr);
1725 case KVM_GET_DEVICE_ATTR: {
1727 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1729 r = kvm_s390_vm_get_attr(kvm, &attr);
1732 case KVM_HAS_DEVICE_ATTR: {
1734 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1736 r = kvm_s390_vm_has_attr(kvm, &attr);
1739 case KVM_S390_GET_SKEYS: {
1740 struct kvm_s390_skeys args;
1743 if (copy_from_user(&args, argp,
1744 sizeof(struct kvm_s390_skeys)))
1746 r = kvm_s390_get_skeys(kvm, &args);
1749 case KVM_S390_SET_SKEYS: {
1750 struct kvm_s390_skeys args;
1753 if (copy_from_user(&args, argp,
1754 sizeof(struct kvm_s390_skeys)))
1756 r = kvm_s390_set_skeys(kvm, &args);
1759 case KVM_S390_GET_CMMA_BITS: {
1760 struct kvm_s390_cmma_log args;
1763 if (copy_from_user(&args, argp, sizeof(args)))
1765 r = kvm_s390_get_cmma_bits(kvm, &args);
1767 r = copy_to_user(argp, &args, sizeof(args));
1773 case KVM_S390_SET_CMMA_BITS: {
1774 struct kvm_s390_cmma_log args;
1777 if (copy_from_user(&args, argp, sizeof(args)))
1779 r = kvm_s390_set_cmma_bits(kvm, &args);
1789 static int kvm_s390_query_ap_config(u8 *config)
1791 u32 fcn_code = 0x04000000UL;
1794 memset(config, 0, 128);
1798 ".long 0xb2af0000\n" /* PQAP(QCI) */
1804 : "r" (fcn_code), "r" (config)
1805 : "cc", "0", "2", "memory"
1811 static int kvm_s390_apxa_installed(void)
1816 if (test_facility(12)) {
1817 cc = kvm_s390_query_ap_config(config);
1820 pr_err("PQAP(QCI) failed with cc=%d", cc);
1822 return config[0] & 0x40;
1828 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1830 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1832 if (kvm_s390_apxa_installed())
1833 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1835 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1838 static u64 kvm_s390_get_initial_cpuid(void)
1843 cpuid.version = 0xff;
1844 return *((u64 *) &cpuid);
1847 static void kvm_s390_crypto_init(struct kvm *kvm)
1849 if (!test_kvm_facility(kvm, 76))
1852 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1853 kvm_s390_set_crycb_format(kvm);
1855 /* Enable AES/DEA protected key functions by default */
1856 kvm->arch.crypto.aes_kw = 1;
1857 kvm->arch.crypto.dea_kw = 1;
1858 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1859 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1860 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1861 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1864 static void sca_dispose(struct kvm *kvm)
1866 if (kvm->arch.use_esca)
1867 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1869 free_page((unsigned long)(kvm->arch.sca));
1870 kvm->arch.sca = NULL;
1873 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1875 gfp_t alloc_flags = GFP_KERNEL;
1877 char debug_name[16];
1878 static unsigned long sca_offset;
1881 #ifdef CONFIG_KVM_S390_UCONTROL
1882 if (type & ~KVM_VM_S390_UCONTROL)
1884 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1891 rc = s390_enable_sie();
1897 kvm->arch.use_esca = 0; /* start with basic SCA */
1898 if (!sclp.has_64bscao)
1899 alloc_flags |= GFP_DMA;
1900 rwlock_init(&kvm->arch.sca_lock);
1901 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1904 spin_lock(&kvm_lock);
1906 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1908 kvm->arch.sca = (struct bsca_block *)
1909 ((char *) kvm->arch.sca + sca_offset);
1910 spin_unlock(&kvm_lock);
1912 sprintf(debug_name, "kvm-%u", current->pid);
1914 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1918 kvm->arch.sie_page2 =
1919 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1920 if (!kvm->arch.sie_page2)
1923 /* Populate the facility mask initially. */
1924 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1925 sizeof(S390_lowcore.stfle_fac_list));
1926 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1927 if (i < kvm_s390_fac_list_mask_size())
1928 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1930 kvm->arch.model.fac_mask[i] = 0UL;
1933 /* Populate the facility list initially. */
1934 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1935 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1936 S390_ARCH_FAC_LIST_SIZE_BYTE);
1938 /* we are always in czam mode - even on pre z14 machines */
1939 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1940 set_kvm_facility(kvm->arch.model.fac_list, 138);
1941 /* we emulate STHYI in kvm */
1942 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1943 set_kvm_facility(kvm->arch.model.fac_list, 74);
1944 if (MACHINE_HAS_TLB_GUEST) {
1945 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1946 set_kvm_facility(kvm->arch.model.fac_list, 147);
1949 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1950 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1952 kvm_s390_crypto_init(kvm);
1954 mutex_init(&kvm->arch.float_int.ais_lock);
1955 kvm->arch.float_int.simm = 0;
1956 kvm->arch.float_int.nimm = 0;
1957 spin_lock_init(&kvm->arch.float_int.lock);
1958 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1959 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1960 init_waitqueue_head(&kvm->arch.ipte_wq);
1961 mutex_init(&kvm->arch.ipte_mutex);
1963 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1964 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1966 if (type & KVM_VM_S390_UCONTROL) {
1967 kvm->arch.gmap = NULL;
1968 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1970 if (sclp.hamax == U64_MAX)
1971 kvm->arch.mem_limit = TASK_SIZE_MAX;
1973 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1975 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1976 if (!kvm->arch.gmap)
1978 kvm->arch.gmap->private = kvm;
1979 kvm->arch.gmap->pfault_enabled = 0;
1982 kvm->arch.css_support = 0;
1983 kvm->arch.use_irqchip = 0;
1984 kvm->arch.epoch = 0;
1986 spin_lock_init(&kvm->arch.start_stop_lock);
1987 kvm_s390_vsie_init(kvm);
1988 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1992 free_page((unsigned long)kvm->arch.sie_page2);
1993 debug_unregister(kvm->arch.dbf);
1995 KVM_EVENT(3, "creation of vm failed: %d", rc);
1999 bool kvm_arch_has_vcpu_debugfs(void)
2004 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2009 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2011 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2012 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2013 kvm_s390_clear_local_irqs(vcpu);
2014 kvm_clear_async_pf_completion_queue(vcpu);
2015 if (!kvm_is_ucontrol(vcpu->kvm))
2018 if (kvm_is_ucontrol(vcpu->kvm))
2019 gmap_remove(vcpu->arch.gmap);
2021 if (vcpu->kvm->arch.use_cmma)
2022 kvm_s390_vcpu_unsetup_cmma(vcpu);
2023 free_page((unsigned long)(vcpu->arch.sie_block));
2025 kvm_vcpu_uninit(vcpu);
2026 kmem_cache_free(kvm_vcpu_cache, vcpu);
2029 static void kvm_free_vcpus(struct kvm *kvm)
2032 struct kvm_vcpu *vcpu;
2034 kvm_for_each_vcpu(i, vcpu, kvm)
2035 kvm_arch_vcpu_destroy(vcpu);
2037 mutex_lock(&kvm->lock);
2038 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2039 kvm->vcpus[i] = NULL;
2041 atomic_set(&kvm->online_vcpus, 0);
2042 mutex_unlock(&kvm->lock);
2045 void kvm_arch_destroy_vm(struct kvm *kvm)
2047 kvm_free_vcpus(kvm);
2049 debug_unregister(kvm->arch.dbf);
2050 free_page((unsigned long)kvm->arch.sie_page2);
2051 if (!kvm_is_ucontrol(kvm))
2052 gmap_remove(kvm->arch.gmap);
2053 kvm_s390_destroy_adapters(kvm);
2054 kvm_s390_clear_float_irqs(kvm);
2055 kvm_s390_vsie_destroy(kvm);
2056 if (kvm->arch.migration_state) {
2057 vfree(kvm->arch.migration_state->pgste_bitmap);
2058 kfree(kvm->arch.migration_state);
2060 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2063 /* Section: vcpu related */
2064 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2066 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2067 if (!vcpu->arch.gmap)
2069 vcpu->arch.gmap->private = vcpu->kvm;
2074 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2076 if (!kvm_s390_use_sca_entries())
2078 read_lock(&vcpu->kvm->arch.sca_lock);
2079 if (vcpu->kvm->arch.use_esca) {
2080 struct esca_block *sca = vcpu->kvm->arch.sca;
2082 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2083 sca->cpu[vcpu->vcpu_id].sda = 0;
2085 struct bsca_block *sca = vcpu->kvm->arch.sca;
2087 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2088 sca->cpu[vcpu->vcpu_id].sda = 0;
2090 read_unlock(&vcpu->kvm->arch.sca_lock);
2093 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2095 if (!kvm_s390_use_sca_entries()) {
2096 struct bsca_block *sca = vcpu->kvm->arch.sca;
2098 /* we still need the basic sca for the ipte control */
2099 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2102 read_lock(&vcpu->kvm->arch.sca_lock);
2103 if (vcpu->kvm->arch.use_esca) {
2104 struct esca_block *sca = vcpu->kvm->arch.sca;
2106 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2109 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2110 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2112 struct bsca_block *sca = vcpu->kvm->arch.sca;
2114 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2115 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2116 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2117 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2119 read_unlock(&vcpu->kvm->arch.sca_lock);
2122 /* Basic SCA to Extended SCA data copy routines */
2123 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2126 d->sigp_ctrl.c = s->sigp_ctrl.c;
2127 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2130 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2134 d->ipte_control = s->ipte_control;
2136 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2137 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2140 static int sca_switch_to_extended(struct kvm *kvm)
2142 struct bsca_block *old_sca = kvm->arch.sca;
2143 struct esca_block *new_sca;
2144 struct kvm_vcpu *vcpu;
2145 unsigned int vcpu_idx;
2148 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2152 scaoh = (u32)((u64)(new_sca) >> 32);
2153 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2155 kvm_s390_vcpu_block_all(kvm);
2156 write_lock(&kvm->arch.sca_lock);
2158 sca_copy_b_to_e(new_sca, old_sca);
2160 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2161 vcpu->arch.sie_block->scaoh = scaoh;
2162 vcpu->arch.sie_block->scaol = scaol;
2163 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2165 kvm->arch.sca = new_sca;
2166 kvm->arch.use_esca = 1;
2168 write_unlock(&kvm->arch.sca_lock);
2169 kvm_s390_vcpu_unblock_all(kvm);
2171 free_page((unsigned long)old_sca);
2173 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2174 old_sca, kvm->arch.sca);
2178 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2182 if (!kvm_s390_use_sca_entries()) {
2183 if (id < KVM_MAX_VCPUS)
2187 if (id < KVM_S390_BSCA_CPU_SLOTS)
2189 if (!sclp.has_esca || !sclp.has_64bscao)
2192 mutex_lock(&kvm->lock);
2193 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2194 mutex_unlock(&kvm->lock);
2196 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2199 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2201 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2202 kvm_clear_async_pf_completion_queue(vcpu);
2203 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2209 kvm_s390_set_prefix(vcpu, 0);
2210 if (test_kvm_facility(vcpu->kvm, 64))
2211 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2212 if (test_kvm_facility(vcpu->kvm, 133))
2213 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2214 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2215 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2218 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2220 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2222 if (kvm_is_ucontrol(vcpu->kvm))
2223 return __kvm_ucontrol_vcpu_init(vcpu);
2228 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2229 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2231 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2232 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2233 vcpu->arch.cputm_start = get_tod_clock_fast();
2234 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2237 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2238 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2240 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2241 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2242 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2243 vcpu->arch.cputm_start = 0;
2244 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2250 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2251 vcpu->arch.cputm_enabled = true;
2252 __start_cpu_timer_accounting(vcpu);
2255 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2256 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2258 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2259 __stop_cpu_timer_accounting(vcpu);
2260 vcpu->arch.cputm_enabled = false;
2263 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2265 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2266 __enable_cpu_timer_accounting(vcpu);
2270 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2272 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273 __disable_cpu_timer_accounting(vcpu);
2277 /* set the cpu timer - may only be called from the VCPU thread itself */
2278 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2280 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2281 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2282 if (vcpu->arch.cputm_enabled)
2283 vcpu->arch.cputm_start = get_tod_clock_fast();
2284 vcpu->arch.sie_block->cputm = cputm;
2285 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2289 /* update and get the cpu timer - can also be called from other VCPU threads */
2290 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2295 if (unlikely(!vcpu->arch.cputm_enabled))
2296 return vcpu->arch.sie_block->cputm;
2298 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2300 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2302 * If the writer would ever execute a read in the critical
2303 * section, e.g. in irq context, we have a deadlock.
2305 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2306 value = vcpu->arch.sie_block->cputm;
2307 /* if cputm_start is 0, accounting is being started/stopped */
2308 if (likely(vcpu->arch.cputm_start))
2309 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2310 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2315 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2318 gmap_enable(vcpu->arch.enabled_gmap);
2319 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2320 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321 __start_cpu_timer_accounting(vcpu);
2325 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2328 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2329 __stop_cpu_timer_accounting(vcpu);
2330 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2331 vcpu->arch.enabled_gmap = gmap_get_enabled();
2332 gmap_disable(vcpu->arch.enabled_gmap);
2336 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2338 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2339 vcpu->arch.sie_block->gpsw.mask = 0UL;
2340 vcpu->arch.sie_block->gpsw.addr = 0UL;
2341 kvm_s390_set_prefix(vcpu, 0);
2342 kvm_s390_set_cpu_timer(vcpu, 0);
2343 vcpu->arch.sie_block->ckc = 0UL;
2344 vcpu->arch.sie_block->todpr = 0;
2345 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2346 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2347 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2348 /* make sure the new fpc will be lazily loaded */
2350 current->thread.fpu.fpc = 0;
2351 vcpu->arch.sie_block->gbea = 1;
2352 vcpu->arch.sie_block->pp = 0;
2353 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2354 kvm_clear_async_pf_completion_queue(vcpu);
2355 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2356 kvm_s390_vcpu_stop(vcpu);
2357 kvm_s390_clear_local_irqs(vcpu);
2360 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2362 mutex_lock(&vcpu->kvm->lock);
2364 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2366 mutex_unlock(&vcpu->kvm->lock);
2367 if (!kvm_is_ucontrol(vcpu->kvm)) {
2368 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2371 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2372 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2373 /* make vcpu_load load the right gmap on the first trigger */
2374 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2377 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2379 if (!test_kvm_facility(vcpu->kvm, 76))
2382 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2384 if (vcpu->kvm->arch.crypto.aes_kw)
2385 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2386 if (vcpu->kvm->arch.crypto.dea_kw)
2387 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2389 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2392 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2394 free_page(vcpu->arch.sie_block->cbrlo);
2395 vcpu->arch.sie_block->cbrlo = 0;
2398 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2400 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2401 if (!vcpu->arch.sie_block->cbrlo)
2404 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2408 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2410 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2412 vcpu->arch.sie_block->ibc = model->ibc;
2413 if (test_kvm_facility(vcpu->kvm, 7))
2414 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2417 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2421 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2425 if (test_kvm_facility(vcpu->kvm, 78))
2426 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2427 else if (test_kvm_facility(vcpu->kvm, 8))
2428 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2430 kvm_s390_vcpu_setup_model(vcpu);
2432 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2433 if (MACHINE_HAS_ESOP)
2434 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2435 if (test_kvm_facility(vcpu->kvm, 9))
2436 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2437 if (test_kvm_facility(vcpu->kvm, 73))
2438 vcpu->arch.sie_block->ecb |= ECB_TE;
2440 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2441 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2442 if (test_kvm_facility(vcpu->kvm, 130))
2443 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2444 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2446 vcpu->arch.sie_block->eca |= ECA_CEI;
2448 vcpu->arch.sie_block->eca |= ECA_IB;
2450 vcpu->arch.sie_block->eca |= ECA_SII;
2451 if (sclp.has_sigpif)
2452 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2453 if (test_kvm_facility(vcpu->kvm, 129)) {
2454 vcpu->arch.sie_block->eca |= ECA_VX;
2455 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2457 if (test_kvm_facility(vcpu->kvm, 139))
2458 vcpu->arch.sie_block->ecd |= ECD_MEF;
2460 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2462 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2465 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2467 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2469 if (vcpu->kvm->arch.use_cmma) {
2470 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2474 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2475 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2477 kvm_s390_vcpu_crypto_setup(vcpu);
2482 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2485 struct kvm_vcpu *vcpu;
2486 struct sie_page *sie_page;
2489 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2494 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2498 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2499 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2503 vcpu->arch.sie_block = &sie_page->sie_block;
2504 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2506 /* the real guest size will always be smaller than msl */
2507 vcpu->arch.sie_block->mso = 0;
2508 vcpu->arch.sie_block->msl = sclp.hamax;
2510 vcpu->arch.sie_block->icpua = id;
2511 spin_lock_init(&vcpu->arch.local_int.lock);
2512 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2513 vcpu->arch.local_int.wq = &vcpu->wq;
2514 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2515 seqcount_init(&vcpu->arch.cputm_seqcount);
2517 rc = kvm_vcpu_init(vcpu, kvm, id);
2519 goto out_free_sie_block;
2520 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2521 vcpu->arch.sie_block);
2522 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2526 free_page((unsigned long)(vcpu->arch.sie_block));
2528 kmem_cache_free(kvm_vcpu_cache, vcpu);
2533 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2535 return kvm_s390_vcpu_has_irq(vcpu, 0);
2538 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2540 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2543 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2545 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2549 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2551 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2554 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2556 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2560 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2562 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2566 * Kick a guest cpu out of SIE and wait until SIE is not running.
2567 * If the CPU is not running (e.g. waiting as idle) the function will
2568 * return immediately. */
2569 void exit_sie(struct kvm_vcpu *vcpu)
2571 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2572 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2576 /* Kick a guest cpu out of SIE to process a request synchronously */
2577 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2579 kvm_make_request(req, vcpu);
2580 kvm_s390_vcpu_request(vcpu);
2583 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2586 struct kvm *kvm = gmap->private;
2587 struct kvm_vcpu *vcpu;
2588 unsigned long prefix;
2591 if (gmap_is_shadow(gmap))
2593 if (start >= 1UL << 31)
2594 /* We are only interested in prefix pages */
2596 kvm_for_each_vcpu(i, vcpu, kvm) {
2597 /* match against both prefix pages */
2598 prefix = kvm_s390_get_prefix(vcpu);
2599 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2600 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2602 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2607 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2609 /* kvm common code refers to this, but never calls it */
2614 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2615 struct kvm_one_reg *reg)
2620 case KVM_REG_S390_TODPR:
2621 r = put_user(vcpu->arch.sie_block->todpr,
2622 (u32 __user *)reg->addr);
2624 case KVM_REG_S390_EPOCHDIFF:
2625 r = put_user(vcpu->arch.sie_block->epoch,
2626 (u64 __user *)reg->addr);
2628 case KVM_REG_S390_CPU_TIMER:
2629 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2630 (u64 __user *)reg->addr);
2632 case KVM_REG_S390_CLOCK_COMP:
2633 r = put_user(vcpu->arch.sie_block->ckc,
2634 (u64 __user *)reg->addr);
2636 case KVM_REG_S390_PFTOKEN:
2637 r = put_user(vcpu->arch.pfault_token,
2638 (u64 __user *)reg->addr);
2640 case KVM_REG_S390_PFCOMPARE:
2641 r = put_user(vcpu->arch.pfault_compare,
2642 (u64 __user *)reg->addr);
2644 case KVM_REG_S390_PFSELECT:
2645 r = put_user(vcpu->arch.pfault_select,
2646 (u64 __user *)reg->addr);
2648 case KVM_REG_S390_PP:
2649 r = put_user(vcpu->arch.sie_block->pp,
2650 (u64 __user *)reg->addr);
2652 case KVM_REG_S390_GBEA:
2653 r = put_user(vcpu->arch.sie_block->gbea,
2654 (u64 __user *)reg->addr);
2663 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2664 struct kvm_one_reg *reg)
2670 case KVM_REG_S390_TODPR:
2671 r = get_user(vcpu->arch.sie_block->todpr,
2672 (u32 __user *)reg->addr);
2674 case KVM_REG_S390_EPOCHDIFF:
2675 r = get_user(vcpu->arch.sie_block->epoch,
2676 (u64 __user *)reg->addr);
2678 case KVM_REG_S390_CPU_TIMER:
2679 r = get_user(val, (u64 __user *)reg->addr);
2681 kvm_s390_set_cpu_timer(vcpu, val);
2683 case KVM_REG_S390_CLOCK_COMP:
2684 r = get_user(vcpu->arch.sie_block->ckc,
2685 (u64 __user *)reg->addr);
2687 case KVM_REG_S390_PFTOKEN:
2688 r = get_user(vcpu->arch.pfault_token,
2689 (u64 __user *)reg->addr);
2690 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2691 kvm_clear_async_pf_completion_queue(vcpu);
2693 case KVM_REG_S390_PFCOMPARE:
2694 r = get_user(vcpu->arch.pfault_compare,
2695 (u64 __user *)reg->addr);
2697 case KVM_REG_S390_PFSELECT:
2698 r = get_user(vcpu->arch.pfault_select,
2699 (u64 __user *)reg->addr);
2701 case KVM_REG_S390_PP:
2702 r = get_user(vcpu->arch.sie_block->pp,
2703 (u64 __user *)reg->addr);
2705 case KVM_REG_S390_GBEA:
2706 r = get_user(vcpu->arch.sie_block->gbea,
2707 (u64 __user *)reg->addr);
2716 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2718 kvm_s390_vcpu_initial_reset(vcpu);
2722 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2724 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2728 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2730 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2734 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2735 struct kvm_sregs *sregs)
2737 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2738 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2742 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2743 struct kvm_sregs *sregs)
2745 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2746 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2750 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2752 if (test_fp_ctl(fpu->fpc))
2754 vcpu->run->s.regs.fpc = fpu->fpc;
2756 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2757 (freg_t *) fpu->fprs);
2759 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2763 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2765 /* make sure we have the latest values */
2768 convert_vx_to_fp((freg_t *) fpu->fprs,
2769 (__vector128 *) vcpu->run->s.regs.vrs);
2771 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2772 fpu->fpc = vcpu->run->s.regs.fpc;
2776 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2780 if (!is_vcpu_stopped(vcpu))
2783 vcpu->run->psw_mask = psw.mask;
2784 vcpu->run->psw_addr = psw.addr;
2789 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2790 struct kvm_translation *tr)
2792 return -EINVAL; /* not implemented yet */
2795 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2796 KVM_GUESTDBG_USE_HW_BP | \
2797 KVM_GUESTDBG_ENABLE)
2799 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2800 struct kvm_guest_debug *dbg)
2804 vcpu->guest_debug = 0;
2805 kvm_s390_clear_bp_data(vcpu);
2807 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2809 if (!sclp.has_gpere)
2812 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2813 vcpu->guest_debug = dbg->control;
2814 /* enforce guest PER */
2815 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2817 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2818 rc = kvm_s390_import_bp_data(vcpu, dbg);
2820 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2821 vcpu->arch.guestdbg.last_bp = 0;
2825 vcpu->guest_debug = 0;
2826 kvm_s390_clear_bp_data(vcpu);
2827 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2833 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2834 struct kvm_mp_state *mp_state)
2836 /* CHECK_STOP and LOAD are not supported yet */
2837 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2838 KVM_MP_STATE_OPERATING;
2841 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2842 struct kvm_mp_state *mp_state)
2846 /* user space knows about this interface - let it control the state */
2847 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2849 switch (mp_state->mp_state) {
2850 case KVM_MP_STATE_STOPPED:
2851 kvm_s390_vcpu_stop(vcpu);
2853 case KVM_MP_STATE_OPERATING:
2854 kvm_s390_vcpu_start(vcpu);
2856 case KVM_MP_STATE_LOAD:
2857 case KVM_MP_STATE_CHECK_STOP:
2858 /* fall through - CHECK_STOP and LOAD are not supported yet */
2866 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2868 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2871 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2874 kvm_s390_vcpu_request_handled(vcpu);
2875 if (!kvm_request_pending(vcpu))
2878 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2879 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2880 * This ensures that the ipte instruction for this request has
2881 * already finished. We might race against a second unmapper that
2882 * wants to set the blocking bit. Lets just retry the request loop.
2884 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2886 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2887 kvm_s390_get_prefix(vcpu),
2888 PAGE_SIZE * 2, PROT_WRITE);
2890 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2896 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2897 vcpu->arch.sie_block->ihcpu = 0xffff;
2901 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2902 if (!ibs_enabled(vcpu)) {
2903 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2904 atomic_or(CPUSTAT_IBS,
2905 &vcpu->arch.sie_block->cpuflags);
2910 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2911 if (ibs_enabled(vcpu)) {
2912 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2913 atomic_andnot(CPUSTAT_IBS,
2914 &vcpu->arch.sie_block->cpuflags);
2919 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2920 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2924 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2926 * Disable CMMA virtualization; we will emulate the ESSA
2927 * instruction manually, in order to provide additional
2928 * functionalities needed for live migration.
2930 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2934 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2936 * Re-enable CMMA virtualization if CMMA is available and
2939 if ((vcpu->kvm->arch.use_cmma) &&
2940 (vcpu->kvm->mm->context.use_cmma))
2941 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2945 /* nothing to do, just clear the request */
2946 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2951 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2952 const struct kvm_s390_vm_tod_clock *gtod)
2954 struct kvm_vcpu *vcpu;
2955 struct kvm_s390_tod_clock_ext htod;
2958 mutex_lock(&kvm->lock);
2961 get_tod_clock_ext((char *)&htod);
2963 kvm->arch.epoch = gtod->tod - htod.tod;
2964 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2966 if (kvm->arch.epoch > gtod->tod)
2967 kvm->arch.epdx -= 1;
2969 kvm_s390_vcpu_block_all(kvm);
2970 kvm_for_each_vcpu(i, vcpu, kvm) {
2971 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2972 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2975 kvm_s390_vcpu_unblock_all(kvm);
2977 mutex_unlock(&kvm->lock);
2980 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2982 struct kvm_vcpu *vcpu;
2985 mutex_lock(&kvm->lock);
2987 kvm->arch.epoch = tod - get_tod_clock();
2988 kvm_s390_vcpu_block_all(kvm);
2989 kvm_for_each_vcpu(i, vcpu, kvm)
2990 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2991 kvm_s390_vcpu_unblock_all(kvm);
2993 mutex_unlock(&kvm->lock);
2997 * kvm_arch_fault_in_page - fault-in guest page if necessary
2998 * @vcpu: The corresponding virtual cpu
2999 * @gpa: Guest physical address
3000 * @writable: Whether the page should be writable or not
3002 * Make sure that a guest page has been faulted-in on the host.
3004 * Return: Zero on success, negative error code otherwise.
3006 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3008 return gmap_fault(vcpu->arch.gmap, gpa,
3009 writable ? FAULT_FLAG_WRITE : 0);
3012 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3013 unsigned long token)
3015 struct kvm_s390_interrupt inti;
3016 struct kvm_s390_irq irq;
3019 irq.u.ext.ext_params2 = token;
3020 irq.type = KVM_S390_INT_PFAULT_INIT;
3021 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3023 inti.type = KVM_S390_INT_PFAULT_DONE;
3024 inti.parm64 = token;
3025 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3029 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3030 struct kvm_async_pf *work)
3032 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3033 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3036 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3037 struct kvm_async_pf *work)
3039 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3040 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3043 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3044 struct kvm_async_pf *work)
3046 /* s390 will always inject the page directly */
3049 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3052 * s390 will always inject the page directly,
3053 * but we still want check_async_completion to cleanup
3058 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3061 struct kvm_arch_async_pf arch;
3064 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3066 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3067 vcpu->arch.pfault_compare)
3069 if (psw_extint_disabled(vcpu))
3071 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3073 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3075 if (!vcpu->arch.gmap->pfault_enabled)
3078 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3079 hva += current->thread.gmap_addr & ~PAGE_MASK;
3080 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3083 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3087 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3092 * On s390 notifications for arriving pages will be delivered directly
3093 * to the guest but the house keeping for completed pfaults is
3094 * handled outside the worker.
3096 kvm_check_async_pf_completion(vcpu);
3098 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3099 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3104 if (test_cpu_flag(CIF_MCCK_PENDING))
3107 if (!kvm_is_ucontrol(vcpu->kvm)) {
3108 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3113 rc = kvm_s390_handle_requests(vcpu);
3117 if (guestdbg_enabled(vcpu)) {
3118 kvm_s390_backup_guest_per_regs(vcpu);
3119 kvm_s390_patch_guest_per_regs(vcpu);
3122 vcpu->arch.sie_block->icptcode = 0;
3123 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3124 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3125 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3130 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3132 struct kvm_s390_pgm_info pgm_info = {
3133 .code = PGM_ADDRESSING,
3138 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3139 trace_kvm_s390_sie_fault(vcpu);
3142 * We want to inject an addressing exception, which is defined as a
3143 * suppressing or terminating exception. However, since we came here
3144 * by a DAT access exception, the PSW still points to the faulting
3145 * instruction since DAT exceptions are nullifying. So we've got
3146 * to look up the current opcode to get the length of the instruction
3147 * to be able to forward the PSW.
3149 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3150 ilen = insn_length(opcode);
3154 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3155 * Forward by arbitrary ilc, injection will take care of
3156 * nullification if necessary.
3158 pgm_info = vcpu->arch.pgm;
3161 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3162 kvm_s390_forward_psw(vcpu, ilen);
3163 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3166 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3168 struct mcck_volatile_info *mcck_info;
3169 struct sie_page *sie_page;
3171 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3172 vcpu->arch.sie_block->icptcode);
3173 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3175 if (guestdbg_enabled(vcpu))
3176 kvm_s390_restore_guest_per_regs(vcpu);
3178 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3179 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3181 if (exit_reason == -EINTR) {
3182 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3183 sie_page = container_of(vcpu->arch.sie_block,
3184 struct sie_page, sie_block);
3185 mcck_info = &sie_page->mcck_info;
3186 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3190 if (vcpu->arch.sie_block->icptcode > 0) {
3191 int rc = kvm_handle_sie_intercept(vcpu);
3193 if (rc != -EOPNOTSUPP)
3195 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3196 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3197 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3198 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3200 } else if (exit_reason != -EFAULT) {
3201 vcpu->stat.exit_null++;
3203 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3204 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3205 vcpu->run->s390_ucontrol.trans_exc_code =
3206 current->thread.gmap_addr;
3207 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3209 } else if (current->thread.gmap_pfault) {
3210 trace_kvm_s390_major_guest_pfault(vcpu);
3211 current->thread.gmap_pfault = 0;
3212 if (kvm_arch_setup_async_pf(vcpu))
3214 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3216 return vcpu_post_run_fault_in_sie(vcpu);
3219 static int __vcpu_run(struct kvm_vcpu *vcpu)
3221 int rc, exit_reason;
3224 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3225 * ning the guest), so that memslots (and other stuff) are protected
3227 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3230 rc = vcpu_pre_run(vcpu);
3234 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3236 * As PF_VCPU will be used in fault handler, between
3237 * guest_enter and guest_exit should be no uaccess.
3239 local_irq_disable();
3240 guest_enter_irqoff();
3241 __disable_cpu_timer_accounting(vcpu);
3243 exit_reason = sie64a(vcpu->arch.sie_block,
3244 vcpu->run->s.regs.gprs);
3245 local_irq_disable();
3246 __enable_cpu_timer_accounting(vcpu);
3247 guest_exit_irqoff();
3249 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3251 rc = vcpu_post_run(vcpu, exit_reason);
3252 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3254 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3258 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3260 struct runtime_instr_cb *riccb;
3263 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3264 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3265 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3266 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3267 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3268 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3269 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3270 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3271 /* some control register changes require a tlb flush */
3272 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3274 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3275 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3276 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3277 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3278 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3279 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3281 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3282 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3283 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3284 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3285 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3286 kvm_clear_async_pf_completion_queue(vcpu);
3289 * If userspace sets the riccb (e.g. after migration) to a valid state,
3290 * we should enable RI here instead of doing the lazy enablement.
3292 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3293 test_kvm_facility(vcpu->kvm, 64) &&
3295 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3296 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3297 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3300 * If userspace sets the gscb (e.g. after migration) to non-zero,
3301 * we should enable GS here instead of doing the lazy enablement.
3303 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3304 test_kvm_facility(vcpu->kvm, 133) &&
3306 !vcpu->arch.gs_enabled) {
3307 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3308 vcpu->arch.sie_block->ecb |= ECB_GS;
3309 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3310 vcpu->arch.gs_enabled = 1;
3312 save_access_regs(vcpu->arch.host_acrs);
3313 restore_access_regs(vcpu->run->s.regs.acrs);
3314 /* save host (userspace) fprs/vrs */
3316 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3317 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3319 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3321 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3322 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3323 if (test_fp_ctl(current->thread.fpu.fpc))
3324 /* User space provided an invalid FPC, let's clear it */
3325 current->thread.fpu.fpc = 0;
3326 if (MACHINE_HAS_GS) {
3328 __ctl_set_bit(2, 4);
3329 if (current->thread.gs_cb) {
3330 vcpu->arch.host_gscb = current->thread.gs_cb;
3331 save_gs_cb(vcpu->arch.host_gscb);
3333 if (vcpu->arch.gs_enabled) {
3334 current->thread.gs_cb = (struct gs_cb *)
3335 &vcpu->run->s.regs.gscb;
3336 restore_gs_cb(current->thread.gs_cb);
3341 kvm_run->kvm_dirty_regs = 0;
3344 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3346 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3347 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3348 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3349 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3350 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3351 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3352 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3353 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3354 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3355 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3356 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3357 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3358 save_access_regs(vcpu->run->s.regs.acrs);
3359 restore_access_regs(vcpu->arch.host_acrs);
3360 /* Save guest register state */
3362 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3363 /* Restore will be done lazily at return */
3364 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3365 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3366 if (MACHINE_HAS_GS) {
3367 __ctl_set_bit(2, 4);
3368 if (vcpu->arch.gs_enabled)
3369 save_gs_cb(current->thread.gs_cb);
3371 current->thread.gs_cb = vcpu->arch.host_gscb;
3372 restore_gs_cb(vcpu->arch.host_gscb);
3374 if (!vcpu->arch.host_gscb)
3375 __ctl_clear_bit(2, 4);
3376 vcpu->arch.host_gscb = NULL;
3381 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3385 if (kvm_run->immediate_exit)
3388 if (guestdbg_exit_pending(vcpu)) {
3389 kvm_s390_prepare_debug_exit(vcpu);
3393 kvm_sigset_activate(vcpu);
3395 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3396 kvm_s390_vcpu_start(vcpu);
3397 } else if (is_vcpu_stopped(vcpu)) {
3398 pr_err_ratelimited("can't run stopped vcpu %d\n",
3403 sync_regs(vcpu, kvm_run);
3404 enable_cpu_timer_accounting(vcpu);
3407 rc = __vcpu_run(vcpu);
3409 if (signal_pending(current) && !rc) {
3410 kvm_run->exit_reason = KVM_EXIT_INTR;
3414 if (guestdbg_exit_pending(vcpu) && !rc) {
3415 kvm_s390_prepare_debug_exit(vcpu);
3419 if (rc == -EREMOTE) {
3420 /* userspace support is needed, kvm_run has been prepared */
3424 disable_cpu_timer_accounting(vcpu);
3425 store_regs(vcpu, kvm_run);
3427 kvm_sigset_deactivate(vcpu);
3429 vcpu->stat.exit_userspace++;
3434 * store status at address
3435 * we use have two special cases:
3436 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3437 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3439 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3441 unsigned char archmode = 1;
3442 freg_t fprs[NUM_FPRS];
3447 px = kvm_s390_get_prefix(vcpu);
3448 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3449 if (write_guest_abs(vcpu, 163, &archmode, 1))
3452 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3453 if (write_guest_real(vcpu, 163, &archmode, 1))
3457 gpa -= __LC_FPREGS_SAVE_AREA;
3459 /* manually convert vector registers if necessary */
3460 if (MACHINE_HAS_VX) {
3461 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3462 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3465 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3466 vcpu->run->s.regs.fprs, 128);
3468 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3469 vcpu->run->s.regs.gprs, 128);
3470 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3471 &vcpu->arch.sie_block->gpsw, 16);
3472 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3474 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3475 &vcpu->run->s.regs.fpc, 4);
3476 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3477 &vcpu->arch.sie_block->todpr, 4);
3478 cputm = kvm_s390_get_cpu_timer(vcpu);
3479 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3481 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3482 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3484 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3485 &vcpu->run->s.regs.acrs, 64);
3486 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3487 &vcpu->arch.sie_block->gcr, 128);
3488 return rc ? -EFAULT : 0;
3491 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3494 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3495 * switch in the run ioctl. Let's update our copies before we save
3496 * it into the save area
3499 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3500 save_access_regs(vcpu->run->s.regs.acrs);
3502 return kvm_s390_store_status_unloaded(vcpu, addr);
3505 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3507 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3508 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3511 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3514 struct kvm_vcpu *vcpu;
3516 kvm_for_each_vcpu(i, vcpu, kvm) {
3517 __disable_ibs_on_vcpu(vcpu);
3521 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3525 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3526 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3529 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3531 int i, online_vcpus, started_vcpus = 0;
3533 if (!is_vcpu_stopped(vcpu))
3536 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3537 /* Only one cpu at a time may enter/leave the STOPPED state. */
3538 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3539 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3541 for (i = 0; i < online_vcpus; i++) {
3542 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3546 if (started_vcpus == 0) {
3547 /* we're the only active VCPU -> speed it up */
3548 __enable_ibs_on_vcpu(vcpu);
3549 } else if (started_vcpus == 1) {
3551 * As we are starting a second VCPU, we have to disable
3552 * the IBS facility on all VCPUs to remove potentially
3553 * oustanding ENABLE requests.
3555 __disable_ibs_on_all_vcpus(vcpu->kvm);
3558 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3560 * Another VCPU might have used IBS while we were offline.
3561 * Let's play safe and flush the VCPU at startup.
3563 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3564 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3568 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3570 int i, online_vcpus, started_vcpus = 0;
3571 struct kvm_vcpu *started_vcpu = NULL;
3573 if (is_vcpu_stopped(vcpu))
3576 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3577 /* Only one cpu at a time may enter/leave the STOPPED state. */
3578 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3579 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3581 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3582 kvm_s390_clear_stop_irq(vcpu);
3584 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3585 __disable_ibs_on_vcpu(vcpu);
3587 for (i = 0; i < online_vcpus; i++) {
3588 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3590 started_vcpu = vcpu->kvm->vcpus[i];
3594 if (started_vcpus == 1) {
3596 * As we only have one VCPU left, we want to enable the
3597 * IBS facility for that VCPU to speed it up.
3599 __enable_ibs_on_vcpu(started_vcpu);
3602 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3606 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3607 struct kvm_enable_cap *cap)
3615 case KVM_CAP_S390_CSS_SUPPORT:
3616 if (!vcpu->kvm->arch.css_support) {
3617 vcpu->kvm->arch.css_support = 1;
3618 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3619 trace_kvm_s390_enable_css(vcpu->kvm);
3630 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3631 struct kvm_s390_mem_op *mop)
3633 void __user *uaddr = (void __user *)mop->buf;
3634 void *tmpbuf = NULL;
3636 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3637 | KVM_S390_MEMOP_F_CHECK_ONLY;
3639 if (mop->flags & ~supported_flags)
3642 if (mop->size > MEM_OP_MAX_SIZE)
3645 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3646 tmpbuf = vmalloc(mop->size);
3651 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3654 case KVM_S390_MEMOP_LOGICAL_READ:
3655 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3656 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3657 mop->size, GACC_FETCH);
3660 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3662 if (copy_to_user(uaddr, tmpbuf, mop->size))
3666 case KVM_S390_MEMOP_LOGICAL_WRITE:
3667 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3668 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3669 mop->size, GACC_STORE);
3672 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3676 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3682 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3684 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3685 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3691 long kvm_arch_vcpu_ioctl(struct file *filp,
3692 unsigned int ioctl, unsigned long arg)
3694 struct kvm_vcpu *vcpu = filp->private_data;
3695 void __user *argp = (void __user *)arg;
3700 case KVM_S390_IRQ: {
3701 struct kvm_s390_irq s390irq;
3704 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3706 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3709 case KVM_S390_INTERRUPT: {
3710 struct kvm_s390_interrupt s390int;
3711 struct kvm_s390_irq s390irq;
3714 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3716 if (s390int_to_s390irq(&s390int, &s390irq))
3718 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3721 case KVM_S390_STORE_STATUS:
3722 idx = srcu_read_lock(&vcpu->kvm->srcu);
3723 r = kvm_s390_vcpu_store_status(vcpu, arg);
3724 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3726 case KVM_S390_SET_INITIAL_PSW: {
3730 if (copy_from_user(&psw, argp, sizeof(psw)))
3732 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3735 case KVM_S390_INITIAL_RESET:
3736 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3738 case KVM_SET_ONE_REG:
3739 case KVM_GET_ONE_REG: {
3740 struct kvm_one_reg reg;
3742 if (copy_from_user(®, argp, sizeof(reg)))
3744 if (ioctl == KVM_SET_ONE_REG)
3745 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3747 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3750 #ifdef CONFIG_KVM_S390_UCONTROL
3751 case KVM_S390_UCAS_MAP: {
3752 struct kvm_s390_ucas_mapping ucasmap;
3754 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3759 if (!kvm_is_ucontrol(vcpu->kvm)) {
3764 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3765 ucasmap.vcpu_addr, ucasmap.length);
3768 case KVM_S390_UCAS_UNMAP: {
3769 struct kvm_s390_ucas_mapping ucasmap;
3771 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3776 if (!kvm_is_ucontrol(vcpu->kvm)) {
3781 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3786 case KVM_S390_VCPU_FAULT: {
3787 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3790 case KVM_ENABLE_CAP:
3792 struct kvm_enable_cap cap;
3794 if (copy_from_user(&cap, argp, sizeof(cap)))
3796 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3799 case KVM_S390_MEM_OP: {
3800 struct kvm_s390_mem_op mem_op;
3802 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3803 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3808 case KVM_S390_SET_IRQ_STATE: {
3809 struct kvm_s390_irq_state irq_state;
3812 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3814 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3815 irq_state.len == 0 ||
3816 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3820 /* do not use irq_state.flags, it will break old QEMUs */
3821 r = kvm_s390_set_irq_state(vcpu,
3822 (void __user *) irq_state.buf,
3826 case KVM_S390_GET_IRQ_STATE: {
3827 struct kvm_s390_irq_state irq_state;
3830 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3832 if (irq_state.len == 0) {
3836 /* do not use irq_state.flags, it will break old QEMUs */
3837 r = kvm_s390_get_irq_state(vcpu,
3838 (__u8 __user *) irq_state.buf,
3848 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3850 #ifdef CONFIG_KVM_S390_UCONTROL
3851 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3852 && (kvm_is_ucontrol(vcpu->kvm))) {
3853 vmf->page = virt_to_page(vcpu->arch.sie_block);
3854 get_page(vmf->page);
3858 return VM_FAULT_SIGBUS;
3861 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3862 unsigned long npages)
3867 /* Section: memory related */
3868 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3869 struct kvm_memory_slot *memslot,
3870 const struct kvm_userspace_memory_region *mem,
3871 enum kvm_mr_change change)
3873 /* A few sanity checks. We can have memory slots which have to be
3874 located/ended at a segment boundary (1MB). The memory in userland is
3875 ok to be fragmented into various different vmas. It is okay to mmap()
3876 and munmap() stuff in this slot after doing this call at any time */
3878 if (mem->userspace_addr & 0xffffful)
3881 if (mem->memory_size & 0xffffful)
3884 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3890 void kvm_arch_commit_memory_region(struct kvm *kvm,
3891 const struct kvm_userspace_memory_region *mem,
3892 const struct kvm_memory_slot *old,
3893 const struct kvm_memory_slot *new,
3894 enum kvm_mr_change change)
3898 /* If the basics of the memslot do not change, we do not want
3899 * to update the gmap. Every update causes several unnecessary
3900 * segment translation exceptions. This is usually handled just
3901 * fine by the normal fault handler + gmap, but it will also
3902 * cause faults on the prefix page of running guest CPUs.
3904 if (old->userspace_addr == mem->userspace_addr &&
3905 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3906 old->npages * PAGE_SIZE == mem->memory_size)
3909 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3910 mem->guest_phys_addr, mem->memory_size);
3912 pr_warn("failed to commit memory region\n");
3916 static inline unsigned long nonhyp_mask(int i)
3918 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3920 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3923 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3925 vcpu->valid_wakeup = false;
3928 static int __init kvm_s390_init(void)
3932 if (!sclp.has_sief2) {
3933 pr_info("SIE not available\n");
3937 for (i = 0; i < 16; i++)
3938 kvm_s390_fac_list_mask[i] |=
3939 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3941 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3944 static void __exit kvm_s390_exit(void)
3949 module_init(kvm_s390_init);
3950 module_exit(kvm_s390_exit);
3953 * Enable autoloading of the kvm module.
3954 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3955 * since x86 takes a different approach.
3957 #include <linux/miscdevice.h>
3958 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3959 MODULE_ALIAS("devname:kvm");