2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
49 #define KMSG_COMPONENT "kvm-s390"
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
53 #define CREATE_TRACE_POINTS
55 #include "trace-s390.h"
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
133 /* allow nested virtualization in KVM (if enabled by user space) */
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
141 unsigned long kvm_s390_fac_list_mask_size(void)
143 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144 return ARRAY_SIZE(kvm_s390_fac_list_mask);
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
159 /* every s390 is virtualization enabled ;-) */
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167 * This callback is executed during stop_machine(). All CPUs are therefore
168 * temporarily stopped. In order not to change guest behavior, we have to
169 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170 * so a CPU won't be stopped while calculating with the epoch.
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176 struct kvm_vcpu *vcpu;
178 unsigned long long *delta = v;
180 list_for_each_entry(kvm, &vm_list, vm_list) {
181 kvm->arch.epoch -= *delta;
182 kvm_for_each_vcpu(i, vcpu, kvm) {
183 vcpu->arch.sie_block->epoch -= *delta;
184 if (vcpu->arch.cputm_enabled)
185 vcpu->arch.cputm_start += *delta;
186 if (vcpu->arch.vsie_block)
187 vcpu->arch.vsie_block->epoch -= *delta;
193 static struct notifier_block kvm_clock_notifier = {
194 .notifier_call = kvm_clock_sync,
197 int kvm_arch_hardware_setup(void)
199 gmap_notifier.notifier_call = kvm_gmap_notifier;
200 gmap_register_pte_notifier(&gmap_notifier);
201 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202 gmap_register_pte_notifier(&vsie_gmap_notifier);
203 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204 &kvm_clock_notifier);
208 void kvm_arch_hardware_unsetup(void)
210 gmap_unregister_pte_notifier(&gmap_notifier);
211 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213 &kvm_clock_notifier);
216 static void allow_cpu_feat(unsigned long nr)
218 set_bit_inv(nr, kvm_s390_available_cpu_feat);
221 static inline int plo_test_bit(unsigned char nr)
223 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227 /* Parameter registers are ignored for "test bit" */
237 static void kvm_s390_cpu_feat_init(void)
241 for (i = 0; i < 256; ++i) {
243 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
246 if (test_facility(28)) /* TOD-clock steering */
247 ptff(kvm_s390_available_subfunc.ptff,
248 sizeof(kvm_s390_available_subfunc.ptff),
251 if (test_facility(17)) { /* MSA */
252 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.kmac);
254 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kmc);
256 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.km);
258 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kimd);
260 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.klmd);
263 if (test_facility(76)) /* MSA3 */
264 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.pckmo);
266 if (test_facility(77)) { /* MSA4 */
267 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmctr);
269 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.kmf);
271 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272 kvm_s390_available_subfunc.kmo);
273 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.pcc);
276 if (test_facility(57)) /* MSA5 */
277 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.ppno);
280 if (test_facility(146)) /* MSA8 */
281 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282 kvm_s390_available_subfunc.kma);
284 if (MACHINE_HAS_ESOP)
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
287 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
290 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291 !test_facility(3) || !nested)
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294 if (sclp.has_64bscao)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
311 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312 * all skey handling functions read/set the skey from the PGSTE
313 * instead of the real storage key.
315 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316 * pages being detected as preserved although they are resident.
318 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
321 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
325 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326 * cannot easily shadow the SCA because of the ipte lock.
330 int kvm_arch_init(void *opaque)
332 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337 debug_unregister(kvm_s390_dbf);
341 kvm_s390_cpu_feat_init();
343 /* Register floating interrupt controller interface. */
344 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
347 void kvm_arch_exit(void)
349 debug_unregister(kvm_s390_dbf);
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354 unsigned int ioctl, unsigned long arg)
356 if (ioctl == KVM_S390_ENABLE_SIE)
357 return s390_enable_sie();
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
366 case KVM_CAP_S390_PSW:
367 case KVM_CAP_S390_GMAP:
368 case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370 case KVM_CAP_S390_UCONTROL:
372 case KVM_CAP_ASYNC_PF:
373 case KVM_CAP_SYNC_REGS:
374 case KVM_CAP_ONE_REG:
375 case KVM_CAP_ENABLE_CAP:
376 case KVM_CAP_S390_CSS_SUPPORT:
377 case KVM_CAP_IOEVENTFD:
378 case KVM_CAP_DEVICE_CTRL:
379 case KVM_CAP_ENABLE_CAP_VM:
380 case KVM_CAP_S390_IRQCHIP:
381 case KVM_CAP_VM_ATTRIBUTES:
382 case KVM_CAP_MP_STATE:
383 case KVM_CAP_IMMEDIATE_EXIT:
384 case KVM_CAP_S390_INJECT_IRQ:
385 case KVM_CAP_S390_USER_SIGP:
386 case KVM_CAP_S390_USER_STSI:
387 case KVM_CAP_S390_SKEYS:
388 case KVM_CAP_S390_IRQ_STATE:
389 case KVM_CAP_S390_USER_INSTR0:
390 case KVM_CAP_S390_CMMA_MIGRATION:
391 case KVM_CAP_S390_AIS:
394 case KVM_CAP_S390_MEM_OP:
397 case KVM_CAP_NR_VCPUS:
398 case KVM_CAP_MAX_VCPUS:
399 r = KVM_S390_BSCA_CPU_SLOTS;
400 if (!kvm_s390_use_sca_entries())
402 else if (sclp.has_esca && sclp.has_64bscao)
403 r = KVM_S390_ESCA_CPU_SLOTS;
405 case KVM_CAP_NR_MEMSLOTS:
406 r = KVM_USER_MEM_SLOTS;
408 case KVM_CAP_S390_COW:
409 r = MACHINE_HAS_ESOP;
411 case KVM_CAP_S390_VECTOR_REGISTERS:
414 case KVM_CAP_S390_RI:
415 r = test_facility(64);
417 case KVM_CAP_S390_GS:
418 r = test_facility(133);
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427 struct kvm_memory_slot *memslot)
429 gfn_t cur_gfn, last_gfn;
430 unsigned long address;
431 struct gmap *gmap = kvm->arch.gmap;
433 /* Loop over all guest pages */
434 last_gfn = memslot->base_gfn + memslot->npages;
435 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436 address = gfn_to_hva_memslot(memslot, cur_gfn);
438 if (test_and_clear_guest_dirty(gmap->mm, address))
439 mark_page_dirty(kvm, cur_gfn);
440 if (fatal_signal_pending(current))
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
450 * Get (and clear) the dirty memory log for a memory slot.
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453 struct kvm_dirty_log *log)
457 struct kvm_memslots *slots;
458 struct kvm_memory_slot *memslot;
461 if (kvm_is_ucontrol(kvm))
464 mutex_lock(&kvm->slots_lock);
467 if (log->slot >= KVM_USER_MEM_SLOTS)
470 slots = kvm_memslots(kvm);
471 memslot = id_to_memslot(slots, log->slot);
473 if (!memslot->dirty_bitmap)
476 kvm_s390_sync_dirty_log(kvm, memslot);
477 r = kvm_get_dirty_log(kvm, log, &is_dirty);
481 /* Clear the dirty log */
483 n = kvm_dirty_bitmap_bytes(memslot);
484 memset(memslot->dirty_bitmap, 0, n);
488 mutex_unlock(&kvm->slots_lock);
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
495 struct kvm_vcpu *vcpu;
497 kvm_for_each_vcpu(i, vcpu, kvm) {
498 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 case KVM_CAP_S390_IRQCHIP:
511 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512 kvm->arch.use_irqchip = 1;
515 case KVM_CAP_S390_USER_SIGP:
516 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517 kvm->arch.user_sigp = 1;
520 case KVM_CAP_S390_VECTOR_REGISTERS:
521 mutex_lock(&kvm->lock);
522 if (kvm->created_vcpus) {
524 } else if (MACHINE_HAS_VX) {
525 set_kvm_facility(kvm->arch.model.fac_mask, 129);
526 set_kvm_facility(kvm->arch.model.fac_list, 129);
527 if (test_facility(134)) {
528 set_kvm_facility(kvm->arch.model.fac_mask, 134);
529 set_kvm_facility(kvm->arch.model.fac_list, 134);
531 if (test_facility(135)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 135);
533 set_kvm_facility(kvm->arch.model.fac_list, 135);
538 mutex_unlock(&kvm->lock);
539 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540 r ? "(not available)" : "(success)");
542 case KVM_CAP_S390_RI:
544 mutex_lock(&kvm->lock);
545 if (kvm->created_vcpus) {
547 } else if (test_facility(64)) {
548 set_kvm_facility(kvm->arch.model.fac_mask, 64);
549 set_kvm_facility(kvm->arch.model.fac_list, 64);
552 mutex_unlock(&kvm->lock);
553 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554 r ? "(not available)" : "(success)");
556 case KVM_CAP_S390_AIS:
557 mutex_lock(&kvm->lock);
558 if (kvm->created_vcpus) {
561 set_kvm_facility(kvm->arch.model.fac_mask, 72);
562 set_kvm_facility(kvm->arch.model.fac_list, 72);
565 mutex_unlock(&kvm->lock);
566 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
567 r ? "(not available)" : "(success)");
569 case KVM_CAP_S390_GS:
571 mutex_lock(&kvm->lock);
572 if (atomic_read(&kvm->online_vcpus)) {
574 } else if (test_facility(133)) {
575 set_kvm_facility(kvm->arch.model.fac_mask, 133);
576 set_kvm_facility(kvm->arch.model.fac_list, 133);
579 mutex_unlock(&kvm->lock);
580 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
581 r ? "(not available)" : "(success)");
583 case KVM_CAP_S390_USER_STSI:
584 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
585 kvm->arch.user_stsi = 1;
588 case KVM_CAP_S390_USER_INSTR0:
589 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
590 kvm->arch.user_instr0 = 1;
591 icpt_operexc_on_all_vcpus(kvm);
601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
605 switch (attr->attr) {
606 case KVM_S390_VM_MEM_LIMIT_SIZE:
608 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
609 kvm->arch.mem_limit);
610 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
624 switch (attr->attr) {
625 case KVM_S390_VM_MEM_ENABLE_CMMA:
631 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
632 mutex_lock(&kvm->lock);
633 if (!kvm->created_vcpus) {
634 kvm->arch.use_cmma = 1;
637 mutex_unlock(&kvm->lock);
639 case KVM_S390_VM_MEM_CLR_CMMA:
644 if (!kvm->arch.use_cmma)
647 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
648 mutex_lock(&kvm->lock);
649 idx = srcu_read_lock(&kvm->srcu);
650 s390_reset_cmma(kvm->arch.gmap->mm);
651 srcu_read_unlock(&kvm->srcu, idx);
652 mutex_unlock(&kvm->lock);
655 case KVM_S390_VM_MEM_LIMIT_SIZE: {
656 unsigned long new_limit;
658 if (kvm_is_ucontrol(kvm))
661 if (get_user(new_limit, (u64 __user *)attr->addr))
664 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
665 new_limit > kvm->arch.mem_limit)
671 /* gmap_create takes last usable address */
672 if (new_limit != KVM_S390_NO_MEM_LIMIT)
676 mutex_lock(&kvm->lock);
677 if (!kvm->created_vcpus) {
678 /* gmap_create will round the limit up */
679 struct gmap *new = gmap_create(current->mm, new_limit);
684 gmap_remove(kvm->arch.gmap);
686 kvm->arch.gmap = new;
690 mutex_unlock(&kvm->lock);
691 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
692 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
693 (void *) kvm->arch.gmap->asce);
703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
707 struct kvm_vcpu *vcpu;
710 if (!test_kvm_facility(kvm, 76))
713 mutex_lock(&kvm->lock);
714 switch (attr->attr) {
715 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
717 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
718 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
719 kvm->arch.crypto.aes_kw = 1;
720 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
722 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
724 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
725 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
726 kvm->arch.crypto.dea_kw = 1;
727 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
729 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
730 kvm->arch.crypto.aes_kw = 0;
731 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
732 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
733 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
735 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
736 kvm->arch.crypto.dea_kw = 0;
737 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
738 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
739 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
742 mutex_unlock(&kvm->lock);
746 kvm_for_each_vcpu(i, vcpu, kvm) {
747 kvm_s390_vcpu_crypto_setup(vcpu);
750 mutex_unlock(&kvm->lock);
754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
757 struct kvm_vcpu *vcpu;
759 kvm_for_each_vcpu(cx, vcpu, kvm)
760 kvm_s390_sync_request(req, vcpu);
764 * Must be called with kvm->srcu held to avoid races on memslots, and with
765 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
769 struct kvm_s390_migration_state *mgs;
770 struct kvm_memory_slot *ms;
771 /* should be the only one */
772 struct kvm_memslots *slots;
773 unsigned long ram_pages;
776 /* migration mode already enabled */
777 if (kvm->arch.migration_state)
780 slots = kvm_memslots(kvm);
781 if (!slots || !slots->used_slots)
784 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
787 kvm->arch.migration_state = mgs;
789 if (kvm->arch.use_cmma) {
791 * Get the last slot. They should be sorted by base_gfn, so the
792 * last slot is also the one at the end of the address space.
793 * We have verified above that at least one slot is present.
795 ms = slots->memslots + slots->used_slots - 1;
796 /* round up so we only use full longs */
797 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
798 /* allocate enough bytes to store all the bits */
799 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
800 if (!mgs->pgste_bitmap) {
802 kvm->arch.migration_state = NULL;
806 mgs->bitmap_size = ram_pages;
807 atomic64_set(&mgs->dirty_pages, ram_pages);
808 /* mark all the pages in active slots as dirty */
809 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
810 ms = slots->memslots + slotnr;
811 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
814 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
820 * Must be called with kvm->lock to avoid races with ourselves and
821 * kvm_s390_vm_start_migration.
823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
825 struct kvm_s390_migration_state *mgs;
827 /* migration mode already disabled */
828 if (!kvm->arch.migration_state)
830 mgs = kvm->arch.migration_state;
831 kvm->arch.migration_state = NULL;
833 if (kvm->arch.use_cmma) {
834 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
835 vfree(mgs->pgste_bitmap);
841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
842 struct kvm_device_attr *attr)
844 int idx, res = -ENXIO;
846 mutex_lock(&kvm->lock);
847 switch (attr->attr) {
848 case KVM_S390_VM_MIGRATION_START:
849 idx = srcu_read_lock(&kvm->srcu);
850 res = kvm_s390_vm_start_migration(kvm);
851 srcu_read_unlock(&kvm->srcu, idx);
853 case KVM_S390_VM_MIGRATION_STOP:
854 res = kvm_s390_vm_stop_migration(kvm);
859 mutex_unlock(&kvm->lock);
864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
865 struct kvm_device_attr *attr)
867 u64 mig = (kvm->arch.migration_state != NULL);
869 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
872 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
881 if (copy_from_user(>od_high, (void __user *)attr->addr,
887 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
896 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
899 kvm_s390_set_tod_clock(kvm, gtod);
900 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
911 switch (attr->attr) {
912 case KVM_S390_VM_TOD_HIGH:
913 ret = kvm_s390_set_tod_high(kvm, attr);
915 case KVM_S390_VM_TOD_LOW:
916 ret = kvm_s390_set_tod_low(kvm, attr);
925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
929 if (copy_to_user((void __user *)attr->addr, >od_high,
932 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
941 gtod = kvm_s390_get_tod_clock_fast(kvm);
942 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
944 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
956 switch (attr->attr) {
957 case KVM_S390_VM_TOD_HIGH:
958 ret = kvm_s390_get_tod_high(kvm, attr);
960 case KVM_S390_VM_TOD_LOW:
961 ret = kvm_s390_get_tod_low(kvm, attr);
970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
972 struct kvm_s390_vm_cpu_processor *proc;
973 u16 lowest_ibc, unblocked_ibc;
976 mutex_lock(&kvm->lock);
977 if (kvm->created_vcpus) {
981 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
986 if (!copy_from_user(proc, (void __user *)attr->addr,
988 kvm->arch.model.cpuid = proc->cpuid;
989 lowest_ibc = sclp.ibc >> 16 & 0xfff;
990 unblocked_ibc = sclp.ibc & 0xfff;
991 if (lowest_ibc && proc->ibc) {
992 if (proc->ibc > unblocked_ibc)
993 kvm->arch.model.ibc = unblocked_ibc;
994 else if (proc->ibc < lowest_ibc)
995 kvm->arch.model.ibc = lowest_ibc;
997 kvm->arch.model.ibc = proc->ibc;
999 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000 S390_ARCH_FAC_LIST_SIZE_BYTE);
1001 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002 kvm->arch.model.ibc,
1003 kvm->arch.model.cpuid);
1004 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005 kvm->arch.model.fac_list[0],
1006 kvm->arch.model.fac_list[1],
1007 kvm->arch.model.fac_list[2]);
1012 mutex_unlock(&kvm->lock);
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017 struct kvm_device_attr *attr)
1019 struct kvm_s390_vm_cpu_feat data;
1022 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1024 if (!bitmap_subset((unsigned long *) data.feat,
1025 kvm_s390_available_cpu_feat,
1026 KVM_S390_VM_CPU_FEAT_NR_BITS))
1029 mutex_lock(&kvm->lock);
1030 if (!atomic_read(&kvm->online_vcpus)) {
1031 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032 KVM_S390_VM_CPU_FEAT_NR_BITS);
1035 mutex_unlock(&kvm->lock);
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040 struct kvm_device_attr *attr)
1043 * Once supported by kernel + hw, we have to store the subfunctions
1044 * in kvm->arch and remember that user space configured them.
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1053 switch (attr->attr) {
1054 case KVM_S390_VM_CPU_PROCESSOR:
1055 ret = kvm_s390_set_processor(kvm, attr);
1057 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058 ret = kvm_s390_set_processor_feat(kvm, attr);
1060 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1069 struct kvm_s390_vm_cpu_processor *proc;
1072 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1077 proc->cpuid = kvm->arch.model.cpuid;
1078 proc->ibc = kvm->arch.model.ibc;
1079 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080 S390_ARCH_FAC_LIST_SIZE_BYTE);
1081 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082 kvm->arch.model.ibc,
1083 kvm->arch.model.cpuid);
1084 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085 kvm->arch.model.fac_list[0],
1086 kvm->arch.model.fac_list[1],
1087 kvm->arch.model.fac_list[2]);
1088 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1097 struct kvm_s390_vm_cpu_machine *mach;
1100 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1105 get_cpu_id((struct cpuid *) &mach->cpuid);
1106 mach->ibc = sclp.ibc;
1107 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108 S390_ARCH_FAC_LIST_SIZE_BYTE);
1109 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110 sizeof(S390_lowcore.stfle_fac_list));
1111 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1112 kvm->arch.model.ibc,
1113 kvm->arch.model.cpuid);
1114 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1118 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1122 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130 struct kvm_device_attr *attr)
1132 struct kvm_s390_vm_cpu_feat data;
1134 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135 KVM_S390_VM_CPU_FEAT_NR_BITS);
1136 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142 struct kvm_device_attr *attr)
1144 struct kvm_s390_vm_cpu_feat data;
1146 bitmap_copy((unsigned long *) data.feat,
1147 kvm_s390_available_cpu_feat,
1148 KVM_S390_VM_CPU_FEAT_NR_BITS);
1149 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155 struct kvm_device_attr *attr)
1158 * Once we can actually configure subfunctions (kernel + hw support),
1159 * we have to check if they were already set by user space, if so copy
1160 * them from kvm->arch.
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166 struct kvm_device_attr *attr)
1168 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1177 switch (attr->attr) {
1178 case KVM_S390_VM_CPU_PROCESSOR:
1179 ret = kvm_s390_get_processor(kvm, attr);
1181 case KVM_S390_VM_CPU_MACHINE:
1182 ret = kvm_s390_get_machine(kvm, attr);
1184 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185 ret = kvm_s390_get_processor_feat(kvm, attr);
1187 case KVM_S390_VM_CPU_MACHINE_FEAT:
1188 ret = kvm_s390_get_machine_feat(kvm, attr);
1190 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1193 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1204 switch (attr->group) {
1205 case KVM_S390_VM_MEM_CTRL:
1206 ret = kvm_s390_set_mem_control(kvm, attr);
1208 case KVM_S390_VM_TOD:
1209 ret = kvm_s390_set_tod(kvm, attr);
1211 case KVM_S390_VM_CPU_MODEL:
1212 ret = kvm_s390_set_cpu_model(kvm, attr);
1214 case KVM_S390_VM_CRYPTO:
1215 ret = kvm_s390_vm_set_crypto(kvm, attr);
1217 case KVM_S390_VM_MIGRATION:
1218 ret = kvm_s390_vm_set_migration(kvm, attr);
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1232 switch (attr->group) {
1233 case KVM_S390_VM_MEM_CTRL:
1234 ret = kvm_s390_get_mem_control(kvm, attr);
1236 case KVM_S390_VM_TOD:
1237 ret = kvm_s390_get_tod(kvm, attr);
1239 case KVM_S390_VM_CPU_MODEL:
1240 ret = kvm_s390_get_cpu_model(kvm, attr);
1242 case KVM_S390_VM_MIGRATION:
1243 ret = kvm_s390_vm_get_migration(kvm, attr);
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1257 switch (attr->group) {
1258 case KVM_S390_VM_MEM_CTRL:
1259 switch (attr->attr) {
1260 case KVM_S390_VM_MEM_ENABLE_CMMA:
1261 case KVM_S390_VM_MEM_CLR_CMMA:
1262 ret = sclp.has_cmma ? 0 : -ENXIO;
1264 case KVM_S390_VM_MEM_LIMIT_SIZE:
1272 case KVM_S390_VM_TOD:
1273 switch (attr->attr) {
1274 case KVM_S390_VM_TOD_LOW:
1275 case KVM_S390_VM_TOD_HIGH:
1283 case KVM_S390_VM_CPU_MODEL:
1284 switch (attr->attr) {
1285 case KVM_S390_VM_CPU_PROCESSOR:
1286 case KVM_S390_VM_CPU_MACHINE:
1287 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288 case KVM_S390_VM_CPU_MACHINE_FEAT:
1289 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1292 /* configuring subfunctions is not supported yet */
1293 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1299 case KVM_S390_VM_CRYPTO:
1300 switch (attr->attr) {
1301 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1312 case KVM_S390_VM_MIGRATION:
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1329 if (args->flags != 0)
1332 /* Is this guest using storage keys? */
1333 if (!mm_use_skey(current->mm))
1334 return KVM_S390_GET_SKEYS_NONE;
1336 /* Enforce sane limit on memory allocation */
1337 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1340 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1344 down_read(¤t->mm->mmap_sem);
1345 for (i = 0; i < args->count; i++) {
1346 hva = gfn_to_hva(kvm, args->start_gfn + i);
1347 if (kvm_is_error_hva(hva)) {
1352 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1356 up_read(¤t->mm->mmap_sem);
1359 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1360 sizeof(uint8_t) * args->count);
1369 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1375 if (args->flags != 0)
1378 /* Enforce sane limit on memory allocation */
1379 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1382 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1386 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1387 sizeof(uint8_t) * args->count);
1393 /* Enable storage key handling for the guest */
1394 r = s390_enable_skey();
1398 down_read(¤t->mm->mmap_sem);
1399 for (i = 0; i < args->count; i++) {
1400 hva = gfn_to_hva(kvm, args->start_gfn + i);
1401 if (kvm_is_error_hva(hva)) {
1406 /* Lowest order bit is reserved */
1407 if (keys[i] & 0x01) {
1412 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1416 up_read(¤t->mm->mmap_sem);
1423 * Base address and length must be sent at the start of each block, therefore
1424 * it's cheaper to send some clean data, as long as it's less than the size of
1427 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1428 /* for consistency */
1429 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1432 * This function searches for the next page with dirty CMMA attributes, and
1433 * saves the attributes in the buffer up to either the end of the buffer or
1434 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1435 * no trailing clean bytes are saved.
1436 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1437 * output buffer will indicate 0 as length.
1439 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1440 struct kvm_s390_cmma_log *args)
1442 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1443 unsigned long bufsize, hva, pgstev, i, next, cur;
1444 int srcu_idx, peek, r = 0, rr;
1447 cur = args->start_gfn;
1448 i = next = pgstev = 0;
1450 if (unlikely(!kvm->arch.use_cmma))
1452 /* Invalid/unsupported flags were specified */
1453 if (args->flags & ~KVM_S390_CMMA_PEEK)
1455 /* Migration mode query, and we are not doing a migration */
1456 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1459 /* CMMA is disabled or was not used, or the buffer has length zero */
1460 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1461 if (!bufsize || !kvm->mm->context.use_cmma) {
1462 memset(args, 0, sizeof(*args));
1467 /* We are not peeking, and there are no dirty pages */
1468 if (!atomic64_read(&s->dirty_pages)) {
1469 memset(args, 0, sizeof(*args));
1472 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1474 if (cur >= s->bitmap_size) /* nothing found, loop back */
1475 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1476 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1477 memset(args, 0, sizeof(*args));
1480 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1483 res = vmalloc(bufsize);
1487 args->start_gfn = cur;
1489 down_read(&kvm->mm->mmap_sem);
1490 srcu_idx = srcu_read_lock(&kvm->srcu);
1491 while (i < bufsize) {
1492 hva = gfn_to_hva(kvm, cur);
1493 if (kvm_is_error_hva(hva)) {
1497 /* decrement only if we actually flipped the bit to 0 */
1498 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1499 atomic64_dec(&s->dirty_pages);
1500 r = get_pgste(kvm->mm, hva, &pgstev);
1503 /* save the value */
1504 res[i++] = (pgstev >> 24) & 0x3;
1506 * if the next bit is too far away, stop.
1507 * if we reached the previous "next", find the next one
1510 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1513 next = find_next_bit(s->pgste_bitmap,
1514 s->bitmap_size, cur + 1);
1515 /* reached the end of the bitmap or of the buffer, stop */
1516 if ((next >= s->bitmap_size) ||
1517 (next >= args->start_gfn + bufsize))
1522 srcu_read_unlock(&kvm->srcu, srcu_idx);
1523 up_read(&kvm->mm->mmap_sem);
1525 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1527 rr = copy_to_user((void __user *)args->values, res, args->count);
1536 * This function sets the CMMA attributes for the given pages. If the input
1537 * buffer has zero length, no action is taken, otherwise the attributes are
1538 * set and the mm->context.use_cmma flag is set.
1540 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1541 const struct kvm_s390_cmma_log *args)
1543 unsigned long hva, mask, pgstev, i;
1545 int srcu_idx, r = 0;
1549 if (!kvm->arch.use_cmma)
1551 /* invalid/unsupported flags */
1552 if (args->flags != 0)
1554 /* Enforce sane limit on memory allocation */
1555 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1558 if (args->count == 0)
1561 bits = vmalloc(sizeof(*bits) * args->count);
1565 r = copy_from_user(bits, (void __user *)args->values, args->count);
1571 down_read(&kvm->mm->mmap_sem);
1572 srcu_idx = srcu_read_lock(&kvm->srcu);
1573 for (i = 0; i < args->count; i++) {
1574 hva = gfn_to_hva(kvm, args->start_gfn + i);
1575 if (kvm_is_error_hva(hva)) {
1581 pgstev = pgstev << 24;
1582 mask &= _PGSTE_GPS_USAGE_MASK;
1583 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1585 srcu_read_unlock(&kvm->srcu, srcu_idx);
1586 up_read(&kvm->mm->mmap_sem);
1588 if (!kvm->mm->context.use_cmma) {
1589 down_write(&kvm->mm->mmap_sem);
1590 kvm->mm->context.use_cmma = 1;
1591 up_write(&kvm->mm->mmap_sem);
1598 long kvm_arch_vm_ioctl(struct file *filp,
1599 unsigned int ioctl, unsigned long arg)
1601 struct kvm *kvm = filp->private_data;
1602 void __user *argp = (void __user *)arg;
1603 struct kvm_device_attr attr;
1607 case KVM_S390_INTERRUPT: {
1608 struct kvm_s390_interrupt s390int;
1611 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1613 r = kvm_s390_inject_vm(kvm, &s390int);
1616 case KVM_ENABLE_CAP: {
1617 struct kvm_enable_cap cap;
1619 if (copy_from_user(&cap, argp, sizeof(cap)))
1621 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1624 case KVM_CREATE_IRQCHIP: {
1625 struct kvm_irq_routing_entry routing;
1628 if (kvm->arch.use_irqchip) {
1629 /* Set up dummy routing. */
1630 memset(&routing, 0, sizeof(routing));
1631 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1635 case KVM_SET_DEVICE_ATTR: {
1637 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1639 r = kvm_s390_vm_set_attr(kvm, &attr);
1642 case KVM_GET_DEVICE_ATTR: {
1644 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1646 r = kvm_s390_vm_get_attr(kvm, &attr);
1649 case KVM_HAS_DEVICE_ATTR: {
1651 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1653 r = kvm_s390_vm_has_attr(kvm, &attr);
1656 case KVM_S390_GET_SKEYS: {
1657 struct kvm_s390_skeys args;
1660 if (copy_from_user(&args, argp,
1661 sizeof(struct kvm_s390_skeys)))
1663 r = kvm_s390_get_skeys(kvm, &args);
1666 case KVM_S390_SET_SKEYS: {
1667 struct kvm_s390_skeys args;
1670 if (copy_from_user(&args, argp,
1671 sizeof(struct kvm_s390_skeys)))
1673 r = kvm_s390_set_skeys(kvm, &args);
1676 case KVM_S390_GET_CMMA_BITS: {
1677 struct kvm_s390_cmma_log args;
1680 if (copy_from_user(&args, argp, sizeof(args)))
1682 r = kvm_s390_get_cmma_bits(kvm, &args);
1684 r = copy_to_user(argp, &args, sizeof(args));
1690 case KVM_S390_SET_CMMA_BITS: {
1691 struct kvm_s390_cmma_log args;
1694 if (copy_from_user(&args, argp, sizeof(args)))
1696 r = kvm_s390_set_cmma_bits(kvm, &args);
1706 static int kvm_s390_query_ap_config(u8 *config)
1708 u32 fcn_code = 0x04000000UL;
1711 memset(config, 0, 128);
1715 ".long 0xb2af0000\n" /* PQAP(QCI) */
1721 : "r" (fcn_code), "r" (config)
1722 : "cc", "0", "2", "memory"
1728 static int kvm_s390_apxa_installed(void)
1733 if (test_facility(12)) {
1734 cc = kvm_s390_query_ap_config(config);
1737 pr_err("PQAP(QCI) failed with cc=%d", cc);
1739 return config[0] & 0x40;
1745 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1747 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1749 if (kvm_s390_apxa_installed())
1750 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1752 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1755 static u64 kvm_s390_get_initial_cpuid(void)
1760 cpuid.version = 0xff;
1761 return *((u64 *) &cpuid);
1764 static void kvm_s390_crypto_init(struct kvm *kvm)
1766 if (!test_kvm_facility(kvm, 76))
1769 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1770 kvm_s390_set_crycb_format(kvm);
1772 /* Enable AES/DEA protected key functions by default */
1773 kvm->arch.crypto.aes_kw = 1;
1774 kvm->arch.crypto.dea_kw = 1;
1775 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1776 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1777 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1778 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1781 static void sca_dispose(struct kvm *kvm)
1783 if (kvm->arch.use_esca)
1784 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1786 free_page((unsigned long)(kvm->arch.sca));
1787 kvm->arch.sca = NULL;
1790 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1792 gfp_t alloc_flags = GFP_KERNEL;
1794 char debug_name[16];
1795 static unsigned long sca_offset;
1798 #ifdef CONFIG_KVM_S390_UCONTROL
1799 if (type & ~KVM_VM_S390_UCONTROL)
1801 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1808 rc = s390_enable_sie();
1814 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1816 kvm->arch.use_esca = 0; /* start with basic SCA */
1817 if (!sclp.has_64bscao)
1818 alloc_flags |= GFP_DMA;
1819 rwlock_init(&kvm->arch.sca_lock);
1820 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1823 spin_lock(&kvm_lock);
1825 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1827 kvm->arch.sca = (struct bsca_block *)
1828 ((char *) kvm->arch.sca + sca_offset);
1829 spin_unlock(&kvm_lock);
1831 sprintf(debug_name, "kvm-%u", current->pid);
1833 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1837 kvm->arch.sie_page2 =
1838 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1839 if (!kvm->arch.sie_page2)
1842 /* Populate the facility mask initially. */
1843 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1844 sizeof(S390_lowcore.stfle_fac_list));
1845 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1846 if (i < kvm_s390_fac_list_mask_size())
1847 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1849 kvm->arch.model.fac_mask[i] = 0UL;
1852 /* Populate the facility list initially. */
1853 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1854 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1855 S390_ARCH_FAC_LIST_SIZE_BYTE);
1857 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1858 set_kvm_facility(kvm->arch.model.fac_list, 74);
1860 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1861 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1863 kvm_s390_crypto_init(kvm);
1865 mutex_init(&kvm->arch.float_int.ais_lock);
1866 kvm->arch.float_int.simm = 0;
1867 kvm->arch.float_int.nimm = 0;
1868 spin_lock_init(&kvm->arch.float_int.lock);
1869 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1870 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1871 init_waitqueue_head(&kvm->arch.ipte_wq);
1872 mutex_init(&kvm->arch.ipte_mutex);
1874 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1875 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1877 if (type & KVM_VM_S390_UCONTROL) {
1878 kvm->arch.gmap = NULL;
1879 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1881 if (sclp.hamax == U64_MAX)
1882 kvm->arch.mem_limit = TASK_SIZE_MAX;
1884 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1886 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1887 if (!kvm->arch.gmap)
1889 kvm->arch.gmap->private = kvm;
1890 kvm->arch.gmap->pfault_enabled = 0;
1893 kvm->arch.css_support = 0;
1894 kvm->arch.use_irqchip = 0;
1895 kvm->arch.epoch = 0;
1897 spin_lock_init(&kvm->arch.start_stop_lock);
1898 kvm_s390_vsie_init(kvm);
1899 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1903 free_page((unsigned long)kvm->arch.sie_page2);
1904 debug_unregister(kvm->arch.dbf);
1906 KVM_EVENT(3, "creation of vm failed: %d", rc);
1910 bool kvm_arch_has_vcpu_debugfs(void)
1915 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1920 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1922 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1923 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1924 kvm_s390_clear_local_irqs(vcpu);
1925 kvm_clear_async_pf_completion_queue(vcpu);
1926 if (!kvm_is_ucontrol(vcpu->kvm))
1929 if (kvm_is_ucontrol(vcpu->kvm))
1930 gmap_remove(vcpu->arch.gmap);
1932 if (vcpu->kvm->arch.use_cmma)
1933 kvm_s390_vcpu_unsetup_cmma(vcpu);
1934 free_page((unsigned long)(vcpu->arch.sie_block));
1936 kvm_vcpu_uninit(vcpu);
1937 kmem_cache_free(kvm_vcpu_cache, vcpu);
1940 static void kvm_free_vcpus(struct kvm *kvm)
1943 struct kvm_vcpu *vcpu;
1945 kvm_for_each_vcpu(i, vcpu, kvm)
1946 kvm_arch_vcpu_destroy(vcpu);
1948 mutex_lock(&kvm->lock);
1949 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1950 kvm->vcpus[i] = NULL;
1952 atomic_set(&kvm->online_vcpus, 0);
1953 mutex_unlock(&kvm->lock);
1956 void kvm_arch_destroy_vm(struct kvm *kvm)
1958 kvm_free_vcpus(kvm);
1960 debug_unregister(kvm->arch.dbf);
1961 free_page((unsigned long)kvm->arch.sie_page2);
1962 if (!kvm_is_ucontrol(kvm))
1963 gmap_remove(kvm->arch.gmap);
1964 kvm_s390_destroy_adapters(kvm);
1965 kvm_s390_clear_float_irqs(kvm);
1966 kvm_s390_vsie_destroy(kvm);
1967 if (kvm->arch.migration_state) {
1968 vfree(kvm->arch.migration_state->pgste_bitmap);
1969 kfree(kvm->arch.migration_state);
1971 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1974 /* Section: vcpu related */
1975 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1977 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1978 if (!vcpu->arch.gmap)
1980 vcpu->arch.gmap->private = vcpu->kvm;
1985 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1987 if (!kvm_s390_use_sca_entries())
1989 read_lock(&vcpu->kvm->arch.sca_lock);
1990 if (vcpu->kvm->arch.use_esca) {
1991 struct esca_block *sca = vcpu->kvm->arch.sca;
1993 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1994 sca->cpu[vcpu->vcpu_id].sda = 0;
1996 struct bsca_block *sca = vcpu->kvm->arch.sca;
1998 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1999 sca->cpu[vcpu->vcpu_id].sda = 0;
2001 read_unlock(&vcpu->kvm->arch.sca_lock);
2004 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2006 if (!kvm_s390_use_sca_entries()) {
2007 struct bsca_block *sca = vcpu->kvm->arch.sca;
2009 /* we still need the basic sca for the ipte control */
2010 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2011 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2013 read_lock(&vcpu->kvm->arch.sca_lock);
2014 if (vcpu->kvm->arch.use_esca) {
2015 struct esca_block *sca = vcpu->kvm->arch.sca;
2017 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2018 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2019 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2020 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2021 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2023 struct bsca_block *sca = vcpu->kvm->arch.sca;
2025 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2026 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2027 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2028 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2030 read_unlock(&vcpu->kvm->arch.sca_lock);
2033 /* Basic SCA to Extended SCA data copy routines */
2034 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2037 d->sigp_ctrl.c = s->sigp_ctrl.c;
2038 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2041 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2045 d->ipte_control = s->ipte_control;
2047 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2048 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2051 static int sca_switch_to_extended(struct kvm *kvm)
2053 struct bsca_block *old_sca = kvm->arch.sca;
2054 struct esca_block *new_sca;
2055 struct kvm_vcpu *vcpu;
2056 unsigned int vcpu_idx;
2059 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2063 scaoh = (u32)((u64)(new_sca) >> 32);
2064 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2066 kvm_s390_vcpu_block_all(kvm);
2067 write_lock(&kvm->arch.sca_lock);
2069 sca_copy_b_to_e(new_sca, old_sca);
2071 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2072 vcpu->arch.sie_block->scaoh = scaoh;
2073 vcpu->arch.sie_block->scaol = scaol;
2074 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2076 kvm->arch.sca = new_sca;
2077 kvm->arch.use_esca = 1;
2079 write_unlock(&kvm->arch.sca_lock);
2080 kvm_s390_vcpu_unblock_all(kvm);
2082 free_page((unsigned long)old_sca);
2084 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2085 old_sca, kvm->arch.sca);
2089 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2093 if (!kvm_s390_use_sca_entries()) {
2094 if (id < KVM_MAX_VCPUS)
2098 if (id < KVM_S390_BSCA_CPU_SLOTS)
2100 if (!sclp.has_esca || !sclp.has_64bscao)
2103 mutex_lock(&kvm->lock);
2104 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2105 mutex_unlock(&kvm->lock);
2107 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2110 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2112 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2113 kvm_clear_async_pf_completion_queue(vcpu);
2114 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2120 kvm_s390_set_prefix(vcpu, 0);
2121 if (test_kvm_facility(vcpu->kvm, 64))
2122 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2123 if (test_kvm_facility(vcpu->kvm, 133))
2124 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2125 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2126 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2129 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2131 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2133 if (kvm_is_ucontrol(vcpu->kvm))
2134 return __kvm_ucontrol_vcpu_init(vcpu);
2139 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2140 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2142 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2143 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2144 vcpu->arch.cputm_start = get_tod_clock_fast();
2145 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2148 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2149 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2151 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2152 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2153 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2154 vcpu->arch.cputm_start = 0;
2155 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2158 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2159 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2161 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2162 vcpu->arch.cputm_enabled = true;
2163 __start_cpu_timer_accounting(vcpu);
2166 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2167 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2169 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2170 __stop_cpu_timer_accounting(vcpu);
2171 vcpu->arch.cputm_enabled = false;
2174 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2176 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2177 __enable_cpu_timer_accounting(vcpu);
2181 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2183 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2184 __disable_cpu_timer_accounting(vcpu);
2188 /* set the cpu timer - may only be called from the VCPU thread itself */
2189 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2191 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2192 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2193 if (vcpu->arch.cputm_enabled)
2194 vcpu->arch.cputm_start = get_tod_clock_fast();
2195 vcpu->arch.sie_block->cputm = cputm;
2196 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2200 /* update and get the cpu timer - can also be called from other VCPU threads */
2201 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2206 if (unlikely(!vcpu->arch.cputm_enabled))
2207 return vcpu->arch.sie_block->cputm;
2209 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2211 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2213 * If the writer would ever execute a read in the critical
2214 * section, e.g. in irq context, we have a deadlock.
2216 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2217 value = vcpu->arch.sie_block->cputm;
2218 /* if cputm_start is 0, accounting is being started/stopped */
2219 if (likely(vcpu->arch.cputm_start))
2220 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2221 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2226 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2229 gmap_enable(vcpu->arch.enabled_gmap);
2230 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2231 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2232 __start_cpu_timer_accounting(vcpu);
2236 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2239 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2240 __stop_cpu_timer_accounting(vcpu);
2241 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2242 vcpu->arch.enabled_gmap = gmap_get_enabled();
2243 gmap_disable(vcpu->arch.enabled_gmap);
2247 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2249 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2250 vcpu->arch.sie_block->gpsw.mask = 0UL;
2251 vcpu->arch.sie_block->gpsw.addr = 0UL;
2252 kvm_s390_set_prefix(vcpu, 0);
2253 kvm_s390_set_cpu_timer(vcpu, 0);
2254 vcpu->arch.sie_block->ckc = 0UL;
2255 vcpu->arch.sie_block->todpr = 0;
2256 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2257 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2258 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2259 /* make sure the new fpc will be lazily loaded */
2261 current->thread.fpu.fpc = 0;
2262 vcpu->arch.sie_block->gbea = 1;
2263 vcpu->arch.sie_block->pp = 0;
2264 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2265 kvm_clear_async_pf_completion_queue(vcpu);
2266 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2267 kvm_s390_vcpu_stop(vcpu);
2268 kvm_s390_clear_local_irqs(vcpu);
2271 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2273 mutex_lock(&vcpu->kvm->lock);
2275 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2277 mutex_unlock(&vcpu->kvm->lock);
2278 if (!kvm_is_ucontrol(vcpu->kvm)) {
2279 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2282 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2283 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2284 /* make vcpu_load load the right gmap on the first trigger */
2285 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2288 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2290 if (!test_kvm_facility(vcpu->kvm, 76))
2293 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2295 if (vcpu->kvm->arch.crypto.aes_kw)
2296 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2297 if (vcpu->kvm->arch.crypto.dea_kw)
2298 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2300 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2303 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2305 free_page(vcpu->arch.sie_block->cbrlo);
2306 vcpu->arch.sie_block->cbrlo = 0;
2309 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2311 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2312 if (!vcpu->arch.sie_block->cbrlo)
2315 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2319 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2321 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2323 vcpu->arch.sie_block->ibc = model->ibc;
2324 if (test_kvm_facility(vcpu->kvm, 7))
2325 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2328 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2332 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2336 if (test_kvm_facility(vcpu->kvm, 78))
2337 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2338 else if (test_kvm_facility(vcpu->kvm, 8))
2339 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2341 kvm_s390_vcpu_setup_model(vcpu);
2343 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2344 if (MACHINE_HAS_ESOP)
2345 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2346 if (test_kvm_facility(vcpu->kvm, 9))
2347 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2348 if (test_kvm_facility(vcpu->kvm, 73))
2349 vcpu->arch.sie_block->ecb |= ECB_TE;
2351 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2352 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2353 if (test_kvm_facility(vcpu->kvm, 130))
2354 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2355 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2357 vcpu->arch.sie_block->eca |= ECA_CEI;
2359 vcpu->arch.sie_block->eca |= ECA_IB;
2361 vcpu->arch.sie_block->eca |= ECA_SII;
2362 if (sclp.has_sigpif)
2363 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2364 if (test_kvm_facility(vcpu->kvm, 129)) {
2365 vcpu->arch.sie_block->eca |= ECA_VX;
2366 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2368 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2370 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2373 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2375 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2377 if (vcpu->kvm->arch.use_cmma) {
2378 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2382 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2383 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2385 kvm_s390_vcpu_crypto_setup(vcpu);
2390 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2393 struct kvm_vcpu *vcpu;
2394 struct sie_page *sie_page;
2397 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2402 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2406 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2407 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2411 vcpu->arch.sie_block = &sie_page->sie_block;
2412 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2414 /* the real guest size will always be smaller than msl */
2415 vcpu->arch.sie_block->mso = 0;
2416 vcpu->arch.sie_block->msl = sclp.hamax;
2418 vcpu->arch.sie_block->icpua = id;
2419 spin_lock_init(&vcpu->arch.local_int.lock);
2420 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2421 vcpu->arch.local_int.wq = &vcpu->wq;
2422 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2423 seqcount_init(&vcpu->arch.cputm_seqcount);
2425 rc = kvm_vcpu_init(vcpu, kvm, id);
2427 goto out_free_sie_block;
2428 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2429 vcpu->arch.sie_block);
2430 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2434 free_page((unsigned long)(vcpu->arch.sie_block));
2436 kmem_cache_free(kvm_vcpu_cache, vcpu);
2441 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2443 return kvm_s390_vcpu_has_irq(vcpu, 0);
2446 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2448 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2452 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2454 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2457 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2459 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2463 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2465 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2469 * Kick a guest cpu out of SIE and wait until SIE is not running.
2470 * If the CPU is not running (e.g. waiting as idle) the function will
2471 * return immediately. */
2472 void exit_sie(struct kvm_vcpu *vcpu)
2474 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2475 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2479 /* Kick a guest cpu out of SIE to process a request synchronously */
2480 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2482 kvm_make_request(req, vcpu);
2483 kvm_s390_vcpu_request(vcpu);
2486 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2489 struct kvm *kvm = gmap->private;
2490 struct kvm_vcpu *vcpu;
2491 unsigned long prefix;
2494 if (gmap_is_shadow(gmap))
2496 if (start >= 1UL << 31)
2497 /* We are only interested in prefix pages */
2499 kvm_for_each_vcpu(i, vcpu, kvm) {
2500 /* match against both prefix pages */
2501 prefix = kvm_s390_get_prefix(vcpu);
2502 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2503 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2505 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2510 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2512 /* kvm common code refers to this, but never calls it */
2517 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2518 struct kvm_one_reg *reg)
2523 case KVM_REG_S390_TODPR:
2524 r = put_user(vcpu->arch.sie_block->todpr,
2525 (u32 __user *)reg->addr);
2527 case KVM_REG_S390_EPOCHDIFF:
2528 r = put_user(vcpu->arch.sie_block->epoch,
2529 (u64 __user *)reg->addr);
2531 case KVM_REG_S390_CPU_TIMER:
2532 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2533 (u64 __user *)reg->addr);
2535 case KVM_REG_S390_CLOCK_COMP:
2536 r = put_user(vcpu->arch.sie_block->ckc,
2537 (u64 __user *)reg->addr);
2539 case KVM_REG_S390_PFTOKEN:
2540 r = put_user(vcpu->arch.pfault_token,
2541 (u64 __user *)reg->addr);
2543 case KVM_REG_S390_PFCOMPARE:
2544 r = put_user(vcpu->arch.pfault_compare,
2545 (u64 __user *)reg->addr);
2547 case KVM_REG_S390_PFSELECT:
2548 r = put_user(vcpu->arch.pfault_select,
2549 (u64 __user *)reg->addr);
2551 case KVM_REG_S390_PP:
2552 r = put_user(vcpu->arch.sie_block->pp,
2553 (u64 __user *)reg->addr);
2555 case KVM_REG_S390_GBEA:
2556 r = put_user(vcpu->arch.sie_block->gbea,
2557 (u64 __user *)reg->addr);
2566 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2567 struct kvm_one_reg *reg)
2573 case KVM_REG_S390_TODPR:
2574 r = get_user(vcpu->arch.sie_block->todpr,
2575 (u32 __user *)reg->addr);
2577 case KVM_REG_S390_EPOCHDIFF:
2578 r = get_user(vcpu->arch.sie_block->epoch,
2579 (u64 __user *)reg->addr);
2581 case KVM_REG_S390_CPU_TIMER:
2582 r = get_user(val, (u64 __user *)reg->addr);
2584 kvm_s390_set_cpu_timer(vcpu, val);
2586 case KVM_REG_S390_CLOCK_COMP:
2587 r = get_user(vcpu->arch.sie_block->ckc,
2588 (u64 __user *)reg->addr);
2590 case KVM_REG_S390_PFTOKEN:
2591 r = get_user(vcpu->arch.pfault_token,
2592 (u64 __user *)reg->addr);
2593 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2594 kvm_clear_async_pf_completion_queue(vcpu);
2596 case KVM_REG_S390_PFCOMPARE:
2597 r = get_user(vcpu->arch.pfault_compare,
2598 (u64 __user *)reg->addr);
2600 case KVM_REG_S390_PFSELECT:
2601 r = get_user(vcpu->arch.pfault_select,
2602 (u64 __user *)reg->addr);
2604 case KVM_REG_S390_PP:
2605 r = get_user(vcpu->arch.sie_block->pp,
2606 (u64 __user *)reg->addr);
2608 case KVM_REG_S390_GBEA:
2609 r = get_user(vcpu->arch.sie_block->gbea,
2610 (u64 __user *)reg->addr);
2619 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2621 kvm_s390_vcpu_initial_reset(vcpu);
2625 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2627 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2631 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2633 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2637 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2638 struct kvm_sregs *sregs)
2640 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2641 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2645 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2646 struct kvm_sregs *sregs)
2648 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2649 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2653 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2655 if (test_fp_ctl(fpu->fpc))
2657 vcpu->run->s.regs.fpc = fpu->fpc;
2659 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2660 (freg_t *) fpu->fprs);
2662 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2666 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2668 /* make sure we have the latest values */
2671 convert_vx_to_fp((freg_t *) fpu->fprs,
2672 (__vector128 *) vcpu->run->s.regs.vrs);
2674 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2675 fpu->fpc = vcpu->run->s.regs.fpc;
2679 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2683 if (!is_vcpu_stopped(vcpu))
2686 vcpu->run->psw_mask = psw.mask;
2687 vcpu->run->psw_addr = psw.addr;
2692 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2693 struct kvm_translation *tr)
2695 return -EINVAL; /* not implemented yet */
2698 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2699 KVM_GUESTDBG_USE_HW_BP | \
2700 KVM_GUESTDBG_ENABLE)
2702 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2703 struct kvm_guest_debug *dbg)
2707 vcpu->guest_debug = 0;
2708 kvm_s390_clear_bp_data(vcpu);
2710 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2712 if (!sclp.has_gpere)
2715 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2716 vcpu->guest_debug = dbg->control;
2717 /* enforce guest PER */
2718 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2720 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2721 rc = kvm_s390_import_bp_data(vcpu, dbg);
2723 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2724 vcpu->arch.guestdbg.last_bp = 0;
2728 vcpu->guest_debug = 0;
2729 kvm_s390_clear_bp_data(vcpu);
2730 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2736 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2737 struct kvm_mp_state *mp_state)
2739 /* CHECK_STOP and LOAD are not supported yet */
2740 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2741 KVM_MP_STATE_OPERATING;
2744 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2745 struct kvm_mp_state *mp_state)
2749 /* user space knows about this interface - let it control the state */
2750 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2752 switch (mp_state->mp_state) {
2753 case KVM_MP_STATE_STOPPED:
2754 kvm_s390_vcpu_stop(vcpu);
2756 case KVM_MP_STATE_OPERATING:
2757 kvm_s390_vcpu_start(vcpu);
2759 case KVM_MP_STATE_LOAD:
2760 case KVM_MP_STATE_CHECK_STOP:
2761 /* fall through - CHECK_STOP and LOAD are not supported yet */
2769 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2771 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2774 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2777 kvm_s390_vcpu_request_handled(vcpu);
2778 if (!kvm_request_pending(vcpu))
2781 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2782 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2783 * This ensures that the ipte instruction for this request has
2784 * already finished. We might race against a second unmapper that
2785 * wants to set the blocking bit. Lets just retry the request loop.
2787 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2789 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2790 kvm_s390_get_prefix(vcpu),
2791 PAGE_SIZE * 2, PROT_WRITE);
2793 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2799 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2800 vcpu->arch.sie_block->ihcpu = 0xffff;
2804 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2805 if (!ibs_enabled(vcpu)) {
2806 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2807 atomic_or(CPUSTAT_IBS,
2808 &vcpu->arch.sie_block->cpuflags);
2813 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2814 if (ibs_enabled(vcpu)) {
2815 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2816 atomic_andnot(CPUSTAT_IBS,
2817 &vcpu->arch.sie_block->cpuflags);
2822 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2823 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2827 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2829 * Disable CMMA virtualization; we will emulate the ESSA
2830 * instruction manually, in order to provide additional
2831 * functionalities needed for live migration.
2833 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2837 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2839 * Re-enable CMMA virtualization if CMMA is available and
2842 if ((vcpu->kvm->arch.use_cmma) &&
2843 (vcpu->kvm->mm->context.use_cmma))
2844 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2848 /* nothing to do, just clear the request */
2849 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2854 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2856 struct kvm_vcpu *vcpu;
2859 mutex_lock(&kvm->lock);
2861 kvm->arch.epoch = tod - get_tod_clock();
2862 kvm_s390_vcpu_block_all(kvm);
2863 kvm_for_each_vcpu(i, vcpu, kvm)
2864 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2865 kvm_s390_vcpu_unblock_all(kvm);
2867 mutex_unlock(&kvm->lock);
2871 * kvm_arch_fault_in_page - fault-in guest page if necessary
2872 * @vcpu: The corresponding virtual cpu
2873 * @gpa: Guest physical address
2874 * @writable: Whether the page should be writable or not
2876 * Make sure that a guest page has been faulted-in on the host.
2878 * Return: Zero on success, negative error code otherwise.
2880 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2882 return gmap_fault(vcpu->arch.gmap, gpa,
2883 writable ? FAULT_FLAG_WRITE : 0);
2886 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2887 unsigned long token)
2889 struct kvm_s390_interrupt inti;
2890 struct kvm_s390_irq irq;
2893 irq.u.ext.ext_params2 = token;
2894 irq.type = KVM_S390_INT_PFAULT_INIT;
2895 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2897 inti.type = KVM_S390_INT_PFAULT_DONE;
2898 inti.parm64 = token;
2899 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2903 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2904 struct kvm_async_pf *work)
2906 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2907 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2910 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2911 struct kvm_async_pf *work)
2913 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2914 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2917 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2918 struct kvm_async_pf *work)
2920 /* s390 will always inject the page directly */
2923 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2926 * s390 will always inject the page directly,
2927 * but we still want check_async_completion to cleanup
2932 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2935 struct kvm_arch_async_pf arch;
2938 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2940 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2941 vcpu->arch.pfault_compare)
2943 if (psw_extint_disabled(vcpu))
2945 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2947 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2949 if (!vcpu->arch.gmap->pfault_enabled)
2952 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2953 hva += current->thread.gmap_addr & ~PAGE_MASK;
2954 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2957 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2961 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2966 * On s390 notifications for arriving pages will be delivered directly
2967 * to the guest but the house keeping for completed pfaults is
2968 * handled outside the worker.
2970 kvm_check_async_pf_completion(vcpu);
2972 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2973 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2978 if (test_cpu_flag(CIF_MCCK_PENDING))
2981 if (!kvm_is_ucontrol(vcpu->kvm)) {
2982 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2987 rc = kvm_s390_handle_requests(vcpu);
2991 if (guestdbg_enabled(vcpu)) {
2992 kvm_s390_backup_guest_per_regs(vcpu);
2993 kvm_s390_patch_guest_per_regs(vcpu);
2996 vcpu->arch.sie_block->icptcode = 0;
2997 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2998 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2999 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3004 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3006 struct kvm_s390_pgm_info pgm_info = {
3007 .code = PGM_ADDRESSING,
3012 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3013 trace_kvm_s390_sie_fault(vcpu);
3016 * We want to inject an addressing exception, which is defined as a
3017 * suppressing or terminating exception. However, since we came here
3018 * by a DAT access exception, the PSW still points to the faulting
3019 * instruction since DAT exceptions are nullifying. So we've got
3020 * to look up the current opcode to get the length of the instruction
3021 * to be able to forward the PSW.
3023 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3024 ilen = insn_length(opcode);
3028 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3029 * Forward by arbitrary ilc, injection will take care of
3030 * nullification if necessary.
3032 pgm_info = vcpu->arch.pgm;
3035 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3036 kvm_s390_forward_psw(vcpu, ilen);
3037 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3040 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3042 struct mcck_volatile_info *mcck_info;
3043 struct sie_page *sie_page;
3045 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3046 vcpu->arch.sie_block->icptcode);
3047 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3049 if (guestdbg_enabled(vcpu))
3050 kvm_s390_restore_guest_per_regs(vcpu);
3052 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3053 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3055 if (exit_reason == -EINTR) {
3056 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3057 sie_page = container_of(vcpu->arch.sie_block,
3058 struct sie_page, sie_block);
3059 mcck_info = &sie_page->mcck_info;
3060 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3064 if (vcpu->arch.sie_block->icptcode > 0) {
3065 int rc = kvm_handle_sie_intercept(vcpu);
3067 if (rc != -EOPNOTSUPP)
3069 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3070 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3071 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3072 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3074 } else if (exit_reason != -EFAULT) {
3075 vcpu->stat.exit_null++;
3077 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3078 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3079 vcpu->run->s390_ucontrol.trans_exc_code =
3080 current->thread.gmap_addr;
3081 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3083 } else if (current->thread.gmap_pfault) {
3084 trace_kvm_s390_major_guest_pfault(vcpu);
3085 current->thread.gmap_pfault = 0;
3086 if (kvm_arch_setup_async_pf(vcpu))
3088 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3090 return vcpu_post_run_fault_in_sie(vcpu);
3093 static int __vcpu_run(struct kvm_vcpu *vcpu)
3095 int rc, exit_reason;
3098 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3099 * ning the guest), so that memslots (and other stuff) are protected
3101 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3104 rc = vcpu_pre_run(vcpu);
3108 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3110 * As PF_VCPU will be used in fault handler, between
3111 * guest_enter and guest_exit should be no uaccess.
3113 local_irq_disable();
3114 guest_enter_irqoff();
3115 __disable_cpu_timer_accounting(vcpu);
3117 exit_reason = sie64a(vcpu->arch.sie_block,
3118 vcpu->run->s.regs.gprs);
3119 local_irq_disable();
3120 __enable_cpu_timer_accounting(vcpu);
3121 guest_exit_irqoff();
3123 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3125 rc = vcpu_post_run(vcpu, exit_reason);
3126 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3128 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3132 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3134 struct runtime_instr_cb *riccb;
3137 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3138 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3139 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3140 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3141 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3142 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3143 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3144 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3145 /* some control register changes require a tlb flush */
3146 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3148 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3149 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3150 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3151 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3152 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3153 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3155 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3156 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3157 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3158 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3159 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3160 kvm_clear_async_pf_completion_queue(vcpu);
3163 * If userspace sets the riccb (e.g. after migration) to a valid state,
3164 * we should enable RI here instead of doing the lazy enablement.
3166 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3167 test_kvm_facility(vcpu->kvm, 64) &&
3169 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3170 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3171 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3174 * If userspace sets the gscb (e.g. after migration) to non-zero,
3175 * we should enable GS here instead of doing the lazy enablement.
3177 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3178 test_kvm_facility(vcpu->kvm, 133) &&
3180 !vcpu->arch.gs_enabled) {
3181 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3182 vcpu->arch.sie_block->ecb |= ECB_GS;
3183 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3184 vcpu->arch.gs_enabled = 1;
3186 save_access_regs(vcpu->arch.host_acrs);
3187 restore_access_regs(vcpu->run->s.regs.acrs);
3188 /* save host (userspace) fprs/vrs */
3190 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3191 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3193 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3195 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3196 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3197 if (test_fp_ctl(current->thread.fpu.fpc))
3198 /* User space provided an invalid FPC, let's clear it */
3199 current->thread.fpu.fpc = 0;
3200 if (MACHINE_HAS_GS) {
3202 __ctl_set_bit(2, 4);
3203 if (current->thread.gs_cb) {
3204 vcpu->arch.host_gscb = current->thread.gs_cb;
3205 save_gs_cb(vcpu->arch.host_gscb);
3207 if (vcpu->arch.gs_enabled) {
3208 current->thread.gs_cb = (struct gs_cb *)
3209 &vcpu->run->s.regs.gscb;
3210 restore_gs_cb(current->thread.gs_cb);
3215 kvm_run->kvm_dirty_regs = 0;
3218 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3220 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3221 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3222 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3223 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3224 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3225 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3226 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3227 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3228 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3229 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3230 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3231 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3232 save_access_regs(vcpu->run->s.regs.acrs);
3233 restore_access_regs(vcpu->arch.host_acrs);
3234 /* Save guest register state */
3236 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3237 /* Restore will be done lazily at return */
3238 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3239 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3240 if (MACHINE_HAS_GS) {
3241 __ctl_set_bit(2, 4);
3242 if (vcpu->arch.gs_enabled)
3243 save_gs_cb(current->thread.gs_cb);
3245 current->thread.gs_cb = vcpu->arch.host_gscb;
3246 restore_gs_cb(vcpu->arch.host_gscb);
3248 if (!vcpu->arch.host_gscb)
3249 __ctl_clear_bit(2, 4);
3250 vcpu->arch.host_gscb = NULL;
3255 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3260 if (kvm_run->immediate_exit)
3263 if (guestdbg_exit_pending(vcpu)) {
3264 kvm_s390_prepare_debug_exit(vcpu);
3268 if (vcpu->sigset_active)
3269 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3271 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3272 kvm_s390_vcpu_start(vcpu);
3273 } else if (is_vcpu_stopped(vcpu)) {
3274 pr_err_ratelimited("can't run stopped vcpu %d\n",
3279 sync_regs(vcpu, kvm_run);
3280 enable_cpu_timer_accounting(vcpu);
3283 rc = __vcpu_run(vcpu);
3285 if (signal_pending(current) && !rc) {
3286 kvm_run->exit_reason = KVM_EXIT_INTR;
3290 if (guestdbg_exit_pending(vcpu) && !rc) {
3291 kvm_s390_prepare_debug_exit(vcpu);
3295 if (rc == -EREMOTE) {
3296 /* userspace support is needed, kvm_run has been prepared */
3300 disable_cpu_timer_accounting(vcpu);
3301 store_regs(vcpu, kvm_run);
3303 if (vcpu->sigset_active)
3304 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3306 vcpu->stat.exit_userspace++;
3311 * store status at address
3312 * we use have two special cases:
3313 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3314 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3316 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3318 unsigned char archmode = 1;
3319 freg_t fprs[NUM_FPRS];
3324 px = kvm_s390_get_prefix(vcpu);
3325 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3326 if (write_guest_abs(vcpu, 163, &archmode, 1))
3329 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3330 if (write_guest_real(vcpu, 163, &archmode, 1))
3334 gpa -= __LC_FPREGS_SAVE_AREA;
3336 /* manually convert vector registers if necessary */
3337 if (MACHINE_HAS_VX) {
3338 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3339 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3342 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3343 vcpu->run->s.regs.fprs, 128);
3345 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3346 vcpu->run->s.regs.gprs, 128);
3347 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3348 &vcpu->arch.sie_block->gpsw, 16);
3349 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3351 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3352 &vcpu->run->s.regs.fpc, 4);
3353 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3354 &vcpu->arch.sie_block->todpr, 4);
3355 cputm = kvm_s390_get_cpu_timer(vcpu);
3356 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3358 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3359 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3361 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3362 &vcpu->run->s.regs.acrs, 64);
3363 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3364 &vcpu->arch.sie_block->gcr, 128);
3365 return rc ? -EFAULT : 0;
3368 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3371 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3372 * switch in the run ioctl. Let's update our copies before we save
3373 * it into the save area
3376 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3377 save_access_regs(vcpu->run->s.regs.acrs);
3379 return kvm_s390_store_status_unloaded(vcpu, addr);
3382 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3384 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3385 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3388 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3391 struct kvm_vcpu *vcpu;
3393 kvm_for_each_vcpu(i, vcpu, kvm) {
3394 __disable_ibs_on_vcpu(vcpu);
3398 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3402 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3403 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3406 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3408 int i, online_vcpus, started_vcpus = 0;
3410 if (!is_vcpu_stopped(vcpu))
3413 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3414 /* Only one cpu at a time may enter/leave the STOPPED state. */
3415 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3416 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3418 for (i = 0; i < online_vcpus; i++) {
3419 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3423 if (started_vcpus == 0) {
3424 /* we're the only active VCPU -> speed it up */
3425 __enable_ibs_on_vcpu(vcpu);
3426 } else if (started_vcpus == 1) {
3428 * As we are starting a second VCPU, we have to disable
3429 * the IBS facility on all VCPUs to remove potentially
3430 * oustanding ENABLE requests.
3432 __disable_ibs_on_all_vcpus(vcpu->kvm);
3435 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3437 * Another VCPU might have used IBS while we were offline.
3438 * Let's play safe and flush the VCPU at startup.
3440 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3441 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3445 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3447 int i, online_vcpus, started_vcpus = 0;
3448 struct kvm_vcpu *started_vcpu = NULL;
3450 if (is_vcpu_stopped(vcpu))
3453 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3454 /* Only one cpu at a time may enter/leave the STOPPED state. */
3455 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3456 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3458 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3459 kvm_s390_clear_stop_irq(vcpu);
3461 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3462 __disable_ibs_on_vcpu(vcpu);
3464 for (i = 0; i < online_vcpus; i++) {
3465 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3467 started_vcpu = vcpu->kvm->vcpus[i];
3471 if (started_vcpus == 1) {
3473 * As we only have one VCPU left, we want to enable the
3474 * IBS facility for that VCPU to speed it up.
3476 __enable_ibs_on_vcpu(started_vcpu);
3479 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3483 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3484 struct kvm_enable_cap *cap)
3492 case KVM_CAP_S390_CSS_SUPPORT:
3493 if (!vcpu->kvm->arch.css_support) {
3494 vcpu->kvm->arch.css_support = 1;
3495 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3496 trace_kvm_s390_enable_css(vcpu->kvm);
3507 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3508 struct kvm_s390_mem_op *mop)
3510 void __user *uaddr = (void __user *)mop->buf;
3511 void *tmpbuf = NULL;
3513 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3514 | KVM_S390_MEMOP_F_CHECK_ONLY;
3516 if (mop->flags & ~supported_flags)
3519 if (mop->size > MEM_OP_MAX_SIZE)
3522 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3523 tmpbuf = vmalloc(mop->size);
3528 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3531 case KVM_S390_MEMOP_LOGICAL_READ:
3532 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3533 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3534 mop->size, GACC_FETCH);
3537 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3539 if (copy_to_user(uaddr, tmpbuf, mop->size))
3543 case KVM_S390_MEMOP_LOGICAL_WRITE:
3544 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3545 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3546 mop->size, GACC_STORE);
3549 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3553 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3559 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3561 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3562 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3568 long kvm_arch_vcpu_ioctl(struct file *filp,
3569 unsigned int ioctl, unsigned long arg)
3571 struct kvm_vcpu *vcpu = filp->private_data;
3572 void __user *argp = (void __user *)arg;
3577 case KVM_S390_IRQ: {
3578 struct kvm_s390_irq s390irq;
3581 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3583 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3586 case KVM_S390_INTERRUPT: {
3587 struct kvm_s390_interrupt s390int;
3588 struct kvm_s390_irq s390irq;
3591 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3593 if (s390int_to_s390irq(&s390int, &s390irq))
3595 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3598 case KVM_S390_STORE_STATUS:
3599 idx = srcu_read_lock(&vcpu->kvm->srcu);
3600 r = kvm_s390_vcpu_store_status(vcpu, arg);
3601 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3603 case KVM_S390_SET_INITIAL_PSW: {
3607 if (copy_from_user(&psw, argp, sizeof(psw)))
3609 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3612 case KVM_S390_INITIAL_RESET:
3613 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3615 case KVM_SET_ONE_REG:
3616 case KVM_GET_ONE_REG: {
3617 struct kvm_one_reg reg;
3619 if (copy_from_user(®, argp, sizeof(reg)))
3621 if (ioctl == KVM_SET_ONE_REG)
3622 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3624 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3627 #ifdef CONFIG_KVM_S390_UCONTROL
3628 case KVM_S390_UCAS_MAP: {
3629 struct kvm_s390_ucas_mapping ucasmap;
3631 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3636 if (!kvm_is_ucontrol(vcpu->kvm)) {
3641 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3642 ucasmap.vcpu_addr, ucasmap.length);
3645 case KVM_S390_UCAS_UNMAP: {
3646 struct kvm_s390_ucas_mapping ucasmap;
3648 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3653 if (!kvm_is_ucontrol(vcpu->kvm)) {
3658 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3663 case KVM_S390_VCPU_FAULT: {
3664 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3667 case KVM_ENABLE_CAP:
3669 struct kvm_enable_cap cap;
3671 if (copy_from_user(&cap, argp, sizeof(cap)))
3673 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3676 case KVM_S390_MEM_OP: {
3677 struct kvm_s390_mem_op mem_op;
3679 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3680 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3685 case KVM_S390_SET_IRQ_STATE: {
3686 struct kvm_s390_irq_state irq_state;
3689 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3691 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3692 irq_state.len == 0 ||
3693 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3697 r = kvm_s390_set_irq_state(vcpu,
3698 (void __user *) irq_state.buf,
3702 case KVM_S390_GET_IRQ_STATE: {
3703 struct kvm_s390_irq_state irq_state;
3706 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3708 if (irq_state.len == 0) {
3712 r = kvm_s390_get_irq_state(vcpu,
3713 (__u8 __user *) irq_state.buf,
3723 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3725 #ifdef CONFIG_KVM_S390_UCONTROL
3726 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3727 && (kvm_is_ucontrol(vcpu->kvm))) {
3728 vmf->page = virt_to_page(vcpu->arch.sie_block);
3729 get_page(vmf->page);
3733 return VM_FAULT_SIGBUS;
3736 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3737 unsigned long npages)
3742 /* Section: memory related */
3743 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3744 struct kvm_memory_slot *memslot,
3745 const struct kvm_userspace_memory_region *mem,
3746 enum kvm_mr_change change)
3748 /* A few sanity checks. We can have memory slots which have to be
3749 located/ended at a segment boundary (1MB). The memory in userland is
3750 ok to be fragmented into various different vmas. It is okay to mmap()
3751 and munmap() stuff in this slot after doing this call at any time */
3753 if (mem->userspace_addr & 0xffffful)
3756 if (mem->memory_size & 0xffffful)
3759 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3765 void kvm_arch_commit_memory_region(struct kvm *kvm,
3766 const struct kvm_userspace_memory_region *mem,
3767 const struct kvm_memory_slot *old,
3768 const struct kvm_memory_slot *new,
3769 enum kvm_mr_change change)
3773 /* If the basics of the memslot do not change, we do not want
3774 * to update the gmap. Every update causes several unnecessary
3775 * segment translation exceptions. This is usually handled just
3776 * fine by the normal fault handler + gmap, but it will also
3777 * cause faults on the prefix page of running guest CPUs.
3779 if (old->userspace_addr == mem->userspace_addr &&
3780 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3781 old->npages * PAGE_SIZE == mem->memory_size)
3784 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3785 mem->guest_phys_addr, mem->memory_size);
3787 pr_warn("failed to commit memory region\n");
3791 static inline unsigned long nonhyp_mask(int i)
3793 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3795 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3798 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3800 vcpu->valid_wakeup = false;
3803 static int __init kvm_s390_init(void)
3807 if (!sclp.has_sief2) {
3808 pr_info("SIE not available\n");
3812 for (i = 0; i < 16; i++)
3813 kvm_s390_fac_list_mask[i] |=
3814 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3816 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3819 static void __exit kvm_s390_exit(void)
3824 module_init(kvm_s390_init);
3825 module_exit(kvm_s390_exit);
3828 * Enable autoloading of the kvm module.
3829 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3830 * since x86 takes a different approach.
3832 #include <linux/miscdevice.h>
3833 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3834 MODULE_ALIAS("devname:kvm");