]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: vsie: simulate VCPU SIE entry/exit
[linux.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63         { "userspace_handled", VCPU_STAT(exit_userspace) },
64         { "exit_null", VCPU_STAT(exit_null) },
65         { "exit_validity", VCPU_STAT(exit_validity) },
66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67         { "exit_external_request", VCPU_STAT(exit_external_request) },
68         { "exit_io_request", VCPU_STAT(exit_io_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92         { "deliver_program", VCPU_STAT(deliver_program) },
93         { "deliver_io", VCPU_STAT(deliver_io) },
94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96         { "inject_ckc", VCPU_STAT(inject_ckc) },
97         { "inject_cputm", VCPU_STAT(inject_cputm) },
98         { "inject_external_call", VCPU_STAT(inject_external_call) },
99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101         { "inject_io", VM_STAT(inject_io) },
102         { "inject_mchk", VCPU_STAT(inject_mchk) },
103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104         { "inject_program", VCPU_STAT(inject_program) },
105         { "inject_restart", VCPU_STAT(inject_restart) },
106         { "inject_service_signal", VM_STAT(inject_service_signal) },
107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110         { "inject_virtio", VM_STAT(inject_virtio) },
111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112         { "instruction_gs", VCPU_STAT(instruction_gs) },
113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119         { "instruction_sck", VCPU_STAT(instruction_sck) },
120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121         { "instruction_spx", VCPU_STAT(instruction_spx) },
122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123         { "instruction_stap", VCPU_STAT(instruction_stap) },
124         { "instruction_iske", VCPU_STAT(instruction_iske) },
125         { "instruction_ri", VCPU_STAT(instruction_ri) },
126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127         { "instruction_sske", VCPU_STAT(instruction_sske) },
128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129         { "instruction_essa", VCPU_STAT(instruction_essa) },
130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132         { "instruction_tb", VCPU_STAT(instruction_tb) },
133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137         { "instruction_sie", VCPU_STAT(instruction_sie) },
138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
161         { NULL }
162 };
163
164 struct kvm_s390_tod_clock_ext {
165         __u8 epoch_idx;
166         __u64 tod;
167         __u8 reserved[7];
168 } __packed;
169
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174
175 /* allow 1m huge page guest backing, if !nested */
176 static int hpage;
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179
180 /*
181  * For now we handle at most 16 double words as this is what the s390 base
182  * kernel handles and stores in the prefix page. If we ever need to go beyond
183  * this, this requires changes to code, but the external uapi can stay.
184  */
185 #define SIZE_INTERNAL 16
186
187 /*
188  * Base feature mask that defines default mask for facilities. Consists of the
189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
190  */
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
192 /*
193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194  * and defines the facilities that can be enabled via a cpu model.
195  */
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
197
198 static unsigned long kvm_s390_fac_size(void)
199 {
200         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203                 sizeof(S390_lowcore.stfle_fac_list));
204
205         return SIZE_INTERNAL;
206 }
207
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
212
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
216
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
219 {
220         /* every s390 is virtualization enabled ;-) */
221         return 0;
222 }
223
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
225                               unsigned long end);
226
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 {
229         u8 delta_idx = 0;
230
231         /*
232          * The TOD jumps by delta, we have to compensate this by adding
233          * -delta to the epoch.
234          */
235         delta = -delta;
236
237         /* sign-extension - we're adding to signed values below */
238         if ((s64)delta < 0)
239                 delta_idx = -1;
240
241         scb->epoch += delta;
242         if (scb->ecd & ECD_MEF) {
243                 scb->epdx += delta_idx;
244                 if (scb->epoch < delta)
245                         scb->epdx += 1;
246         }
247 }
248
249 /*
250  * This callback is executed during stop_machine(). All CPUs are therefore
251  * temporarily stopped. In order not to change guest behavior, we have to
252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253  * so a CPU won't be stopped while calculating with the epoch.
254  */
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
256                           void *v)
257 {
258         struct kvm *kvm;
259         struct kvm_vcpu *vcpu;
260         int i;
261         unsigned long long *delta = v;
262
263         list_for_each_entry(kvm, &vm_list, vm_list) {
264                 kvm_for_each_vcpu(i, vcpu, kvm) {
265                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
266                         if (i == 0) {
267                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
269                         }
270                         if (vcpu->arch.cputm_enabled)
271                                 vcpu->arch.cputm_start += *delta;
272                         if (vcpu->arch.vsie_block)
273                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
274                                                    *delta);
275                 }
276         }
277         return NOTIFY_OK;
278 }
279
280 static struct notifier_block kvm_clock_notifier = {
281         .notifier_call = kvm_clock_sync,
282 };
283
284 int kvm_arch_hardware_setup(void)
285 {
286         gmap_notifier.notifier_call = kvm_gmap_notifier;
287         gmap_register_pte_notifier(&gmap_notifier);
288         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289         gmap_register_pte_notifier(&vsie_gmap_notifier);
290         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291                                        &kvm_clock_notifier);
292         return 0;
293 }
294
295 void kvm_arch_hardware_unsetup(void)
296 {
297         gmap_unregister_pte_notifier(&gmap_notifier);
298         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300                                          &kvm_clock_notifier);
301 }
302
303 static void allow_cpu_feat(unsigned long nr)
304 {
305         set_bit_inv(nr, kvm_s390_available_cpu_feat);
306 }
307
308 static inline int plo_test_bit(unsigned char nr)
309 {
310         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
311         int cc;
312
313         asm volatile(
314                 /* Parameter registers are ignored for "test bit" */
315                 "       plo     0,0,0,0(0)\n"
316                 "       ipm     %0\n"
317                 "       srl     %0,28\n"
318                 : "=d" (cc)
319                 : "d" (r0)
320                 : "cc");
321         return cc == 0;
322 }
323
324 static void kvm_s390_cpu_feat_init(void)
325 {
326         int i;
327
328         for (i = 0; i < 256; ++i) {
329                 if (plo_test_bit(i))
330                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
331         }
332
333         if (test_facility(28)) /* TOD-clock steering */
334                 ptff(kvm_s390_available_subfunc.ptff,
335                      sizeof(kvm_s390_available_subfunc.ptff),
336                      PTFF_QAF);
337
338         if (test_facility(17)) { /* MSA */
339                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340                               kvm_s390_available_subfunc.kmac);
341                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342                               kvm_s390_available_subfunc.kmc);
343                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
344                               kvm_s390_available_subfunc.km);
345                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346                               kvm_s390_available_subfunc.kimd);
347                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348                               kvm_s390_available_subfunc.klmd);
349         }
350         if (test_facility(76)) /* MSA3 */
351                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352                               kvm_s390_available_subfunc.pckmo);
353         if (test_facility(77)) { /* MSA4 */
354                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355                               kvm_s390_available_subfunc.kmctr);
356                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357                               kvm_s390_available_subfunc.kmf);
358                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359                               kvm_s390_available_subfunc.kmo);
360                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361                               kvm_s390_available_subfunc.pcc);
362         }
363         if (test_facility(57)) /* MSA5 */
364                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365                               kvm_s390_available_subfunc.ppno);
366
367         if (test_facility(146)) /* MSA8 */
368                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369                               kvm_s390_available_subfunc.kma);
370
371         if (MACHINE_HAS_ESOP)
372                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
373         /*
374          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
376          */
377         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378             !test_facility(3) || !nested)
379                 return;
380         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381         if (sclp.has_64bscao)
382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
383         if (sclp.has_siif)
384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
385         if (sclp.has_gpere)
386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
387         if (sclp.has_gsls)
388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
389         if (sclp.has_ib)
390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
391         if (sclp.has_cei)
392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
393         if (sclp.has_ibs)
394                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
395         if (sclp.has_kss)
396                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
397         /*
398          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399          * all skey handling functions read/set the skey from the PGSTE
400          * instead of the real storage key.
401          *
402          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403          * pages being detected as preserved although they are resident.
404          *
405          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
407          *
408          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
411          *
412          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413          * cannot easily shadow the SCA because of the ipte lock.
414          */
415 }
416
417 int kvm_arch_init(void *opaque)
418 {
419         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
420         if (!kvm_s390_dbf)
421                 return -ENOMEM;
422
423         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
424                 debug_unregister(kvm_s390_dbf);
425                 return -ENOMEM;
426         }
427
428         kvm_s390_cpu_feat_init();
429
430         /* Register floating interrupt controller interface. */
431         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
432 }
433
434 void kvm_arch_exit(void)
435 {
436         debug_unregister(kvm_s390_dbf);
437 }
438
439 /* Section: device related */
440 long kvm_arch_dev_ioctl(struct file *filp,
441                         unsigned int ioctl, unsigned long arg)
442 {
443         if (ioctl == KVM_S390_ENABLE_SIE)
444                 return s390_enable_sie();
445         return -EINVAL;
446 }
447
448 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
449 {
450         int r;
451
452         switch (ext) {
453         case KVM_CAP_S390_PSW:
454         case KVM_CAP_S390_GMAP:
455         case KVM_CAP_SYNC_MMU:
456 #ifdef CONFIG_KVM_S390_UCONTROL
457         case KVM_CAP_S390_UCONTROL:
458 #endif
459         case KVM_CAP_ASYNC_PF:
460         case KVM_CAP_SYNC_REGS:
461         case KVM_CAP_ONE_REG:
462         case KVM_CAP_ENABLE_CAP:
463         case KVM_CAP_S390_CSS_SUPPORT:
464         case KVM_CAP_IOEVENTFD:
465         case KVM_CAP_DEVICE_CTRL:
466         case KVM_CAP_ENABLE_CAP_VM:
467         case KVM_CAP_S390_IRQCHIP:
468         case KVM_CAP_VM_ATTRIBUTES:
469         case KVM_CAP_MP_STATE:
470         case KVM_CAP_IMMEDIATE_EXIT:
471         case KVM_CAP_S390_INJECT_IRQ:
472         case KVM_CAP_S390_USER_SIGP:
473         case KVM_CAP_S390_USER_STSI:
474         case KVM_CAP_S390_SKEYS:
475         case KVM_CAP_S390_IRQ_STATE:
476         case KVM_CAP_S390_USER_INSTR0:
477         case KVM_CAP_S390_CMMA_MIGRATION:
478         case KVM_CAP_S390_AIS:
479         case KVM_CAP_S390_AIS_MIGRATION:
480                 r = 1;
481                 break;
482         case KVM_CAP_S390_HPAGE_1M:
483                 r = 0;
484                 if (hpage)
485                         r = 1;
486                 break;
487         case KVM_CAP_S390_MEM_OP:
488                 r = MEM_OP_MAX_SIZE;
489                 break;
490         case KVM_CAP_NR_VCPUS:
491         case KVM_CAP_MAX_VCPUS:
492                 r = KVM_S390_BSCA_CPU_SLOTS;
493                 if (!kvm_s390_use_sca_entries())
494                         r = KVM_MAX_VCPUS;
495                 else if (sclp.has_esca && sclp.has_64bscao)
496                         r = KVM_S390_ESCA_CPU_SLOTS;
497                 break;
498         case KVM_CAP_NR_MEMSLOTS:
499                 r = KVM_USER_MEM_SLOTS;
500                 break;
501         case KVM_CAP_S390_COW:
502                 r = MACHINE_HAS_ESOP;
503                 break;
504         case KVM_CAP_S390_VECTOR_REGISTERS:
505                 r = MACHINE_HAS_VX;
506                 break;
507         case KVM_CAP_S390_RI:
508                 r = test_facility(64);
509                 break;
510         case KVM_CAP_S390_GS:
511                 r = test_facility(133);
512                 break;
513         case KVM_CAP_S390_BPB:
514                 r = test_facility(82);
515                 break;
516         default:
517                 r = 0;
518         }
519         return r;
520 }
521
522 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
523                                     struct kvm_memory_slot *memslot)
524 {
525         int i;
526         gfn_t cur_gfn, last_gfn;
527         unsigned long gaddr, vmaddr;
528         struct gmap *gmap = kvm->arch.gmap;
529         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
530
531         /* Loop over all guest segments */
532         cur_gfn = memslot->base_gfn;
533         last_gfn = memslot->base_gfn + memslot->npages;
534         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
535                 gaddr = gfn_to_gpa(cur_gfn);
536                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
537                 if (kvm_is_error_hva(vmaddr))
538                         continue;
539
540                 bitmap_zero(bitmap, _PAGE_ENTRIES);
541                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
542                 for (i = 0; i < _PAGE_ENTRIES; i++) {
543                         if (test_bit(i, bitmap))
544                                 mark_page_dirty(kvm, cur_gfn + i);
545                 }
546
547                 if (fatal_signal_pending(current))
548                         return;
549                 cond_resched();
550         }
551 }
552
553 /* Section: vm related */
554 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
555
556 /*
557  * Get (and clear) the dirty memory log for a memory slot.
558  */
559 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
560                                struct kvm_dirty_log *log)
561 {
562         int r;
563         unsigned long n;
564         struct kvm_memslots *slots;
565         struct kvm_memory_slot *memslot;
566         int is_dirty = 0;
567
568         if (kvm_is_ucontrol(kvm))
569                 return -EINVAL;
570
571         mutex_lock(&kvm->slots_lock);
572
573         r = -EINVAL;
574         if (log->slot >= KVM_USER_MEM_SLOTS)
575                 goto out;
576
577         slots = kvm_memslots(kvm);
578         memslot = id_to_memslot(slots, log->slot);
579         r = -ENOENT;
580         if (!memslot->dirty_bitmap)
581                 goto out;
582
583         kvm_s390_sync_dirty_log(kvm, memslot);
584         r = kvm_get_dirty_log(kvm, log, &is_dirty);
585         if (r)
586                 goto out;
587
588         /* Clear the dirty log */
589         if (is_dirty) {
590                 n = kvm_dirty_bitmap_bytes(memslot);
591                 memset(memslot->dirty_bitmap, 0, n);
592         }
593         r = 0;
594 out:
595         mutex_unlock(&kvm->slots_lock);
596         return r;
597 }
598
599 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
600 {
601         unsigned int i;
602         struct kvm_vcpu *vcpu;
603
604         kvm_for_each_vcpu(i, vcpu, kvm) {
605                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
606         }
607 }
608
609 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
610 {
611         int r;
612
613         if (cap->flags)
614                 return -EINVAL;
615
616         switch (cap->cap) {
617         case KVM_CAP_S390_IRQCHIP:
618                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
619                 kvm->arch.use_irqchip = 1;
620                 r = 0;
621                 break;
622         case KVM_CAP_S390_USER_SIGP:
623                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
624                 kvm->arch.user_sigp = 1;
625                 r = 0;
626                 break;
627         case KVM_CAP_S390_VECTOR_REGISTERS:
628                 mutex_lock(&kvm->lock);
629                 if (kvm->created_vcpus) {
630                         r = -EBUSY;
631                 } else if (MACHINE_HAS_VX) {
632                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
633                         set_kvm_facility(kvm->arch.model.fac_list, 129);
634                         if (test_facility(134)) {
635                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
636                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
637                         }
638                         if (test_facility(135)) {
639                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
640                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
641                         }
642                         r = 0;
643                 } else
644                         r = -EINVAL;
645                 mutex_unlock(&kvm->lock);
646                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
647                          r ? "(not available)" : "(success)");
648                 break;
649         case KVM_CAP_S390_RI:
650                 r = -EINVAL;
651                 mutex_lock(&kvm->lock);
652                 if (kvm->created_vcpus) {
653                         r = -EBUSY;
654                 } else if (test_facility(64)) {
655                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
656                         set_kvm_facility(kvm->arch.model.fac_list, 64);
657                         r = 0;
658                 }
659                 mutex_unlock(&kvm->lock);
660                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
661                          r ? "(not available)" : "(success)");
662                 break;
663         case KVM_CAP_S390_AIS:
664                 mutex_lock(&kvm->lock);
665                 if (kvm->created_vcpus) {
666                         r = -EBUSY;
667                 } else {
668                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
669                         set_kvm_facility(kvm->arch.model.fac_list, 72);
670                         r = 0;
671                 }
672                 mutex_unlock(&kvm->lock);
673                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
674                          r ? "(not available)" : "(success)");
675                 break;
676         case KVM_CAP_S390_GS:
677                 r = -EINVAL;
678                 mutex_lock(&kvm->lock);
679                 if (kvm->created_vcpus) {
680                         r = -EBUSY;
681                 } else if (test_facility(133)) {
682                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
683                         set_kvm_facility(kvm->arch.model.fac_list, 133);
684                         r = 0;
685                 }
686                 mutex_unlock(&kvm->lock);
687                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
688                          r ? "(not available)" : "(success)");
689                 break;
690         case KVM_CAP_S390_HPAGE_1M:
691                 mutex_lock(&kvm->lock);
692                 if (kvm->created_vcpus)
693                         r = -EBUSY;
694                 else if (!hpage || kvm->arch.use_cmma)
695                         r = -EINVAL;
696                 else {
697                         r = 0;
698                         down_write(&kvm->mm->mmap_sem);
699                         kvm->mm->context.allow_gmap_hpage_1m = 1;
700                         up_write(&kvm->mm->mmap_sem);
701                         /*
702                          * We might have to create fake 4k page
703                          * tables. To avoid that the hardware works on
704                          * stale PGSTEs, we emulate these instructions.
705                          */
706                         kvm->arch.use_skf = 0;
707                         kvm->arch.use_pfmfi = 0;
708                 }
709                 mutex_unlock(&kvm->lock);
710                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
711                          r ? "(not available)" : "(success)");
712                 break;
713         case KVM_CAP_S390_USER_STSI:
714                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
715                 kvm->arch.user_stsi = 1;
716                 r = 0;
717                 break;
718         case KVM_CAP_S390_USER_INSTR0:
719                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
720                 kvm->arch.user_instr0 = 1;
721                 icpt_operexc_on_all_vcpus(kvm);
722                 r = 0;
723                 break;
724         default:
725                 r = -EINVAL;
726                 break;
727         }
728         return r;
729 }
730
731 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
732 {
733         int ret;
734
735         switch (attr->attr) {
736         case KVM_S390_VM_MEM_LIMIT_SIZE:
737                 ret = 0;
738                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
739                          kvm->arch.mem_limit);
740                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
741                         ret = -EFAULT;
742                 break;
743         default:
744                 ret = -ENXIO;
745                 break;
746         }
747         return ret;
748 }
749
750 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
751 {
752         int ret;
753         unsigned int idx;
754         switch (attr->attr) {
755         case KVM_S390_VM_MEM_ENABLE_CMMA:
756                 ret = -ENXIO;
757                 if (!sclp.has_cmma)
758                         break;
759
760                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
761                 mutex_lock(&kvm->lock);
762                 if (kvm->created_vcpus)
763                         ret = -EBUSY;
764                 else if (kvm->mm->context.allow_gmap_hpage_1m)
765                         ret = -EINVAL;
766                 else {
767                         kvm->arch.use_cmma = 1;
768                         /* Not compatible with cmma. */
769                         kvm->arch.use_pfmfi = 0;
770                         ret = 0;
771                 }
772                 mutex_unlock(&kvm->lock);
773                 break;
774         case KVM_S390_VM_MEM_CLR_CMMA:
775                 ret = -ENXIO;
776                 if (!sclp.has_cmma)
777                         break;
778                 ret = -EINVAL;
779                 if (!kvm->arch.use_cmma)
780                         break;
781
782                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
783                 mutex_lock(&kvm->lock);
784                 idx = srcu_read_lock(&kvm->srcu);
785                 s390_reset_cmma(kvm->arch.gmap->mm);
786                 srcu_read_unlock(&kvm->srcu, idx);
787                 mutex_unlock(&kvm->lock);
788                 ret = 0;
789                 break;
790         case KVM_S390_VM_MEM_LIMIT_SIZE: {
791                 unsigned long new_limit;
792
793                 if (kvm_is_ucontrol(kvm))
794                         return -EINVAL;
795
796                 if (get_user(new_limit, (u64 __user *)attr->addr))
797                         return -EFAULT;
798
799                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
800                     new_limit > kvm->arch.mem_limit)
801                         return -E2BIG;
802
803                 if (!new_limit)
804                         return -EINVAL;
805
806                 /* gmap_create takes last usable address */
807                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
808                         new_limit -= 1;
809
810                 ret = -EBUSY;
811                 mutex_lock(&kvm->lock);
812                 if (!kvm->created_vcpus) {
813                         /* gmap_create will round the limit up */
814                         struct gmap *new = gmap_create(current->mm, new_limit);
815
816                         if (!new) {
817                                 ret = -ENOMEM;
818                         } else {
819                                 gmap_remove(kvm->arch.gmap);
820                                 new->private = kvm;
821                                 kvm->arch.gmap = new;
822                                 ret = 0;
823                         }
824                 }
825                 mutex_unlock(&kvm->lock);
826                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
827                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
828                          (void *) kvm->arch.gmap->asce);
829                 break;
830         }
831         default:
832                 ret = -ENXIO;
833                 break;
834         }
835         return ret;
836 }
837
838 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
839
840 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
841 {
842         struct kvm_vcpu *vcpu;
843         int i;
844
845         kvm_s390_vcpu_block_all(kvm);
846
847         kvm_for_each_vcpu(i, vcpu, kvm)
848                 kvm_s390_vcpu_crypto_setup(vcpu);
849
850         kvm_s390_vcpu_unblock_all(kvm);
851 }
852
853 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
854 {
855         if (!test_kvm_facility(kvm, 76))
856                 return -EINVAL;
857
858         mutex_lock(&kvm->lock);
859         switch (attr->attr) {
860         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
861                 get_random_bytes(
862                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
863                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
864                 kvm->arch.crypto.aes_kw = 1;
865                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
866                 break;
867         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
868                 get_random_bytes(
869                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
870                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
871                 kvm->arch.crypto.dea_kw = 1;
872                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
873                 break;
874         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
875                 kvm->arch.crypto.aes_kw = 0;
876                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
877                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
878                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
879                 break;
880         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
881                 kvm->arch.crypto.dea_kw = 0;
882                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
883                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
884                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
885                 break;
886         default:
887                 mutex_unlock(&kvm->lock);
888                 return -ENXIO;
889         }
890
891         kvm_s390_vcpu_crypto_reset_all(kvm);
892         mutex_unlock(&kvm->lock);
893         return 0;
894 }
895
896 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
897 {
898         int cx;
899         struct kvm_vcpu *vcpu;
900
901         kvm_for_each_vcpu(cx, vcpu, kvm)
902                 kvm_s390_sync_request(req, vcpu);
903 }
904
905 /*
906  * Must be called with kvm->srcu held to avoid races on memslots, and with
907  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
908  */
909 static int kvm_s390_vm_start_migration(struct kvm *kvm)
910 {
911         struct kvm_memory_slot *ms;
912         struct kvm_memslots *slots;
913         unsigned long ram_pages = 0;
914         int slotnr;
915
916         /* migration mode already enabled */
917         if (kvm->arch.migration_mode)
918                 return 0;
919         slots = kvm_memslots(kvm);
920         if (!slots || !slots->used_slots)
921                 return -EINVAL;
922
923         if (!kvm->arch.use_cmma) {
924                 kvm->arch.migration_mode = 1;
925                 return 0;
926         }
927         /* mark all the pages in active slots as dirty */
928         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
929                 ms = slots->memslots + slotnr;
930                 /*
931                  * The second half of the bitmap is only used on x86,
932                  * and would be wasted otherwise, so we put it to good
933                  * use here to keep track of the state of the storage
934                  * attributes.
935                  */
936                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
937                 ram_pages += ms->npages;
938         }
939         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
940         kvm->arch.migration_mode = 1;
941         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
942         return 0;
943 }
944
945 /*
946  * Must be called with kvm->slots_lock to avoid races with ourselves and
947  * kvm_s390_vm_start_migration.
948  */
949 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
950 {
951         /* migration mode already disabled */
952         if (!kvm->arch.migration_mode)
953                 return 0;
954         kvm->arch.migration_mode = 0;
955         if (kvm->arch.use_cmma)
956                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
957         return 0;
958 }
959
960 static int kvm_s390_vm_set_migration(struct kvm *kvm,
961                                      struct kvm_device_attr *attr)
962 {
963         int res = -ENXIO;
964
965         mutex_lock(&kvm->slots_lock);
966         switch (attr->attr) {
967         case KVM_S390_VM_MIGRATION_START:
968                 res = kvm_s390_vm_start_migration(kvm);
969                 break;
970         case KVM_S390_VM_MIGRATION_STOP:
971                 res = kvm_s390_vm_stop_migration(kvm);
972                 break;
973         default:
974                 break;
975         }
976         mutex_unlock(&kvm->slots_lock);
977
978         return res;
979 }
980
981 static int kvm_s390_vm_get_migration(struct kvm *kvm,
982                                      struct kvm_device_attr *attr)
983 {
984         u64 mig = kvm->arch.migration_mode;
985
986         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
987                 return -ENXIO;
988
989         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
990                 return -EFAULT;
991         return 0;
992 }
993
994 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
995 {
996         struct kvm_s390_vm_tod_clock gtod;
997
998         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
999                 return -EFAULT;
1000
1001         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1002                 return -EINVAL;
1003         kvm_s390_set_tod_clock(kvm, &gtod);
1004
1005         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1006                 gtod.epoch_idx, gtod.tod);
1007
1008         return 0;
1009 }
1010
1011 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1012 {
1013         u8 gtod_high;
1014
1015         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1016                                            sizeof(gtod_high)))
1017                 return -EFAULT;
1018
1019         if (gtod_high != 0)
1020                 return -EINVAL;
1021         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1022
1023         return 0;
1024 }
1025
1026 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1027 {
1028         struct kvm_s390_vm_tod_clock gtod = { 0 };
1029
1030         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1031                            sizeof(gtod.tod)))
1032                 return -EFAULT;
1033
1034         kvm_s390_set_tod_clock(kvm, &gtod);
1035         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1036         return 0;
1037 }
1038
1039 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041         int ret;
1042
1043         if (attr->flags)
1044                 return -EINVAL;
1045
1046         switch (attr->attr) {
1047         case KVM_S390_VM_TOD_EXT:
1048                 ret = kvm_s390_set_tod_ext(kvm, attr);
1049                 break;
1050         case KVM_S390_VM_TOD_HIGH:
1051                 ret = kvm_s390_set_tod_high(kvm, attr);
1052                 break;
1053         case KVM_S390_VM_TOD_LOW:
1054                 ret = kvm_s390_set_tod_low(kvm, attr);
1055                 break;
1056         default:
1057                 ret = -ENXIO;
1058                 break;
1059         }
1060         return ret;
1061 }
1062
1063 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1064                                    struct kvm_s390_vm_tod_clock *gtod)
1065 {
1066         struct kvm_s390_tod_clock_ext htod;
1067
1068         preempt_disable();
1069
1070         get_tod_clock_ext((char *)&htod);
1071
1072         gtod->tod = htod.tod + kvm->arch.epoch;
1073         gtod->epoch_idx = 0;
1074         if (test_kvm_facility(kvm, 139)) {
1075                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1076                 if (gtod->tod < htod.tod)
1077                         gtod->epoch_idx += 1;
1078         }
1079
1080         preempt_enable();
1081 }
1082
1083 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1084 {
1085         struct kvm_s390_vm_tod_clock gtod;
1086
1087         memset(&gtod, 0, sizeof(gtod));
1088         kvm_s390_get_tod_clock(kvm, &gtod);
1089         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1090                 return -EFAULT;
1091
1092         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1093                 gtod.epoch_idx, gtod.tod);
1094         return 0;
1095 }
1096
1097 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099         u8 gtod_high = 0;
1100
1101         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1102                                          sizeof(gtod_high)))
1103                 return -EFAULT;
1104         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1105
1106         return 0;
1107 }
1108
1109 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1110 {
1111         u64 gtod;
1112
1113         gtod = kvm_s390_get_tod_clock_fast(kvm);
1114         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1115                 return -EFAULT;
1116         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1117
1118         return 0;
1119 }
1120
1121 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         int ret;
1124
1125         if (attr->flags)
1126                 return -EINVAL;
1127
1128         switch (attr->attr) {
1129         case KVM_S390_VM_TOD_EXT:
1130                 ret = kvm_s390_get_tod_ext(kvm, attr);
1131                 break;
1132         case KVM_S390_VM_TOD_HIGH:
1133                 ret = kvm_s390_get_tod_high(kvm, attr);
1134                 break;
1135         case KVM_S390_VM_TOD_LOW:
1136                 ret = kvm_s390_get_tod_low(kvm, attr);
1137                 break;
1138         default:
1139                 ret = -ENXIO;
1140                 break;
1141         }
1142         return ret;
1143 }
1144
1145 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147         struct kvm_s390_vm_cpu_processor *proc;
1148         u16 lowest_ibc, unblocked_ibc;
1149         int ret = 0;
1150
1151         mutex_lock(&kvm->lock);
1152         if (kvm->created_vcpus) {
1153                 ret = -EBUSY;
1154                 goto out;
1155         }
1156         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1157         if (!proc) {
1158                 ret = -ENOMEM;
1159                 goto out;
1160         }
1161         if (!copy_from_user(proc, (void __user *)attr->addr,
1162                             sizeof(*proc))) {
1163                 kvm->arch.model.cpuid = proc->cpuid;
1164                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1165                 unblocked_ibc = sclp.ibc & 0xfff;
1166                 if (lowest_ibc && proc->ibc) {
1167                         if (proc->ibc > unblocked_ibc)
1168                                 kvm->arch.model.ibc = unblocked_ibc;
1169                         else if (proc->ibc < lowest_ibc)
1170                                 kvm->arch.model.ibc = lowest_ibc;
1171                         else
1172                                 kvm->arch.model.ibc = proc->ibc;
1173                 }
1174                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1175                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1176                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1177                          kvm->arch.model.ibc,
1178                          kvm->arch.model.cpuid);
1179                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1180                          kvm->arch.model.fac_list[0],
1181                          kvm->arch.model.fac_list[1],
1182                          kvm->arch.model.fac_list[2]);
1183         } else
1184                 ret = -EFAULT;
1185         kfree(proc);
1186 out:
1187         mutex_unlock(&kvm->lock);
1188         return ret;
1189 }
1190
1191 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1192                                        struct kvm_device_attr *attr)
1193 {
1194         struct kvm_s390_vm_cpu_feat data;
1195
1196         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1197                 return -EFAULT;
1198         if (!bitmap_subset((unsigned long *) data.feat,
1199                            kvm_s390_available_cpu_feat,
1200                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1201                 return -EINVAL;
1202
1203         mutex_lock(&kvm->lock);
1204         if (kvm->created_vcpus) {
1205                 mutex_unlock(&kvm->lock);
1206                 return -EBUSY;
1207         }
1208         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1209                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1210         mutex_unlock(&kvm->lock);
1211         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1212                          data.feat[0],
1213                          data.feat[1],
1214                          data.feat[2]);
1215         return 0;
1216 }
1217
1218 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1219                                           struct kvm_device_attr *attr)
1220 {
1221         /*
1222          * Once supported by kernel + hw, we have to store the subfunctions
1223          * in kvm->arch and remember that user space configured them.
1224          */
1225         return -ENXIO;
1226 }
1227
1228 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         int ret = -ENXIO;
1231
1232         switch (attr->attr) {
1233         case KVM_S390_VM_CPU_PROCESSOR:
1234                 ret = kvm_s390_set_processor(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1237                 ret = kvm_s390_set_processor_feat(kvm, attr);
1238                 break;
1239         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1240                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1241                 break;
1242         }
1243         return ret;
1244 }
1245
1246 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248         struct kvm_s390_vm_cpu_processor *proc;
1249         int ret = 0;
1250
1251         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1252         if (!proc) {
1253                 ret = -ENOMEM;
1254                 goto out;
1255         }
1256         proc->cpuid = kvm->arch.model.cpuid;
1257         proc->ibc = kvm->arch.model.ibc;
1258         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1259                S390_ARCH_FAC_LIST_SIZE_BYTE);
1260         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1261                  kvm->arch.model.ibc,
1262                  kvm->arch.model.cpuid);
1263         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1264                  kvm->arch.model.fac_list[0],
1265                  kvm->arch.model.fac_list[1],
1266                  kvm->arch.model.fac_list[2]);
1267         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1268                 ret = -EFAULT;
1269         kfree(proc);
1270 out:
1271         return ret;
1272 }
1273
1274 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1275 {
1276         struct kvm_s390_vm_cpu_machine *mach;
1277         int ret = 0;
1278
1279         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1280         if (!mach) {
1281                 ret = -ENOMEM;
1282                 goto out;
1283         }
1284         get_cpu_id((struct cpuid *) &mach->cpuid);
1285         mach->ibc = sclp.ibc;
1286         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1287                S390_ARCH_FAC_LIST_SIZE_BYTE);
1288         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1289                sizeof(S390_lowcore.stfle_fac_list));
1290         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1291                  kvm->arch.model.ibc,
1292                  kvm->arch.model.cpuid);
1293         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1294                  mach->fac_mask[0],
1295                  mach->fac_mask[1],
1296                  mach->fac_mask[2]);
1297         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1298                  mach->fac_list[0],
1299                  mach->fac_list[1],
1300                  mach->fac_list[2]);
1301         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1302                 ret = -EFAULT;
1303         kfree(mach);
1304 out:
1305         return ret;
1306 }
1307
1308 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1309                                        struct kvm_device_attr *attr)
1310 {
1311         struct kvm_s390_vm_cpu_feat data;
1312
1313         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1314                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1315         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1316                 return -EFAULT;
1317         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1318                          data.feat[0],
1319                          data.feat[1],
1320                          data.feat[2]);
1321         return 0;
1322 }
1323
1324 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1325                                      struct kvm_device_attr *attr)
1326 {
1327         struct kvm_s390_vm_cpu_feat data;
1328
1329         bitmap_copy((unsigned long *) data.feat,
1330                     kvm_s390_available_cpu_feat,
1331                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1332         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1333                 return -EFAULT;
1334         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1335                          data.feat[0],
1336                          data.feat[1],
1337                          data.feat[2]);
1338         return 0;
1339 }
1340
1341 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1342                                           struct kvm_device_attr *attr)
1343 {
1344         /*
1345          * Once we can actually configure subfunctions (kernel + hw support),
1346          * we have to check if they were already set by user space, if so copy
1347          * them from kvm->arch.
1348          */
1349         return -ENXIO;
1350 }
1351
1352 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1353                                         struct kvm_device_attr *attr)
1354 {
1355         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1356             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1357                 return -EFAULT;
1358         return 0;
1359 }
1360 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1361 {
1362         int ret = -ENXIO;
1363
1364         switch (attr->attr) {
1365         case KVM_S390_VM_CPU_PROCESSOR:
1366                 ret = kvm_s390_get_processor(kvm, attr);
1367                 break;
1368         case KVM_S390_VM_CPU_MACHINE:
1369                 ret = kvm_s390_get_machine(kvm, attr);
1370                 break;
1371         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1372                 ret = kvm_s390_get_processor_feat(kvm, attr);
1373                 break;
1374         case KVM_S390_VM_CPU_MACHINE_FEAT:
1375                 ret = kvm_s390_get_machine_feat(kvm, attr);
1376                 break;
1377         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1378                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1379                 break;
1380         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1381                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1382                 break;
1383         }
1384         return ret;
1385 }
1386
1387 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1388 {
1389         int ret;
1390
1391         switch (attr->group) {
1392         case KVM_S390_VM_MEM_CTRL:
1393                 ret = kvm_s390_set_mem_control(kvm, attr);
1394                 break;
1395         case KVM_S390_VM_TOD:
1396                 ret = kvm_s390_set_tod(kvm, attr);
1397                 break;
1398         case KVM_S390_VM_CPU_MODEL:
1399                 ret = kvm_s390_set_cpu_model(kvm, attr);
1400                 break;
1401         case KVM_S390_VM_CRYPTO:
1402                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1403                 break;
1404         case KVM_S390_VM_MIGRATION:
1405                 ret = kvm_s390_vm_set_migration(kvm, attr);
1406                 break;
1407         default:
1408                 ret = -ENXIO;
1409                 break;
1410         }
1411
1412         return ret;
1413 }
1414
1415 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1416 {
1417         int ret;
1418
1419         switch (attr->group) {
1420         case KVM_S390_VM_MEM_CTRL:
1421                 ret = kvm_s390_get_mem_control(kvm, attr);
1422                 break;
1423         case KVM_S390_VM_TOD:
1424                 ret = kvm_s390_get_tod(kvm, attr);
1425                 break;
1426         case KVM_S390_VM_CPU_MODEL:
1427                 ret = kvm_s390_get_cpu_model(kvm, attr);
1428                 break;
1429         case KVM_S390_VM_MIGRATION:
1430                 ret = kvm_s390_vm_get_migration(kvm, attr);
1431                 break;
1432         default:
1433                 ret = -ENXIO;
1434                 break;
1435         }
1436
1437         return ret;
1438 }
1439
1440 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442         int ret;
1443
1444         switch (attr->group) {
1445         case KVM_S390_VM_MEM_CTRL:
1446                 switch (attr->attr) {
1447                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1448                 case KVM_S390_VM_MEM_CLR_CMMA:
1449                         ret = sclp.has_cmma ? 0 : -ENXIO;
1450                         break;
1451                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1452                         ret = 0;
1453                         break;
1454                 default:
1455                         ret = -ENXIO;
1456                         break;
1457                 }
1458                 break;
1459         case KVM_S390_VM_TOD:
1460                 switch (attr->attr) {
1461                 case KVM_S390_VM_TOD_LOW:
1462                 case KVM_S390_VM_TOD_HIGH:
1463                         ret = 0;
1464                         break;
1465                 default:
1466                         ret = -ENXIO;
1467                         break;
1468                 }
1469                 break;
1470         case KVM_S390_VM_CPU_MODEL:
1471                 switch (attr->attr) {
1472                 case KVM_S390_VM_CPU_PROCESSOR:
1473                 case KVM_S390_VM_CPU_MACHINE:
1474                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1475                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1476                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1477                         ret = 0;
1478                         break;
1479                 /* configuring subfunctions is not supported yet */
1480                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1481                 default:
1482                         ret = -ENXIO;
1483                         break;
1484                 }
1485                 break;
1486         case KVM_S390_VM_CRYPTO:
1487                 switch (attr->attr) {
1488                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1489                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1490                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1491                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1492                         ret = 0;
1493                         break;
1494                 default:
1495                         ret = -ENXIO;
1496                         break;
1497                 }
1498                 break;
1499         case KVM_S390_VM_MIGRATION:
1500                 ret = 0;
1501                 break;
1502         default:
1503                 ret = -ENXIO;
1504                 break;
1505         }
1506
1507         return ret;
1508 }
1509
1510 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1511 {
1512         uint8_t *keys;
1513         uint64_t hva;
1514         int srcu_idx, i, r = 0;
1515
1516         if (args->flags != 0)
1517                 return -EINVAL;
1518
1519         /* Is this guest using storage keys? */
1520         if (!mm_uses_skeys(current->mm))
1521                 return KVM_S390_GET_SKEYS_NONE;
1522
1523         /* Enforce sane limit on memory allocation */
1524         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1525                 return -EINVAL;
1526
1527         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1528         if (!keys)
1529                 return -ENOMEM;
1530
1531         down_read(&current->mm->mmap_sem);
1532         srcu_idx = srcu_read_lock(&kvm->srcu);
1533         for (i = 0; i < args->count; i++) {
1534                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1535                 if (kvm_is_error_hva(hva)) {
1536                         r = -EFAULT;
1537                         break;
1538                 }
1539
1540                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1541                 if (r)
1542                         break;
1543         }
1544         srcu_read_unlock(&kvm->srcu, srcu_idx);
1545         up_read(&current->mm->mmap_sem);
1546
1547         if (!r) {
1548                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1549                                  sizeof(uint8_t) * args->count);
1550                 if (r)
1551                         r = -EFAULT;
1552         }
1553
1554         kvfree(keys);
1555         return r;
1556 }
1557
1558 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1559 {
1560         uint8_t *keys;
1561         uint64_t hva;
1562         int srcu_idx, i, r = 0;
1563         bool unlocked;
1564
1565         if (args->flags != 0)
1566                 return -EINVAL;
1567
1568         /* Enforce sane limit on memory allocation */
1569         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1570                 return -EINVAL;
1571
1572         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1573         if (!keys)
1574                 return -ENOMEM;
1575
1576         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1577                            sizeof(uint8_t) * args->count);
1578         if (r) {
1579                 r = -EFAULT;
1580                 goto out;
1581         }
1582
1583         /* Enable storage key handling for the guest */
1584         r = s390_enable_skey();
1585         if (r)
1586                 goto out;
1587
1588         i = 0;
1589         down_read(&current->mm->mmap_sem);
1590         srcu_idx = srcu_read_lock(&kvm->srcu);
1591         while (i < args->count) {
1592                 unlocked = false;
1593                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1594                 if (kvm_is_error_hva(hva)) {
1595                         r = -EFAULT;
1596                         break;
1597                 }
1598
1599                 /* Lowest order bit is reserved */
1600                 if (keys[i] & 0x01) {
1601                         r = -EINVAL;
1602                         break;
1603                 }
1604
1605                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1606                 if (r) {
1607                         r = fixup_user_fault(current, current->mm, hva,
1608                                              FAULT_FLAG_WRITE, &unlocked);
1609                         if (r)
1610                                 break;
1611                 }
1612                 if (!r)
1613                         i++;
1614         }
1615         srcu_read_unlock(&kvm->srcu, srcu_idx);
1616         up_read(&current->mm->mmap_sem);
1617 out:
1618         kvfree(keys);
1619         return r;
1620 }
1621
1622 /*
1623  * Base address and length must be sent at the start of each block, therefore
1624  * it's cheaper to send some clean data, as long as it's less than the size of
1625  * two longs.
1626  */
1627 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1628 /* for consistency */
1629 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1630
1631 /*
1632  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1633  * address falls in a hole. In that case the index of one of the memslots
1634  * bordering the hole is returned.
1635  */
1636 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1637 {
1638         int start = 0, end = slots->used_slots;
1639         int slot = atomic_read(&slots->lru_slot);
1640         struct kvm_memory_slot *memslots = slots->memslots;
1641
1642         if (gfn >= memslots[slot].base_gfn &&
1643             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1644                 return slot;
1645
1646         while (start < end) {
1647                 slot = start + (end - start) / 2;
1648
1649                 if (gfn >= memslots[slot].base_gfn)
1650                         end = slot;
1651                 else
1652                         start = slot + 1;
1653         }
1654
1655         if (gfn >= memslots[start].base_gfn &&
1656             gfn < memslots[start].base_gfn + memslots[start].npages) {
1657                 atomic_set(&slots->lru_slot, start);
1658         }
1659
1660         return start;
1661 }
1662
1663 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1664                               u8 *res, unsigned long bufsize)
1665 {
1666         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1667
1668         args->count = 0;
1669         while (args->count < bufsize) {
1670                 hva = gfn_to_hva(kvm, cur_gfn);
1671                 /*
1672                  * We return an error if the first value was invalid, but we
1673                  * return successfully if at least one value was copied.
1674                  */
1675                 if (kvm_is_error_hva(hva))
1676                         return args->count ? 0 : -EFAULT;
1677                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1678                         pgstev = 0;
1679                 res[args->count++] = (pgstev >> 24) & 0x43;
1680                 cur_gfn++;
1681         }
1682
1683         return 0;
1684 }
1685
1686 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1687                                               unsigned long cur_gfn)
1688 {
1689         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1690         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1691         unsigned long ofs = cur_gfn - ms->base_gfn;
1692
1693         if (ms->base_gfn + ms->npages <= cur_gfn) {
1694                 slotidx--;
1695                 /* If we are above the highest slot, wrap around */
1696                 if (slotidx < 0)
1697                         slotidx = slots->used_slots - 1;
1698
1699                 ms = slots->memslots + slotidx;
1700                 ofs = 0;
1701         }
1702         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1703         while ((slotidx > 0) && (ofs >= ms->npages)) {
1704                 slotidx--;
1705                 ms = slots->memslots + slotidx;
1706                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1707         }
1708         return ms->base_gfn + ofs;
1709 }
1710
1711 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1712                              u8 *res, unsigned long bufsize)
1713 {
1714         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1715         struct kvm_memslots *slots = kvm_memslots(kvm);
1716         struct kvm_memory_slot *ms;
1717
1718         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1719         ms = gfn_to_memslot(kvm, cur_gfn);
1720         args->count = 0;
1721         args->start_gfn = cur_gfn;
1722         if (!ms)
1723                 return 0;
1724         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1725         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1726
1727         while (args->count < bufsize) {
1728                 hva = gfn_to_hva(kvm, cur_gfn);
1729                 if (kvm_is_error_hva(hva))
1730                         return 0;
1731                 /* Decrement only if we actually flipped the bit to 0 */
1732                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1733                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1734                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1735                         pgstev = 0;
1736                 /* Save the value */
1737                 res[args->count++] = (pgstev >> 24) & 0x43;
1738                 /* If the next bit is too far away, stop. */
1739                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1740                         return 0;
1741                 /* If we reached the previous "next", find the next one */
1742                 if (cur_gfn == next_gfn)
1743                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1744                 /* Reached the end of memory or of the buffer, stop */
1745                 if ((next_gfn >= mem_end) ||
1746                     (next_gfn - args->start_gfn >= bufsize))
1747                         return 0;
1748                 cur_gfn++;
1749                 /* Reached the end of the current memslot, take the next one. */
1750                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1751                         ms = gfn_to_memslot(kvm, cur_gfn);
1752                         if (!ms)
1753                                 return 0;
1754                 }
1755         }
1756         return 0;
1757 }
1758
1759 /*
1760  * This function searches for the next page with dirty CMMA attributes, and
1761  * saves the attributes in the buffer up to either the end of the buffer or
1762  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1763  * no trailing clean bytes are saved.
1764  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1765  * output buffer will indicate 0 as length.
1766  */
1767 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1768                                   struct kvm_s390_cmma_log *args)
1769 {
1770         unsigned long bufsize;
1771         int srcu_idx, peek, ret;
1772         u8 *values;
1773
1774         if (!kvm->arch.use_cmma)
1775                 return -ENXIO;
1776         /* Invalid/unsupported flags were specified */
1777         if (args->flags & ~KVM_S390_CMMA_PEEK)
1778                 return -EINVAL;
1779         /* Migration mode query, and we are not doing a migration */
1780         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1781         if (!peek && !kvm->arch.migration_mode)
1782                 return -EINVAL;
1783         /* CMMA is disabled or was not used, or the buffer has length zero */
1784         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1785         if (!bufsize || !kvm->mm->context.uses_cmm) {
1786                 memset(args, 0, sizeof(*args));
1787                 return 0;
1788         }
1789         /* We are not peeking, and there are no dirty pages */
1790         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1791                 memset(args, 0, sizeof(*args));
1792                 return 0;
1793         }
1794
1795         values = vmalloc(bufsize);
1796         if (!values)
1797                 return -ENOMEM;
1798
1799         down_read(&kvm->mm->mmap_sem);
1800         srcu_idx = srcu_read_lock(&kvm->srcu);
1801         if (peek)
1802                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1803         else
1804                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1805         srcu_read_unlock(&kvm->srcu, srcu_idx);
1806         up_read(&kvm->mm->mmap_sem);
1807
1808         if (kvm->arch.migration_mode)
1809                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1810         else
1811                 args->remaining = 0;
1812
1813         if (copy_to_user((void __user *)args->values, values, args->count))
1814                 ret = -EFAULT;
1815
1816         vfree(values);
1817         return ret;
1818 }
1819
1820 /*
1821  * This function sets the CMMA attributes for the given pages. If the input
1822  * buffer has zero length, no action is taken, otherwise the attributes are
1823  * set and the mm->context.uses_cmm flag is set.
1824  */
1825 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1826                                   const struct kvm_s390_cmma_log *args)
1827 {
1828         unsigned long hva, mask, pgstev, i;
1829         uint8_t *bits;
1830         int srcu_idx, r = 0;
1831
1832         mask = args->mask;
1833
1834         if (!kvm->arch.use_cmma)
1835                 return -ENXIO;
1836         /* invalid/unsupported flags */
1837         if (args->flags != 0)
1838                 return -EINVAL;
1839         /* Enforce sane limit on memory allocation */
1840         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1841                 return -EINVAL;
1842         /* Nothing to do */
1843         if (args->count == 0)
1844                 return 0;
1845
1846         bits = vmalloc(array_size(sizeof(*bits), args->count));
1847         if (!bits)
1848                 return -ENOMEM;
1849
1850         r = copy_from_user(bits, (void __user *)args->values, args->count);
1851         if (r) {
1852                 r = -EFAULT;
1853                 goto out;
1854         }
1855
1856         down_read(&kvm->mm->mmap_sem);
1857         srcu_idx = srcu_read_lock(&kvm->srcu);
1858         for (i = 0; i < args->count; i++) {
1859                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1860                 if (kvm_is_error_hva(hva)) {
1861                         r = -EFAULT;
1862                         break;
1863                 }
1864
1865                 pgstev = bits[i];
1866                 pgstev = pgstev << 24;
1867                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1868                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1869         }
1870         srcu_read_unlock(&kvm->srcu, srcu_idx);
1871         up_read(&kvm->mm->mmap_sem);
1872
1873         if (!kvm->mm->context.uses_cmm) {
1874                 down_write(&kvm->mm->mmap_sem);
1875                 kvm->mm->context.uses_cmm = 1;
1876                 up_write(&kvm->mm->mmap_sem);
1877         }
1878 out:
1879         vfree(bits);
1880         return r;
1881 }
1882
1883 long kvm_arch_vm_ioctl(struct file *filp,
1884                        unsigned int ioctl, unsigned long arg)
1885 {
1886         struct kvm *kvm = filp->private_data;
1887         void __user *argp = (void __user *)arg;
1888         struct kvm_device_attr attr;
1889         int r;
1890
1891         switch (ioctl) {
1892         case KVM_S390_INTERRUPT: {
1893                 struct kvm_s390_interrupt s390int;
1894
1895                 r = -EFAULT;
1896                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1897                         break;
1898                 r = kvm_s390_inject_vm(kvm, &s390int);
1899                 break;
1900         }
1901         case KVM_ENABLE_CAP: {
1902                 struct kvm_enable_cap cap;
1903                 r = -EFAULT;
1904                 if (copy_from_user(&cap, argp, sizeof(cap)))
1905                         break;
1906                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1907                 break;
1908         }
1909         case KVM_CREATE_IRQCHIP: {
1910                 struct kvm_irq_routing_entry routing;
1911
1912                 r = -EINVAL;
1913                 if (kvm->arch.use_irqchip) {
1914                         /* Set up dummy routing. */
1915                         memset(&routing, 0, sizeof(routing));
1916                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1917                 }
1918                 break;
1919         }
1920         case KVM_SET_DEVICE_ATTR: {
1921                 r = -EFAULT;
1922                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1923                         break;
1924                 r = kvm_s390_vm_set_attr(kvm, &attr);
1925                 break;
1926         }
1927         case KVM_GET_DEVICE_ATTR: {
1928                 r = -EFAULT;
1929                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1930                         break;
1931                 r = kvm_s390_vm_get_attr(kvm, &attr);
1932                 break;
1933         }
1934         case KVM_HAS_DEVICE_ATTR: {
1935                 r = -EFAULT;
1936                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1937                         break;
1938                 r = kvm_s390_vm_has_attr(kvm, &attr);
1939                 break;
1940         }
1941         case KVM_S390_GET_SKEYS: {
1942                 struct kvm_s390_skeys args;
1943
1944                 r = -EFAULT;
1945                 if (copy_from_user(&args, argp,
1946                                    sizeof(struct kvm_s390_skeys)))
1947                         break;
1948                 r = kvm_s390_get_skeys(kvm, &args);
1949                 break;
1950         }
1951         case KVM_S390_SET_SKEYS: {
1952                 struct kvm_s390_skeys args;
1953
1954                 r = -EFAULT;
1955                 if (copy_from_user(&args, argp,
1956                                    sizeof(struct kvm_s390_skeys)))
1957                         break;
1958                 r = kvm_s390_set_skeys(kvm, &args);
1959                 break;
1960         }
1961         case KVM_S390_GET_CMMA_BITS: {
1962                 struct kvm_s390_cmma_log args;
1963
1964                 r = -EFAULT;
1965                 if (copy_from_user(&args, argp, sizeof(args)))
1966                         break;
1967                 mutex_lock(&kvm->slots_lock);
1968                 r = kvm_s390_get_cmma_bits(kvm, &args);
1969                 mutex_unlock(&kvm->slots_lock);
1970                 if (!r) {
1971                         r = copy_to_user(argp, &args, sizeof(args));
1972                         if (r)
1973                                 r = -EFAULT;
1974                 }
1975                 break;
1976         }
1977         case KVM_S390_SET_CMMA_BITS: {
1978                 struct kvm_s390_cmma_log args;
1979
1980                 r = -EFAULT;
1981                 if (copy_from_user(&args, argp, sizeof(args)))
1982                         break;
1983                 mutex_lock(&kvm->slots_lock);
1984                 r = kvm_s390_set_cmma_bits(kvm, &args);
1985                 mutex_unlock(&kvm->slots_lock);
1986                 break;
1987         }
1988         default:
1989                 r = -ENOTTY;
1990         }
1991
1992         return r;
1993 }
1994
1995 static int kvm_s390_query_ap_config(u8 *config)
1996 {
1997         u32 fcn_code = 0x04000000UL;
1998         u32 cc = 0;
1999
2000         memset(config, 0, 128);
2001         asm volatile(
2002                 "lgr 0,%1\n"
2003                 "lgr 2,%2\n"
2004                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
2005                 "0: ipm %0\n"
2006                 "srl %0,28\n"
2007                 "1:\n"
2008                 EX_TABLE(0b, 1b)
2009                 : "+r" (cc)
2010                 : "r" (fcn_code), "r" (config)
2011                 : "cc", "0", "2", "memory"
2012         );
2013
2014         return cc;
2015 }
2016
2017 static int kvm_s390_apxa_installed(void)
2018 {
2019         u8 config[128];
2020         int cc;
2021
2022         if (test_facility(12)) {
2023                 cc = kvm_s390_query_ap_config(config);
2024
2025                 if (cc)
2026                         pr_err("PQAP(QCI) failed with cc=%d", cc);
2027                 else
2028                         return config[0] & 0x40;
2029         }
2030
2031         return 0;
2032 }
2033
2034 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2035 {
2036         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2037
2038         if (kvm_s390_apxa_installed())
2039                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2040         else
2041                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2042 }
2043
2044 static u64 kvm_s390_get_initial_cpuid(void)
2045 {
2046         struct cpuid cpuid;
2047
2048         get_cpu_id(&cpuid);
2049         cpuid.version = 0xff;
2050         return *((u64 *) &cpuid);
2051 }
2052
2053 static void kvm_s390_crypto_init(struct kvm *kvm)
2054 {
2055         if (!test_kvm_facility(kvm, 76))
2056                 return;
2057
2058         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2059         kvm_s390_set_crycb_format(kvm);
2060
2061         /* Enable AES/DEA protected key functions by default */
2062         kvm->arch.crypto.aes_kw = 1;
2063         kvm->arch.crypto.dea_kw = 1;
2064         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2065                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2066         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2067                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2068 }
2069
2070 static void sca_dispose(struct kvm *kvm)
2071 {
2072         if (kvm->arch.use_esca)
2073                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2074         else
2075                 free_page((unsigned long)(kvm->arch.sca));
2076         kvm->arch.sca = NULL;
2077 }
2078
2079 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2080 {
2081         gfp_t alloc_flags = GFP_KERNEL;
2082         int i, rc;
2083         char debug_name[16];
2084         static unsigned long sca_offset;
2085
2086         rc = -EINVAL;
2087 #ifdef CONFIG_KVM_S390_UCONTROL
2088         if (type & ~KVM_VM_S390_UCONTROL)
2089                 goto out_err;
2090         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2091                 goto out_err;
2092 #else
2093         if (type)
2094                 goto out_err;
2095 #endif
2096
2097         rc = s390_enable_sie();
2098         if (rc)
2099                 goto out_err;
2100
2101         rc = -ENOMEM;
2102
2103         if (!sclp.has_64bscao)
2104                 alloc_flags |= GFP_DMA;
2105         rwlock_init(&kvm->arch.sca_lock);
2106         /* start with basic SCA */
2107         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2108         if (!kvm->arch.sca)
2109                 goto out_err;
2110         spin_lock(&kvm_lock);
2111         sca_offset += 16;
2112         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2113                 sca_offset = 0;
2114         kvm->arch.sca = (struct bsca_block *)
2115                         ((char *) kvm->arch.sca + sca_offset);
2116         spin_unlock(&kvm_lock);
2117
2118         sprintf(debug_name, "kvm-%u", current->pid);
2119
2120         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2121         if (!kvm->arch.dbf)
2122                 goto out_err;
2123
2124         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2125         kvm->arch.sie_page2 =
2126              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2127         if (!kvm->arch.sie_page2)
2128                 goto out_err;
2129
2130         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2131
2132         for (i = 0; i < kvm_s390_fac_size(); i++) {
2133                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2134                                               (kvm_s390_fac_base[i] |
2135                                                kvm_s390_fac_ext[i]);
2136                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2137                                               kvm_s390_fac_base[i];
2138         }
2139
2140         /* we are always in czam mode - even on pre z14 machines */
2141         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2142         set_kvm_facility(kvm->arch.model.fac_list, 138);
2143         /* we emulate STHYI in kvm */
2144         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2145         set_kvm_facility(kvm->arch.model.fac_list, 74);
2146         if (MACHINE_HAS_TLB_GUEST) {
2147                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2148                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2149         }
2150
2151         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2152         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2153
2154         kvm_s390_crypto_init(kvm);
2155
2156         mutex_init(&kvm->arch.float_int.ais_lock);
2157         spin_lock_init(&kvm->arch.float_int.lock);
2158         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2159                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2160         init_waitqueue_head(&kvm->arch.ipte_wq);
2161         mutex_init(&kvm->arch.ipte_mutex);
2162
2163         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2164         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2165
2166         if (type & KVM_VM_S390_UCONTROL) {
2167                 kvm->arch.gmap = NULL;
2168                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2169         } else {
2170                 if (sclp.hamax == U64_MAX)
2171                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2172                 else
2173                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2174                                                     sclp.hamax + 1);
2175                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2176                 if (!kvm->arch.gmap)
2177                         goto out_err;
2178                 kvm->arch.gmap->private = kvm;
2179                 kvm->arch.gmap->pfault_enabled = 0;
2180         }
2181
2182         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2183         kvm->arch.use_skf = sclp.has_skey;
2184         spin_lock_init(&kvm->arch.start_stop_lock);
2185         kvm_s390_vsie_init(kvm);
2186         kvm_s390_gisa_init(kvm);
2187         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2188
2189         return 0;
2190 out_err:
2191         free_page((unsigned long)kvm->arch.sie_page2);
2192         debug_unregister(kvm->arch.dbf);
2193         sca_dispose(kvm);
2194         KVM_EVENT(3, "creation of vm failed: %d", rc);
2195         return rc;
2196 }
2197
2198 bool kvm_arch_has_vcpu_debugfs(void)
2199 {
2200         return false;
2201 }
2202
2203 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2204 {
2205         return 0;
2206 }
2207
2208 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2209 {
2210         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2211         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2212         kvm_s390_clear_local_irqs(vcpu);
2213         kvm_clear_async_pf_completion_queue(vcpu);
2214         if (!kvm_is_ucontrol(vcpu->kvm))
2215                 sca_del_vcpu(vcpu);
2216
2217         if (kvm_is_ucontrol(vcpu->kvm))
2218                 gmap_remove(vcpu->arch.gmap);
2219
2220         if (vcpu->kvm->arch.use_cmma)
2221                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2222         free_page((unsigned long)(vcpu->arch.sie_block));
2223
2224         kvm_vcpu_uninit(vcpu);
2225         kmem_cache_free(kvm_vcpu_cache, vcpu);
2226 }
2227
2228 static void kvm_free_vcpus(struct kvm *kvm)
2229 {
2230         unsigned int i;
2231         struct kvm_vcpu *vcpu;
2232
2233         kvm_for_each_vcpu(i, vcpu, kvm)
2234                 kvm_arch_vcpu_destroy(vcpu);
2235
2236         mutex_lock(&kvm->lock);
2237         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2238                 kvm->vcpus[i] = NULL;
2239
2240         atomic_set(&kvm->online_vcpus, 0);
2241         mutex_unlock(&kvm->lock);
2242 }
2243
2244 void kvm_arch_destroy_vm(struct kvm *kvm)
2245 {
2246         kvm_free_vcpus(kvm);
2247         sca_dispose(kvm);
2248         debug_unregister(kvm->arch.dbf);
2249         kvm_s390_gisa_destroy(kvm);
2250         free_page((unsigned long)kvm->arch.sie_page2);
2251         if (!kvm_is_ucontrol(kvm))
2252                 gmap_remove(kvm->arch.gmap);
2253         kvm_s390_destroy_adapters(kvm);
2254         kvm_s390_clear_float_irqs(kvm);
2255         kvm_s390_vsie_destroy(kvm);
2256         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2257 }
2258
2259 /* Section: vcpu related */
2260 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2261 {
2262         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2263         if (!vcpu->arch.gmap)
2264                 return -ENOMEM;
2265         vcpu->arch.gmap->private = vcpu->kvm;
2266
2267         return 0;
2268 }
2269
2270 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2271 {
2272         if (!kvm_s390_use_sca_entries())
2273                 return;
2274         read_lock(&vcpu->kvm->arch.sca_lock);
2275         if (vcpu->kvm->arch.use_esca) {
2276                 struct esca_block *sca = vcpu->kvm->arch.sca;
2277
2278                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2279                 sca->cpu[vcpu->vcpu_id].sda = 0;
2280         } else {
2281                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2282
2283                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2284                 sca->cpu[vcpu->vcpu_id].sda = 0;
2285         }
2286         read_unlock(&vcpu->kvm->arch.sca_lock);
2287 }
2288
2289 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2290 {
2291         if (!kvm_s390_use_sca_entries()) {
2292                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2293
2294                 /* we still need the basic sca for the ipte control */
2295                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2296                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2297                 return;
2298         }
2299         read_lock(&vcpu->kvm->arch.sca_lock);
2300         if (vcpu->kvm->arch.use_esca) {
2301                 struct esca_block *sca = vcpu->kvm->arch.sca;
2302
2303                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2304                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2305                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2306                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2307                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2308         } else {
2309                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2310
2311                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2312                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2313                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2314                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2315         }
2316         read_unlock(&vcpu->kvm->arch.sca_lock);
2317 }
2318
2319 /* Basic SCA to Extended SCA data copy routines */
2320 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2321 {
2322         d->sda = s->sda;
2323         d->sigp_ctrl.c = s->sigp_ctrl.c;
2324         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2325 }
2326
2327 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2328 {
2329         int i;
2330
2331         d->ipte_control = s->ipte_control;
2332         d->mcn[0] = s->mcn;
2333         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2334                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2335 }
2336
2337 static int sca_switch_to_extended(struct kvm *kvm)
2338 {
2339         struct bsca_block *old_sca = kvm->arch.sca;
2340         struct esca_block *new_sca;
2341         struct kvm_vcpu *vcpu;
2342         unsigned int vcpu_idx;
2343         u32 scaol, scaoh;
2344
2345         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2346         if (!new_sca)
2347                 return -ENOMEM;
2348
2349         scaoh = (u32)((u64)(new_sca) >> 32);
2350         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2351
2352         kvm_s390_vcpu_block_all(kvm);
2353         write_lock(&kvm->arch.sca_lock);
2354
2355         sca_copy_b_to_e(new_sca, old_sca);
2356
2357         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2358                 vcpu->arch.sie_block->scaoh = scaoh;
2359                 vcpu->arch.sie_block->scaol = scaol;
2360                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2361         }
2362         kvm->arch.sca = new_sca;
2363         kvm->arch.use_esca = 1;
2364
2365         write_unlock(&kvm->arch.sca_lock);
2366         kvm_s390_vcpu_unblock_all(kvm);
2367
2368         free_page((unsigned long)old_sca);
2369
2370         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2371                  old_sca, kvm->arch.sca);
2372         return 0;
2373 }
2374
2375 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2376 {
2377         int rc;
2378
2379         if (!kvm_s390_use_sca_entries()) {
2380                 if (id < KVM_MAX_VCPUS)
2381                         return true;
2382                 return false;
2383         }
2384         if (id < KVM_S390_BSCA_CPU_SLOTS)
2385                 return true;
2386         if (!sclp.has_esca || !sclp.has_64bscao)
2387                 return false;
2388
2389         mutex_lock(&kvm->lock);
2390         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2391         mutex_unlock(&kvm->lock);
2392
2393         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2394 }
2395
2396 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2397 {
2398         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2399         kvm_clear_async_pf_completion_queue(vcpu);
2400         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2401                                     KVM_SYNC_GPRS |
2402                                     KVM_SYNC_ACRS |
2403                                     KVM_SYNC_CRS |
2404                                     KVM_SYNC_ARCH0 |
2405                                     KVM_SYNC_PFAULT;
2406         kvm_s390_set_prefix(vcpu, 0);
2407         if (test_kvm_facility(vcpu->kvm, 64))
2408                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2409         if (test_kvm_facility(vcpu->kvm, 82))
2410                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2411         if (test_kvm_facility(vcpu->kvm, 133))
2412                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2413         if (test_kvm_facility(vcpu->kvm, 156))
2414                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2415         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2416          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2417          */
2418         if (MACHINE_HAS_VX)
2419                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2420         else
2421                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2422
2423         if (kvm_is_ucontrol(vcpu->kvm))
2424                 return __kvm_ucontrol_vcpu_init(vcpu);
2425
2426         return 0;
2427 }
2428
2429 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2430 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2431 {
2432         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2433         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2434         vcpu->arch.cputm_start = get_tod_clock_fast();
2435         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2436 }
2437
2438 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2439 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2440 {
2441         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2442         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2443         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2444         vcpu->arch.cputm_start = 0;
2445         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2446 }
2447
2448 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2449 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2450 {
2451         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2452         vcpu->arch.cputm_enabled = true;
2453         __start_cpu_timer_accounting(vcpu);
2454 }
2455
2456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2457 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2458 {
2459         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2460         __stop_cpu_timer_accounting(vcpu);
2461         vcpu->arch.cputm_enabled = false;
2462 }
2463
2464 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2465 {
2466         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2467         __enable_cpu_timer_accounting(vcpu);
2468         preempt_enable();
2469 }
2470
2471 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2472 {
2473         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2474         __disable_cpu_timer_accounting(vcpu);
2475         preempt_enable();
2476 }
2477
2478 /* set the cpu timer - may only be called from the VCPU thread itself */
2479 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2480 {
2481         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2482         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2483         if (vcpu->arch.cputm_enabled)
2484                 vcpu->arch.cputm_start = get_tod_clock_fast();
2485         vcpu->arch.sie_block->cputm = cputm;
2486         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2487         preempt_enable();
2488 }
2489
2490 /* update and get the cpu timer - can also be called from other VCPU threads */
2491 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2492 {
2493         unsigned int seq;
2494         __u64 value;
2495
2496         if (unlikely(!vcpu->arch.cputm_enabled))
2497                 return vcpu->arch.sie_block->cputm;
2498
2499         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2500         do {
2501                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2502                 /*
2503                  * If the writer would ever execute a read in the critical
2504                  * section, e.g. in irq context, we have a deadlock.
2505                  */
2506                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2507                 value = vcpu->arch.sie_block->cputm;
2508                 /* if cputm_start is 0, accounting is being started/stopped */
2509                 if (likely(vcpu->arch.cputm_start))
2510                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2511         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2512         preempt_enable();
2513         return value;
2514 }
2515
2516 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2517 {
2518
2519         gmap_enable(vcpu->arch.enabled_gmap);
2520         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2521         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2522                 __start_cpu_timer_accounting(vcpu);
2523         vcpu->cpu = cpu;
2524 }
2525
2526 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2527 {
2528         vcpu->cpu = -1;
2529         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2530                 __stop_cpu_timer_accounting(vcpu);
2531         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2532         vcpu->arch.enabled_gmap = gmap_get_enabled();
2533         gmap_disable(vcpu->arch.enabled_gmap);
2534
2535 }
2536
2537 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2538 {
2539         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2540         vcpu->arch.sie_block->gpsw.mask = 0UL;
2541         vcpu->arch.sie_block->gpsw.addr = 0UL;
2542         kvm_s390_set_prefix(vcpu, 0);
2543         kvm_s390_set_cpu_timer(vcpu, 0);
2544         vcpu->arch.sie_block->ckc       = 0UL;
2545         vcpu->arch.sie_block->todpr     = 0;
2546         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2547         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2548                                         CR0_INTERRUPT_KEY_SUBMASK |
2549                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2550         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2551                                         CR14_UNUSED_33 |
2552                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2553         /* make sure the new fpc will be lazily loaded */
2554         save_fpu_regs();
2555         current->thread.fpu.fpc = 0;
2556         vcpu->arch.sie_block->gbea = 1;
2557         vcpu->arch.sie_block->pp = 0;
2558         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2559         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2560         kvm_clear_async_pf_completion_queue(vcpu);
2561         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2562                 kvm_s390_vcpu_stop(vcpu);
2563         kvm_s390_clear_local_irqs(vcpu);
2564 }
2565
2566 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2567 {
2568         mutex_lock(&vcpu->kvm->lock);
2569         preempt_disable();
2570         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2571         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2572         preempt_enable();
2573         mutex_unlock(&vcpu->kvm->lock);
2574         if (!kvm_is_ucontrol(vcpu->kvm)) {
2575                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2576                 sca_add_vcpu(vcpu);
2577         }
2578         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2579                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2580         /* make vcpu_load load the right gmap on the first trigger */
2581         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2582 }
2583
2584 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2585 {
2586         if (!test_kvm_facility(vcpu->kvm, 76))
2587                 return;
2588
2589         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2590
2591         if (vcpu->kvm->arch.crypto.aes_kw)
2592                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2593         if (vcpu->kvm->arch.crypto.dea_kw)
2594                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2595
2596         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2597 }
2598
2599 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2600 {
2601         free_page(vcpu->arch.sie_block->cbrlo);
2602         vcpu->arch.sie_block->cbrlo = 0;
2603 }
2604
2605 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2606 {
2607         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2608         if (!vcpu->arch.sie_block->cbrlo)
2609                 return -ENOMEM;
2610         return 0;
2611 }
2612
2613 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2614 {
2615         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2616
2617         vcpu->arch.sie_block->ibc = model->ibc;
2618         if (test_kvm_facility(vcpu->kvm, 7))
2619                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2620 }
2621
2622 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2623 {
2624         int rc = 0;
2625
2626         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2627                                                     CPUSTAT_SM |
2628                                                     CPUSTAT_STOPPED);
2629
2630         if (test_kvm_facility(vcpu->kvm, 78))
2631                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2632         else if (test_kvm_facility(vcpu->kvm, 8))
2633                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2634
2635         kvm_s390_vcpu_setup_model(vcpu);
2636
2637         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2638         if (MACHINE_HAS_ESOP)
2639                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2640         if (test_kvm_facility(vcpu->kvm, 9))
2641                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2642         if (test_kvm_facility(vcpu->kvm, 73))
2643                 vcpu->arch.sie_block->ecb |= ECB_TE;
2644
2645         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2646                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2647         if (test_kvm_facility(vcpu->kvm, 130))
2648                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2649         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2650         if (sclp.has_cei)
2651                 vcpu->arch.sie_block->eca |= ECA_CEI;
2652         if (sclp.has_ib)
2653                 vcpu->arch.sie_block->eca |= ECA_IB;
2654         if (sclp.has_siif)
2655                 vcpu->arch.sie_block->eca |= ECA_SII;
2656         if (sclp.has_sigpif)
2657                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2658         if (test_kvm_facility(vcpu->kvm, 129)) {
2659                 vcpu->arch.sie_block->eca |= ECA_VX;
2660                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2661         }
2662         if (test_kvm_facility(vcpu->kvm, 139))
2663                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2664         if (test_kvm_facility(vcpu->kvm, 156))
2665                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2666         if (vcpu->arch.sie_block->gd) {
2667                 vcpu->arch.sie_block->eca |= ECA_AIV;
2668                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2669                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2670         }
2671         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2672                                         | SDNXC;
2673         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2674
2675         if (sclp.has_kss)
2676                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2677         else
2678                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2679
2680         if (vcpu->kvm->arch.use_cmma) {
2681                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2682                 if (rc)
2683                         return rc;
2684         }
2685         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2686         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2687
2688         kvm_s390_vcpu_crypto_setup(vcpu);
2689
2690         return rc;
2691 }
2692
2693 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2694                                       unsigned int id)
2695 {
2696         struct kvm_vcpu *vcpu;
2697         struct sie_page *sie_page;
2698         int rc = -EINVAL;
2699
2700         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2701                 goto out;
2702
2703         rc = -ENOMEM;
2704
2705         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2706         if (!vcpu)
2707                 goto out;
2708
2709         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2710         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2711         if (!sie_page)
2712                 goto out_free_cpu;
2713
2714         vcpu->arch.sie_block = &sie_page->sie_block;
2715         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2716
2717         /* the real guest size will always be smaller than msl */
2718         vcpu->arch.sie_block->mso = 0;
2719         vcpu->arch.sie_block->msl = sclp.hamax;
2720
2721         vcpu->arch.sie_block->icpua = id;
2722         spin_lock_init(&vcpu->arch.local_int.lock);
2723         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2724         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2725                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2726         seqcount_init(&vcpu->arch.cputm_seqcount);
2727
2728         rc = kvm_vcpu_init(vcpu, kvm, id);
2729         if (rc)
2730                 goto out_free_sie_block;
2731         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2732                  vcpu->arch.sie_block);
2733         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2734
2735         return vcpu;
2736 out_free_sie_block:
2737         free_page((unsigned long)(vcpu->arch.sie_block));
2738 out_free_cpu:
2739         kmem_cache_free(kvm_vcpu_cache, vcpu);
2740 out:
2741         return ERR_PTR(rc);
2742 }
2743
2744 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2745 {
2746         return kvm_s390_vcpu_has_irq(vcpu, 0);
2747 }
2748
2749 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2750 {
2751         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2752 }
2753
2754 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2755 {
2756         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2757         exit_sie(vcpu);
2758 }
2759
2760 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2761 {
2762         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2763 }
2764
2765 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2766 {
2767         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2768         exit_sie(vcpu);
2769 }
2770
2771 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2772 {
2773         return atomic_read(&vcpu->arch.sie_block->prog20) &
2774                (PROG_BLOCK_SIE | PROG_REQUEST);
2775 }
2776
2777 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2778 {
2779         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2780 }
2781
2782 /*
2783  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2784  * If the CPU is not running (e.g. waiting as idle) the function will
2785  * return immediately. */
2786 void exit_sie(struct kvm_vcpu *vcpu)
2787 {
2788         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2789         kvm_s390_vsie_kick(vcpu);
2790         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2791                 cpu_relax();
2792 }
2793
2794 /* Kick a guest cpu out of SIE to process a request synchronously */
2795 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2796 {
2797         kvm_make_request(req, vcpu);
2798         kvm_s390_vcpu_request(vcpu);
2799 }
2800
2801 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2802                               unsigned long end)
2803 {
2804         struct kvm *kvm = gmap->private;
2805         struct kvm_vcpu *vcpu;
2806         unsigned long prefix;
2807         int i;
2808
2809         if (gmap_is_shadow(gmap))
2810                 return;
2811         if (start >= 1UL << 31)
2812                 /* We are only interested in prefix pages */
2813                 return;
2814         kvm_for_each_vcpu(i, vcpu, kvm) {
2815                 /* match against both prefix pages */
2816                 prefix = kvm_s390_get_prefix(vcpu);
2817                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2818                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2819                                    start, end);
2820                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2821                 }
2822         }
2823 }
2824
2825 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2826 {
2827         /* kvm common code refers to this, but never calls it */
2828         BUG();
2829         return 0;
2830 }
2831
2832 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2833                                            struct kvm_one_reg *reg)
2834 {
2835         int r = -EINVAL;
2836
2837         switch (reg->id) {
2838         case KVM_REG_S390_TODPR:
2839                 r = put_user(vcpu->arch.sie_block->todpr,
2840                              (u32 __user *)reg->addr);
2841                 break;
2842         case KVM_REG_S390_EPOCHDIFF:
2843                 r = put_user(vcpu->arch.sie_block->epoch,
2844                              (u64 __user *)reg->addr);
2845                 break;
2846         case KVM_REG_S390_CPU_TIMER:
2847                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2848                              (u64 __user *)reg->addr);
2849                 break;
2850         case KVM_REG_S390_CLOCK_COMP:
2851                 r = put_user(vcpu->arch.sie_block->ckc,
2852                              (u64 __user *)reg->addr);
2853                 break;
2854         case KVM_REG_S390_PFTOKEN:
2855                 r = put_user(vcpu->arch.pfault_token,
2856                              (u64 __user *)reg->addr);
2857                 break;
2858         case KVM_REG_S390_PFCOMPARE:
2859                 r = put_user(vcpu->arch.pfault_compare,
2860                              (u64 __user *)reg->addr);
2861                 break;
2862         case KVM_REG_S390_PFSELECT:
2863                 r = put_user(vcpu->arch.pfault_select,
2864                              (u64 __user *)reg->addr);
2865                 break;
2866         case KVM_REG_S390_PP:
2867                 r = put_user(vcpu->arch.sie_block->pp,
2868                              (u64 __user *)reg->addr);
2869                 break;
2870         case KVM_REG_S390_GBEA:
2871                 r = put_user(vcpu->arch.sie_block->gbea,
2872                              (u64 __user *)reg->addr);
2873                 break;
2874         default:
2875                 break;
2876         }
2877
2878         return r;
2879 }
2880
2881 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2882                                            struct kvm_one_reg *reg)
2883 {
2884         int r = -EINVAL;
2885         __u64 val;
2886
2887         switch (reg->id) {
2888         case KVM_REG_S390_TODPR:
2889                 r = get_user(vcpu->arch.sie_block->todpr,
2890                              (u32 __user *)reg->addr);
2891                 break;
2892         case KVM_REG_S390_EPOCHDIFF:
2893                 r = get_user(vcpu->arch.sie_block->epoch,
2894                              (u64 __user *)reg->addr);
2895                 break;
2896         case KVM_REG_S390_CPU_TIMER:
2897                 r = get_user(val, (u64 __user *)reg->addr);
2898                 if (!r)
2899                         kvm_s390_set_cpu_timer(vcpu, val);
2900                 break;
2901         case KVM_REG_S390_CLOCK_COMP:
2902                 r = get_user(vcpu->arch.sie_block->ckc,
2903                              (u64 __user *)reg->addr);
2904                 break;
2905         case KVM_REG_S390_PFTOKEN:
2906                 r = get_user(vcpu->arch.pfault_token,
2907                              (u64 __user *)reg->addr);
2908                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2909                         kvm_clear_async_pf_completion_queue(vcpu);
2910                 break;
2911         case KVM_REG_S390_PFCOMPARE:
2912                 r = get_user(vcpu->arch.pfault_compare,
2913                              (u64 __user *)reg->addr);
2914                 break;
2915         case KVM_REG_S390_PFSELECT:
2916                 r = get_user(vcpu->arch.pfault_select,
2917                              (u64 __user *)reg->addr);
2918                 break;
2919         case KVM_REG_S390_PP:
2920                 r = get_user(vcpu->arch.sie_block->pp,
2921                              (u64 __user *)reg->addr);
2922                 break;
2923         case KVM_REG_S390_GBEA:
2924                 r = get_user(vcpu->arch.sie_block->gbea,
2925                              (u64 __user *)reg->addr);
2926                 break;
2927         default:
2928                 break;
2929         }
2930
2931         return r;
2932 }
2933
2934 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2935 {
2936         kvm_s390_vcpu_initial_reset(vcpu);
2937         return 0;
2938 }
2939
2940 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2941 {
2942         vcpu_load(vcpu);
2943         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2944         vcpu_put(vcpu);
2945         return 0;
2946 }
2947
2948 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2949 {
2950         vcpu_load(vcpu);
2951         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2952         vcpu_put(vcpu);
2953         return 0;
2954 }
2955
2956 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2957                                   struct kvm_sregs *sregs)
2958 {
2959         vcpu_load(vcpu);
2960
2961         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2962         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2963
2964         vcpu_put(vcpu);
2965         return 0;
2966 }
2967
2968 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2969                                   struct kvm_sregs *sregs)
2970 {
2971         vcpu_load(vcpu);
2972
2973         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2974         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2975
2976         vcpu_put(vcpu);
2977         return 0;
2978 }
2979
2980 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2981 {
2982         int ret = 0;
2983
2984         vcpu_load(vcpu);
2985
2986         if (test_fp_ctl(fpu->fpc)) {
2987                 ret = -EINVAL;
2988                 goto out;
2989         }
2990         vcpu->run->s.regs.fpc = fpu->fpc;
2991         if (MACHINE_HAS_VX)
2992                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2993                                  (freg_t *) fpu->fprs);
2994         else
2995                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2996
2997 out:
2998         vcpu_put(vcpu);
2999         return ret;
3000 }
3001
3002 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3003 {
3004         vcpu_load(vcpu);
3005
3006         /* make sure we have the latest values */
3007         save_fpu_regs();
3008         if (MACHINE_HAS_VX)
3009                 convert_vx_to_fp((freg_t *) fpu->fprs,
3010                                  (__vector128 *) vcpu->run->s.regs.vrs);
3011         else
3012                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3013         fpu->fpc = vcpu->run->s.regs.fpc;
3014
3015         vcpu_put(vcpu);
3016         return 0;
3017 }
3018
3019 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3020 {
3021         int rc = 0;
3022
3023         if (!is_vcpu_stopped(vcpu))
3024                 rc = -EBUSY;
3025         else {
3026                 vcpu->run->psw_mask = psw.mask;
3027                 vcpu->run->psw_addr = psw.addr;
3028         }
3029         return rc;
3030 }
3031
3032 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3033                                   struct kvm_translation *tr)
3034 {
3035         return -EINVAL; /* not implemented yet */
3036 }
3037
3038 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3039                               KVM_GUESTDBG_USE_HW_BP | \
3040                               KVM_GUESTDBG_ENABLE)
3041
3042 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3043                                         struct kvm_guest_debug *dbg)
3044 {
3045         int rc = 0;
3046
3047         vcpu_load(vcpu);
3048
3049         vcpu->guest_debug = 0;
3050         kvm_s390_clear_bp_data(vcpu);
3051
3052         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3053                 rc = -EINVAL;
3054                 goto out;
3055         }
3056         if (!sclp.has_gpere) {
3057                 rc = -EINVAL;
3058                 goto out;
3059         }
3060
3061         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3062                 vcpu->guest_debug = dbg->control;
3063                 /* enforce guest PER */
3064                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3065
3066                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3067                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3068         } else {
3069                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3070                 vcpu->arch.guestdbg.last_bp = 0;
3071         }
3072
3073         if (rc) {
3074                 vcpu->guest_debug = 0;
3075                 kvm_s390_clear_bp_data(vcpu);
3076                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3077         }
3078
3079 out:
3080         vcpu_put(vcpu);
3081         return rc;
3082 }
3083
3084 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3085                                     struct kvm_mp_state *mp_state)
3086 {
3087         int ret;
3088
3089         vcpu_load(vcpu);
3090
3091         /* CHECK_STOP and LOAD are not supported yet */
3092         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3093                                       KVM_MP_STATE_OPERATING;
3094
3095         vcpu_put(vcpu);
3096         return ret;
3097 }
3098
3099 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3100                                     struct kvm_mp_state *mp_state)
3101 {
3102         int rc = 0;
3103
3104         vcpu_load(vcpu);
3105
3106         /* user space knows about this interface - let it control the state */
3107         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3108
3109         switch (mp_state->mp_state) {
3110         case KVM_MP_STATE_STOPPED:
3111                 kvm_s390_vcpu_stop(vcpu);
3112                 break;
3113         case KVM_MP_STATE_OPERATING:
3114                 kvm_s390_vcpu_start(vcpu);
3115                 break;
3116         case KVM_MP_STATE_LOAD:
3117         case KVM_MP_STATE_CHECK_STOP:
3118                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3119         default:
3120                 rc = -ENXIO;
3121         }
3122
3123         vcpu_put(vcpu);
3124         return rc;
3125 }
3126
3127 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3128 {
3129         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3130 }
3131
3132 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3133 {
3134 retry:
3135         kvm_s390_vcpu_request_handled(vcpu);
3136         if (!kvm_request_pending(vcpu))
3137                 return 0;
3138         /*
3139          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3140          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3141          * This ensures that the ipte instruction for this request has
3142          * already finished. We might race against a second unmapper that
3143          * wants to set the blocking bit. Lets just retry the request loop.
3144          */
3145         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3146                 int rc;
3147                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3148                                           kvm_s390_get_prefix(vcpu),
3149                                           PAGE_SIZE * 2, PROT_WRITE);
3150                 if (rc) {
3151                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3152                         return rc;
3153                 }
3154                 goto retry;
3155         }
3156
3157         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3158                 vcpu->arch.sie_block->ihcpu = 0xffff;
3159                 goto retry;
3160         }
3161
3162         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3163                 if (!ibs_enabled(vcpu)) {
3164                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3165                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3166                 }
3167                 goto retry;
3168         }
3169
3170         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3171                 if (ibs_enabled(vcpu)) {
3172                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3173                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3174                 }
3175                 goto retry;
3176         }
3177
3178         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3179                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3180                 goto retry;
3181         }
3182
3183         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3184                 /*
3185                  * Disable CMM virtualization; we will emulate the ESSA
3186                  * instruction manually, in order to provide additional
3187                  * functionalities needed for live migration.
3188                  */
3189                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3190                 goto retry;
3191         }
3192
3193         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3194                 /*
3195                  * Re-enable CMM virtualization if CMMA is available and
3196                  * CMM has been used.
3197                  */
3198                 if ((vcpu->kvm->arch.use_cmma) &&
3199                     (vcpu->kvm->mm->context.uses_cmm))
3200                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3201                 goto retry;
3202         }
3203
3204         /* nothing to do, just clear the request */
3205         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3206
3207         return 0;
3208 }
3209
3210 void kvm_s390_set_tod_clock(struct kvm *kvm,
3211                             const struct kvm_s390_vm_tod_clock *gtod)
3212 {
3213         struct kvm_vcpu *vcpu;
3214         struct kvm_s390_tod_clock_ext htod;
3215         int i;
3216
3217         mutex_lock(&kvm->lock);
3218         preempt_disable();
3219
3220         get_tod_clock_ext((char *)&htod);
3221
3222         kvm->arch.epoch = gtod->tod - htod.tod;
3223         kvm->arch.epdx = 0;
3224         if (test_kvm_facility(kvm, 139)) {
3225                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3226                 if (kvm->arch.epoch > gtod->tod)
3227                         kvm->arch.epdx -= 1;
3228         }
3229
3230         kvm_s390_vcpu_block_all(kvm);
3231         kvm_for_each_vcpu(i, vcpu, kvm) {
3232                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3233                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3234         }
3235
3236         kvm_s390_vcpu_unblock_all(kvm);
3237         preempt_enable();
3238         mutex_unlock(&kvm->lock);
3239 }
3240
3241 /**
3242  * kvm_arch_fault_in_page - fault-in guest page if necessary
3243  * @vcpu: The corresponding virtual cpu
3244  * @gpa: Guest physical address
3245  * @writable: Whether the page should be writable or not
3246  *
3247  * Make sure that a guest page has been faulted-in on the host.
3248  *
3249  * Return: Zero on success, negative error code otherwise.
3250  */
3251 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3252 {
3253         return gmap_fault(vcpu->arch.gmap, gpa,
3254                           writable ? FAULT_FLAG_WRITE : 0);
3255 }
3256
3257 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3258                                       unsigned long token)
3259 {
3260         struct kvm_s390_interrupt inti;
3261         struct kvm_s390_irq irq;
3262
3263         if (start_token) {
3264                 irq.u.ext.ext_params2 = token;
3265                 irq.type = KVM_S390_INT_PFAULT_INIT;
3266                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3267         } else {
3268                 inti.type = KVM_S390_INT_PFAULT_DONE;
3269                 inti.parm64 = token;
3270                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3271         }
3272 }
3273
3274 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3275                                      struct kvm_async_pf *work)
3276 {
3277         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3278         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3279 }
3280
3281 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3282                                  struct kvm_async_pf *work)
3283 {
3284         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3285         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3286 }
3287
3288 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3289                                struct kvm_async_pf *work)
3290 {
3291         /* s390 will always inject the page directly */
3292 }
3293
3294 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3295 {
3296         /*
3297          * s390 will always inject the page directly,
3298          * but we still want check_async_completion to cleanup
3299          */
3300         return true;
3301 }
3302
3303 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3304 {
3305         hva_t hva;
3306         struct kvm_arch_async_pf arch;
3307         int rc;
3308
3309         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3310                 return 0;
3311         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3312             vcpu->arch.pfault_compare)
3313                 return 0;
3314         if (psw_extint_disabled(vcpu))
3315                 return 0;
3316         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3317                 return 0;
3318         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3319                 return 0;
3320         if (!vcpu->arch.gmap->pfault_enabled)
3321                 return 0;
3322
3323         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3324         hva += current->thread.gmap_addr & ~PAGE_MASK;
3325         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3326                 return 0;
3327
3328         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3329         return rc;
3330 }
3331
3332 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3333 {
3334         int rc, cpuflags;
3335
3336         /*
3337          * On s390 notifications for arriving pages will be delivered directly
3338          * to the guest but the house keeping for completed pfaults is
3339          * handled outside the worker.
3340          */
3341         kvm_check_async_pf_completion(vcpu);
3342
3343         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3344         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3345
3346         if (need_resched())
3347                 schedule();
3348
3349         if (test_cpu_flag(CIF_MCCK_PENDING))
3350                 s390_handle_mcck();
3351
3352         if (!kvm_is_ucontrol(vcpu->kvm)) {
3353                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3354                 if (rc)
3355                         return rc;
3356         }
3357
3358         rc = kvm_s390_handle_requests(vcpu);
3359         if (rc)
3360                 return rc;
3361
3362         if (guestdbg_enabled(vcpu)) {
3363                 kvm_s390_backup_guest_per_regs(vcpu);
3364                 kvm_s390_patch_guest_per_regs(vcpu);
3365         }
3366
3367         vcpu->arch.sie_block->icptcode = 0;
3368         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3369         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3370         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3371
3372         return 0;
3373 }
3374
3375 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3376 {
3377         struct kvm_s390_pgm_info pgm_info = {
3378                 .code = PGM_ADDRESSING,
3379         };
3380         u8 opcode, ilen;
3381         int rc;
3382
3383         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3384         trace_kvm_s390_sie_fault(vcpu);
3385
3386         /*
3387          * We want to inject an addressing exception, which is defined as a
3388          * suppressing or terminating exception. However, since we came here
3389          * by a DAT access exception, the PSW still points to the faulting
3390          * instruction since DAT exceptions are nullifying. So we've got
3391          * to look up the current opcode to get the length of the instruction
3392          * to be able to forward the PSW.
3393          */
3394         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3395         ilen = insn_length(opcode);
3396         if (rc < 0) {
3397                 return rc;
3398         } else if (rc) {
3399                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3400                  * Forward by arbitrary ilc, injection will take care of
3401                  * nullification if necessary.
3402                  */
3403                 pgm_info = vcpu->arch.pgm;
3404                 ilen = 4;
3405         }
3406         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3407         kvm_s390_forward_psw(vcpu, ilen);
3408         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3409 }
3410
3411 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3412 {
3413         struct mcck_volatile_info *mcck_info;
3414         struct sie_page *sie_page;
3415
3416         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3417                    vcpu->arch.sie_block->icptcode);
3418         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3419
3420         if (guestdbg_enabled(vcpu))
3421                 kvm_s390_restore_guest_per_regs(vcpu);
3422
3423         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3424         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3425
3426         if (exit_reason == -EINTR) {
3427                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3428                 sie_page = container_of(vcpu->arch.sie_block,
3429                                         struct sie_page, sie_block);
3430                 mcck_info = &sie_page->mcck_info;
3431                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3432                 return 0;
3433         }
3434
3435         if (vcpu->arch.sie_block->icptcode > 0) {
3436                 int rc = kvm_handle_sie_intercept(vcpu);
3437
3438                 if (rc != -EOPNOTSUPP)
3439                         return rc;
3440                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3441                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3442                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3443                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3444                 return -EREMOTE;
3445         } else if (exit_reason != -EFAULT) {
3446                 vcpu->stat.exit_null++;
3447                 return 0;
3448         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3449                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3450                 vcpu->run->s390_ucontrol.trans_exc_code =
3451                                                 current->thread.gmap_addr;
3452                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3453                 return -EREMOTE;
3454         } else if (current->thread.gmap_pfault) {
3455                 trace_kvm_s390_major_guest_pfault(vcpu);
3456                 current->thread.gmap_pfault = 0;
3457                 if (kvm_arch_setup_async_pf(vcpu))
3458                         return 0;
3459                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3460         }
3461         return vcpu_post_run_fault_in_sie(vcpu);
3462 }
3463
3464 static int __vcpu_run(struct kvm_vcpu *vcpu)
3465 {
3466         int rc, exit_reason;
3467
3468         /*
3469          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3470          * ning the guest), so that memslots (and other stuff) are protected
3471          */
3472         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3473
3474         do {
3475                 rc = vcpu_pre_run(vcpu);
3476                 if (rc)
3477                         break;
3478
3479                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3480                 /*
3481                  * As PF_VCPU will be used in fault handler, between
3482                  * guest_enter and guest_exit should be no uaccess.
3483                  */
3484                 local_irq_disable();
3485                 guest_enter_irqoff();
3486                 __disable_cpu_timer_accounting(vcpu);
3487                 local_irq_enable();
3488                 exit_reason = sie64a(vcpu->arch.sie_block,
3489                                      vcpu->run->s.regs.gprs);
3490                 local_irq_disable();
3491                 __enable_cpu_timer_accounting(vcpu);
3492                 guest_exit_irqoff();
3493                 local_irq_enable();
3494                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3495
3496                 rc = vcpu_post_run(vcpu, exit_reason);
3497         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3498
3499         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3500         return rc;
3501 }
3502
3503 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3504 {
3505         struct runtime_instr_cb *riccb;
3506         struct gs_cb *gscb;
3507
3508         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3509         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3510         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3511         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3512         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3513                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3514         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3515                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3516                 /* some control register changes require a tlb flush */
3517                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3518         }
3519         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3520                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3521                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3522                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3523                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3524                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3525         }
3526         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3527                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3528                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3529                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3530                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3531                         kvm_clear_async_pf_completion_queue(vcpu);
3532         }
3533         /*
3534          * If userspace sets the riccb (e.g. after migration) to a valid state,
3535          * we should enable RI here instead of doing the lazy enablement.
3536          */
3537         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3538             test_kvm_facility(vcpu->kvm, 64) &&
3539             riccb->v &&
3540             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3541                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3542                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3543         }
3544         /*
3545          * If userspace sets the gscb (e.g. after migration) to non-zero,
3546          * we should enable GS here instead of doing the lazy enablement.
3547          */
3548         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3549             test_kvm_facility(vcpu->kvm, 133) &&
3550             gscb->gssm &&
3551             !vcpu->arch.gs_enabled) {
3552                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3553                 vcpu->arch.sie_block->ecb |= ECB_GS;
3554                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3555                 vcpu->arch.gs_enabled = 1;
3556         }
3557         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3558             test_kvm_facility(vcpu->kvm, 82)) {
3559                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3560                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3561         }
3562         save_access_regs(vcpu->arch.host_acrs);
3563         restore_access_regs(vcpu->run->s.regs.acrs);
3564         /* save host (userspace) fprs/vrs */
3565         save_fpu_regs();
3566         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3567         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3568         if (MACHINE_HAS_VX)
3569                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3570         else
3571                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3572         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3573         if (test_fp_ctl(current->thread.fpu.fpc))
3574                 /* User space provided an invalid FPC, let's clear it */
3575                 current->thread.fpu.fpc = 0;
3576         if (MACHINE_HAS_GS) {
3577                 preempt_disable();
3578                 __ctl_set_bit(2, 4);
3579                 if (current->thread.gs_cb) {
3580                         vcpu->arch.host_gscb = current->thread.gs_cb;
3581                         save_gs_cb(vcpu->arch.host_gscb);
3582                 }
3583                 if (vcpu->arch.gs_enabled) {
3584                         current->thread.gs_cb = (struct gs_cb *)
3585                                                 &vcpu->run->s.regs.gscb;
3586                         restore_gs_cb(current->thread.gs_cb);
3587                 }
3588                 preempt_enable();
3589         }
3590         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3591
3592         kvm_run->kvm_dirty_regs = 0;
3593 }
3594
3595 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3596 {
3597         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3598         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3599         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3600         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3601         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3602         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3603         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3604         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3605         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3606         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3607         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3608         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3609         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3610         save_access_regs(vcpu->run->s.regs.acrs);
3611         restore_access_regs(vcpu->arch.host_acrs);
3612         /* Save guest register state */
3613         save_fpu_regs();
3614         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3615         /* Restore will be done lazily at return */
3616         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3617         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3618         if (MACHINE_HAS_GS) {
3619                 __ctl_set_bit(2, 4);
3620                 if (vcpu->arch.gs_enabled)
3621                         save_gs_cb(current->thread.gs_cb);
3622                 preempt_disable();
3623                 current->thread.gs_cb = vcpu->arch.host_gscb;
3624                 restore_gs_cb(vcpu->arch.host_gscb);
3625                 preempt_enable();
3626                 if (!vcpu->arch.host_gscb)
3627                         __ctl_clear_bit(2, 4);
3628                 vcpu->arch.host_gscb = NULL;
3629         }
3630         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3631 }
3632
3633 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3634 {
3635         int rc;
3636
3637         if (kvm_run->immediate_exit)
3638                 return -EINTR;
3639
3640         vcpu_load(vcpu);
3641
3642         if (guestdbg_exit_pending(vcpu)) {
3643                 kvm_s390_prepare_debug_exit(vcpu);
3644                 rc = 0;
3645                 goto out;
3646         }
3647
3648         kvm_sigset_activate(vcpu);
3649
3650         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3651                 kvm_s390_vcpu_start(vcpu);
3652         } else if (is_vcpu_stopped(vcpu)) {
3653                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3654                                    vcpu->vcpu_id);
3655                 rc = -EINVAL;
3656                 goto out;
3657         }
3658
3659         sync_regs(vcpu, kvm_run);
3660         enable_cpu_timer_accounting(vcpu);
3661
3662         might_fault();
3663         rc = __vcpu_run(vcpu);
3664
3665         if (signal_pending(current) && !rc) {
3666                 kvm_run->exit_reason = KVM_EXIT_INTR;
3667                 rc = -EINTR;
3668         }
3669
3670         if (guestdbg_exit_pending(vcpu) && !rc)  {
3671                 kvm_s390_prepare_debug_exit(vcpu);
3672                 rc = 0;
3673         }
3674
3675         if (rc == -EREMOTE) {
3676                 /* userspace support is needed, kvm_run has been prepared */
3677                 rc = 0;
3678         }
3679
3680         disable_cpu_timer_accounting(vcpu);
3681         store_regs(vcpu, kvm_run);
3682
3683         kvm_sigset_deactivate(vcpu);
3684
3685         vcpu->stat.exit_userspace++;
3686 out:
3687         vcpu_put(vcpu);
3688         return rc;
3689 }
3690
3691 /*
3692  * store status at address
3693  * we use have two special cases:
3694  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3695  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3696  */
3697 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3698 {
3699         unsigned char archmode = 1;
3700         freg_t fprs[NUM_FPRS];
3701         unsigned int px;
3702         u64 clkcomp, cputm;
3703         int rc;
3704
3705         px = kvm_s390_get_prefix(vcpu);
3706         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3707                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3708                         return -EFAULT;
3709                 gpa = 0;
3710         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3711                 if (write_guest_real(vcpu, 163, &archmode, 1))
3712                         return -EFAULT;
3713                 gpa = px;
3714         } else
3715                 gpa -= __LC_FPREGS_SAVE_AREA;
3716
3717         /* manually convert vector registers if necessary */
3718         if (MACHINE_HAS_VX) {
3719                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3720                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3721                                      fprs, 128);
3722         } else {
3723                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3724                                      vcpu->run->s.regs.fprs, 128);
3725         }
3726         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3727                               vcpu->run->s.regs.gprs, 128);
3728         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3729                               &vcpu->arch.sie_block->gpsw, 16);
3730         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3731                               &px, 4);
3732         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3733                               &vcpu->run->s.regs.fpc, 4);
3734         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3735                               &vcpu->arch.sie_block->todpr, 4);
3736         cputm = kvm_s390_get_cpu_timer(vcpu);
3737         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3738                               &cputm, 8);
3739         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3740         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3741                               &clkcomp, 8);
3742         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3743                               &vcpu->run->s.regs.acrs, 64);
3744         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3745                               &vcpu->arch.sie_block->gcr, 128);
3746         return rc ? -EFAULT : 0;
3747 }
3748
3749 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3750 {
3751         /*
3752          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3753          * switch in the run ioctl. Let's update our copies before we save
3754          * it into the save area
3755          */
3756         save_fpu_regs();
3757         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3758         save_access_regs(vcpu->run->s.regs.acrs);
3759
3760         return kvm_s390_store_status_unloaded(vcpu, addr);
3761 }
3762
3763 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3764 {
3765         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3766         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3767 }
3768
3769 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3770 {
3771         unsigned int i;
3772         struct kvm_vcpu *vcpu;
3773
3774         kvm_for_each_vcpu(i, vcpu, kvm) {
3775                 __disable_ibs_on_vcpu(vcpu);
3776         }
3777 }
3778
3779 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3780 {
3781         if (!sclp.has_ibs)
3782                 return;
3783         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3784         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3785 }
3786
3787 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3788 {
3789         int i, online_vcpus, started_vcpus = 0;
3790
3791         if (!is_vcpu_stopped(vcpu))
3792                 return;
3793
3794         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3795         /* Only one cpu at a time may enter/leave the STOPPED state. */
3796         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3797         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3798
3799         for (i = 0; i < online_vcpus; i++) {
3800                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3801                         started_vcpus++;
3802         }
3803
3804         if (started_vcpus == 0) {
3805                 /* we're the only active VCPU -> speed it up */
3806                 __enable_ibs_on_vcpu(vcpu);
3807         } else if (started_vcpus == 1) {
3808                 /*
3809                  * As we are starting a second VCPU, we have to disable
3810                  * the IBS facility on all VCPUs to remove potentially
3811                  * oustanding ENABLE requests.
3812                  */
3813                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3814         }
3815
3816         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3817         /*
3818          * Another VCPU might have used IBS while we were offline.
3819          * Let's play safe and flush the VCPU at startup.
3820          */
3821         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3822         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3823         return;
3824 }
3825
3826 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3827 {
3828         int i, online_vcpus, started_vcpus = 0;
3829         struct kvm_vcpu *started_vcpu = NULL;
3830
3831         if (is_vcpu_stopped(vcpu))
3832                 return;
3833
3834         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3835         /* Only one cpu at a time may enter/leave the STOPPED state. */
3836         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3837         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3838
3839         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3840         kvm_s390_clear_stop_irq(vcpu);
3841
3842         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3843         __disable_ibs_on_vcpu(vcpu);
3844
3845         for (i = 0; i < online_vcpus; i++) {
3846                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3847                         started_vcpus++;
3848                         started_vcpu = vcpu->kvm->vcpus[i];
3849                 }
3850         }
3851
3852         if (started_vcpus == 1) {
3853                 /*
3854                  * As we only have one VCPU left, we want to enable the
3855                  * IBS facility for that VCPU to speed it up.
3856                  */
3857                 __enable_ibs_on_vcpu(started_vcpu);
3858         }
3859
3860         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3861         return;
3862 }
3863
3864 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3865                                      struct kvm_enable_cap *cap)
3866 {
3867         int r;
3868
3869         if (cap->flags)
3870                 return -EINVAL;
3871
3872         switch (cap->cap) {
3873         case KVM_CAP_S390_CSS_SUPPORT:
3874                 if (!vcpu->kvm->arch.css_support) {
3875                         vcpu->kvm->arch.css_support = 1;
3876                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3877                         trace_kvm_s390_enable_css(vcpu->kvm);
3878                 }
3879                 r = 0;
3880                 break;
3881         default:
3882                 r = -EINVAL;
3883                 break;
3884         }
3885         return r;
3886 }
3887
3888 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3889                                   struct kvm_s390_mem_op *mop)
3890 {
3891         void __user *uaddr = (void __user *)mop->buf;
3892         void *tmpbuf = NULL;
3893         int r, srcu_idx;
3894         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3895                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3896
3897         if (mop->flags & ~supported_flags)
3898                 return -EINVAL;
3899
3900         if (mop->size > MEM_OP_MAX_SIZE)
3901                 return -E2BIG;
3902
3903         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3904                 tmpbuf = vmalloc(mop->size);
3905                 if (!tmpbuf)
3906                         return -ENOMEM;
3907         }
3908
3909         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3910
3911         switch (mop->op) {
3912         case KVM_S390_MEMOP_LOGICAL_READ:
3913                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3914                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3915                                             mop->size, GACC_FETCH);
3916                         break;
3917                 }
3918                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3919                 if (r == 0) {
3920                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3921                                 r = -EFAULT;
3922                 }
3923                 break;
3924         case KVM_S390_MEMOP_LOGICAL_WRITE:
3925                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3926                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3927                                             mop->size, GACC_STORE);
3928                         break;
3929                 }
3930                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3931                         r = -EFAULT;
3932                         break;
3933                 }
3934                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3935                 break;
3936         default:
3937                 r = -EINVAL;
3938         }
3939
3940         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3941
3942         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3943                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3944
3945         vfree(tmpbuf);
3946         return r;
3947 }
3948
3949 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3950                                unsigned int ioctl, unsigned long arg)
3951 {
3952         struct kvm_vcpu *vcpu = filp->private_data;
3953         void __user *argp = (void __user *)arg;
3954
3955         switch (ioctl) {
3956         case KVM_S390_IRQ: {
3957                 struct kvm_s390_irq s390irq;
3958
3959                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3960                         return -EFAULT;
3961                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3962         }
3963         case KVM_S390_INTERRUPT: {
3964                 struct kvm_s390_interrupt s390int;
3965                 struct kvm_s390_irq s390irq;
3966
3967                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3968                         return -EFAULT;
3969                 if (s390int_to_s390irq(&s390int, &s390irq))
3970                         return -EINVAL;
3971                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3972         }
3973         }
3974         return -ENOIOCTLCMD;
3975 }
3976
3977 long kvm_arch_vcpu_ioctl(struct file *filp,
3978                          unsigned int ioctl, unsigned long arg)
3979 {
3980         struct kvm_vcpu *vcpu = filp->private_data;
3981         void __user *argp = (void __user *)arg;
3982         int idx;
3983         long r;
3984
3985         vcpu_load(vcpu);
3986
3987         switch (ioctl) {
3988         case KVM_S390_STORE_STATUS:
3989                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3990                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3991                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3992                 break;
3993         case KVM_S390_SET_INITIAL_PSW: {
3994                 psw_t psw;
3995
3996                 r = -EFAULT;
3997                 if (copy_from_user(&psw, argp, sizeof(psw)))
3998                         break;
3999                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4000                 break;
4001         }
4002         case KVM_S390_INITIAL_RESET:
4003                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4004                 break;
4005         case KVM_SET_ONE_REG:
4006         case KVM_GET_ONE_REG: {
4007                 struct kvm_one_reg reg;
4008                 r = -EFAULT;
4009                 if (copy_from_user(&reg, argp, sizeof(reg)))
4010                         break;
4011                 if (ioctl == KVM_SET_ONE_REG)
4012                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4013                 else
4014                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4015                 break;
4016         }
4017 #ifdef CONFIG_KVM_S390_UCONTROL
4018         case KVM_S390_UCAS_MAP: {
4019                 struct kvm_s390_ucas_mapping ucasmap;
4020
4021                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4022                         r = -EFAULT;
4023                         break;
4024                 }
4025
4026                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4027                         r = -EINVAL;
4028                         break;
4029                 }
4030
4031                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4032                                      ucasmap.vcpu_addr, ucasmap.length);
4033                 break;
4034         }
4035         case KVM_S390_UCAS_UNMAP: {
4036                 struct kvm_s390_ucas_mapping ucasmap;
4037
4038                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4039                         r = -EFAULT;
4040                         break;
4041                 }
4042
4043                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4044                         r = -EINVAL;
4045                         break;
4046                 }
4047
4048                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4049                         ucasmap.length);
4050                 break;
4051         }
4052 #endif
4053         case KVM_S390_VCPU_FAULT: {
4054                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4055                 break;
4056         }
4057         case KVM_ENABLE_CAP:
4058         {
4059                 struct kvm_enable_cap cap;
4060                 r = -EFAULT;
4061                 if (copy_from_user(&cap, argp, sizeof(cap)))
4062                         break;
4063                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4064                 break;
4065         }
4066         case KVM_S390_MEM_OP: {
4067                 struct kvm_s390_mem_op mem_op;
4068
4069                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4070                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4071                 else
4072                         r = -EFAULT;
4073                 break;
4074         }
4075         case KVM_S390_SET_IRQ_STATE: {
4076                 struct kvm_s390_irq_state irq_state;
4077
4078                 r = -EFAULT;
4079                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4080                         break;
4081                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4082                     irq_state.len == 0 ||
4083                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4084                         r = -EINVAL;
4085                         break;
4086                 }
4087                 /* do not use irq_state.flags, it will break old QEMUs */
4088                 r = kvm_s390_set_irq_state(vcpu,
4089                                            (void __user *) irq_state.buf,
4090                                            irq_state.len);
4091                 break;
4092         }
4093         case KVM_S390_GET_IRQ_STATE: {
4094                 struct kvm_s390_irq_state irq_state;
4095
4096                 r = -EFAULT;
4097                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4098                         break;
4099                 if (irq_state.len == 0) {
4100                         r = -EINVAL;
4101                         break;
4102                 }
4103                 /* do not use irq_state.flags, it will break old QEMUs */
4104                 r = kvm_s390_get_irq_state(vcpu,
4105                                            (__u8 __user *)  irq_state.buf,
4106                                            irq_state.len);
4107                 break;
4108         }
4109         default:
4110                 r = -ENOTTY;
4111         }
4112
4113         vcpu_put(vcpu);
4114         return r;
4115 }
4116
4117 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4118 {
4119 #ifdef CONFIG_KVM_S390_UCONTROL
4120         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4121                  && (kvm_is_ucontrol(vcpu->kvm))) {
4122                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4123                 get_page(vmf->page);
4124                 return 0;
4125         }
4126 #endif
4127         return VM_FAULT_SIGBUS;
4128 }
4129
4130 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4131                             unsigned long npages)
4132 {
4133         return 0;
4134 }
4135
4136 /* Section: memory related */
4137 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4138                                    struct kvm_memory_slot *memslot,
4139                                    const struct kvm_userspace_memory_region *mem,
4140                                    enum kvm_mr_change change)
4141 {
4142         /* A few sanity checks. We can have memory slots which have to be
4143            located/ended at a segment boundary (1MB). The memory in userland is
4144            ok to be fragmented into various different vmas. It is okay to mmap()
4145            and munmap() stuff in this slot after doing this call at any time */
4146
4147         if (mem->userspace_addr & 0xffffful)
4148                 return -EINVAL;
4149
4150         if (mem->memory_size & 0xffffful)
4151                 return -EINVAL;
4152
4153         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4154                 return -EINVAL;
4155
4156         return 0;
4157 }
4158
4159 void kvm_arch_commit_memory_region(struct kvm *kvm,
4160                                 const struct kvm_userspace_memory_region *mem,
4161                                 const struct kvm_memory_slot *old,
4162                                 const struct kvm_memory_slot *new,
4163                                 enum kvm_mr_change change)
4164 {
4165         int rc;
4166
4167         /* If the basics of the memslot do not change, we do not want
4168          * to update the gmap. Every update causes several unnecessary
4169          * segment translation exceptions. This is usually handled just
4170          * fine by the normal fault handler + gmap, but it will also
4171          * cause faults on the prefix page of running guest CPUs.
4172          */
4173         if (old->userspace_addr == mem->userspace_addr &&
4174             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4175             old->npages * PAGE_SIZE == mem->memory_size)
4176                 return;
4177
4178         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4179                 mem->guest_phys_addr, mem->memory_size);
4180         if (rc)
4181                 pr_warn("failed to commit memory region\n");
4182         return;
4183 }
4184
4185 static inline unsigned long nonhyp_mask(int i)
4186 {
4187         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4188
4189         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4190 }
4191
4192 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4193 {
4194         vcpu->valid_wakeup = false;
4195 }
4196
4197 static int __init kvm_s390_init(void)
4198 {
4199         int i;
4200
4201         if (!sclp.has_sief2) {
4202                 pr_info("SIE not available\n");
4203                 return -ENODEV;
4204         }
4205
4206         if (nested && hpage) {
4207                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4208                 return -EINVAL;
4209         }
4210
4211         for (i = 0; i < 16; i++)
4212                 kvm_s390_fac_base[i] |=
4213                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4214
4215         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4216 }
4217
4218 static void __exit kvm_s390_exit(void)
4219 {
4220         kvm_exit();
4221 }
4222
4223 module_init(kvm_s390_init);
4224 module_exit(kvm_s390_exit);
4225
4226 /*
4227  * Enable autoloading of the kvm module.
4228  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4229  * since x86 takes a different approach.
4230  */
4231 #include <linux/miscdevice.h>
4232 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4233 MODULE_ALIAS("devname:kvm");