]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: add etoken support for guests
[linux.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63         { "userspace_handled", VCPU_STAT(exit_userspace) },
64         { "exit_null", VCPU_STAT(exit_null) },
65         { "exit_validity", VCPU_STAT(exit_validity) },
66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67         { "exit_external_request", VCPU_STAT(exit_external_request) },
68         { "exit_io_request", VCPU_STAT(exit_io_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92         { "deliver_program", VCPU_STAT(deliver_program) },
93         { "deliver_io", VCPU_STAT(deliver_io) },
94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96         { "inject_ckc", VCPU_STAT(inject_ckc) },
97         { "inject_cputm", VCPU_STAT(inject_cputm) },
98         { "inject_external_call", VCPU_STAT(inject_external_call) },
99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101         { "inject_io", VM_STAT(inject_io) },
102         { "inject_mchk", VCPU_STAT(inject_mchk) },
103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104         { "inject_program", VCPU_STAT(inject_program) },
105         { "inject_restart", VCPU_STAT(inject_restart) },
106         { "inject_service_signal", VM_STAT(inject_service_signal) },
107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110         { "inject_virtio", VM_STAT(inject_virtio) },
111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112         { "instruction_gs", VCPU_STAT(instruction_gs) },
113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119         { "instruction_sck", VCPU_STAT(instruction_sck) },
120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121         { "instruction_spx", VCPU_STAT(instruction_spx) },
122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123         { "instruction_stap", VCPU_STAT(instruction_stap) },
124         { "instruction_iske", VCPU_STAT(instruction_iske) },
125         { "instruction_ri", VCPU_STAT(instruction_ri) },
126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127         { "instruction_sske", VCPU_STAT(instruction_sske) },
128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129         { "instruction_essa", VCPU_STAT(instruction_essa) },
130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132         { "instruction_tb", VCPU_STAT(instruction_tb) },
133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137         { "instruction_sie", VCPU_STAT(instruction_sie) },
138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
161         { NULL }
162 };
163
164 struct kvm_s390_tod_clock_ext {
165         __u8 epoch_idx;
166         __u64 tod;
167         __u8 reserved[7];
168 } __packed;
169
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174
175
176 /*
177  * For now we handle at most 16 double words as this is what the s390 base
178  * kernel handles and stores in the prefix page. If we ever need to go beyond
179  * this, this requires changes to code, but the external uapi can stay.
180  */
181 #define SIZE_INTERNAL 16
182
183 /*
184  * Base feature mask that defines default mask for facilities. Consists of the
185  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
186  */
187 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
188 /*
189  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
190  * and defines the facilities that can be enabled via a cpu model.
191  */
192 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
193
194 static unsigned long kvm_s390_fac_size(void)
195 {
196         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
197         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
198         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
199                 sizeof(S390_lowcore.stfle_fac_list));
200
201         return SIZE_INTERNAL;
202 }
203
204 /* available cpu features supported by kvm */
205 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
206 /* available subfunctions indicated via query / "test bit" */
207 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
208
209 static struct gmap_notifier gmap_notifier;
210 static struct gmap_notifier vsie_gmap_notifier;
211 debug_info_t *kvm_s390_dbf;
212
213 /* Section: not file related */
214 int kvm_arch_hardware_enable(void)
215 {
216         /* every s390 is virtualization enabled ;-) */
217         return 0;
218 }
219
220 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
221                               unsigned long end);
222
223 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
224 {
225         u8 delta_idx = 0;
226
227         /*
228          * The TOD jumps by delta, we have to compensate this by adding
229          * -delta to the epoch.
230          */
231         delta = -delta;
232
233         /* sign-extension - we're adding to signed values below */
234         if ((s64)delta < 0)
235                 delta_idx = -1;
236
237         scb->epoch += delta;
238         if (scb->ecd & ECD_MEF) {
239                 scb->epdx += delta_idx;
240                 if (scb->epoch < delta)
241                         scb->epdx += 1;
242         }
243 }
244
245 /*
246  * This callback is executed during stop_machine(). All CPUs are therefore
247  * temporarily stopped. In order not to change guest behavior, we have to
248  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
249  * so a CPU won't be stopped while calculating with the epoch.
250  */
251 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
252                           void *v)
253 {
254         struct kvm *kvm;
255         struct kvm_vcpu *vcpu;
256         int i;
257         unsigned long long *delta = v;
258
259         list_for_each_entry(kvm, &vm_list, vm_list) {
260                 kvm_for_each_vcpu(i, vcpu, kvm) {
261                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
262                         if (i == 0) {
263                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
264                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
265                         }
266                         if (vcpu->arch.cputm_enabled)
267                                 vcpu->arch.cputm_start += *delta;
268                         if (vcpu->arch.vsie_block)
269                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
270                                                    *delta);
271                 }
272         }
273         return NOTIFY_OK;
274 }
275
276 static struct notifier_block kvm_clock_notifier = {
277         .notifier_call = kvm_clock_sync,
278 };
279
280 int kvm_arch_hardware_setup(void)
281 {
282         gmap_notifier.notifier_call = kvm_gmap_notifier;
283         gmap_register_pte_notifier(&gmap_notifier);
284         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
285         gmap_register_pte_notifier(&vsie_gmap_notifier);
286         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
287                                        &kvm_clock_notifier);
288         return 0;
289 }
290
291 void kvm_arch_hardware_unsetup(void)
292 {
293         gmap_unregister_pte_notifier(&gmap_notifier);
294         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
295         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
296                                          &kvm_clock_notifier);
297 }
298
299 static void allow_cpu_feat(unsigned long nr)
300 {
301         set_bit_inv(nr, kvm_s390_available_cpu_feat);
302 }
303
304 static inline int plo_test_bit(unsigned char nr)
305 {
306         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
307         int cc;
308
309         asm volatile(
310                 /* Parameter registers are ignored for "test bit" */
311                 "       plo     0,0,0,0(0)\n"
312                 "       ipm     %0\n"
313                 "       srl     %0,28\n"
314                 : "=d" (cc)
315                 : "d" (r0)
316                 : "cc");
317         return cc == 0;
318 }
319
320 static void kvm_s390_cpu_feat_init(void)
321 {
322         int i;
323
324         for (i = 0; i < 256; ++i) {
325                 if (plo_test_bit(i))
326                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
327         }
328
329         if (test_facility(28)) /* TOD-clock steering */
330                 ptff(kvm_s390_available_subfunc.ptff,
331                      sizeof(kvm_s390_available_subfunc.ptff),
332                      PTFF_QAF);
333
334         if (test_facility(17)) { /* MSA */
335                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
336                               kvm_s390_available_subfunc.kmac);
337                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
338                               kvm_s390_available_subfunc.kmc);
339                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
340                               kvm_s390_available_subfunc.km);
341                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
342                               kvm_s390_available_subfunc.kimd);
343                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
344                               kvm_s390_available_subfunc.klmd);
345         }
346         if (test_facility(76)) /* MSA3 */
347                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
348                               kvm_s390_available_subfunc.pckmo);
349         if (test_facility(77)) { /* MSA4 */
350                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
351                               kvm_s390_available_subfunc.kmctr);
352                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
353                               kvm_s390_available_subfunc.kmf);
354                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
355                               kvm_s390_available_subfunc.kmo);
356                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
357                               kvm_s390_available_subfunc.pcc);
358         }
359         if (test_facility(57)) /* MSA5 */
360                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
361                               kvm_s390_available_subfunc.ppno);
362
363         if (test_facility(146)) /* MSA8 */
364                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
365                               kvm_s390_available_subfunc.kma);
366
367         if (MACHINE_HAS_ESOP)
368                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
369         /*
370          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
371          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
372          */
373         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
374             !test_facility(3) || !nested)
375                 return;
376         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
377         if (sclp.has_64bscao)
378                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
379         if (sclp.has_siif)
380                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
381         if (sclp.has_gpere)
382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
383         if (sclp.has_gsls)
384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
385         if (sclp.has_ib)
386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
387         if (sclp.has_cei)
388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
389         if (sclp.has_ibs)
390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
391         if (sclp.has_kss)
392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
393         /*
394          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
395          * all skey handling functions read/set the skey from the PGSTE
396          * instead of the real storage key.
397          *
398          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
399          * pages being detected as preserved although they are resident.
400          *
401          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
402          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
403          *
404          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
405          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
406          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
407          *
408          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
409          * cannot easily shadow the SCA because of the ipte lock.
410          */
411 }
412
413 int kvm_arch_init(void *opaque)
414 {
415         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
416         if (!kvm_s390_dbf)
417                 return -ENOMEM;
418
419         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
420                 debug_unregister(kvm_s390_dbf);
421                 return -ENOMEM;
422         }
423
424         kvm_s390_cpu_feat_init();
425
426         /* Register floating interrupt controller interface. */
427         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
428 }
429
430 void kvm_arch_exit(void)
431 {
432         debug_unregister(kvm_s390_dbf);
433 }
434
435 /* Section: device related */
436 long kvm_arch_dev_ioctl(struct file *filp,
437                         unsigned int ioctl, unsigned long arg)
438 {
439         if (ioctl == KVM_S390_ENABLE_SIE)
440                 return s390_enable_sie();
441         return -EINVAL;
442 }
443
444 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
445 {
446         int r;
447
448         switch (ext) {
449         case KVM_CAP_S390_PSW:
450         case KVM_CAP_S390_GMAP:
451         case KVM_CAP_SYNC_MMU:
452 #ifdef CONFIG_KVM_S390_UCONTROL
453         case KVM_CAP_S390_UCONTROL:
454 #endif
455         case KVM_CAP_ASYNC_PF:
456         case KVM_CAP_SYNC_REGS:
457         case KVM_CAP_ONE_REG:
458         case KVM_CAP_ENABLE_CAP:
459         case KVM_CAP_S390_CSS_SUPPORT:
460         case KVM_CAP_IOEVENTFD:
461         case KVM_CAP_DEVICE_CTRL:
462         case KVM_CAP_ENABLE_CAP_VM:
463         case KVM_CAP_S390_IRQCHIP:
464         case KVM_CAP_VM_ATTRIBUTES:
465         case KVM_CAP_MP_STATE:
466         case KVM_CAP_IMMEDIATE_EXIT:
467         case KVM_CAP_S390_INJECT_IRQ:
468         case KVM_CAP_S390_USER_SIGP:
469         case KVM_CAP_S390_USER_STSI:
470         case KVM_CAP_S390_SKEYS:
471         case KVM_CAP_S390_IRQ_STATE:
472         case KVM_CAP_S390_USER_INSTR0:
473         case KVM_CAP_S390_CMMA_MIGRATION:
474         case KVM_CAP_S390_AIS:
475         case KVM_CAP_S390_AIS_MIGRATION:
476                 r = 1;
477                 break;
478         case KVM_CAP_S390_MEM_OP:
479                 r = MEM_OP_MAX_SIZE;
480                 break;
481         case KVM_CAP_NR_VCPUS:
482         case KVM_CAP_MAX_VCPUS:
483                 r = KVM_S390_BSCA_CPU_SLOTS;
484                 if (!kvm_s390_use_sca_entries())
485                         r = KVM_MAX_VCPUS;
486                 else if (sclp.has_esca && sclp.has_64bscao)
487                         r = KVM_S390_ESCA_CPU_SLOTS;
488                 break;
489         case KVM_CAP_NR_MEMSLOTS:
490                 r = KVM_USER_MEM_SLOTS;
491                 break;
492         case KVM_CAP_S390_COW:
493                 r = MACHINE_HAS_ESOP;
494                 break;
495         case KVM_CAP_S390_VECTOR_REGISTERS:
496                 r = MACHINE_HAS_VX;
497                 break;
498         case KVM_CAP_S390_RI:
499                 r = test_facility(64);
500                 break;
501         case KVM_CAP_S390_GS:
502                 r = test_facility(133);
503                 break;
504         case KVM_CAP_S390_BPB:
505                 r = test_facility(82);
506                 break;
507         default:
508                 r = 0;
509         }
510         return r;
511 }
512
513 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
514                                         struct kvm_memory_slot *memslot)
515 {
516         gfn_t cur_gfn, last_gfn;
517         unsigned long address;
518         struct gmap *gmap = kvm->arch.gmap;
519
520         /* Loop over all guest pages */
521         last_gfn = memslot->base_gfn + memslot->npages;
522         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
523                 address = gfn_to_hva_memslot(memslot, cur_gfn);
524
525                 if (test_and_clear_guest_dirty(gmap->mm, address))
526                         mark_page_dirty(kvm, cur_gfn);
527                 if (fatal_signal_pending(current))
528                         return;
529                 cond_resched();
530         }
531 }
532
533 /* Section: vm related */
534 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
535
536 /*
537  * Get (and clear) the dirty memory log for a memory slot.
538  */
539 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
540                                struct kvm_dirty_log *log)
541 {
542         int r;
543         unsigned long n;
544         struct kvm_memslots *slots;
545         struct kvm_memory_slot *memslot;
546         int is_dirty = 0;
547
548         if (kvm_is_ucontrol(kvm))
549                 return -EINVAL;
550
551         mutex_lock(&kvm->slots_lock);
552
553         r = -EINVAL;
554         if (log->slot >= KVM_USER_MEM_SLOTS)
555                 goto out;
556
557         slots = kvm_memslots(kvm);
558         memslot = id_to_memslot(slots, log->slot);
559         r = -ENOENT;
560         if (!memslot->dirty_bitmap)
561                 goto out;
562
563         kvm_s390_sync_dirty_log(kvm, memslot);
564         r = kvm_get_dirty_log(kvm, log, &is_dirty);
565         if (r)
566                 goto out;
567
568         /* Clear the dirty log */
569         if (is_dirty) {
570                 n = kvm_dirty_bitmap_bytes(memslot);
571                 memset(memslot->dirty_bitmap, 0, n);
572         }
573         r = 0;
574 out:
575         mutex_unlock(&kvm->slots_lock);
576         return r;
577 }
578
579 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
580 {
581         unsigned int i;
582         struct kvm_vcpu *vcpu;
583
584         kvm_for_each_vcpu(i, vcpu, kvm) {
585                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
586         }
587 }
588
589 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
590 {
591         int r;
592
593         if (cap->flags)
594                 return -EINVAL;
595
596         switch (cap->cap) {
597         case KVM_CAP_S390_IRQCHIP:
598                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
599                 kvm->arch.use_irqchip = 1;
600                 r = 0;
601                 break;
602         case KVM_CAP_S390_USER_SIGP:
603                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
604                 kvm->arch.user_sigp = 1;
605                 r = 0;
606                 break;
607         case KVM_CAP_S390_VECTOR_REGISTERS:
608                 mutex_lock(&kvm->lock);
609                 if (kvm->created_vcpus) {
610                         r = -EBUSY;
611                 } else if (MACHINE_HAS_VX) {
612                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
613                         set_kvm_facility(kvm->arch.model.fac_list, 129);
614                         if (test_facility(134)) {
615                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
616                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
617                         }
618                         if (test_facility(135)) {
619                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
620                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
621                         }
622                         r = 0;
623                 } else
624                         r = -EINVAL;
625                 mutex_unlock(&kvm->lock);
626                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
627                          r ? "(not available)" : "(success)");
628                 break;
629         case KVM_CAP_S390_RI:
630                 r = -EINVAL;
631                 mutex_lock(&kvm->lock);
632                 if (kvm->created_vcpus) {
633                         r = -EBUSY;
634                 } else if (test_facility(64)) {
635                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
636                         set_kvm_facility(kvm->arch.model.fac_list, 64);
637                         r = 0;
638                 }
639                 mutex_unlock(&kvm->lock);
640                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
641                          r ? "(not available)" : "(success)");
642                 break;
643         case KVM_CAP_S390_AIS:
644                 mutex_lock(&kvm->lock);
645                 if (kvm->created_vcpus) {
646                         r = -EBUSY;
647                 } else {
648                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
649                         set_kvm_facility(kvm->arch.model.fac_list, 72);
650                         r = 0;
651                 }
652                 mutex_unlock(&kvm->lock);
653                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
654                          r ? "(not available)" : "(success)");
655                 break;
656         case KVM_CAP_S390_GS:
657                 r = -EINVAL;
658                 mutex_lock(&kvm->lock);
659                 if (kvm->created_vcpus) {
660                         r = -EBUSY;
661                 } else if (test_facility(133)) {
662                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
663                         set_kvm_facility(kvm->arch.model.fac_list, 133);
664                         r = 0;
665                 }
666                 mutex_unlock(&kvm->lock);
667                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
668                          r ? "(not available)" : "(success)");
669                 break;
670         case KVM_CAP_S390_USER_STSI:
671                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
672                 kvm->arch.user_stsi = 1;
673                 r = 0;
674                 break;
675         case KVM_CAP_S390_USER_INSTR0:
676                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
677                 kvm->arch.user_instr0 = 1;
678                 icpt_operexc_on_all_vcpus(kvm);
679                 r = 0;
680                 break;
681         default:
682                 r = -EINVAL;
683                 break;
684         }
685         return r;
686 }
687
688 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
689 {
690         int ret;
691
692         switch (attr->attr) {
693         case KVM_S390_VM_MEM_LIMIT_SIZE:
694                 ret = 0;
695                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
696                          kvm->arch.mem_limit);
697                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
698                         ret = -EFAULT;
699                 break;
700         default:
701                 ret = -ENXIO;
702                 break;
703         }
704         return ret;
705 }
706
707 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
708 {
709         int ret;
710         unsigned int idx;
711         switch (attr->attr) {
712         case KVM_S390_VM_MEM_ENABLE_CMMA:
713                 ret = -ENXIO;
714                 if (!sclp.has_cmma)
715                         break;
716
717                 ret = -EBUSY;
718                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
719                 mutex_lock(&kvm->lock);
720                 if (!kvm->created_vcpus) {
721                         kvm->arch.use_cmma = 1;
722                         /* Not compatible with cmma. */
723                         kvm->arch.use_pfmfi = 0;
724                         ret = 0;
725                 }
726                 mutex_unlock(&kvm->lock);
727                 break;
728         case KVM_S390_VM_MEM_CLR_CMMA:
729                 ret = -ENXIO;
730                 if (!sclp.has_cmma)
731                         break;
732                 ret = -EINVAL;
733                 if (!kvm->arch.use_cmma)
734                         break;
735
736                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
737                 mutex_lock(&kvm->lock);
738                 idx = srcu_read_lock(&kvm->srcu);
739                 s390_reset_cmma(kvm->arch.gmap->mm);
740                 srcu_read_unlock(&kvm->srcu, idx);
741                 mutex_unlock(&kvm->lock);
742                 ret = 0;
743                 break;
744         case KVM_S390_VM_MEM_LIMIT_SIZE: {
745                 unsigned long new_limit;
746
747                 if (kvm_is_ucontrol(kvm))
748                         return -EINVAL;
749
750                 if (get_user(new_limit, (u64 __user *)attr->addr))
751                         return -EFAULT;
752
753                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
754                     new_limit > kvm->arch.mem_limit)
755                         return -E2BIG;
756
757                 if (!new_limit)
758                         return -EINVAL;
759
760                 /* gmap_create takes last usable address */
761                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
762                         new_limit -= 1;
763
764                 ret = -EBUSY;
765                 mutex_lock(&kvm->lock);
766                 if (!kvm->created_vcpus) {
767                         /* gmap_create will round the limit up */
768                         struct gmap *new = gmap_create(current->mm, new_limit);
769
770                         if (!new) {
771                                 ret = -ENOMEM;
772                         } else {
773                                 gmap_remove(kvm->arch.gmap);
774                                 new->private = kvm;
775                                 kvm->arch.gmap = new;
776                                 ret = 0;
777                         }
778                 }
779                 mutex_unlock(&kvm->lock);
780                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
781                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
782                          (void *) kvm->arch.gmap->asce);
783                 break;
784         }
785         default:
786                 ret = -ENXIO;
787                 break;
788         }
789         return ret;
790 }
791
792 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
793
794 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
795 {
796         struct kvm_vcpu *vcpu;
797         int i;
798
799         kvm_s390_vcpu_block_all(kvm);
800
801         kvm_for_each_vcpu(i, vcpu, kvm)
802                 kvm_s390_vcpu_crypto_setup(vcpu);
803
804         kvm_s390_vcpu_unblock_all(kvm);
805 }
806
807 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
808 {
809         if (!test_kvm_facility(kvm, 76))
810                 return -EINVAL;
811
812         mutex_lock(&kvm->lock);
813         switch (attr->attr) {
814         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
815                 get_random_bytes(
816                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
817                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
818                 kvm->arch.crypto.aes_kw = 1;
819                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
820                 break;
821         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
822                 get_random_bytes(
823                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
824                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
825                 kvm->arch.crypto.dea_kw = 1;
826                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
827                 break;
828         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
829                 kvm->arch.crypto.aes_kw = 0;
830                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
831                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
832                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
833                 break;
834         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
835                 kvm->arch.crypto.dea_kw = 0;
836                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
837                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
838                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
839                 break;
840         default:
841                 mutex_unlock(&kvm->lock);
842                 return -ENXIO;
843         }
844
845         kvm_s390_vcpu_crypto_reset_all(kvm);
846         mutex_unlock(&kvm->lock);
847         return 0;
848 }
849
850 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
851 {
852         int cx;
853         struct kvm_vcpu *vcpu;
854
855         kvm_for_each_vcpu(cx, vcpu, kvm)
856                 kvm_s390_sync_request(req, vcpu);
857 }
858
859 /*
860  * Must be called with kvm->srcu held to avoid races on memslots, and with
861  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
862  */
863 static int kvm_s390_vm_start_migration(struct kvm *kvm)
864 {
865         struct kvm_memory_slot *ms;
866         struct kvm_memslots *slots;
867         unsigned long ram_pages = 0;
868         int slotnr;
869
870         /* migration mode already enabled */
871         if (kvm->arch.migration_mode)
872                 return 0;
873         slots = kvm_memslots(kvm);
874         if (!slots || !slots->used_slots)
875                 return -EINVAL;
876
877         if (!kvm->arch.use_cmma) {
878                 kvm->arch.migration_mode = 1;
879                 return 0;
880         }
881         /* mark all the pages in active slots as dirty */
882         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
883                 ms = slots->memslots + slotnr;
884                 /*
885                  * The second half of the bitmap is only used on x86,
886                  * and would be wasted otherwise, so we put it to good
887                  * use here to keep track of the state of the storage
888                  * attributes.
889                  */
890                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
891                 ram_pages += ms->npages;
892         }
893         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
894         kvm->arch.migration_mode = 1;
895         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
896         return 0;
897 }
898
899 /*
900  * Must be called with kvm->slots_lock to avoid races with ourselves and
901  * kvm_s390_vm_start_migration.
902  */
903 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
904 {
905         /* migration mode already disabled */
906         if (!kvm->arch.migration_mode)
907                 return 0;
908         kvm->arch.migration_mode = 0;
909         if (kvm->arch.use_cmma)
910                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
911         return 0;
912 }
913
914 static int kvm_s390_vm_set_migration(struct kvm *kvm,
915                                      struct kvm_device_attr *attr)
916 {
917         int res = -ENXIO;
918
919         mutex_lock(&kvm->slots_lock);
920         switch (attr->attr) {
921         case KVM_S390_VM_MIGRATION_START:
922                 res = kvm_s390_vm_start_migration(kvm);
923                 break;
924         case KVM_S390_VM_MIGRATION_STOP:
925                 res = kvm_s390_vm_stop_migration(kvm);
926                 break;
927         default:
928                 break;
929         }
930         mutex_unlock(&kvm->slots_lock);
931
932         return res;
933 }
934
935 static int kvm_s390_vm_get_migration(struct kvm *kvm,
936                                      struct kvm_device_attr *attr)
937 {
938         u64 mig = kvm->arch.migration_mode;
939
940         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
941                 return -ENXIO;
942
943         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
944                 return -EFAULT;
945         return 0;
946 }
947
948 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
949 {
950         struct kvm_s390_vm_tod_clock gtod;
951
952         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
953                 return -EFAULT;
954
955         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
956                 return -EINVAL;
957         kvm_s390_set_tod_clock(kvm, &gtod);
958
959         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
960                 gtod.epoch_idx, gtod.tod);
961
962         return 0;
963 }
964
965 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
966 {
967         u8 gtod_high;
968
969         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
970                                            sizeof(gtod_high)))
971                 return -EFAULT;
972
973         if (gtod_high != 0)
974                 return -EINVAL;
975         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
976
977         return 0;
978 }
979
980 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
981 {
982         struct kvm_s390_vm_tod_clock gtod = { 0 };
983
984         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
985                            sizeof(gtod.tod)))
986                 return -EFAULT;
987
988         kvm_s390_set_tod_clock(kvm, &gtod);
989         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
990         return 0;
991 }
992
993 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995         int ret;
996
997         if (attr->flags)
998                 return -EINVAL;
999
1000         switch (attr->attr) {
1001         case KVM_S390_VM_TOD_EXT:
1002                 ret = kvm_s390_set_tod_ext(kvm, attr);
1003                 break;
1004         case KVM_S390_VM_TOD_HIGH:
1005                 ret = kvm_s390_set_tod_high(kvm, attr);
1006                 break;
1007         case KVM_S390_VM_TOD_LOW:
1008                 ret = kvm_s390_set_tod_low(kvm, attr);
1009                 break;
1010         default:
1011                 ret = -ENXIO;
1012                 break;
1013         }
1014         return ret;
1015 }
1016
1017 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1018                                    struct kvm_s390_vm_tod_clock *gtod)
1019 {
1020         struct kvm_s390_tod_clock_ext htod;
1021
1022         preempt_disable();
1023
1024         get_tod_clock_ext((char *)&htod);
1025
1026         gtod->tod = htod.tod + kvm->arch.epoch;
1027         gtod->epoch_idx = 0;
1028         if (test_kvm_facility(kvm, 139)) {
1029                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1030                 if (gtod->tod < htod.tod)
1031                         gtod->epoch_idx += 1;
1032         }
1033
1034         preempt_enable();
1035 }
1036
1037 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1038 {
1039         struct kvm_s390_vm_tod_clock gtod;
1040
1041         memset(&gtod, 0, sizeof(gtod));
1042         kvm_s390_get_tod_clock(kvm, &gtod);
1043         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1044                 return -EFAULT;
1045
1046         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1047                 gtod.epoch_idx, gtod.tod);
1048         return 0;
1049 }
1050
1051 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1052 {
1053         u8 gtod_high = 0;
1054
1055         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1056                                          sizeof(gtod_high)))
1057                 return -EFAULT;
1058         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1059
1060         return 0;
1061 }
1062
1063 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1064 {
1065         u64 gtod;
1066
1067         gtod = kvm_s390_get_tod_clock_fast(kvm);
1068         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1069                 return -EFAULT;
1070         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1071
1072         return 0;
1073 }
1074
1075 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1076 {
1077         int ret;
1078
1079         if (attr->flags)
1080                 return -EINVAL;
1081
1082         switch (attr->attr) {
1083         case KVM_S390_VM_TOD_EXT:
1084                 ret = kvm_s390_get_tod_ext(kvm, attr);
1085                 break;
1086         case KVM_S390_VM_TOD_HIGH:
1087                 ret = kvm_s390_get_tod_high(kvm, attr);
1088                 break;
1089         case KVM_S390_VM_TOD_LOW:
1090                 ret = kvm_s390_get_tod_low(kvm, attr);
1091                 break;
1092         default:
1093                 ret = -ENXIO;
1094                 break;
1095         }
1096         return ret;
1097 }
1098
1099 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1100 {
1101         struct kvm_s390_vm_cpu_processor *proc;
1102         u16 lowest_ibc, unblocked_ibc;
1103         int ret = 0;
1104
1105         mutex_lock(&kvm->lock);
1106         if (kvm->created_vcpus) {
1107                 ret = -EBUSY;
1108                 goto out;
1109         }
1110         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1111         if (!proc) {
1112                 ret = -ENOMEM;
1113                 goto out;
1114         }
1115         if (!copy_from_user(proc, (void __user *)attr->addr,
1116                             sizeof(*proc))) {
1117                 kvm->arch.model.cpuid = proc->cpuid;
1118                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1119                 unblocked_ibc = sclp.ibc & 0xfff;
1120                 if (lowest_ibc && proc->ibc) {
1121                         if (proc->ibc > unblocked_ibc)
1122                                 kvm->arch.model.ibc = unblocked_ibc;
1123                         else if (proc->ibc < lowest_ibc)
1124                                 kvm->arch.model.ibc = lowest_ibc;
1125                         else
1126                                 kvm->arch.model.ibc = proc->ibc;
1127                 }
1128                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1129                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1130                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1131                          kvm->arch.model.ibc,
1132                          kvm->arch.model.cpuid);
1133                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1134                          kvm->arch.model.fac_list[0],
1135                          kvm->arch.model.fac_list[1],
1136                          kvm->arch.model.fac_list[2]);
1137         } else
1138                 ret = -EFAULT;
1139         kfree(proc);
1140 out:
1141         mutex_unlock(&kvm->lock);
1142         return ret;
1143 }
1144
1145 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1146                                        struct kvm_device_attr *attr)
1147 {
1148         struct kvm_s390_vm_cpu_feat data;
1149
1150         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1151                 return -EFAULT;
1152         if (!bitmap_subset((unsigned long *) data.feat,
1153                            kvm_s390_available_cpu_feat,
1154                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1155                 return -EINVAL;
1156
1157         mutex_lock(&kvm->lock);
1158         if (kvm->created_vcpus) {
1159                 mutex_unlock(&kvm->lock);
1160                 return -EBUSY;
1161         }
1162         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1163                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1164         mutex_unlock(&kvm->lock);
1165         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1166                          data.feat[0],
1167                          data.feat[1],
1168                          data.feat[2]);
1169         return 0;
1170 }
1171
1172 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1173                                           struct kvm_device_attr *attr)
1174 {
1175         /*
1176          * Once supported by kernel + hw, we have to store the subfunctions
1177          * in kvm->arch and remember that user space configured them.
1178          */
1179         return -ENXIO;
1180 }
1181
1182 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1183 {
1184         int ret = -ENXIO;
1185
1186         switch (attr->attr) {
1187         case KVM_S390_VM_CPU_PROCESSOR:
1188                 ret = kvm_s390_set_processor(kvm, attr);
1189                 break;
1190         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1191                 ret = kvm_s390_set_processor_feat(kvm, attr);
1192                 break;
1193         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1194                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1195                 break;
1196         }
1197         return ret;
1198 }
1199
1200 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         struct kvm_s390_vm_cpu_processor *proc;
1203         int ret = 0;
1204
1205         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1206         if (!proc) {
1207                 ret = -ENOMEM;
1208                 goto out;
1209         }
1210         proc->cpuid = kvm->arch.model.cpuid;
1211         proc->ibc = kvm->arch.model.ibc;
1212         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1213                S390_ARCH_FAC_LIST_SIZE_BYTE);
1214         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1215                  kvm->arch.model.ibc,
1216                  kvm->arch.model.cpuid);
1217         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1218                  kvm->arch.model.fac_list[0],
1219                  kvm->arch.model.fac_list[1],
1220                  kvm->arch.model.fac_list[2]);
1221         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1222                 ret = -EFAULT;
1223         kfree(proc);
1224 out:
1225         return ret;
1226 }
1227
1228 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         struct kvm_s390_vm_cpu_machine *mach;
1231         int ret = 0;
1232
1233         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1234         if (!mach) {
1235                 ret = -ENOMEM;
1236                 goto out;
1237         }
1238         get_cpu_id((struct cpuid *) &mach->cpuid);
1239         mach->ibc = sclp.ibc;
1240         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1241                S390_ARCH_FAC_LIST_SIZE_BYTE);
1242         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1243                sizeof(S390_lowcore.stfle_fac_list));
1244         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1245                  kvm->arch.model.ibc,
1246                  kvm->arch.model.cpuid);
1247         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1248                  mach->fac_mask[0],
1249                  mach->fac_mask[1],
1250                  mach->fac_mask[2]);
1251         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1252                  mach->fac_list[0],
1253                  mach->fac_list[1],
1254                  mach->fac_list[2]);
1255         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1256                 ret = -EFAULT;
1257         kfree(mach);
1258 out:
1259         return ret;
1260 }
1261
1262 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1263                                        struct kvm_device_attr *attr)
1264 {
1265         struct kvm_s390_vm_cpu_feat data;
1266
1267         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1268                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1269         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1270                 return -EFAULT;
1271         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1272                          data.feat[0],
1273                          data.feat[1],
1274                          data.feat[2]);
1275         return 0;
1276 }
1277
1278 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1279                                      struct kvm_device_attr *attr)
1280 {
1281         struct kvm_s390_vm_cpu_feat data;
1282
1283         bitmap_copy((unsigned long *) data.feat,
1284                     kvm_s390_available_cpu_feat,
1285                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1286         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1287                 return -EFAULT;
1288         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1289                          data.feat[0],
1290                          data.feat[1],
1291                          data.feat[2]);
1292         return 0;
1293 }
1294
1295 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1296                                           struct kvm_device_attr *attr)
1297 {
1298         /*
1299          * Once we can actually configure subfunctions (kernel + hw support),
1300          * we have to check if they were already set by user space, if so copy
1301          * them from kvm->arch.
1302          */
1303         return -ENXIO;
1304 }
1305
1306 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1307                                         struct kvm_device_attr *attr)
1308 {
1309         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1310             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1311                 return -EFAULT;
1312         return 0;
1313 }
1314 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1315 {
1316         int ret = -ENXIO;
1317
1318         switch (attr->attr) {
1319         case KVM_S390_VM_CPU_PROCESSOR:
1320                 ret = kvm_s390_get_processor(kvm, attr);
1321                 break;
1322         case KVM_S390_VM_CPU_MACHINE:
1323                 ret = kvm_s390_get_machine(kvm, attr);
1324                 break;
1325         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1326                 ret = kvm_s390_get_processor_feat(kvm, attr);
1327                 break;
1328         case KVM_S390_VM_CPU_MACHINE_FEAT:
1329                 ret = kvm_s390_get_machine_feat(kvm, attr);
1330                 break;
1331         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1332                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1333                 break;
1334         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1335                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1336                 break;
1337         }
1338         return ret;
1339 }
1340
1341 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1342 {
1343         int ret;
1344
1345         switch (attr->group) {
1346         case KVM_S390_VM_MEM_CTRL:
1347                 ret = kvm_s390_set_mem_control(kvm, attr);
1348                 break;
1349         case KVM_S390_VM_TOD:
1350                 ret = kvm_s390_set_tod(kvm, attr);
1351                 break;
1352         case KVM_S390_VM_CPU_MODEL:
1353                 ret = kvm_s390_set_cpu_model(kvm, attr);
1354                 break;
1355         case KVM_S390_VM_CRYPTO:
1356                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1357                 break;
1358         case KVM_S390_VM_MIGRATION:
1359                 ret = kvm_s390_vm_set_migration(kvm, attr);
1360                 break;
1361         default:
1362                 ret = -ENXIO;
1363                 break;
1364         }
1365
1366         return ret;
1367 }
1368
1369 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1370 {
1371         int ret;
1372
1373         switch (attr->group) {
1374         case KVM_S390_VM_MEM_CTRL:
1375                 ret = kvm_s390_get_mem_control(kvm, attr);
1376                 break;
1377         case KVM_S390_VM_TOD:
1378                 ret = kvm_s390_get_tod(kvm, attr);
1379                 break;
1380         case KVM_S390_VM_CPU_MODEL:
1381                 ret = kvm_s390_get_cpu_model(kvm, attr);
1382                 break;
1383         case KVM_S390_VM_MIGRATION:
1384                 ret = kvm_s390_vm_get_migration(kvm, attr);
1385                 break;
1386         default:
1387                 ret = -ENXIO;
1388                 break;
1389         }
1390
1391         return ret;
1392 }
1393
1394 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1395 {
1396         int ret;
1397
1398         switch (attr->group) {
1399         case KVM_S390_VM_MEM_CTRL:
1400                 switch (attr->attr) {
1401                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1402                 case KVM_S390_VM_MEM_CLR_CMMA:
1403                         ret = sclp.has_cmma ? 0 : -ENXIO;
1404                         break;
1405                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1406                         ret = 0;
1407                         break;
1408                 default:
1409                         ret = -ENXIO;
1410                         break;
1411                 }
1412                 break;
1413         case KVM_S390_VM_TOD:
1414                 switch (attr->attr) {
1415                 case KVM_S390_VM_TOD_LOW:
1416                 case KVM_S390_VM_TOD_HIGH:
1417                         ret = 0;
1418                         break;
1419                 default:
1420                         ret = -ENXIO;
1421                         break;
1422                 }
1423                 break;
1424         case KVM_S390_VM_CPU_MODEL:
1425                 switch (attr->attr) {
1426                 case KVM_S390_VM_CPU_PROCESSOR:
1427                 case KVM_S390_VM_CPU_MACHINE:
1428                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1429                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1430                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1431                         ret = 0;
1432                         break;
1433                 /* configuring subfunctions is not supported yet */
1434                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1435                 default:
1436                         ret = -ENXIO;
1437                         break;
1438                 }
1439                 break;
1440         case KVM_S390_VM_CRYPTO:
1441                 switch (attr->attr) {
1442                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1443                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1444                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1445                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1446                         ret = 0;
1447                         break;
1448                 default:
1449                         ret = -ENXIO;
1450                         break;
1451                 }
1452                 break;
1453         case KVM_S390_VM_MIGRATION:
1454                 ret = 0;
1455                 break;
1456         default:
1457                 ret = -ENXIO;
1458                 break;
1459         }
1460
1461         return ret;
1462 }
1463
1464 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1465 {
1466         uint8_t *keys;
1467         uint64_t hva;
1468         int srcu_idx, i, r = 0;
1469
1470         if (args->flags != 0)
1471                 return -EINVAL;
1472
1473         /* Is this guest using storage keys? */
1474         if (!mm_uses_skeys(current->mm))
1475                 return KVM_S390_GET_SKEYS_NONE;
1476
1477         /* Enforce sane limit on memory allocation */
1478         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1479                 return -EINVAL;
1480
1481         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1482         if (!keys)
1483                 return -ENOMEM;
1484
1485         down_read(&current->mm->mmap_sem);
1486         srcu_idx = srcu_read_lock(&kvm->srcu);
1487         for (i = 0; i < args->count; i++) {
1488                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1489                 if (kvm_is_error_hva(hva)) {
1490                         r = -EFAULT;
1491                         break;
1492                 }
1493
1494                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1495                 if (r)
1496                         break;
1497         }
1498         srcu_read_unlock(&kvm->srcu, srcu_idx);
1499         up_read(&current->mm->mmap_sem);
1500
1501         if (!r) {
1502                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1503                                  sizeof(uint8_t) * args->count);
1504                 if (r)
1505                         r = -EFAULT;
1506         }
1507
1508         kvfree(keys);
1509         return r;
1510 }
1511
1512 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1513 {
1514         uint8_t *keys;
1515         uint64_t hva;
1516         int srcu_idx, i, r = 0;
1517
1518         if (args->flags != 0)
1519                 return -EINVAL;
1520
1521         /* Enforce sane limit on memory allocation */
1522         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1523                 return -EINVAL;
1524
1525         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1526         if (!keys)
1527                 return -ENOMEM;
1528
1529         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1530                            sizeof(uint8_t) * args->count);
1531         if (r) {
1532                 r = -EFAULT;
1533                 goto out;
1534         }
1535
1536         /* Enable storage key handling for the guest */
1537         r = s390_enable_skey();
1538         if (r)
1539                 goto out;
1540
1541         down_read(&current->mm->mmap_sem);
1542         srcu_idx = srcu_read_lock(&kvm->srcu);
1543         for (i = 0; i < args->count; i++) {
1544                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1545                 if (kvm_is_error_hva(hva)) {
1546                         r = -EFAULT;
1547                         break;
1548                 }
1549
1550                 /* Lowest order bit is reserved */
1551                 if (keys[i] & 0x01) {
1552                         r = -EINVAL;
1553                         break;
1554                 }
1555
1556                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1557                 if (r)
1558                         break;
1559         }
1560         srcu_read_unlock(&kvm->srcu, srcu_idx);
1561         up_read(&current->mm->mmap_sem);
1562 out:
1563         kvfree(keys);
1564         return r;
1565 }
1566
1567 /*
1568  * Base address and length must be sent at the start of each block, therefore
1569  * it's cheaper to send some clean data, as long as it's less than the size of
1570  * two longs.
1571  */
1572 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1573 /* for consistency */
1574 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1575
1576 /*
1577  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1578  * address falls in a hole. In that case the index of one of the memslots
1579  * bordering the hole is returned.
1580  */
1581 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1582 {
1583         int start = 0, end = slots->used_slots;
1584         int slot = atomic_read(&slots->lru_slot);
1585         struct kvm_memory_slot *memslots = slots->memslots;
1586
1587         if (gfn >= memslots[slot].base_gfn &&
1588             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1589                 return slot;
1590
1591         while (start < end) {
1592                 slot = start + (end - start) / 2;
1593
1594                 if (gfn >= memslots[slot].base_gfn)
1595                         end = slot;
1596                 else
1597                         start = slot + 1;
1598         }
1599
1600         if (gfn >= memslots[start].base_gfn &&
1601             gfn < memslots[start].base_gfn + memslots[start].npages) {
1602                 atomic_set(&slots->lru_slot, start);
1603         }
1604
1605         return start;
1606 }
1607
1608 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1609                               u8 *res, unsigned long bufsize)
1610 {
1611         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1612
1613         args->count = 0;
1614         while (args->count < bufsize) {
1615                 hva = gfn_to_hva(kvm, cur_gfn);
1616                 /*
1617                  * We return an error if the first value was invalid, but we
1618                  * return successfully if at least one value was copied.
1619                  */
1620                 if (kvm_is_error_hva(hva))
1621                         return args->count ? 0 : -EFAULT;
1622                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1623                         pgstev = 0;
1624                 res[args->count++] = (pgstev >> 24) & 0x43;
1625                 cur_gfn++;
1626         }
1627
1628         return 0;
1629 }
1630
1631 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1632                                               unsigned long cur_gfn)
1633 {
1634         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1635         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1636         unsigned long ofs = cur_gfn - ms->base_gfn;
1637
1638         if (ms->base_gfn + ms->npages <= cur_gfn) {
1639                 slotidx--;
1640                 /* If we are above the highest slot, wrap around */
1641                 if (slotidx < 0)
1642                         slotidx = slots->used_slots - 1;
1643
1644                 ms = slots->memslots + slotidx;
1645                 ofs = 0;
1646         }
1647         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1648         while ((slotidx > 0) && (ofs >= ms->npages)) {
1649                 slotidx--;
1650                 ms = slots->memslots + slotidx;
1651                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1652         }
1653         return ms->base_gfn + ofs;
1654 }
1655
1656 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1657                              u8 *res, unsigned long bufsize)
1658 {
1659         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1660         struct kvm_memslots *slots = kvm_memslots(kvm);
1661         struct kvm_memory_slot *ms;
1662
1663         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1664         ms = gfn_to_memslot(kvm, cur_gfn);
1665         args->count = 0;
1666         args->start_gfn = cur_gfn;
1667         if (!ms)
1668                 return 0;
1669         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1670         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1671
1672         while (args->count < bufsize) {
1673                 hva = gfn_to_hva(kvm, cur_gfn);
1674                 if (kvm_is_error_hva(hva))
1675                         return 0;
1676                 /* Decrement only if we actually flipped the bit to 0 */
1677                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1678                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1679                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1680                         pgstev = 0;
1681                 /* Save the value */
1682                 res[args->count++] = (pgstev >> 24) & 0x43;
1683                 /* If the next bit is too far away, stop. */
1684                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1685                         return 0;
1686                 /* If we reached the previous "next", find the next one */
1687                 if (cur_gfn == next_gfn)
1688                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1689                 /* Reached the end of memory or of the buffer, stop */
1690                 if ((next_gfn >= mem_end) ||
1691                     (next_gfn - args->start_gfn >= bufsize))
1692                         return 0;
1693                 cur_gfn++;
1694                 /* Reached the end of the current memslot, take the next one. */
1695                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1696                         ms = gfn_to_memslot(kvm, cur_gfn);
1697                         if (!ms)
1698                                 return 0;
1699                 }
1700         }
1701         return 0;
1702 }
1703
1704 /*
1705  * This function searches for the next page with dirty CMMA attributes, and
1706  * saves the attributes in the buffer up to either the end of the buffer or
1707  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1708  * no trailing clean bytes are saved.
1709  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1710  * output buffer will indicate 0 as length.
1711  */
1712 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1713                                   struct kvm_s390_cmma_log *args)
1714 {
1715         unsigned long bufsize;
1716         int srcu_idx, peek, ret;
1717         u8 *values;
1718
1719         if (!kvm->arch.use_cmma)
1720                 return -ENXIO;
1721         /* Invalid/unsupported flags were specified */
1722         if (args->flags & ~KVM_S390_CMMA_PEEK)
1723                 return -EINVAL;
1724         /* Migration mode query, and we are not doing a migration */
1725         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1726         if (!peek && !kvm->arch.migration_mode)
1727                 return -EINVAL;
1728         /* CMMA is disabled or was not used, or the buffer has length zero */
1729         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1730         if (!bufsize || !kvm->mm->context.uses_cmm) {
1731                 memset(args, 0, sizeof(*args));
1732                 return 0;
1733         }
1734         /* We are not peeking, and there are no dirty pages */
1735         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1736                 memset(args, 0, sizeof(*args));
1737                 return 0;
1738         }
1739
1740         values = vmalloc(bufsize);
1741         if (!values)
1742                 return -ENOMEM;
1743
1744         down_read(&kvm->mm->mmap_sem);
1745         srcu_idx = srcu_read_lock(&kvm->srcu);
1746         if (peek)
1747                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1748         else
1749                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1750         srcu_read_unlock(&kvm->srcu, srcu_idx);
1751         up_read(&kvm->mm->mmap_sem);
1752
1753         if (kvm->arch.migration_mode)
1754                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1755         else
1756                 args->remaining = 0;
1757
1758         if (copy_to_user((void __user *)args->values, values, args->count))
1759                 ret = -EFAULT;
1760
1761         vfree(values);
1762         return ret;
1763 }
1764
1765 /*
1766  * This function sets the CMMA attributes for the given pages. If the input
1767  * buffer has zero length, no action is taken, otherwise the attributes are
1768  * set and the mm->context.uses_cmm flag is set.
1769  */
1770 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1771                                   const struct kvm_s390_cmma_log *args)
1772 {
1773         unsigned long hva, mask, pgstev, i;
1774         uint8_t *bits;
1775         int srcu_idx, r = 0;
1776
1777         mask = args->mask;
1778
1779         if (!kvm->arch.use_cmma)
1780                 return -ENXIO;
1781         /* invalid/unsupported flags */
1782         if (args->flags != 0)
1783                 return -EINVAL;
1784         /* Enforce sane limit on memory allocation */
1785         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1786                 return -EINVAL;
1787         /* Nothing to do */
1788         if (args->count == 0)
1789                 return 0;
1790
1791         bits = vmalloc(array_size(sizeof(*bits), args->count));
1792         if (!bits)
1793                 return -ENOMEM;
1794
1795         r = copy_from_user(bits, (void __user *)args->values, args->count);
1796         if (r) {
1797                 r = -EFAULT;
1798                 goto out;
1799         }
1800
1801         down_read(&kvm->mm->mmap_sem);
1802         srcu_idx = srcu_read_lock(&kvm->srcu);
1803         for (i = 0; i < args->count; i++) {
1804                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1805                 if (kvm_is_error_hva(hva)) {
1806                         r = -EFAULT;
1807                         break;
1808                 }
1809
1810                 pgstev = bits[i];
1811                 pgstev = pgstev << 24;
1812                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1813                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1814         }
1815         srcu_read_unlock(&kvm->srcu, srcu_idx);
1816         up_read(&kvm->mm->mmap_sem);
1817
1818         if (!kvm->mm->context.uses_cmm) {
1819                 down_write(&kvm->mm->mmap_sem);
1820                 kvm->mm->context.uses_cmm = 1;
1821                 up_write(&kvm->mm->mmap_sem);
1822         }
1823 out:
1824         vfree(bits);
1825         return r;
1826 }
1827
1828 long kvm_arch_vm_ioctl(struct file *filp,
1829                        unsigned int ioctl, unsigned long arg)
1830 {
1831         struct kvm *kvm = filp->private_data;
1832         void __user *argp = (void __user *)arg;
1833         struct kvm_device_attr attr;
1834         int r;
1835
1836         switch (ioctl) {
1837         case KVM_S390_INTERRUPT: {
1838                 struct kvm_s390_interrupt s390int;
1839
1840                 r = -EFAULT;
1841                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1842                         break;
1843                 r = kvm_s390_inject_vm(kvm, &s390int);
1844                 break;
1845         }
1846         case KVM_ENABLE_CAP: {
1847                 struct kvm_enable_cap cap;
1848                 r = -EFAULT;
1849                 if (copy_from_user(&cap, argp, sizeof(cap)))
1850                         break;
1851                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1852                 break;
1853         }
1854         case KVM_CREATE_IRQCHIP: {
1855                 struct kvm_irq_routing_entry routing;
1856
1857                 r = -EINVAL;
1858                 if (kvm->arch.use_irqchip) {
1859                         /* Set up dummy routing. */
1860                         memset(&routing, 0, sizeof(routing));
1861                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1862                 }
1863                 break;
1864         }
1865         case KVM_SET_DEVICE_ATTR: {
1866                 r = -EFAULT;
1867                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1868                         break;
1869                 r = kvm_s390_vm_set_attr(kvm, &attr);
1870                 break;
1871         }
1872         case KVM_GET_DEVICE_ATTR: {
1873                 r = -EFAULT;
1874                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1875                         break;
1876                 r = kvm_s390_vm_get_attr(kvm, &attr);
1877                 break;
1878         }
1879         case KVM_HAS_DEVICE_ATTR: {
1880                 r = -EFAULT;
1881                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1882                         break;
1883                 r = kvm_s390_vm_has_attr(kvm, &attr);
1884                 break;
1885         }
1886         case KVM_S390_GET_SKEYS: {
1887                 struct kvm_s390_skeys args;
1888
1889                 r = -EFAULT;
1890                 if (copy_from_user(&args, argp,
1891                                    sizeof(struct kvm_s390_skeys)))
1892                         break;
1893                 r = kvm_s390_get_skeys(kvm, &args);
1894                 break;
1895         }
1896         case KVM_S390_SET_SKEYS: {
1897                 struct kvm_s390_skeys args;
1898
1899                 r = -EFAULT;
1900                 if (copy_from_user(&args, argp,
1901                                    sizeof(struct kvm_s390_skeys)))
1902                         break;
1903                 r = kvm_s390_set_skeys(kvm, &args);
1904                 break;
1905         }
1906         case KVM_S390_GET_CMMA_BITS: {
1907                 struct kvm_s390_cmma_log args;
1908
1909                 r = -EFAULT;
1910                 if (copy_from_user(&args, argp, sizeof(args)))
1911                         break;
1912                 mutex_lock(&kvm->slots_lock);
1913                 r = kvm_s390_get_cmma_bits(kvm, &args);
1914                 mutex_unlock(&kvm->slots_lock);
1915                 if (!r) {
1916                         r = copy_to_user(argp, &args, sizeof(args));
1917                         if (r)
1918                                 r = -EFAULT;
1919                 }
1920                 break;
1921         }
1922         case KVM_S390_SET_CMMA_BITS: {
1923                 struct kvm_s390_cmma_log args;
1924
1925                 r = -EFAULT;
1926                 if (copy_from_user(&args, argp, sizeof(args)))
1927                         break;
1928                 mutex_lock(&kvm->slots_lock);
1929                 r = kvm_s390_set_cmma_bits(kvm, &args);
1930                 mutex_unlock(&kvm->slots_lock);
1931                 break;
1932         }
1933         default:
1934                 r = -ENOTTY;
1935         }
1936
1937         return r;
1938 }
1939
1940 static int kvm_s390_query_ap_config(u8 *config)
1941 {
1942         u32 fcn_code = 0x04000000UL;
1943         u32 cc = 0;
1944
1945         memset(config, 0, 128);
1946         asm volatile(
1947                 "lgr 0,%1\n"
1948                 "lgr 2,%2\n"
1949                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1950                 "0: ipm %0\n"
1951                 "srl %0,28\n"
1952                 "1:\n"
1953                 EX_TABLE(0b, 1b)
1954                 : "+r" (cc)
1955                 : "r" (fcn_code), "r" (config)
1956                 : "cc", "0", "2", "memory"
1957         );
1958
1959         return cc;
1960 }
1961
1962 static int kvm_s390_apxa_installed(void)
1963 {
1964         u8 config[128];
1965         int cc;
1966
1967         if (test_facility(12)) {
1968                 cc = kvm_s390_query_ap_config(config);
1969
1970                 if (cc)
1971                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1972                 else
1973                         return config[0] & 0x40;
1974         }
1975
1976         return 0;
1977 }
1978
1979 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1980 {
1981         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1982
1983         if (kvm_s390_apxa_installed())
1984                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1985         else
1986                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1987 }
1988
1989 static u64 kvm_s390_get_initial_cpuid(void)
1990 {
1991         struct cpuid cpuid;
1992
1993         get_cpu_id(&cpuid);
1994         cpuid.version = 0xff;
1995         return *((u64 *) &cpuid);
1996 }
1997
1998 static void kvm_s390_crypto_init(struct kvm *kvm)
1999 {
2000         if (!test_kvm_facility(kvm, 76))
2001                 return;
2002
2003         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2004         kvm_s390_set_crycb_format(kvm);
2005
2006         /* Enable AES/DEA protected key functions by default */
2007         kvm->arch.crypto.aes_kw = 1;
2008         kvm->arch.crypto.dea_kw = 1;
2009         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2010                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2011         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2012                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2013 }
2014
2015 static void sca_dispose(struct kvm *kvm)
2016 {
2017         if (kvm->arch.use_esca)
2018                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2019         else
2020                 free_page((unsigned long)(kvm->arch.sca));
2021         kvm->arch.sca = NULL;
2022 }
2023
2024 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2025 {
2026         gfp_t alloc_flags = GFP_KERNEL;
2027         int i, rc;
2028         char debug_name[16];
2029         static unsigned long sca_offset;
2030
2031         rc = -EINVAL;
2032 #ifdef CONFIG_KVM_S390_UCONTROL
2033         if (type & ~KVM_VM_S390_UCONTROL)
2034                 goto out_err;
2035         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2036                 goto out_err;
2037 #else
2038         if (type)
2039                 goto out_err;
2040 #endif
2041
2042         rc = s390_enable_sie();
2043         if (rc)
2044                 goto out_err;
2045
2046         rc = -ENOMEM;
2047
2048         if (!sclp.has_64bscao)
2049                 alloc_flags |= GFP_DMA;
2050         rwlock_init(&kvm->arch.sca_lock);
2051         /* start with basic SCA */
2052         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2053         if (!kvm->arch.sca)
2054                 goto out_err;
2055         spin_lock(&kvm_lock);
2056         sca_offset += 16;
2057         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2058                 sca_offset = 0;
2059         kvm->arch.sca = (struct bsca_block *)
2060                         ((char *) kvm->arch.sca + sca_offset);
2061         spin_unlock(&kvm_lock);
2062
2063         sprintf(debug_name, "kvm-%u", current->pid);
2064
2065         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2066         if (!kvm->arch.dbf)
2067                 goto out_err;
2068
2069         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2070         kvm->arch.sie_page2 =
2071              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2072         if (!kvm->arch.sie_page2)
2073                 goto out_err;
2074
2075         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2076
2077         for (i = 0; i < kvm_s390_fac_size(); i++) {
2078                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2079                                               (kvm_s390_fac_base[i] |
2080                                                kvm_s390_fac_ext[i]);
2081                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2082                                               kvm_s390_fac_base[i];
2083         }
2084
2085         /* we are always in czam mode - even on pre z14 machines */
2086         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2087         set_kvm_facility(kvm->arch.model.fac_list, 138);
2088         /* we emulate STHYI in kvm */
2089         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2090         set_kvm_facility(kvm->arch.model.fac_list, 74);
2091         if (MACHINE_HAS_TLB_GUEST) {
2092                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2093                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2094         }
2095
2096         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2097         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2098
2099         kvm_s390_crypto_init(kvm);
2100
2101         mutex_init(&kvm->arch.float_int.ais_lock);
2102         spin_lock_init(&kvm->arch.float_int.lock);
2103         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2104                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2105         init_waitqueue_head(&kvm->arch.ipte_wq);
2106         mutex_init(&kvm->arch.ipte_mutex);
2107
2108         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2109         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2110
2111         if (type & KVM_VM_S390_UCONTROL) {
2112                 kvm->arch.gmap = NULL;
2113                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2114         } else {
2115                 if (sclp.hamax == U64_MAX)
2116                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2117                 else
2118                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2119                                                     sclp.hamax + 1);
2120                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2121                 if (!kvm->arch.gmap)
2122                         goto out_err;
2123                 kvm->arch.gmap->private = kvm;
2124                 kvm->arch.gmap->pfault_enabled = 0;
2125         }
2126
2127         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2128         kvm->arch.use_skf = sclp.has_skey;
2129         spin_lock_init(&kvm->arch.start_stop_lock);
2130         kvm_s390_vsie_init(kvm);
2131         kvm_s390_gisa_init(kvm);
2132         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2133
2134         return 0;
2135 out_err:
2136         free_page((unsigned long)kvm->arch.sie_page2);
2137         debug_unregister(kvm->arch.dbf);
2138         sca_dispose(kvm);
2139         KVM_EVENT(3, "creation of vm failed: %d", rc);
2140         return rc;
2141 }
2142
2143 bool kvm_arch_has_vcpu_debugfs(void)
2144 {
2145         return false;
2146 }
2147
2148 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2149 {
2150         return 0;
2151 }
2152
2153 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2154 {
2155         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2156         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2157         kvm_s390_clear_local_irqs(vcpu);
2158         kvm_clear_async_pf_completion_queue(vcpu);
2159         if (!kvm_is_ucontrol(vcpu->kvm))
2160                 sca_del_vcpu(vcpu);
2161
2162         if (kvm_is_ucontrol(vcpu->kvm))
2163                 gmap_remove(vcpu->arch.gmap);
2164
2165         if (vcpu->kvm->arch.use_cmma)
2166                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2167         free_page((unsigned long)(vcpu->arch.sie_block));
2168
2169         kvm_vcpu_uninit(vcpu);
2170         kmem_cache_free(kvm_vcpu_cache, vcpu);
2171 }
2172
2173 static void kvm_free_vcpus(struct kvm *kvm)
2174 {
2175         unsigned int i;
2176         struct kvm_vcpu *vcpu;
2177
2178         kvm_for_each_vcpu(i, vcpu, kvm)
2179                 kvm_arch_vcpu_destroy(vcpu);
2180
2181         mutex_lock(&kvm->lock);
2182         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2183                 kvm->vcpus[i] = NULL;
2184
2185         atomic_set(&kvm->online_vcpus, 0);
2186         mutex_unlock(&kvm->lock);
2187 }
2188
2189 void kvm_arch_destroy_vm(struct kvm *kvm)
2190 {
2191         kvm_free_vcpus(kvm);
2192         sca_dispose(kvm);
2193         debug_unregister(kvm->arch.dbf);
2194         kvm_s390_gisa_destroy(kvm);
2195         free_page((unsigned long)kvm->arch.sie_page2);
2196         if (!kvm_is_ucontrol(kvm))
2197                 gmap_remove(kvm->arch.gmap);
2198         kvm_s390_destroy_adapters(kvm);
2199         kvm_s390_clear_float_irqs(kvm);
2200         kvm_s390_vsie_destroy(kvm);
2201         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2202 }
2203
2204 /* Section: vcpu related */
2205 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2206 {
2207         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2208         if (!vcpu->arch.gmap)
2209                 return -ENOMEM;
2210         vcpu->arch.gmap->private = vcpu->kvm;
2211
2212         return 0;
2213 }
2214
2215 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2216 {
2217         if (!kvm_s390_use_sca_entries())
2218                 return;
2219         read_lock(&vcpu->kvm->arch.sca_lock);
2220         if (vcpu->kvm->arch.use_esca) {
2221                 struct esca_block *sca = vcpu->kvm->arch.sca;
2222
2223                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2224                 sca->cpu[vcpu->vcpu_id].sda = 0;
2225         } else {
2226                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2227
2228                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2229                 sca->cpu[vcpu->vcpu_id].sda = 0;
2230         }
2231         read_unlock(&vcpu->kvm->arch.sca_lock);
2232 }
2233
2234 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2235 {
2236         if (!kvm_s390_use_sca_entries()) {
2237                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2238
2239                 /* we still need the basic sca for the ipte control */
2240                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2241                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2242                 return;
2243         }
2244         read_lock(&vcpu->kvm->arch.sca_lock);
2245         if (vcpu->kvm->arch.use_esca) {
2246                 struct esca_block *sca = vcpu->kvm->arch.sca;
2247
2248                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2249                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2250                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2251                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2253         } else {
2254                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2255
2256                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2257                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2258                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2259                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2260         }
2261         read_unlock(&vcpu->kvm->arch.sca_lock);
2262 }
2263
2264 /* Basic SCA to Extended SCA data copy routines */
2265 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2266 {
2267         d->sda = s->sda;
2268         d->sigp_ctrl.c = s->sigp_ctrl.c;
2269         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2270 }
2271
2272 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2273 {
2274         int i;
2275
2276         d->ipte_control = s->ipte_control;
2277         d->mcn[0] = s->mcn;
2278         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2279                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2280 }
2281
2282 static int sca_switch_to_extended(struct kvm *kvm)
2283 {
2284         struct bsca_block *old_sca = kvm->arch.sca;
2285         struct esca_block *new_sca;
2286         struct kvm_vcpu *vcpu;
2287         unsigned int vcpu_idx;
2288         u32 scaol, scaoh;
2289
2290         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2291         if (!new_sca)
2292                 return -ENOMEM;
2293
2294         scaoh = (u32)((u64)(new_sca) >> 32);
2295         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2296
2297         kvm_s390_vcpu_block_all(kvm);
2298         write_lock(&kvm->arch.sca_lock);
2299
2300         sca_copy_b_to_e(new_sca, old_sca);
2301
2302         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2303                 vcpu->arch.sie_block->scaoh = scaoh;
2304                 vcpu->arch.sie_block->scaol = scaol;
2305                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2306         }
2307         kvm->arch.sca = new_sca;
2308         kvm->arch.use_esca = 1;
2309
2310         write_unlock(&kvm->arch.sca_lock);
2311         kvm_s390_vcpu_unblock_all(kvm);
2312
2313         free_page((unsigned long)old_sca);
2314
2315         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2316                  old_sca, kvm->arch.sca);
2317         return 0;
2318 }
2319
2320 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2321 {
2322         int rc;
2323
2324         if (!kvm_s390_use_sca_entries()) {
2325                 if (id < KVM_MAX_VCPUS)
2326                         return true;
2327                 return false;
2328         }
2329         if (id < KVM_S390_BSCA_CPU_SLOTS)
2330                 return true;
2331         if (!sclp.has_esca || !sclp.has_64bscao)
2332                 return false;
2333
2334         mutex_lock(&kvm->lock);
2335         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2336         mutex_unlock(&kvm->lock);
2337
2338         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2339 }
2340
2341 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2342 {
2343         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2344         kvm_clear_async_pf_completion_queue(vcpu);
2345         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2346                                     KVM_SYNC_GPRS |
2347                                     KVM_SYNC_ACRS |
2348                                     KVM_SYNC_CRS |
2349                                     KVM_SYNC_ARCH0 |
2350                                     KVM_SYNC_PFAULT;
2351         kvm_s390_set_prefix(vcpu, 0);
2352         if (test_kvm_facility(vcpu->kvm, 64))
2353                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2354         if (test_kvm_facility(vcpu->kvm, 82))
2355                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2356         if (test_kvm_facility(vcpu->kvm, 133))
2357                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2358         if (test_kvm_facility(vcpu->kvm, 156))
2359                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2360         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2361          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2362          */
2363         if (MACHINE_HAS_VX)
2364                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2365         else
2366                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2367
2368         if (kvm_is_ucontrol(vcpu->kvm))
2369                 return __kvm_ucontrol_vcpu_init(vcpu);
2370
2371         return 0;
2372 }
2373
2374 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2375 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2376 {
2377         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2378         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2379         vcpu->arch.cputm_start = get_tod_clock_fast();
2380         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2381 }
2382
2383 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2384 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2385 {
2386         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2387         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2388         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2389         vcpu->arch.cputm_start = 0;
2390         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2391 }
2392
2393 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2394 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2395 {
2396         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2397         vcpu->arch.cputm_enabled = true;
2398         __start_cpu_timer_accounting(vcpu);
2399 }
2400
2401 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2402 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2403 {
2404         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2405         __stop_cpu_timer_accounting(vcpu);
2406         vcpu->arch.cputm_enabled = false;
2407 }
2408
2409 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2410 {
2411         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2412         __enable_cpu_timer_accounting(vcpu);
2413         preempt_enable();
2414 }
2415
2416 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2417 {
2418         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2419         __disable_cpu_timer_accounting(vcpu);
2420         preempt_enable();
2421 }
2422
2423 /* set the cpu timer - may only be called from the VCPU thread itself */
2424 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2425 {
2426         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2427         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2428         if (vcpu->arch.cputm_enabled)
2429                 vcpu->arch.cputm_start = get_tod_clock_fast();
2430         vcpu->arch.sie_block->cputm = cputm;
2431         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2432         preempt_enable();
2433 }
2434
2435 /* update and get the cpu timer - can also be called from other VCPU threads */
2436 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2437 {
2438         unsigned int seq;
2439         __u64 value;
2440
2441         if (unlikely(!vcpu->arch.cputm_enabled))
2442                 return vcpu->arch.sie_block->cputm;
2443
2444         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2445         do {
2446                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2447                 /*
2448                  * If the writer would ever execute a read in the critical
2449                  * section, e.g. in irq context, we have a deadlock.
2450                  */
2451                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2452                 value = vcpu->arch.sie_block->cputm;
2453                 /* if cputm_start is 0, accounting is being started/stopped */
2454                 if (likely(vcpu->arch.cputm_start))
2455                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2456         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2457         preempt_enable();
2458         return value;
2459 }
2460
2461 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2462 {
2463
2464         gmap_enable(vcpu->arch.enabled_gmap);
2465         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2466         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2467                 __start_cpu_timer_accounting(vcpu);
2468         vcpu->cpu = cpu;
2469 }
2470
2471 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2472 {
2473         vcpu->cpu = -1;
2474         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2475                 __stop_cpu_timer_accounting(vcpu);
2476         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2477         vcpu->arch.enabled_gmap = gmap_get_enabled();
2478         gmap_disable(vcpu->arch.enabled_gmap);
2479
2480 }
2481
2482 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2483 {
2484         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2485         vcpu->arch.sie_block->gpsw.mask = 0UL;
2486         vcpu->arch.sie_block->gpsw.addr = 0UL;
2487         kvm_s390_set_prefix(vcpu, 0);
2488         kvm_s390_set_cpu_timer(vcpu, 0);
2489         vcpu->arch.sie_block->ckc       = 0UL;
2490         vcpu->arch.sie_block->todpr     = 0;
2491         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2492         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2493                                         CR0_INTERRUPT_KEY_SUBMASK |
2494                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2495         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2496                                         CR14_UNUSED_33 |
2497                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2498         /* make sure the new fpc will be lazily loaded */
2499         save_fpu_regs();
2500         current->thread.fpu.fpc = 0;
2501         vcpu->arch.sie_block->gbea = 1;
2502         vcpu->arch.sie_block->pp = 0;
2503         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2504         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2505         kvm_clear_async_pf_completion_queue(vcpu);
2506         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2507                 kvm_s390_vcpu_stop(vcpu);
2508         kvm_s390_clear_local_irqs(vcpu);
2509 }
2510
2511 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2512 {
2513         mutex_lock(&vcpu->kvm->lock);
2514         preempt_disable();
2515         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2516         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2517         preempt_enable();
2518         mutex_unlock(&vcpu->kvm->lock);
2519         if (!kvm_is_ucontrol(vcpu->kvm)) {
2520                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2521                 sca_add_vcpu(vcpu);
2522         }
2523         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2524                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2525         /* make vcpu_load load the right gmap on the first trigger */
2526         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2527 }
2528
2529 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2530 {
2531         if (!test_kvm_facility(vcpu->kvm, 76))
2532                 return;
2533
2534         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2535
2536         if (vcpu->kvm->arch.crypto.aes_kw)
2537                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2538         if (vcpu->kvm->arch.crypto.dea_kw)
2539                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2540
2541         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2542 }
2543
2544 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2545 {
2546         free_page(vcpu->arch.sie_block->cbrlo);
2547         vcpu->arch.sie_block->cbrlo = 0;
2548 }
2549
2550 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2551 {
2552         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2553         if (!vcpu->arch.sie_block->cbrlo)
2554                 return -ENOMEM;
2555         return 0;
2556 }
2557
2558 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2559 {
2560         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2561
2562         vcpu->arch.sie_block->ibc = model->ibc;
2563         if (test_kvm_facility(vcpu->kvm, 7))
2564                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2565 }
2566
2567 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2568 {
2569         int rc = 0;
2570
2571         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2572                                                     CPUSTAT_SM |
2573                                                     CPUSTAT_STOPPED);
2574
2575         if (test_kvm_facility(vcpu->kvm, 78))
2576                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2577         else if (test_kvm_facility(vcpu->kvm, 8))
2578                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2579
2580         kvm_s390_vcpu_setup_model(vcpu);
2581
2582         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2583         if (MACHINE_HAS_ESOP)
2584                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2585         if (test_kvm_facility(vcpu->kvm, 9))
2586                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2587         if (test_kvm_facility(vcpu->kvm, 73))
2588                 vcpu->arch.sie_block->ecb |= ECB_TE;
2589
2590         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2591                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2592         if (test_kvm_facility(vcpu->kvm, 130))
2593                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2594         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2595         if (sclp.has_cei)
2596                 vcpu->arch.sie_block->eca |= ECA_CEI;
2597         if (sclp.has_ib)
2598                 vcpu->arch.sie_block->eca |= ECA_IB;
2599         if (sclp.has_siif)
2600                 vcpu->arch.sie_block->eca |= ECA_SII;
2601         if (sclp.has_sigpif)
2602                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2603         if (test_kvm_facility(vcpu->kvm, 129)) {
2604                 vcpu->arch.sie_block->eca |= ECA_VX;
2605                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2606         }
2607         if (test_kvm_facility(vcpu->kvm, 139))
2608                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2609         if (test_kvm_facility(vcpu->kvm, 156))
2610                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2611         if (vcpu->arch.sie_block->gd) {
2612                 vcpu->arch.sie_block->eca |= ECA_AIV;
2613                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2614                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2615         }
2616         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2617                                         | SDNXC;
2618         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2619
2620         if (sclp.has_kss)
2621                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2622         else
2623                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2624
2625         if (vcpu->kvm->arch.use_cmma) {
2626                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2627                 if (rc)
2628                         return rc;
2629         }
2630         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2631         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2632
2633         kvm_s390_vcpu_crypto_setup(vcpu);
2634
2635         return rc;
2636 }
2637
2638 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2639                                       unsigned int id)
2640 {
2641         struct kvm_vcpu *vcpu;
2642         struct sie_page *sie_page;
2643         int rc = -EINVAL;
2644
2645         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2646                 goto out;
2647
2648         rc = -ENOMEM;
2649
2650         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2651         if (!vcpu)
2652                 goto out;
2653
2654         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2655         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2656         if (!sie_page)
2657                 goto out_free_cpu;
2658
2659         vcpu->arch.sie_block = &sie_page->sie_block;
2660         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2661
2662         /* the real guest size will always be smaller than msl */
2663         vcpu->arch.sie_block->mso = 0;
2664         vcpu->arch.sie_block->msl = sclp.hamax;
2665
2666         vcpu->arch.sie_block->icpua = id;
2667         spin_lock_init(&vcpu->arch.local_int.lock);
2668         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2669         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2670                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2671         seqcount_init(&vcpu->arch.cputm_seqcount);
2672
2673         rc = kvm_vcpu_init(vcpu, kvm, id);
2674         if (rc)
2675                 goto out_free_sie_block;
2676         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2677                  vcpu->arch.sie_block);
2678         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2679
2680         return vcpu;
2681 out_free_sie_block:
2682         free_page((unsigned long)(vcpu->arch.sie_block));
2683 out_free_cpu:
2684         kmem_cache_free(kvm_vcpu_cache, vcpu);
2685 out:
2686         return ERR_PTR(rc);
2687 }
2688
2689 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2690 {
2691         return kvm_s390_vcpu_has_irq(vcpu, 0);
2692 }
2693
2694 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2695 {
2696         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2697 }
2698
2699 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2700 {
2701         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2702         exit_sie(vcpu);
2703 }
2704
2705 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2706 {
2707         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2708 }
2709
2710 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2711 {
2712         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2713         exit_sie(vcpu);
2714 }
2715
2716 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2717 {
2718         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2719 }
2720
2721 /*
2722  * Kick a guest cpu out of SIE and wait until SIE is not running.
2723  * If the CPU is not running (e.g. waiting as idle) the function will
2724  * return immediately. */
2725 void exit_sie(struct kvm_vcpu *vcpu)
2726 {
2727         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2728         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2729                 cpu_relax();
2730 }
2731
2732 /* Kick a guest cpu out of SIE to process a request synchronously */
2733 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2734 {
2735         kvm_make_request(req, vcpu);
2736         kvm_s390_vcpu_request(vcpu);
2737 }
2738
2739 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2740                               unsigned long end)
2741 {
2742         struct kvm *kvm = gmap->private;
2743         struct kvm_vcpu *vcpu;
2744         unsigned long prefix;
2745         int i;
2746
2747         if (gmap_is_shadow(gmap))
2748                 return;
2749         if (start >= 1UL << 31)
2750                 /* We are only interested in prefix pages */
2751                 return;
2752         kvm_for_each_vcpu(i, vcpu, kvm) {
2753                 /* match against both prefix pages */
2754                 prefix = kvm_s390_get_prefix(vcpu);
2755                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2756                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2757                                    start, end);
2758                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2759                 }
2760         }
2761 }
2762
2763 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2764 {
2765         /* kvm common code refers to this, but never calls it */
2766         BUG();
2767         return 0;
2768 }
2769
2770 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2771                                            struct kvm_one_reg *reg)
2772 {
2773         int r = -EINVAL;
2774
2775         switch (reg->id) {
2776         case KVM_REG_S390_TODPR:
2777                 r = put_user(vcpu->arch.sie_block->todpr,
2778                              (u32 __user *)reg->addr);
2779                 break;
2780         case KVM_REG_S390_EPOCHDIFF:
2781                 r = put_user(vcpu->arch.sie_block->epoch,
2782                              (u64 __user *)reg->addr);
2783                 break;
2784         case KVM_REG_S390_CPU_TIMER:
2785                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2786                              (u64 __user *)reg->addr);
2787                 break;
2788         case KVM_REG_S390_CLOCK_COMP:
2789                 r = put_user(vcpu->arch.sie_block->ckc,
2790                              (u64 __user *)reg->addr);
2791                 break;
2792         case KVM_REG_S390_PFTOKEN:
2793                 r = put_user(vcpu->arch.pfault_token,
2794                              (u64 __user *)reg->addr);
2795                 break;
2796         case KVM_REG_S390_PFCOMPARE:
2797                 r = put_user(vcpu->arch.pfault_compare,
2798                              (u64 __user *)reg->addr);
2799                 break;
2800         case KVM_REG_S390_PFSELECT:
2801                 r = put_user(vcpu->arch.pfault_select,
2802                              (u64 __user *)reg->addr);
2803                 break;
2804         case KVM_REG_S390_PP:
2805                 r = put_user(vcpu->arch.sie_block->pp,
2806                              (u64 __user *)reg->addr);
2807                 break;
2808         case KVM_REG_S390_GBEA:
2809                 r = put_user(vcpu->arch.sie_block->gbea,
2810                              (u64 __user *)reg->addr);
2811                 break;
2812         default:
2813                 break;
2814         }
2815
2816         return r;
2817 }
2818
2819 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2820                                            struct kvm_one_reg *reg)
2821 {
2822         int r = -EINVAL;
2823         __u64 val;
2824
2825         switch (reg->id) {
2826         case KVM_REG_S390_TODPR:
2827                 r = get_user(vcpu->arch.sie_block->todpr,
2828                              (u32 __user *)reg->addr);
2829                 break;
2830         case KVM_REG_S390_EPOCHDIFF:
2831                 r = get_user(vcpu->arch.sie_block->epoch,
2832                              (u64 __user *)reg->addr);
2833                 break;
2834         case KVM_REG_S390_CPU_TIMER:
2835                 r = get_user(val, (u64 __user *)reg->addr);
2836                 if (!r)
2837                         kvm_s390_set_cpu_timer(vcpu, val);
2838                 break;
2839         case KVM_REG_S390_CLOCK_COMP:
2840                 r = get_user(vcpu->arch.sie_block->ckc,
2841                              (u64 __user *)reg->addr);
2842                 break;
2843         case KVM_REG_S390_PFTOKEN:
2844                 r = get_user(vcpu->arch.pfault_token,
2845                              (u64 __user *)reg->addr);
2846                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2847                         kvm_clear_async_pf_completion_queue(vcpu);
2848                 break;
2849         case KVM_REG_S390_PFCOMPARE:
2850                 r = get_user(vcpu->arch.pfault_compare,
2851                              (u64 __user *)reg->addr);
2852                 break;
2853         case KVM_REG_S390_PFSELECT:
2854                 r = get_user(vcpu->arch.pfault_select,
2855                              (u64 __user *)reg->addr);
2856                 break;
2857         case KVM_REG_S390_PP:
2858                 r = get_user(vcpu->arch.sie_block->pp,
2859                              (u64 __user *)reg->addr);
2860                 break;
2861         case KVM_REG_S390_GBEA:
2862                 r = get_user(vcpu->arch.sie_block->gbea,
2863                              (u64 __user *)reg->addr);
2864                 break;
2865         default:
2866                 break;
2867         }
2868
2869         return r;
2870 }
2871
2872 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2873 {
2874         kvm_s390_vcpu_initial_reset(vcpu);
2875         return 0;
2876 }
2877
2878 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2879 {
2880         vcpu_load(vcpu);
2881         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2882         vcpu_put(vcpu);
2883         return 0;
2884 }
2885
2886 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2887 {
2888         vcpu_load(vcpu);
2889         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2890         vcpu_put(vcpu);
2891         return 0;
2892 }
2893
2894 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2895                                   struct kvm_sregs *sregs)
2896 {
2897         vcpu_load(vcpu);
2898
2899         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2900         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2901
2902         vcpu_put(vcpu);
2903         return 0;
2904 }
2905
2906 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2907                                   struct kvm_sregs *sregs)
2908 {
2909         vcpu_load(vcpu);
2910
2911         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2912         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2913
2914         vcpu_put(vcpu);
2915         return 0;
2916 }
2917
2918 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2919 {
2920         int ret = 0;
2921
2922         vcpu_load(vcpu);
2923
2924         if (test_fp_ctl(fpu->fpc)) {
2925                 ret = -EINVAL;
2926                 goto out;
2927         }
2928         vcpu->run->s.regs.fpc = fpu->fpc;
2929         if (MACHINE_HAS_VX)
2930                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2931                                  (freg_t *) fpu->fprs);
2932         else
2933                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2934
2935 out:
2936         vcpu_put(vcpu);
2937         return ret;
2938 }
2939
2940 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2941 {
2942         vcpu_load(vcpu);
2943
2944         /* make sure we have the latest values */
2945         save_fpu_regs();
2946         if (MACHINE_HAS_VX)
2947                 convert_vx_to_fp((freg_t *) fpu->fprs,
2948                                  (__vector128 *) vcpu->run->s.regs.vrs);
2949         else
2950                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2951         fpu->fpc = vcpu->run->s.regs.fpc;
2952
2953         vcpu_put(vcpu);
2954         return 0;
2955 }
2956
2957 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2958 {
2959         int rc = 0;
2960
2961         if (!is_vcpu_stopped(vcpu))
2962                 rc = -EBUSY;
2963         else {
2964                 vcpu->run->psw_mask = psw.mask;
2965                 vcpu->run->psw_addr = psw.addr;
2966         }
2967         return rc;
2968 }
2969
2970 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2971                                   struct kvm_translation *tr)
2972 {
2973         return -EINVAL; /* not implemented yet */
2974 }
2975
2976 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2977                               KVM_GUESTDBG_USE_HW_BP | \
2978                               KVM_GUESTDBG_ENABLE)
2979
2980 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2981                                         struct kvm_guest_debug *dbg)
2982 {
2983         int rc = 0;
2984
2985         vcpu_load(vcpu);
2986
2987         vcpu->guest_debug = 0;
2988         kvm_s390_clear_bp_data(vcpu);
2989
2990         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2991                 rc = -EINVAL;
2992                 goto out;
2993         }
2994         if (!sclp.has_gpere) {
2995                 rc = -EINVAL;
2996                 goto out;
2997         }
2998
2999         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3000                 vcpu->guest_debug = dbg->control;
3001                 /* enforce guest PER */
3002                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3003
3004                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3005                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3006         } else {
3007                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3008                 vcpu->arch.guestdbg.last_bp = 0;
3009         }
3010
3011         if (rc) {
3012                 vcpu->guest_debug = 0;
3013                 kvm_s390_clear_bp_data(vcpu);
3014                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3015         }
3016
3017 out:
3018         vcpu_put(vcpu);
3019         return rc;
3020 }
3021
3022 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3023                                     struct kvm_mp_state *mp_state)
3024 {
3025         int ret;
3026
3027         vcpu_load(vcpu);
3028
3029         /* CHECK_STOP and LOAD are not supported yet */
3030         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3031                                       KVM_MP_STATE_OPERATING;
3032
3033         vcpu_put(vcpu);
3034         return ret;
3035 }
3036
3037 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3038                                     struct kvm_mp_state *mp_state)
3039 {
3040         int rc = 0;
3041
3042         vcpu_load(vcpu);
3043
3044         /* user space knows about this interface - let it control the state */
3045         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3046
3047         switch (mp_state->mp_state) {
3048         case KVM_MP_STATE_STOPPED:
3049                 kvm_s390_vcpu_stop(vcpu);
3050                 break;
3051         case KVM_MP_STATE_OPERATING:
3052                 kvm_s390_vcpu_start(vcpu);
3053                 break;
3054         case KVM_MP_STATE_LOAD:
3055         case KVM_MP_STATE_CHECK_STOP:
3056                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3057         default:
3058                 rc = -ENXIO;
3059         }
3060
3061         vcpu_put(vcpu);
3062         return rc;
3063 }
3064
3065 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3066 {
3067         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3068 }
3069
3070 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3071 {
3072 retry:
3073         kvm_s390_vcpu_request_handled(vcpu);
3074         if (!kvm_request_pending(vcpu))
3075                 return 0;
3076         /*
3077          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3078          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3079          * This ensures that the ipte instruction for this request has
3080          * already finished. We might race against a second unmapper that
3081          * wants to set the blocking bit. Lets just retry the request loop.
3082          */
3083         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3084                 int rc;
3085                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3086                                           kvm_s390_get_prefix(vcpu),
3087                                           PAGE_SIZE * 2, PROT_WRITE);
3088                 if (rc) {
3089                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3090                         return rc;
3091                 }
3092                 goto retry;
3093         }
3094
3095         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3096                 vcpu->arch.sie_block->ihcpu = 0xffff;
3097                 goto retry;
3098         }
3099
3100         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3101                 if (!ibs_enabled(vcpu)) {
3102                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3103                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3104                 }
3105                 goto retry;
3106         }
3107
3108         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3109                 if (ibs_enabled(vcpu)) {
3110                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3111                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3112                 }
3113                 goto retry;
3114         }
3115
3116         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3117                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3118                 goto retry;
3119         }
3120
3121         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3122                 /*
3123                  * Disable CMM virtualization; we will emulate the ESSA
3124                  * instruction manually, in order to provide additional
3125                  * functionalities needed for live migration.
3126                  */
3127                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3128                 goto retry;
3129         }
3130
3131         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3132                 /*
3133                  * Re-enable CMM virtualization if CMMA is available and
3134                  * CMM has been used.
3135                  */
3136                 if ((vcpu->kvm->arch.use_cmma) &&
3137                     (vcpu->kvm->mm->context.uses_cmm))
3138                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3139                 goto retry;
3140         }
3141
3142         /* nothing to do, just clear the request */
3143         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3144
3145         return 0;
3146 }
3147
3148 void kvm_s390_set_tod_clock(struct kvm *kvm,
3149                             const struct kvm_s390_vm_tod_clock *gtod)
3150 {
3151         struct kvm_vcpu *vcpu;
3152         struct kvm_s390_tod_clock_ext htod;
3153         int i;
3154
3155         mutex_lock(&kvm->lock);
3156         preempt_disable();
3157
3158         get_tod_clock_ext((char *)&htod);
3159
3160         kvm->arch.epoch = gtod->tod - htod.tod;
3161         kvm->arch.epdx = 0;
3162         if (test_kvm_facility(kvm, 139)) {
3163                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3164                 if (kvm->arch.epoch > gtod->tod)
3165                         kvm->arch.epdx -= 1;
3166         }
3167
3168         kvm_s390_vcpu_block_all(kvm);
3169         kvm_for_each_vcpu(i, vcpu, kvm) {
3170                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3171                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3172         }
3173
3174         kvm_s390_vcpu_unblock_all(kvm);
3175         preempt_enable();
3176         mutex_unlock(&kvm->lock);
3177 }
3178
3179 /**
3180  * kvm_arch_fault_in_page - fault-in guest page if necessary
3181  * @vcpu: The corresponding virtual cpu
3182  * @gpa: Guest physical address
3183  * @writable: Whether the page should be writable or not
3184  *
3185  * Make sure that a guest page has been faulted-in on the host.
3186  *
3187  * Return: Zero on success, negative error code otherwise.
3188  */
3189 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3190 {
3191         return gmap_fault(vcpu->arch.gmap, gpa,
3192                           writable ? FAULT_FLAG_WRITE : 0);
3193 }
3194
3195 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3196                                       unsigned long token)
3197 {
3198         struct kvm_s390_interrupt inti;
3199         struct kvm_s390_irq irq;
3200
3201         if (start_token) {
3202                 irq.u.ext.ext_params2 = token;
3203                 irq.type = KVM_S390_INT_PFAULT_INIT;
3204                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3205         } else {
3206                 inti.type = KVM_S390_INT_PFAULT_DONE;
3207                 inti.parm64 = token;
3208                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3209         }
3210 }
3211
3212 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3213                                      struct kvm_async_pf *work)
3214 {
3215         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3216         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3217 }
3218
3219 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3220                                  struct kvm_async_pf *work)
3221 {
3222         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3223         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3224 }
3225
3226 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3227                                struct kvm_async_pf *work)
3228 {
3229         /* s390 will always inject the page directly */
3230 }
3231
3232 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3233 {
3234         /*
3235          * s390 will always inject the page directly,
3236          * but we still want check_async_completion to cleanup
3237          */
3238         return true;
3239 }
3240
3241 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3242 {
3243         hva_t hva;
3244         struct kvm_arch_async_pf arch;
3245         int rc;
3246
3247         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3248                 return 0;
3249         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3250             vcpu->arch.pfault_compare)
3251                 return 0;
3252         if (psw_extint_disabled(vcpu))
3253                 return 0;
3254         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3255                 return 0;
3256         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3257                 return 0;
3258         if (!vcpu->arch.gmap->pfault_enabled)
3259                 return 0;
3260
3261         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3262         hva += current->thread.gmap_addr & ~PAGE_MASK;
3263         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3264                 return 0;
3265
3266         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3267         return rc;
3268 }
3269
3270 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3271 {
3272         int rc, cpuflags;
3273
3274         /*
3275          * On s390 notifications for arriving pages will be delivered directly
3276          * to the guest but the house keeping for completed pfaults is
3277          * handled outside the worker.
3278          */
3279         kvm_check_async_pf_completion(vcpu);
3280
3281         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3282         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3283
3284         if (need_resched())
3285                 schedule();
3286
3287         if (test_cpu_flag(CIF_MCCK_PENDING))
3288                 s390_handle_mcck();
3289
3290         if (!kvm_is_ucontrol(vcpu->kvm)) {
3291                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3292                 if (rc)
3293                         return rc;
3294         }
3295
3296         rc = kvm_s390_handle_requests(vcpu);
3297         if (rc)
3298                 return rc;
3299
3300         if (guestdbg_enabled(vcpu)) {
3301                 kvm_s390_backup_guest_per_regs(vcpu);
3302                 kvm_s390_patch_guest_per_regs(vcpu);
3303         }
3304
3305         vcpu->arch.sie_block->icptcode = 0;
3306         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3307         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3308         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3309
3310         return 0;
3311 }
3312
3313 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3314 {
3315         struct kvm_s390_pgm_info pgm_info = {
3316                 .code = PGM_ADDRESSING,
3317         };
3318         u8 opcode, ilen;
3319         int rc;
3320
3321         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3322         trace_kvm_s390_sie_fault(vcpu);
3323
3324         /*
3325          * We want to inject an addressing exception, which is defined as a
3326          * suppressing or terminating exception. However, since we came here
3327          * by a DAT access exception, the PSW still points to the faulting
3328          * instruction since DAT exceptions are nullifying. So we've got
3329          * to look up the current opcode to get the length of the instruction
3330          * to be able to forward the PSW.
3331          */
3332         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3333         ilen = insn_length(opcode);
3334         if (rc < 0) {
3335                 return rc;
3336         } else if (rc) {
3337                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3338                  * Forward by arbitrary ilc, injection will take care of
3339                  * nullification if necessary.
3340                  */
3341                 pgm_info = vcpu->arch.pgm;
3342                 ilen = 4;
3343         }
3344         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3345         kvm_s390_forward_psw(vcpu, ilen);
3346         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3347 }
3348
3349 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3350 {
3351         struct mcck_volatile_info *mcck_info;
3352         struct sie_page *sie_page;
3353
3354         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3355                    vcpu->arch.sie_block->icptcode);
3356         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3357
3358         if (guestdbg_enabled(vcpu))
3359                 kvm_s390_restore_guest_per_regs(vcpu);
3360
3361         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3362         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3363
3364         if (exit_reason == -EINTR) {
3365                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3366                 sie_page = container_of(vcpu->arch.sie_block,
3367                                         struct sie_page, sie_block);
3368                 mcck_info = &sie_page->mcck_info;
3369                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3370                 return 0;
3371         }
3372
3373         if (vcpu->arch.sie_block->icptcode > 0) {
3374                 int rc = kvm_handle_sie_intercept(vcpu);
3375
3376                 if (rc != -EOPNOTSUPP)
3377                         return rc;
3378                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3379                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3380                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3381                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3382                 return -EREMOTE;
3383         } else if (exit_reason != -EFAULT) {
3384                 vcpu->stat.exit_null++;
3385                 return 0;
3386         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3387                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3388                 vcpu->run->s390_ucontrol.trans_exc_code =
3389                                                 current->thread.gmap_addr;
3390                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3391                 return -EREMOTE;
3392         } else if (current->thread.gmap_pfault) {
3393                 trace_kvm_s390_major_guest_pfault(vcpu);
3394                 current->thread.gmap_pfault = 0;
3395                 if (kvm_arch_setup_async_pf(vcpu))
3396                         return 0;
3397                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3398         }
3399         return vcpu_post_run_fault_in_sie(vcpu);
3400 }
3401
3402 static int __vcpu_run(struct kvm_vcpu *vcpu)
3403 {
3404         int rc, exit_reason;
3405
3406         /*
3407          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3408          * ning the guest), so that memslots (and other stuff) are protected
3409          */
3410         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3411
3412         do {
3413                 rc = vcpu_pre_run(vcpu);
3414                 if (rc)
3415                         break;
3416
3417                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3418                 /*
3419                  * As PF_VCPU will be used in fault handler, between
3420                  * guest_enter and guest_exit should be no uaccess.
3421                  */
3422                 local_irq_disable();
3423                 guest_enter_irqoff();
3424                 __disable_cpu_timer_accounting(vcpu);
3425                 local_irq_enable();
3426                 exit_reason = sie64a(vcpu->arch.sie_block,
3427                                      vcpu->run->s.regs.gprs);
3428                 local_irq_disable();
3429                 __enable_cpu_timer_accounting(vcpu);
3430                 guest_exit_irqoff();
3431                 local_irq_enable();
3432                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3433
3434                 rc = vcpu_post_run(vcpu, exit_reason);
3435         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3436
3437         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3438         return rc;
3439 }
3440
3441 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3442 {
3443         struct runtime_instr_cb *riccb;
3444         struct gs_cb *gscb;
3445
3446         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3447         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3448         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3449         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3450         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3451                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3452         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3453                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3454                 /* some control register changes require a tlb flush */
3455                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3456         }
3457         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3458                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3459                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3460                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3461                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3462                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3463         }
3464         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3465                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3466                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3467                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3468                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3469                         kvm_clear_async_pf_completion_queue(vcpu);
3470         }
3471         /*
3472          * If userspace sets the riccb (e.g. after migration) to a valid state,
3473          * we should enable RI here instead of doing the lazy enablement.
3474          */
3475         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3476             test_kvm_facility(vcpu->kvm, 64) &&
3477             riccb->v &&
3478             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3479                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3480                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3481         }
3482         /*
3483          * If userspace sets the gscb (e.g. after migration) to non-zero,
3484          * we should enable GS here instead of doing the lazy enablement.
3485          */
3486         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3487             test_kvm_facility(vcpu->kvm, 133) &&
3488             gscb->gssm &&
3489             !vcpu->arch.gs_enabled) {
3490                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3491                 vcpu->arch.sie_block->ecb |= ECB_GS;
3492                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3493                 vcpu->arch.gs_enabled = 1;
3494         }
3495         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3496             test_kvm_facility(vcpu->kvm, 82)) {
3497                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3498                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3499         }
3500         save_access_regs(vcpu->arch.host_acrs);
3501         restore_access_regs(vcpu->run->s.regs.acrs);
3502         /* save host (userspace) fprs/vrs */
3503         save_fpu_regs();
3504         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3505         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3506         if (MACHINE_HAS_VX)
3507                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3508         else
3509                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3510         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3511         if (test_fp_ctl(current->thread.fpu.fpc))
3512                 /* User space provided an invalid FPC, let's clear it */
3513                 current->thread.fpu.fpc = 0;
3514         if (MACHINE_HAS_GS) {
3515                 preempt_disable();
3516                 __ctl_set_bit(2, 4);
3517                 if (current->thread.gs_cb) {
3518                         vcpu->arch.host_gscb = current->thread.gs_cb;
3519                         save_gs_cb(vcpu->arch.host_gscb);
3520                 }
3521                 if (vcpu->arch.gs_enabled) {
3522                         current->thread.gs_cb = (struct gs_cb *)
3523                                                 &vcpu->run->s.regs.gscb;
3524                         restore_gs_cb(current->thread.gs_cb);
3525                 }
3526                 preempt_enable();
3527         }
3528         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3529
3530         kvm_run->kvm_dirty_regs = 0;
3531 }
3532
3533 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3534 {
3535         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3536         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3537         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3538         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3539         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3540         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3541         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3542         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3543         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3544         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3545         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3546         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3547         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3548         save_access_regs(vcpu->run->s.regs.acrs);
3549         restore_access_regs(vcpu->arch.host_acrs);
3550         /* Save guest register state */
3551         save_fpu_regs();
3552         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3553         /* Restore will be done lazily at return */
3554         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3555         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3556         if (MACHINE_HAS_GS) {
3557                 __ctl_set_bit(2, 4);
3558                 if (vcpu->arch.gs_enabled)
3559                         save_gs_cb(current->thread.gs_cb);
3560                 preempt_disable();
3561                 current->thread.gs_cb = vcpu->arch.host_gscb;
3562                 restore_gs_cb(vcpu->arch.host_gscb);
3563                 preempt_enable();
3564                 if (!vcpu->arch.host_gscb)
3565                         __ctl_clear_bit(2, 4);
3566                 vcpu->arch.host_gscb = NULL;
3567         }
3568         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3569 }
3570
3571 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3572 {
3573         int rc;
3574
3575         if (kvm_run->immediate_exit)
3576                 return -EINTR;
3577
3578         vcpu_load(vcpu);
3579
3580         if (guestdbg_exit_pending(vcpu)) {
3581                 kvm_s390_prepare_debug_exit(vcpu);
3582                 rc = 0;
3583                 goto out;
3584         }
3585
3586         kvm_sigset_activate(vcpu);
3587
3588         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3589                 kvm_s390_vcpu_start(vcpu);
3590         } else if (is_vcpu_stopped(vcpu)) {
3591                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3592                                    vcpu->vcpu_id);
3593                 rc = -EINVAL;
3594                 goto out;
3595         }
3596
3597         sync_regs(vcpu, kvm_run);
3598         enable_cpu_timer_accounting(vcpu);
3599
3600         might_fault();
3601         rc = __vcpu_run(vcpu);
3602
3603         if (signal_pending(current) && !rc) {
3604                 kvm_run->exit_reason = KVM_EXIT_INTR;
3605                 rc = -EINTR;
3606         }
3607
3608         if (guestdbg_exit_pending(vcpu) && !rc)  {
3609                 kvm_s390_prepare_debug_exit(vcpu);
3610                 rc = 0;
3611         }
3612
3613         if (rc == -EREMOTE) {
3614                 /* userspace support is needed, kvm_run has been prepared */
3615                 rc = 0;
3616         }
3617
3618         disable_cpu_timer_accounting(vcpu);
3619         store_regs(vcpu, kvm_run);
3620
3621         kvm_sigset_deactivate(vcpu);
3622
3623         vcpu->stat.exit_userspace++;
3624 out:
3625         vcpu_put(vcpu);
3626         return rc;
3627 }
3628
3629 /*
3630  * store status at address
3631  * we use have two special cases:
3632  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3633  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3634  */
3635 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3636 {
3637         unsigned char archmode = 1;
3638         freg_t fprs[NUM_FPRS];
3639         unsigned int px;
3640         u64 clkcomp, cputm;
3641         int rc;
3642
3643         px = kvm_s390_get_prefix(vcpu);
3644         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3645                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3646                         return -EFAULT;
3647                 gpa = 0;
3648         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3649                 if (write_guest_real(vcpu, 163, &archmode, 1))
3650                         return -EFAULT;
3651                 gpa = px;
3652         } else
3653                 gpa -= __LC_FPREGS_SAVE_AREA;
3654
3655         /* manually convert vector registers if necessary */
3656         if (MACHINE_HAS_VX) {
3657                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3658                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3659                                      fprs, 128);
3660         } else {
3661                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3662                                      vcpu->run->s.regs.fprs, 128);
3663         }
3664         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3665                               vcpu->run->s.regs.gprs, 128);
3666         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3667                               &vcpu->arch.sie_block->gpsw, 16);
3668         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3669                               &px, 4);
3670         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3671                               &vcpu->run->s.regs.fpc, 4);
3672         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3673                               &vcpu->arch.sie_block->todpr, 4);
3674         cputm = kvm_s390_get_cpu_timer(vcpu);
3675         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3676                               &cputm, 8);
3677         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3678         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3679                               &clkcomp, 8);
3680         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3681                               &vcpu->run->s.regs.acrs, 64);
3682         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3683                               &vcpu->arch.sie_block->gcr, 128);
3684         return rc ? -EFAULT : 0;
3685 }
3686
3687 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3688 {
3689         /*
3690          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3691          * switch in the run ioctl. Let's update our copies before we save
3692          * it into the save area
3693          */
3694         save_fpu_regs();
3695         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3696         save_access_regs(vcpu->run->s.regs.acrs);
3697
3698         return kvm_s390_store_status_unloaded(vcpu, addr);
3699 }
3700
3701 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3702 {
3703         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3704         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3705 }
3706
3707 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3708 {
3709         unsigned int i;
3710         struct kvm_vcpu *vcpu;
3711
3712         kvm_for_each_vcpu(i, vcpu, kvm) {
3713                 __disable_ibs_on_vcpu(vcpu);
3714         }
3715 }
3716
3717 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3718 {
3719         if (!sclp.has_ibs)
3720                 return;
3721         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3722         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3723 }
3724
3725 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3726 {
3727         int i, online_vcpus, started_vcpus = 0;
3728
3729         if (!is_vcpu_stopped(vcpu))
3730                 return;
3731
3732         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3733         /* Only one cpu at a time may enter/leave the STOPPED state. */
3734         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3735         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3736
3737         for (i = 0; i < online_vcpus; i++) {
3738                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3739                         started_vcpus++;
3740         }
3741
3742         if (started_vcpus == 0) {
3743                 /* we're the only active VCPU -> speed it up */
3744                 __enable_ibs_on_vcpu(vcpu);
3745         } else if (started_vcpus == 1) {
3746                 /*
3747                  * As we are starting a second VCPU, we have to disable
3748                  * the IBS facility on all VCPUs to remove potentially
3749                  * oustanding ENABLE requests.
3750                  */
3751                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3752         }
3753
3754         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3755         /*
3756          * Another VCPU might have used IBS while we were offline.
3757          * Let's play safe and flush the VCPU at startup.
3758          */
3759         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3760         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3761         return;
3762 }
3763
3764 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3765 {
3766         int i, online_vcpus, started_vcpus = 0;
3767         struct kvm_vcpu *started_vcpu = NULL;
3768
3769         if (is_vcpu_stopped(vcpu))
3770                 return;
3771
3772         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3773         /* Only one cpu at a time may enter/leave the STOPPED state. */
3774         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3775         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3776
3777         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3778         kvm_s390_clear_stop_irq(vcpu);
3779
3780         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3781         __disable_ibs_on_vcpu(vcpu);
3782
3783         for (i = 0; i < online_vcpus; i++) {
3784                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3785                         started_vcpus++;
3786                         started_vcpu = vcpu->kvm->vcpus[i];
3787                 }
3788         }
3789
3790         if (started_vcpus == 1) {
3791                 /*
3792                  * As we only have one VCPU left, we want to enable the
3793                  * IBS facility for that VCPU to speed it up.
3794                  */
3795                 __enable_ibs_on_vcpu(started_vcpu);
3796         }
3797
3798         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3799         return;
3800 }
3801
3802 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3803                                      struct kvm_enable_cap *cap)
3804 {
3805         int r;
3806
3807         if (cap->flags)
3808                 return -EINVAL;
3809
3810         switch (cap->cap) {
3811         case KVM_CAP_S390_CSS_SUPPORT:
3812                 if (!vcpu->kvm->arch.css_support) {
3813                         vcpu->kvm->arch.css_support = 1;
3814                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3815                         trace_kvm_s390_enable_css(vcpu->kvm);
3816                 }
3817                 r = 0;
3818                 break;
3819         default:
3820                 r = -EINVAL;
3821                 break;
3822         }
3823         return r;
3824 }
3825
3826 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3827                                   struct kvm_s390_mem_op *mop)
3828 {
3829         void __user *uaddr = (void __user *)mop->buf;
3830         void *tmpbuf = NULL;
3831         int r, srcu_idx;
3832         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3833                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3834
3835         if (mop->flags & ~supported_flags)
3836                 return -EINVAL;
3837
3838         if (mop->size > MEM_OP_MAX_SIZE)
3839                 return -E2BIG;
3840
3841         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3842                 tmpbuf = vmalloc(mop->size);
3843                 if (!tmpbuf)
3844                         return -ENOMEM;
3845         }
3846
3847         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3848
3849         switch (mop->op) {
3850         case KVM_S390_MEMOP_LOGICAL_READ:
3851                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3852                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3853                                             mop->size, GACC_FETCH);
3854                         break;
3855                 }
3856                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3857                 if (r == 0) {
3858                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3859                                 r = -EFAULT;
3860                 }
3861                 break;
3862         case KVM_S390_MEMOP_LOGICAL_WRITE:
3863                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3864                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3865                                             mop->size, GACC_STORE);
3866                         break;
3867                 }
3868                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3869                         r = -EFAULT;
3870                         break;
3871                 }
3872                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3873                 break;
3874         default:
3875                 r = -EINVAL;
3876         }
3877
3878         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3879
3880         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3881                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3882
3883         vfree(tmpbuf);
3884         return r;
3885 }
3886
3887 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3888                                unsigned int ioctl, unsigned long arg)
3889 {
3890         struct kvm_vcpu *vcpu = filp->private_data;
3891         void __user *argp = (void __user *)arg;
3892
3893         switch (ioctl) {
3894         case KVM_S390_IRQ: {
3895                 struct kvm_s390_irq s390irq;
3896
3897                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3898                         return -EFAULT;
3899                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3900         }
3901         case KVM_S390_INTERRUPT: {
3902                 struct kvm_s390_interrupt s390int;
3903                 struct kvm_s390_irq s390irq;
3904
3905                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3906                         return -EFAULT;
3907                 if (s390int_to_s390irq(&s390int, &s390irq))
3908                         return -EINVAL;
3909                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3910         }
3911         }
3912         return -ENOIOCTLCMD;
3913 }
3914
3915 long kvm_arch_vcpu_ioctl(struct file *filp,
3916                          unsigned int ioctl, unsigned long arg)
3917 {
3918         struct kvm_vcpu *vcpu = filp->private_data;
3919         void __user *argp = (void __user *)arg;
3920         int idx;
3921         long r;
3922
3923         vcpu_load(vcpu);
3924
3925         switch (ioctl) {
3926         case KVM_S390_STORE_STATUS:
3927                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3928                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3929                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3930                 break;
3931         case KVM_S390_SET_INITIAL_PSW: {
3932                 psw_t psw;
3933
3934                 r = -EFAULT;
3935                 if (copy_from_user(&psw, argp, sizeof(psw)))
3936                         break;
3937                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3938                 break;
3939         }
3940         case KVM_S390_INITIAL_RESET:
3941                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3942                 break;
3943         case KVM_SET_ONE_REG:
3944         case KVM_GET_ONE_REG: {
3945                 struct kvm_one_reg reg;
3946                 r = -EFAULT;
3947                 if (copy_from_user(&reg, argp, sizeof(reg)))
3948                         break;
3949                 if (ioctl == KVM_SET_ONE_REG)
3950                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3951                 else
3952                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3953                 break;
3954         }
3955 #ifdef CONFIG_KVM_S390_UCONTROL
3956         case KVM_S390_UCAS_MAP: {
3957                 struct kvm_s390_ucas_mapping ucasmap;
3958
3959                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3960                         r = -EFAULT;
3961                         break;
3962                 }
3963
3964                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3965                         r = -EINVAL;
3966                         break;
3967                 }
3968
3969                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3970                                      ucasmap.vcpu_addr, ucasmap.length);
3971                 break;
3972         }
3973         case KVM_S390_UCAS_UNMAP: {
3974                 struct kvm_s390_ucas_mapping ucasmap;
3975
3976                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3977                         r = -EFAULT;
3978                         break;
3979                 }
3980
3981                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3982                         r = -EINVAL;
3983                         break;
3984                 }
3985
3986                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3987                         ucasmap.length);
3988                 break;
3989         }
3990 #endif
3991         case KVM_S390_VCPU_FAULT: {
3992                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3993                 break;
3994         }
3995         case KVM_ENABLE_CAP:
3996         {
3997                 struct kvm_enable_cap cap;
3998                 r = -EFAULT;
3999                 if (copy_from_user(&cap, argp, sizeof(cap)))
4000                         break;
4001                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4002                 break;
4003         }
4004         case KVM_S390_MEM_OP: {
4005                 struct kvm_s390_mem_op mem_op;
4006
4007                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4008                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4009                 else
4010                         r = -EFAULT;
4011                 break;
4012         }
4013         case KVM_S390_SET_IRQ_STATE: {
4014                 struct kvm_s390_irq_state irq_state;
4015
4016                 r = -EFAULT;
4017                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4018                         break;
4019                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4020                     irq_state.len == 0 ||
4021                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4022                         r = -EINVAL;
4023                         break;
4024                 }
4025                 /* do not use irq_state.flags, it will break old QEMUs */
4026                 r = kvm_s390_set_irq_state(vcpu,
4027                                            (void __user *) irq_state.buf,
4028                                            irq_state.len);
4029                 break;
4030         }
4031         case KVM_S390_GET_IRQ_STATE: {
4032                 struct kvm_s390_irq_state irq_state;
4033
4034                 r = -EFAULT;
4035                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4036                         break;
4037                 if (irq_state.len == 0) {
4038                         r = -EINVAL;
4039                         break;
4040                 }
4041                 /* do not use irq_state.flags, it will break old QEMUs */
4042                 r = kvm_s390_get_irq_state(vcpu,
4043                                            (__u8 __user *)  irq_state.buf,
4044                                            irq_state.len);
4045                 break;
4046         }
4047         default:
4048                 r = -ENOTTY;
4049         }
4050
4051         vcpu_put(vcpu);
4052         return r;
4053 }
4054
4055 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4056 {
4057 #ifdef CONFIG_KVM_S390_UCONTROL
4058         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4059                  && (kvm_is_ucontrol(vcpu->kvm))) {
4060                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4061                 get_page(vmf->page);
4062                 return 0;
4063         }
4064 #endif
4065         return VM_FAULT_SIGBUS;
4066 }
4067
4068 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4069                             unsigned long npages)
4070 {
4071         return 0;
4072 }
4073
4074 /* Section: memory related */
4075 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4076                                    struct kvm_memory_slot *memslot,
4077                                    const struct kvm_userspace_memory_region *mem,
4078                                    enum kvm_mr_change change)
4079 {
4080         /* A few sanity checks. We can have memory slots which have to be
4081            located/ended at a segment boundary (1MB). The memory in userland is
4082            ok to be fragmented into various different vmas. It is okay to mmap()
4083            and munmap() stuff in this slot after doing this call at any time */
4084
4085         if (mem->userspace_addr & 0xffffful)
4086                 return -EINVAL;
4087
4088         if (mem->memory_size & 0xffffful)
4089                 return -EINVAL;
4090
4091         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4092                 return -EINVAL;
4093
4094         return 0;
4095 }
4096
4097 void kvm_arch_commit_memory_region(struct kvm *kvm,
4098                                 const struct kvm_userspace_memory_region *mem,
4099                                 const struct kvm_memory_slot *old,
4100                                 const struct kvm_memory_slot *new,
4101                                 enum kvm_mr_change change)
4102 {
4103         int rc;
4104
4105         /* If the basics of the memslot do not change, we do not want
4106          * to update the gmap. Every update causes several unnecessary
4107          * segment translation exceptions. This is usually handled just
4108          * fine by the normal fault handler + gmap, but it will also
4109          * cause faults on the prefix page of running guest CPUs.
4110          */
4111         if (old->userspace_addr == mem->userspace_addr &&
4112             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4113             old->npages * PAGE_SIZE == mem->memory_size)
4114                 return;
4115
4116         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4117                 mem->guest_phys_addr, mem->memory_size);
4118         if (rc)
4119                 pr_warn("failed to commit memory region\n");
4120         return;
4121 }
4122
4123 static inline unsigned long nonhyp_mask(int i)
4124 {
4125         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4126
4127         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4128 }
4129
4130 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4131 {
4132         vcpu->valid_wakeup = false;
4133 }
4134
4135 static int __init kvm_s390_init(void)
4136 {
4137         int i;
4138
4139         if (!sclp.has_sief2) {
4140                 pr_info("SIE not available\n");
4141                 return -ENODEV;
4142         }
4143
4144         for (i = 0; i < 16; i++)
4145                 kvm_s390_fac_base[i] |=
4146                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4147
4148         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4149 }
4150
4151 static void __exit kvm_s390_exit(void)
4152 {
4153         kvm_exit();
4154 }
4155
4156 module_init(kvm_s390_init);
4157 module_exit(kvm_s390_exit);
4158
4159 /*
4160  * Enable autoloading of the kvm module.
4161  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4162  * since x86 takes a different approach.
4163  */
4164 #include <linux/miscdevice.h>
4165 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4166 MODULE_ALIAS("devname:kvm");