]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: Fix storage attributes migration with memory slots
[linux.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63         { "userspace_handled", VCPU_STAT(exit_userspace) },
64         { "exit_null", VCPU_STAT(exit_null) },
65         { "exit_validity", VCPU_STAT(exit_validity) },
66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67         { "exit_external_request", VCPU_STAT(exit_external_request) },
68         { "exit_io_request", VCPU_STAT(exit_io_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92         { "deliver_program", VCPU_STAT(deliver_program) },
93         { "deliver_io", VCPU_STAT(deliver_io) },
94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96         { "inject_ckc", VCPU_STAT(inject_ckc) },
97         { "inject_cputm", VCPU_STAT(inject_cputm) },
98         { "inject_external_call", VCPU_STAT(inject_external_call) },
99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101         { "inject_io", VM_STAT(inject_io) },
102         { "inject_mchk", VCPU_STAT(inject_mchk) },
103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104         { "inject_program", VCPU_STAT(inject_program) },
105         { "inject_restart", VCPU_STAT(inject_restart) },
106         { "inject_service_signal", VM_STAT(inject_service_signal) },
107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110         { "inject_virtio", VM_STAT(inject_virtio) },
111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112         { "instruction_gs", VCPU_STAT(instruction_gs) },
113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119         { "instruction_sck", VCPU_STAT(instruction_sck) },
120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121         { "instruction_spx", VCPU_STAT(instruction_spx) },
122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123         { "instruction_stap", VCPU_STAT(instruction_stap) },
124         { "instruction_iske", VCPU_STAT(instruction_iske) },
125         { "instruction_ri", VCPU_STAT(instruction_ri) },
126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127         { "instruction_sske", VCPU_STAT(instruction_sske) },
128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129         { "instruction_essa", VCPU_STAT(instruction_essa) },
130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132         { "instruction_tb", VCPU_STAT(instruction_tb) },
133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137         { "instruction_sie", VCPU_STAT(instruction_sie) },
138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
161         { NULL }
162 };
163
164 struct kvm_s390_tod_clock_ext {
165         __u8 epoch_idx;
166         __u64 tod;
167         __u8 reserved[7];
168 } __packed;
169
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174
175
176 /*
177  * For now we handle at most 16 double words as this is what the s390 base
178  * kernel handles and stores in the prefix page. If we ever need to go beyond
179  * this, this requires changes to code, but the external uapi can stay.
180  */
181 #define SIZE_INTERNAL 16
182
183 /*
184  * Base feature mask that defines default mask for facilities. Consists of the
185  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
186  */
187 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
188 /*
189  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
190  * and defines the facilities that can be enabled via a cpu model.
191  */
192 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
193
194 static unsigned long kvm_s390_fac_size(void)
195 {
196         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
197         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
198         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
199                 sizeof(S390_lowcore.stfle_fac_list));
200
201         return SIZE_INTERNAL;
202 }
203
204 /* available cpu features supported by kvm */
205 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
206 /* available subfunctions indicated via query / "test bit" */
207 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
208
209 static struct gmap_notifier gmap_notifier;
210 static struct gmap_notifier vsie_gmap_notifier;
211 debug_info_t *kvm_s390_dbf;
212
213 /* Section: not file related */
214 int kvm_arch_hardware_enable(void)
215 {
216         /* every s390 is virtualization enabled ;-) */
217         return 0;
218 }
219
220 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
221                               unsigned long end);
222
223 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
224 {
225         u8 delta_idx = 0;
226
227         /*
228          * The TOD jumps by delta, we have to compensate this by adding
229          * -delta to the epoch.
230          */
231         delta = -delta;
232
233         /* sign-extension - we're adding to signed values below */
234         if ((s64)delta < 0)
235                 delta_idx = -1;
236
237         scb->epoch += delta;
238         if (scb->ecd & ECD_MEF) {
239                 scb->epdx += delta_idx;
240                 if (scb->epoch < delta)
241                         scb->epdx += 1;
242         }
243 }
244
245 /*
246  * This callback is executed during stop_machine(). All CPUs are therefore
247  * temporarily stopped. In order not to change guest behavior, we have to
248  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
249  * so a CPU won't be stopped while calculating with the epoch.
250  */
251 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
252                           void *v)
253 {
254         struct kvm *kvm;
255         struct kvm_vcpu *vcpu;
256         int i;
257         unsigned long long *delta = v;
258
259         list_for_each_entry(kvm, &vm_list, vm_list) {
260                 kvm_for_each_vcpu(i, vcpu, kvm) {
261                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
262                         if (i == 0) {
263                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
264                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
265                         }
266                         if (vcpu->arch.cputm_enabled)
267                                 vcpu->arch.cputm_start += *delta;
268                         if (vcpu->arch.vsie_block)
269                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
270                                                    *delta);
271                 }
272         }
273         return NOTIFY_OK;
274 }
275
276 static struct notifier_block kvm_clock_notifier = {
277         .notifier_call = kvm_clock_sync,
278 };
279
280 int kvm_arch_hardware_setup(void)
281 {
282         gmap_notifier.notifier_call = kvm_gmap_notifier;
283         gmap_register_pte_notifier(&gmap_notifier);
284         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
285         gmap_register_pte_notifier(&vsie_gmap_notifier);
286         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
287                                        &kvm_clock_notifier);
288         return 0;
289 }
290
291 void kvm_arch_hardware_unsetup(void)
292 {
293         gmap_unregister_pte_notifier(&gmap_notifier);
294         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
295         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
296                                          &kvm_clock_notifier);
297 }
298
299 static void allow_cpu_feat(unsigned long nr)
300 {
301         set_bit_inv(nr, kvm_s390_available_cpu_feat);
302 }
303
304 static inline int plo_test_bit(unsigned char nr)
305 {
306         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
307         int cc;
308
309         asm volatile(
310                 /* Parameter registers are ignored for "test bit" */
311                 "       plo     0,0,0,0(0)\n"
312                 "       ipm     %0\n"
313                 "       srl     %0,28\n"
314                 : "=d" (cc)
315                 : "d" (r0)
316                 : "cc");
317         return cc == 0;
318 }
319
320 static void kvm_s390_cpu_feat_init(void)
321 {
322         int i;
323
324         for (i = 0; i < 256; ++i) {
325                 if (plo_test_bit(i))
326                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
327         }
328
329         if (test_facility(28)) /* TOD-clock steering */
330                 ptff(kvm_s390_available_subfunc.ptff,
331                      sizeof(kvm_s390_available_subfunc.ptff),
332                      PTFF_QAF);
333
334         if (test_facility(17)) { /* MSA */
335                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
336                               kvm_s390_available_subfunc.kmac);
337                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
338                               kvm_s390_available_subfunc.kmc);
339                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
340                               kvm_s390_available_subfunc.km);
341                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
342                               kvm_s390_available_subfunc.kimd);
343                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
344                               kvm_s390_available_subfunc.klmd);
345         }
346         if (test_facility(76)) /* MSA3 */
347                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
348                               kvm_s390_available_subfunc.pckmo);
349         if (test_facility(77)) { /* MSA4 */
350                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
351                               kvm_s390_available_subfunc.kmctr);
352                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
353                               kvm_s390_available_subfunc.kmf);
354                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
355                               kvm_s390_available_subfunc.kmo);
356                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
357                               kvm_s390_available_subfunc.pcc);
358         }
359         if (test_facility(57)) /* MSA5 */
360                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
361                               kvm_s390_available_subfunc.ppno);
362
363         if (test_facility(146)) /* MSA8 */
364                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
365                               kvm_s390_available_subfunc.kma);
366
367         if (MACHINE_HAS_ESOP)
368                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
369         /*
370          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
371          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
372          */
373         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
374             !test_facility(3) || !nested)
375                 return;
376         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
377         if (sclp.has_64bscao)
378                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
379         if (sclp.has_siif)
380                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
381         if (sclp.has_gpere)
382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
383         if (sclp.has_gsls)
384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
385         if (sclp.has_ib)
386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
387         if (sclp.has_cei)
388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
389         if (sclp.has_ibs)
390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
391         if (sclp.has_kss)
392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
393         /*
394          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
395          * all skey handling functions read/set the skey from the PGSTE
396          * instead of the real storage key.
397          *
398          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
399          * pages being detected as preserved although they are resident.
400          *
401          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
402          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
403          *
404          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
405          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
406          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
407          *
408          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
409          * cannot easily shadow the SCA because of the ipte lock.
410          */
411 }
412
413 int kvm_arch_init(void *opaque)
414 {
415         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
416         if (!kvm_s390_dbf)
417                 return -ENOMEM;
418
419         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
420                 debug_unregister(kvm_s390_dbf);
421                 return -ENOMEM;
422         }
423
424         kvm_s390_cpu_feat_init();
425
426         /* Register floating interrupt controller interface. */
427         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
428 }
429
430 void kvm_arch_exit(void)
431 {
432         debug_unregister(kvm_s390_dbf);
433 }
434
435 /* Section: device related */
436 long kvm_arch_dev_ioctl(struct file *filp,
437                         unsigned int ioctl, unsigned long arg)
438 {
439         if (ioctl == KVM_S390_ENABLE_SIE)
440                 return s390_enable_sie();
441         return -EINVAL;
442 }
443
444 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
445 {
446         int r;
447
448         switch (ext) {
449         case KVM_CAP_S390_PSW:
450         case KVM_CAP_S390_GMAP:
451         case KVM_CAP_SYNC_MMU:
452 #ifdef CONFIG_KVM_S390_UCONTROL
453         case KVM_CAP_S390_UCONTROL:
454 #endif
455         case KVM_CAP_ASYNC_PF:
456         case KVM_CAP_SYNC_REGS:
457         case KVM_CAP_ONE_REG:
458         case KVM_CAP_ENABLE_CAP:
459         case KVM_CAP_S390_CSS_SUPPORT:
460         case KVM_CAP_IOEVENTFD:
461         case KVM_CAP_DEVICE_CTRL:
462         case KVM_CAP_ENABLE_CAP_VM:
463         case KVM_CAP_S390_IRQCHIP:
464         case KVM_CAP_VM_ATTRIBUTES:
465         case KVM_CAP_MP_STATE:
466         case KVM_CAP_IMMEDIATE_EXIT:
467         case KVM_CAP_S390_INJECT_IRQ:
468         case KVM_CAP_S390_USER_SIGP:
469         case KVM_CAP_S390_USER_STSI:
470         case KVM_CAP_S390_SKEYS:
471         case KVM_CAP_S390_IRQ_STATE:
472         case KVM_CAP_S390_USER_INSTR0:
473         case KVM_CAP_S390_CMMA_MIGRATION:
474         case KVM_CAP_S390_AIS:
475         case KVM_CAP_S390_AIS_MIGRATION:
476                 r = 1;
477                 break;
478         case KVM_CAP_S390_MEM_OP:
479                 r = MEM_OP_MAX_SIZE;
480                 break;
481         case KVM_CAP_NR_VCPUS:
482         case KVM_CAP_MAX_VCPUS:
483                 r = KVM_S390_BSCA_CPU_SLOTS;
484                 if (!kvm_s390_use_sca_entries())
485                         r = KVM_MAX_VCPUS;
486                 else if (sclp.has_esca && sclp.has_64bscao)
487                         r = KVM_S390_ESCA_CPU_SLOTS;
488                 break;
489         case KVM_CAP_NR_MEMSLOTS:
490                 r = KVM_USER_MEM_SLOTS;
491                 break;
492         case KVM_CAP_S390_COW:
493                 r = MACHINE_HAS_ESOP;
494                 break;
495         case KVM_CAP_S390_VECTOR_REGISTERS:
496                 r = MACHINE_HAS_VX;
497                 break;
498         case KVM_CAP_S390_RI:
499                 r = test_facility(64);
500                 break;
501         case KVM_CAP_S390_GS:
502                 r = test_facility(133);
503                 break;
504         case KVM_CAP_S390_BPB:
505                 r = test_facility(82);
506                 break;
507         default:
508                 r = 0;
509         }
510         return r;
511 }
512
513 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
514                                         struct kvm_memory_slot *memslot)
515 {
516         gfn_t cur_gfn, last_gfn;
517         unsigned long address;
518         struct gmap *gmap = kvm->arch.gmap;
519
520         /* Loop over all guest pages */
521         last_gfn = memslot->base_gfn + memslot->npages;
522         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
523                 address = gfn_to_hva_memslot(memslot, cur_gfn);
524
525                 if (test_and_clear_guest_dirty(gmap->mm, address))
526                         mark_page_dirty(kvm, cur_gfn);
527                 if (fatal_signal_pending(current))
528                         return;
529                 cond_resched();
530         }
531 }
532
533 /* Section: vm related */
534 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
535
536 /*
537  * Get (and clear) the dirty memory log for a memory slot.
538  */
539 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
540                                struct kvm_dirty_log *log)
541 {
542         int r;
543         unsigned long n;
544         struct kvm_memslots *slots;
545         struct kvm_memory_slot *memslot;
546         int is_dirty = 0;
547
548         if (kvm_is_ucontrol(kvm))
549                 return -EINVAL;
550
551         mutex_lock(&kvm->slots_lock);
552
553         r = -EINVAL;
554         if (log->slot >= KVM_USER_MEM_SLOTS)
555                 goto out;
556
557         slots = kvm_memslots(kvm);
558         memslot = id_to_memslot(slots, log->slot);
559         r = -ENOENT;
560         if (!memslot->dirty_bitmap)
561                 goto out;
562
563         kvm_s390_sync_dirty_log(kvm, memslot);
564         r = kvm_get_dirty_log(kvm, log, &is_dirty);
565         if (r)
566                 goto out;
567
568         /* Clear the dirty log */
569         if (is_dirty) {
570                 n = kvm_dirty_bitmap_bytes(memslot);
571                 memset(memslot->dirty_bitmap, 0, n);
572         }
573         r = 0;
574 out:
575         mutex_unlock(&kvm->slots_lock);
576         return r;
577 }
578
579 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
580 {
581         unsigned int i;
582         struct kvm_vcpu *vcpu;
583
584         kvm_for_each_vcpu(i, vcpu, kvm) {
585                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
586         }
587 }
588
589 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
590 {
591         int r;
592
593         if (cap->flags)
594                 return -EINVAL;
595
596         switch (cap->cap) {
597         case KVM_CAP_S390_IRQCHIP:
598                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
599                 kvm->arch.use_irqchip = 1;
600                 r = 0;
601                 break;
602         case KVM_CAP_S390_USER_SIGP:
603                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
604                 kvm->arch.user_sigp = 1;
605                 r = 0;
606                 break;
607         case KVM_CAP_S390_VECTOR_REGISTERS:
608                 mutex_lock(&kvm->lock);
609                 if (kvm->created_vcpus) {
610                         r = -EBUSY;
611                 } else if (MACHINE_HAS_VX) {
612                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
613                         set_kvm_facility(kvm->arch.model.fac_list, 129);
614                         if (test_facility(134)) {
615                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
616                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
617                         }
618                         if (test_facility(135)) {
619                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
620                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
621                         }
622                         r = 0;
623                 } else
624                         r = -EINVAL;
625                 mutex_unlock(&kvm->lock);
626                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
627                          r ? "(not available)" : "(success)");
628                 break;
629         case KVM_CAP_S390_RI:
630                 r = -EINVAL;
631                 mutex_lock(&kvm->lock);
632                 if (kvm->created_vcpus) {
633                         r = -EBUSY;
634                 } else if (test_facility(64)) {
635                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
636                         set_kvm_facility(kvm->arch.model.fac_list, 64);
637                         r = 0;
638                 }
639                 mutex_unlock(&kvm->lock);
640                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
641                          r ? "(not available)" : "(success)");
642                 break;
643         case KVM_CAP_S390_AIS:
644                 mutex_lock(&kvm->lock);
645                 if (kvm->created_vcpus) {
646                         r = -EBUSY;
647                 } else {
648                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
649                         set_kvm_facility(kvm->arch.model.fac_list, 72);
650                         r = 0;
651                 }
652                 mutex_unlock(&kvm->lock);
653                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
654                          r ? "(not available)" : "(success)");
655                 break;
656         case KVM_CAP_S390_GS:
657                 r = -EINVAL;
658                 mutex_lock(&kvm->lock);
659                 if (kvm->created_vcpus) {
660                         r = -EBUSY;
661                 } else if (test_facility(133)) {
662                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
663                         set_kvm_facility(kvm->arch.model.fac_list, 133);
664                         r = 0;
665                 }
666                 mutex_unlock(&kvm->lock);
667                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
668                          r ? "(not available)" : "(success)");
669                 break;
670         case KVM_CAP_S390_USER_STSI:
671                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
672                 kvm->arch.user_stsi = 1;
673                 r = 0;
674                 break;
675         case KVM_CAP_S390_USER_INSTR0:
676                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
677                 kvm->arch.user_instr0 = 1;
678                 icpt_operexc_on_all_vcpus(kvm);
679                 r = 0;
680                 break;
681         default:
682                 r = -EINVAL;
683                 break;
684         }
685         return r;
686 }
687
688 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
689 {
690         int ret;
691
692         switch (attr->attr) {
693         case KVM_S390_VM_MEM_LIMIT_SIZE:
694                 ret = 0;
695                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
696                          kvm->arch.mem_limit);
697                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
698                         ret = -EFAULT;
699                 break;
700         default:
701                 ret = -ENXIO;
702                 break;
703         }
704         return ret;
705 }
706
707 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
708 {
709         int ret;
710         unsigned int idx;
711         switch (attr->attr) {
712         case KVM_S390_VM_MEM_ENABLE_CMMA:
713                 ret = -ENXIO;
714                 if (!sclp.has_cmma)
715                         break;
716
717                 ret = -EBUSY;
718                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
719                 mutex_lock(&kvm->lock);
720                 if (!kvm->created_vcpus) {
721                         kvm->arch.use_cmma = 1;
722                         /* Not compatible with cmma. */
723                         kvm->arch.use_pfmfi = 0;
724                         ret = 0;
725                 }
726                 mutex_unlock(&kvm->lock);
727                 break;
728         case KVM_S390_VM_MEM_CLR_CMMA:
729                 ret = -ENXIO;
730                 if (!sclp.has_cmma)
731                         break;
732                 ret = -EINVAL;
733                 if (!kvm->arch.use_cmma)
734                         break;
735
736                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
737                 mutex_lock(&kvm->lock);
738                 idx = srcu_read_lock(&kvm->srcu);
739                 s390_reset_cmma(kvm->arch.gmap->mm);
740                 srcu_read_unlock(&kvm->srcu, idx);
741                 mutex_unlock(&kvm->lock);
742                 ret = 0;
743                 break;
744         case KVM_S390_VM_MEM_LIMIT_SIZE: {
745                 unsigned long new_limit;
746
747                 if (kvm_is_ucontrol(kvm))
748                         return -EINVAL;
749
750                 if (get_user(new_limit, (u64 __user *)attr->addr))
751                         return -EFAULT;
752
753                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
754                     new_limit > kvm->arch.mem_limit)
755                         return -E2BIG;
756
757                 if (!new_limit)
758                         return -EINVAL;
759
760                 /* gmap_create takes last usable address */
761                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
762                         new_limit -= 1;
763
764                 ret = -EBUSY;
765                 mutex_lock(&kvm->lock);
766                 if (!kvm->created_vcpus) {
767                         /* gmap_create will round the limit up */
768                         struct gmap *new = gmap_create(current->mm, new_limit);
769
770                         if (!new) {
771                                 ret = -ENOMEM;
772                         } else {
773                                 gmap_remove(kvm->arch.gmap);
774                                 new->private = kvm;
775                                 kvm->arch.gmap = new;
776                                 ret = 0;
777                         }
778                 }
779                 mutex_unlock(&kvm->lock);
780                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
781                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
782                          (void *) kvm->arch.gmap->asce);
783                 break;
784         }
785         default:
786                 ret = -ENXIO;
787                 break;
788         }
789         return ret;
790 }
791
792 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
793
794 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
795 {
796         struct kvm_vcpu *vcpu;
797         int i;
798
799         kvm_s390_vcpu_block_all(kvm);
800
801         kvm_for_each_vcpu(i, vcpu, kvm)
802                 kvm_s390_vcpu_crypto_setup(vcpu);
803
804         kvm_s390_vcpu_unblock_all(kvm);
805 }
806
807 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
808 {
809         if (!test_kvm_facility(kvm, 76))
810                 return -EINVAL;
811
812         mutex_lock(&kvm->lock);
813         switch (attr->attr) {
814         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
815                 get_random_bytes(
816                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
817                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
818                 kvm->arch.crypto.aes_kw = 1;
819                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
820                 break;
821         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
822                 get_random_bytes(
823                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
824                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
825                 kvm->arch.crypto.dea_kw = 1;
826                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
827                 break;
828         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
829                 kvm->arch.crypto.aes_kw = 0;
830                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
831                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
832                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
833                 break;
834         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
835                 kvm->arch.crypto.dea_kw = 0;
836                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
837                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
838                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
839                 break;
840         default:
841                 mutex_unlock(&kvm->lock);
842                 return -ENXIO;
843         }
844
845         kvm_s390_vcpu_crypto_reset_all(kvm);
846         mutex_unlock(&kvm->lock);
847         return 0;
848 }
849
850 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
851 {
852         int cx;
853         struct kvm_vcpu *vcpu;
854
855         kvm_for_each_vcpu(cx, vcpu, kvm)
856                 kvm_s390_sync_request(req, vcpu);
857 }
858
859 /*
860  * Must be called with kvm->srcu held to avoid races on memslots, and with
861  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
862  */
863 static int kvm_s390_vm_start_migration(struct kvm *kvm)
864 {
865         struct kvm_memory_slot *ms;
866         struct kvm_memslots *slots;
867         unsigned long ram_pages = 0;
868         int slotnr;
869
870         /* migration mode already enabled */
871         if (kvm->arch.migration_mode)
872                 return 0;
873         slots = kvm_memslots(kvm);
874         if (!slots || !slots->used_slots)
875                 return -EINVAL;
876
877         if (!kvm->arch.use_cmma) {
878                 kvm->arch.migration_mode = 1;
879                 return 0;
880         }
881         /* mark all the pages in active slots as dirty */
882         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
883                 ms = slots->memslots + slotnr;
884                 /*
885                  * The second half of the bitmap is only used on x86,
886                  * and would be wasted otherwise, so we put it to good
887                  * use here to keep track of the state of the storage
888                  * attributes.
889                  */
890                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
891                 ram_pages += ms->npages;
892         }
893         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
894         kvm->arch.migration_mode = 1;
895         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
896         return 0;
897 }
898
899 /*
900  * Must be called with kvm->slots_lock to avoid races with ourselves and
901  * kvm_s390_vm_start_migration.
902  */
903 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
904 {
905         /* migration mode already disabled */
906         if (!kvm->arch.migration_mode)
907                 return 0;
908         kvm->arch.migration_mode = 0;
909         if (kvm->arch.use_cmma)
910                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
911         return 0;
912 }
913
914 static int kvm_s390_vm_set_migration(struct kvm *kvm,
915                                      struct kvm_device_attr *attr)
916 {
917         int res = -ENXIO;
918
919         mutex_lock(&kvm->slots_lock);
920         switch (attr->attr) {
921         case KVM_S390_VM_MIGRATION_START:
922                 res = kvm_s390_vm_start_migration(kvm);
923                 break;
924         case KVM_S390_VM_MIGRATION_STOP:
925                 res = kvm_s390_vm_stop_migration(kvm);
926                 break;
927         default:
928                 break;
929         }
930         mutex_unlock(&kvm->slots_lock);
931
932         return res;
933 }
934
935 static int kvm_s390_vm_get_migration(struct kvm *kvm,
936                                      struct kvm_device_attr *attr)
937 {
938         u64 mig = kvm->arch.migration_mode;
939
940         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
941                 return -ENXIO;
942
943         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
944                 return -EFAULT;
945         return 0;
946 }
947
948 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
949 {
950         struct kvm_s390_vm_tod_clock gtod;
951
952         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
953                 return -EFAULT;
954
955         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
956                 return -EINVAL;
957         kvm_s390_set_tod_clock(kvm, &gtod);
958
959         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
960                 gtod.epoch_idx, gtod.tod);
961
962         return 0;
963 }
964
965 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
966 {
967         u8 gtod_high;
968
969         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
970                                            sizeof(gtod_high)))
971                 return -EFAULT;
972
973         if (gtod_high != 0)
974                 return -EINVAL;
975         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
976
977         return 0;
978 }
979
980 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
981 {
982         struct kvm_s390_vm_tod_clock gtod = { 0 };
983
984         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
985                            sizeof(gtod.tod)))
986                 return -EFAULT;
987
988         kvm_s390_set_tod_clock(kvm, &gtod);
989         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
990         return 0;
991 }
992
993 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995         int ret;
996
997         if (attr->flags)
998                 return -EINVAL;
999
1000         switch (attr->attr) {
1001         case KVM_S390_VM_TOD_EXT:
1002                 ret = kvm_s390_set_tod_ext(kvm, attr);
1003                 break;
1004         case KVM_S390_VM_TOD_HIGH:
1005                 ret = kvm_s390_set_tod_high(kvm, attr);
1006                 break;
1007         case KVM_S390_VM_TOD_LOW:
1008                 ret = kvm_s390_set_tod_low(kvm, attr);
1009                 break;
1010         default:
1011                 ret = -ENXIO;
1012                 break;
1013         }
1014         return ret;
1015 }
1016
1017 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1018                                    struct kvm_s390_vm_tod_clock *gtod)
1019 {
1020         struct kvm_s390_tod_clock_ext htod;
1021
1022         preempt_disable();
1023
1024         get_tod_clock_ext((char *)&htod);
1025
1026         gtod->tod = htod.tod + kvm->arch.epoch;
1027         gtod->epoch_idx = 0;
1028         if (test_kvm_facility(kvm, 139)) {
1029                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1030                 if (gtod->tod < htod.tod)
1031                         gtod->epoch_idx += 1;
1032         }
1033
1034         preempt_enable();
1035 }
1036
1037 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1038 {
1039         struct kvm_s390_vm_tod_clock gtod;
1040
1041         memset(&gtod, 0, sizeof(gtod));
1042         kvm_s390_get_tod_clock(kvm, &gtod);
1043         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1044                 return -EFAULT;
1045
1046         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1047                 gtod.epoch_idx, gtod.tod);
1048         return 0;
1049 }
1050
1051 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1052 {
1053         u8 gtod_high = 0;
1054
1055         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1056                                          sizeof(gtod_high)))
1057                 return -EFAULT;
1058         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1059
1060         return 0;
1061 }
1062
1063 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1064 {
1065         u64 gtod;
1066
1067         gtod = kvm_s390_get_tod_clock_fast(kvm);
1068         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1069                 return -EFAULT;
1070         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1071
1072         return 0;
1073 }
1074
1075 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1076 {
1077         int ret;
1078
1079         if (attr->flags)
1080                 return -EINVAL;
1081
1082         switch (attr->attr) {
1083         case KVM_S390_VM_TOD_EXT:
1084                 ret = kvm_s390_get_tod_ext(kvm, attr);
1085                 break;
1086         case KVM_S390_VM_TOD_HIGH:
1087                 ret = kvm_s390_get_tod_high(kvm, attr);
1088                 break;
1089         case KVM_S390_VM_TOD_LOW:
1090                 ret = kvm_s390_get_tod_low(kvm, attr);
1091                 break;
1092         default:
1093                 ret = -ENXIO;
1094                 break;
1095         }
1096         return ret;
1097 }
1098
1099 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1100 {
1101         struct kvm_s390_vm_cpu_processor *proc;
1102         u16 lowest_ibc, unblocked_ibc;
1103         int ret = 0;
1104
1105         mutex_lock(&kvm->lock);
1106         if (kvm->created_vcpus) {
1107                 ret = -EBUSY;
1108                 goto out;
1109         }
1110         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1111         if (!proc) {
1112                 ret = -ENOMEM;
1113                 goto out;
1114         }
1115         if (!copy_from_user(proc, (void __user *)attr->addr,
1116                             sizeof(*proc))) {
1117                 kvm->arch.model.cpuid = proc->cpuid;
1118                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1119                 unblocked_ibc = sclp.ibc & 0xfff;
1120                 if (lowest_ibc && proc->ibc) {
1121                         if (proc->ibc > unblocked_ibc)
1122                                 kvm->arch.model.ibc = unblocked_ibc;
1123                         else if (proc->ibc < lowest_ibc)
1124                                 kvm->arch.model.ibc = lowest_ibc;
1125                         else
1126                                 kvm->arch.model.ibc = proc->ibc;
1127                 }
1128                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1129                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1130                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1131                          kvm->arch.model.ibc,
1132                          kvm->arch.model.cpuid);
1133                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1134                          kvm->arch.model.fac_list[0],
1135                          kvm->arch.model.fac_list[1],
1136                          kvm->arch.model.fac_list[2]);
1137         } else
1138                 ret = -EFAULT;
1139         kfree(proc);
1140 out:
1141         mutex_unlock(&kvm->lock);
1142         return ret;
1143 }
1144
1145 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1146                                        struct kvm_device_attr *attr)
1147 {
1148         struct kvm_s390_vm_cpu_feat data;
1149
1150         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1151                 return -EFAULT;
1152         if (!bitmap_subset((unsigned long *) data.feat,
1153                            kvm_s390_available_cpu_feat,
1154                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1155                 return -EINVAL;
1156
1157         mutex_lock(&kvm->lock);
1158         if (kvm->created_vcpus) {
1159                 mutex_unlock(&kvm->lock);
1160                 return -EBUSY;
1161         }
1162         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1163                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1164         mutex_unlock(&kvm->lock);
1165         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1166                          data.feat[0],
1167                          data.feat[1],
1168                          data.feat[2]);
1169         return 0;
1170 }
1171
1172 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1173                                           struct kvm_device_attr *attr)
1174 {
1175         /*
1176          * Once supported by kernel + hw, we have to store the subfunctions
1177          * in kvm->arch and remember that user space configured them.
1178          */
1179         return -ENXIO;
1180 }
1181
1182 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1183 {
1184         int ret = -ENXIO;
1185
1186         switch (attr->attr) {
1187         case KVM_S390_VM_CPU_PROCESSOR:
1188                 ret = kvm_s390_set_processor(kvm, attr);
1189                 break;
1190         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1191                 ret = kvm_s390_set_processor_feat(kvm, attr);
1192                 break;
1193         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1194                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1195                 break;
1196         }
1197         return ret;
1198 }
1199
1200 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         struct kvm_s390_vm_cpu_processor *proc;
1203         int ret = 0;
1204
1205         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1206         if (!proc) {
1207                 ret = -ENOMEM;
1208                 goto out;
1209         }
1210         proc->cpuid = kvm->arch.model.cpuid;
1211         proc->ibc = kvm->arch.model.ibc;
1212         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1213                S390_ARCH_FAC_LIST_SIZE_BYTE);
1214         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1215                  kvm->arch.model.ibc,
1216                  kvm->arch.model.cpuid);
1217         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1218                  kvm->arch.model.fac_list[0],
1219                  kvm->arch.model.fac_list[1],
1220                  kvm->arch.model.fac_list[2]);
1221         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1222                 ret = -EFAULT;
1223         kfree(proc);
1224 out:
1225         return ret;
1226 }
1227
1228 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         struct kvm_s390_vm_cpu_machine *mach;
1231         int ret = 0;
1232
1233         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1234         if (!mach) {
1235                 ret = -ENOMEM;
1236                 goto out;
1237         }
1238         get_cpu_id((struct cpuid *) &mach->cpuid);
1239         mach->ibc = sclp.ibc;
1240         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1241                S390_ARCH_FAC_LIST_SIZE_BYTE);
1242         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1243                sizeof(S390_lowcore.stfle_fac_list));
1244         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1245                  kvm->arch.model.ibc,
1246                  kvm->arch.model.cpuid);
1247         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1248                  mach->fac_mask[0],
1249                  mach->fac_mask[1],
1250                  mach->fac_mask[2]);
1251         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1252                  mach->fac_list[0],
1253                  mach->fac_list[1],
1254                  mach->fac_list[2]);
1255         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1256                 ret = -EFAULT;
1257         kfree(mach);
1258 out:
1259         return ret;
1260 }
1261
1262 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1263                                        struct kvm_device_attr *attr)
1264 {
1265         struct kvm_s390_vm_cpu_feat data;
1266
1267         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1268                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1269         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1270                 return -EFAULT;
1271         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1272                          data.feat[0],
1273                          data.feat[1],
1274                          data.feat[2]);
1275         return 0;
1276 }
1277
1278 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1279                                      struct kvm_device_attr *attr)
1280 {
1281         struct kvm_s390_vm_cpu_feat data;
1282
1283         bitmap_copy((unsigned long *) data.feat,
1284                     kvm_s390_available_cpu_feat,
1285                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1286         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1287                 return -EFAULT;
1288         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1289                          data.feat[0],
1290                          data.feat[1],
1291                          data.feat[2]);
1292         return 0;
1293 }
1294
1295 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1296                                           struct kvm_device_attr *attr)
1297 {
1298         /*
1299          * Once we can actually configure subfunctions (kernel + hw support),
1300          * we have to check if they were already set by user space, if so copy
1301          * them from kvm->arch.
1302          */
1303         return -ENXIO;
1304 }
1305
1306 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1307                                         struct kvm_device_attr *attr)
1308 {
1309         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1310             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1311                 return -EFAULT;
1312         return 0;
1313 }
1314 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1315 {
1316         int ret = -ENXIO;
1317
1318         switch (attr->attr) {
1319         case KVM_S390_VM_CPU_PROCESSOR:
1320                 ret = kvm_s390_get_processor(kvm, attr);
1321                 break;
1322         case KVM_S390_VM_CPU_MACHINE:
1323                 ret = kvm_s390_get_machine(kvm, attr);
1324                 break;
1325         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1326                 ret = kvm_s390_get_processor_feat(kvm, attr);
1327                 break;
1328         case KVM_S390_VM_CPU_MACHINE_FEAT:
1329                 ret = kvm_s390_get_machine_feat(kvm, attr);
1330                 break;
1331         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1332                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1333                 break;
1334         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1335                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1336                 break;
1337         }
1338         return ret;
1339 }
1340
1341 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1342 {
1343         int ret;
1344
1345         switch (attr->group) {
1346         case KVM_S390_VM_MEM_CTRL:
1347                 ret = kvm_s390_set_mem_control(kvm, attr);
1348                 break;
1349         case KVM_S390_VM_TOD:
1350                 ret = kvm_s390_set_tod(kvm, attr);
1351                 break;
1352         case KVM_S390_VM_CPU_MODEL:
1353                 ret = kvm_s390_set_cpu_model(kvm, attr);
1354                 break;
1355         case KVM_S390_VM_CRYPTO:
1356                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1357                 break;
1358         case KVM_S390_VM_MIGRATION:
1359                 ret = kvm_s390_vm_set_migration(kvm, attr);
1360                 break;
1361         default:
1362                 ret = -ENXIO;
1363                 break;
1364         }
1365
1366         return ret;
1367 }
1368
1369 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1370 {
1371         int ret;
1372
1373         switch (attr->group) {
1374         case KVM_S390_VM_MEM_CTRL:
1375                 ret = kvm_s390_get_mem_control(kvm, attr);
1376                 break;
1377         case KVM_S390_VM_TOD:
1378                 ret = kvm_s390_get_tod(kvm, attr);
1379                 break;
1380         case KVM_S390_VM_CPU_MODEL:
1381                 ret = kvm_s390_get_cpu_model(kvm, attr);
1382                 break;
1383         case KVM_S390_VM_MIGRATION:
1384                 ret = kvm_s390_vm_get_migration(kvm, attr);
1385                 break;
1386         default:
1387                 ret = -ENXIO;
1388                 break;
1389         }
1390
1391         return ret;
1392 }
1393
1394 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1395 {
1396         int ret;
1397
1398         switch (attr->group) {
1399         case KVM_S390_VM_MEM_CTRL:
1400                 switch (attr->attr) {
1401                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1402                 case KVM_S390_VM_MEM_CLR_CMMA:
1403                         ret = sclp.has_cmma ? 0 : -ENXIO;
1404                         break;
1405                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1406                         ret = 0;
1407                         break;
1408                 default:
1409                         ret = -ENXIO;
1410                         break;
1411                 }
1412                 break;
1413         case KVM_S390_VM_TOD:
1414                 switch (attr->attr) {
1415                 case KVM_S390_VM_TOD_LOW:
1416                 case KVM_S390_VM_TOD_HIGH:
1417                         ret = 0;
1418                         break;
1419                 default:
1420                         ret = -ENXIO;
1421                         break;
1422                 }
1423                 break;
1424         case KVM_S390_VM_CPU_MODEL:
1425                 switch (attr->attr) {
1426                 case KVM_S390_VM_CPU_PROCESSOR:
1427                 case KVM_S390_VM_CPU_MACHINE:
1428                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1429                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1430                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1431                         ret = 0;
1432                         break;
1433                 /* configuring subfunctions is not supported yet */
1434                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1435                 default:
1436                         ret = -ENXIO;
1437                         break;
1438                 }
1439                 break;
1440         case KVM_S390_VM_CRYPTO:
1441                 switch (attr->attr) {
1442                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1443                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1444                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1445                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1446                         ret = 0;
1447                         break;
1448                 default:
1449                         ret = -ENXIO;
1450                         break;
1451                 }
1452                 break;
1453         case KVM_S390_VM_MIGRATION:
1454                 ret = 0;
1455                 break;
1456         default:
1457                 ret = -ENXIO;
1458                 break;
1459         }
1460
1461         return ret;
1462 }
1463
1464 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1465 {
1466         uint8_t *keys;
1467         uint64_t hva;
1468         int srcu_idx, i, r = 0;
1469
1470         if (args->flags != 0)
1471                 return -EINVAL;
1472
1473         /* Is this guest using storage keys? */
1474         if (!mm_uses_skeys(current->mm))
1475                 return KVM_S390_GET_SKEYS_NONE;
1476
1477         /* Enforce sane limit on memory allocation */
1478         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1479                 return -EINVAL;
1480
1481         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1482         if (!keys)
1483                 return -ENOMEM;
1484
1485         down_read(&current->mm->mmap_sem);
1486         srcu_idx = srcu_read_lock(&kvm->srcu);
1487         for (i = 0; i < args->count; i++) {
1488                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1489                 if (kvm_is_error_hva(hva)) {
1490                         r = -EFAULT;
1491                         break;
1492                 }
1493
1494                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1495                 if (r)
1496                         break;
1497         }
1498         srcu_read_unlock(&kvm->srcu, srcu_idx);
1499         up_read(&current->mm->mmap_sem);
1500
1501         if (!r) {
1502                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1503                                  sizeof(uint8_t) * args->count);
1504                 if (r)
1505                         r = -EFAULT;
1506         }
1507
1508         kvfree(keys);
1509         return r;
1510 }
1511
1512 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1513 {
1514         uint8_t *keys;
1515         uint64_t hva;
1516         int srcu_idx, i, r = 0;
1517
1518         if (args->flags != 0)
1519                 return -EINVAL;
1520
1521         /* Enforce sane limit on memory allocation */
1522         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1523                 return -EINVAL;
1524
1525         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1526         if (!keys)
1527                 return -ENOMEM;
1528
1529         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1530                            sizeof(uint8_t) * args->count);
1531         if (r) {
1532                 r = -EFAULT;
1533                 goto out;
1534         }
1535
1536         /* Enable storage key handling for the guest */
1537         r = s390_enable_skey();
1538         if (r)
1539                 goto out;
1540
1541         down_read(&current->mm->mmap_sem);
1542         srcu_idx = srcu_read_lock(&kvm->srcu);
1543         for (i = 0; i < args->count; i++) {
1544                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1545                 if (kvm_is_error_hva(hva)) {
1546                         r = -EFAULT;
1547                         break;
1548                 }
1549
1550                 /* Lowest order bit is reserved */
1551                 if (keys[i] & 0x01) {
1552                         r = -EINVAL;
1553                         break;
1554                 }
1555
1556                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1557                 if (r)
1558                         break;
1559         }
1560         srcu_read_unlock(&kvm->srcu, srcu_idx);
1561         up_read(&current->mm->mmap_sem);
1562 out:
1563         kvfree(keys);
1564         return r;
1565 }
1566
1567 /*
1568  * Base address and length must be sent at the start of each block, therefore
1569  * it's cheaper to send some clean data, as long as it's less than the size of
1570  * two longs.
1571  */
1572 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1573 /* for consistency */
1574 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1575
1576 /*
1577  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1578  * address falls in a hole. In that case the index of one of the memslots
1579  * bordering the hole is returned.
1580  */
1581 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1582 {
1583         int start = 0, end = slots->used_slots;
1584         int slot = atomic_read(&slots->lru_slot);
1585         struct kvm_memory_slot *memslots = slots->memslots;
1586
1587         if (gfn >= memslots[slot].base_gfn &&
1588             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1589                 return slot;
1590
1591         while (start < end) {
1592                 slot = start + (end - start) / 2;
1593
1594                 if (gfn >= memslots[slot].base_gfn)
1595                         end = slot;
1596                 else
1597                         start = slot + 1;
1598         }
1599
1600         if (gfn >= memslots[start].base_gfn &&
1601             gfn < memslots[start].base_gfn + memslots[start].npages) {
1602                 atomic_set(&slots->lru_slot, start);
1603         }
1604
1605         return start;
1606 }
1607
1608 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1609                               u8 *res, unsigned long bufsize)
1610 {
1611         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1612
1613         args->count = 0;
1614         while (args->count < bufsize) {
1615                 hva = gfn_to_hva(kvm, cur_gfn);
1616                 /*
1617                  * We return an error if the first value was invalid, but we
1618                  * return successfully if at least one value was copied.
1619                  */
1620                 if (kvm_is_error_hva(hva))
1621                         return args->count ? 0 : -EFAULT;
1622                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1623                         pgstev = 0;
1624                 res[args->count++] = (pgstev >> 24) & 0x43;
1625                 cur_gfn++;
1626         }
1627
1628         return 0;
1629 }
1630
1631 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1632                                               unsigned long cur_gfn)
1633 {
1634         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1635         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1636         unsigned long ofs = cur_gfn - ms->base_gfn;
1637
1638         if (ms->base_gfn + ms->npages <= cur_gfn) {
1639                 slotidx--;
1640                 /* If we are above the highest slot, wrap around */
1641                 if (slotidx < 0)
1642                         slotidx = slots->used_slots - 1;
1643
1644                 ms = slots->memslots + slotidx;
1645                 ofs = 0;
1646         }
1647         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1648         while ((slotidx > 0) && (ofs >= ms->npages)) {
1649                 slotidx--;
1650                 ms = slots->memslots + slotidx;
1651                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1652         }
1653         return ms->base_gfn + ofs;
1654 }
1655
1656 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1657                              u8 *res, unsigned long bufsize)
1658 {
1659         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1660         struct kvm_memslots *slots = kvm_memslots(kvm);
1661         struct kvm_memory_slot *ms;
1662
1663         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1664         ms = gfn_to_memslot(kvm, cur_gfn);
1665         args->count = 0;
1666         args->start_gfn = cur_gfn;
1667         if (!ms)
1668                 return 0;
1669         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1670         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1671
1672         while (args->count < bufsize) {
1673                 hva = gfn_to_hva(kvm, cur_gfn);
1674                 if (kvm_is_error_hva(hva))
1675                         return 0;
1676                 /* Decrement only if we actually flipped the bit to 0 */
1677                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1678                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1679                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1680                         pgstev = 0;
1681                 /* Save the value */
1682                 res[args->count++] = (pgstev >> 24) & 0x43;
1683                 /* If the next bit is too far away, stop. */
1684                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1685                         return 0;
1686                 /* If we reached the previous "next", find the next one */
1687                 if (cur_gfn == next_gfn)
1688                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1689                 /* Reached the end of memory or of the buffer, stop */
1690                 if ((next_gfn >= mem_end) ||
1691                     (next_gfn - args->start_gfn >= bufsize))
1692                         return 0;
1693                 cur_gfn++;
1694                 /* Reached the end of the current memslot, take the next one. */
1695                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1696                         ms = gfn_to_memslot(kvm, cur_gfn);
1697                         if (!ms)
1698                                 return 0;
1699                 }
1700         }
1701         return 0;
1702 }
1703
1704 /*
1705  * This function searches for the next page with dirty CMMA attributes, and
1706  * saves the attributes in the buffer up to either the end of the buffer or
1707  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1708  * no trailing clean bytes are saved.
1709  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1710  * output buffer will indicate 0 as length.
1711  */
1712 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1713                                   struct kvm_s390_cmma_log *args)
1714 {
1715         unsigned long bufsize;
1716         int srcu_idx, peek, ret;
1717         u8 *values;
1718
1719         if (!kvm->arch.use_cmma)
1720                 return -ENXIO;
1721         /* Invalid/unsupported flags were specified */
1722         if (args->flags & ~KVM_S390_CMMA_PEEK)
1723                 return -EINVAL;
1724         /* Migration mode query, and we are not doing a migration */
1725         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1726         if (!peek && !kvm->arch.migration_mode)
1727                 return -EINVAL;
1728         /* CMMA is disabled or was not used, or the buffer has length zero */
1729         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1730         if (!bufsize || !kvm->mm->context.uses_cmm) {
1731                 memset(args, 0, sizeof(*args));
1732                 return 0;
1733         }
1734         /* We are not peeking, and there are no dirty pages */
1735         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1736                 memset(args, 0, sizeof(*args));
1737                 return 0;
1738         }
1739
1740         values = vmalloc(bufsize);
1741         if (!values)
1742                 return -ENOMEM;
1743
1744         down_read(&kvm->mm->mmap_sem);
1745         srcu_idx = srcu_read_lock(&kvm->srcu);
1746         if (peek)
1747                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1748         else
1749                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1750         srcu_read_unlock(&kvm->srcu, srcu_idx);
1751         up_read(&kvm->mm->mmap_sem);
1752
1753         if (kvm->arch.migration_mode)
1754                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1755         else
1756                 args->remaining = 0;
1757
1758         if (copy_to_user((void __user *)args->values, values, args->count))
1759                 ret = -EFAULT;
1760
1761         vfree(values);
1762         return ret;
1763 }
1764
1765 /*
1766  * This function sets the CMMA attributes for the given pages. If the input
1767  * buffer has zero length, no action is taken, otherwise the attributes are
1768  * set and the mm->context.uses_cmm flag is set.
1769  */
1770 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1771                                   const struct kvm_s390_cmma_log *args)
1772 {
1773         unsigned long hva, mask, pgstev, i;
1774         uint8_t *bits;
1775         int srcu_idx, r = 0;
1776
1777         mask = args->mask;
1778
1779         if (!kvm->arch.use_cmma)
1780                 return -ENXIO;
1781         /* invalid/unsupported flags */
1782         if (args->flags != 0)
1783                 return -EINVAL;
1784         /* Enforce sane limit on memory allocation */
1785         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1786                 return -EINVAL;
1787         /* Nothing to do */
1788         if (args->count == 0)
1789                 return 0;
1790
1791         bits = vmalloc(array_size(sizeof(*bits), args->count));
1792         if (!bits)
1793                 return -ENOMEM;
1794
1795         r = copy_from_user(bits, (void __user *)args->values, args->count);
1796         if (r) {
1797                 r = -EFAULT;
1798                 goto out;
1799         }
1800
1801         down_read(&kvm->mm->mmap_sem);
1802         srcu_idx = srcu_read_lock(&kvm->srcu);
1803         for (i = 0; i < args->count; i++) {
1804                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1805                 if (kvm_is_error_hva(hva)) {
1806                         r = -EFAULT;
1807                         break;
1808                 }
1809
1810                 pgstev = bits[i];
1811                 pgstev = pgstev << 24;
1812                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1813                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1814         }
1815         srcu_read_unlock(&kvm->srcu, srcu_idx);
1816         up_read(&kvm->mm->mmap_sem);
1817
1818         if (!kvm->mm->context.uses_cmm) {
1819                 down_write(&kvm->mm->mmap_sem);
1820                 kvm->mm->context.uses_cmm = 1;
1821                 up_write(&kvm->mm->mmap_sem);
1822         }
1823 out:
1824         vfree(bits);
1825         return r;
1826 }
1827
1828 long kvm_arch_vm_ioctl(struct file *filp,
1829                        unsigned int ioctl, unsigned long arg)
1830 {
1831         struct kvm *kvm = filp->private_data;
1832         void __user *argp = (void __user *)arg;
1833         struct kvm_device_attr attr;
1834         int r;
1835
1836         switch (ioctl) {
1837         case KVM_S390_INTERRUPT: {
1838                 struct kvm_s390_interrupt s390int;
1839
1840                 r = -EFAULT;
1841                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1842                         break;
1843                 r = kvm_s390_inject_vm(kvm, &s390int);
1844                 break;
1845         }
1846         case KVM_ENABLE_CAP: {
1847                 struct kvm_enable_cap cap;
1848                 r = -EFAULT;
1849                 if (copy_from_user(&cap, argp, sizeof(cap)))
1850                         break;
1851                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1852                 break;
1853         }
1854         case KVM_CREATE_IRQCHIP: {
1855                 struct kvm_irq_routing_entry routing;
1856
1857                 r = -EINVAL;
1858                 if (kvm->arch.use_irqchip) {
1859                         /* Set up dummy routing. */
1860                         memset(&routing, 0, sizeof(routing));
1861                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1862                 }
1863                 break;
1864         }
1865         case KVM_SET_DEVICE_ATTR: {
1866                 r = -EFAULT;
1867                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1868                         break;
1869                 r = kvm_s390_vm_set_attr(kvm, &attr);
1870                 break;
1871         }
1872         case KVM_GET_DEVICE_ATTR: {
1873                 r = -EFAULT;
1874                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1875                         break;
1876                 r = kvm_s390_vm_get_attr(kvm, &attr);
1877                 break;
1878         }
1879         case KVM_HAS_DEVICE_ATTR: {
1880                 r = -EFAULT;
1881                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1882                         break;
1883                 r = kvm_s390_vm_has_attr(kvm, &attr);
1884                 break;
1885         }
1886         case KVM_S390_GET_SKEYS: {
1887                 struct kvm_s390_skeys args;
1888
1889                 r = -EFAULT;
1890                 if (copy_from_user(&args, argp,
1891                                    sizeof(struct kvm_s390_skeys)))
1892                         break;
1893                 r = kvm_s390_get_skeys(kvm, &args);
1894                 break;
1895         }
1896         case KVM_S390_SET_SKEYS: {
1897                 struct kvm_s390_skeys args;
1898
1899                 r = -EFAULT;
1900                 if (copy_from_user(&args, argp,
1901                                    sizeof(struct kvm_s390_skeys)))
1902                         break;
1903                 r = kvm_s390_set_skeys(kvm, &args);
1904                 break;
1905         }
1906         case KVM_S390_GET_CMMA_BITS: {
1907                 struct kvm_s390_cmma_log args;
1908
1909                 r = -EFAULT;
1910                 if (copy_from_user(&args, argp, sizeof(args)))
1911                         break;
1912                 mutex_lock(&kvm->slots_lock);
1913                 r = kvm_s390_get_cmma_bits(kvm, &args);
1914                 mutex_unlock(&kvm->slots_lock);
1915                 if (!r) {
1916                         r = copy_to_user(argp, &args, sizeof(args));
1917                         if (r)
1918                                 r = -EFAULT;
1919                 }
1920                 break;
1921         }
1922         case KVM_S390_SET_CMMA_BITS: {
1923                 struct kvm_s390_cmma_log args;
1924
1925                 r = -EFAULT;
1926                 if (copy_from_user(&args, argp, sizeof(args)))
1927                         break;
1928                 mutex_lock(&kvm->slots_lock);
1929                 r = kvm_s390_set_cmma_bits(kvm, &args);
1930                 mutex_unlock(&kvm->slots_lock);
1931                 break;
1932         }
1933         default:
1934                 r = -ENOTTY;
1935         }
1936
1937         return r;
1938 }
1939
1940 static int kvm_s390_query_ap_config(u8 *config)
1941 {
1942         u32 fcn_code = 0x04000000UL;
1943         u32 cc = 0;
1944
1945         memset(config, 0, 128);
1946         asm volatile(
1947                 "lgr 0,%1\n"
1948                 "lgr 2,%2\n"
1949                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1950                 "0: ipm %0\n"
1951                 "srl %0,28\n"
1952                 "1:\n"
1953                 EX_TABLE(0b, 1b)
1954                 : "+r" (cc)
1955                 : "r" (fcn_code), "r" (config)
1956                 : "cc", "0", "2", "memory"
1957         );
1958
1959         return cc;
1960 }
1961
1962 static int kvm_s390_apxa_installed(void)
1963 {
1964         u8 config[128];
1965         int cc;
1966
1967         if (test_facility(12)) {
1968                 cc = kvm_s390_query_ap_config(config);
1969
1970                 if (cc)
1971                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1972                 else
1973                         return config[0] & 0x40;
1974         }
1975
1976         return 0;
1977 }
1978
1979 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1980 {
1981         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1982
1983         if (kvm_s390_apxa_installed())
1984                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1985         else
1986                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1987 }
1988
1989 static u64 kvm_s390_get_initial_cpuid(void)
1990 {
1991         struct cpuid cpuid;
1992
1993         get_cpu_id(&cpuid);
1994         cpuid.version = 0xff;
1995         return *((u64 *) &cpuid);
1996 }
1997
1998 static void kvm_s390_crypto_init(struct kvm *kvm)
1999 {
2000         if (!test_kvm_facility(kvm, 76))
2001                 return;
2002
2003         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2004         kvm_s390_set_crycb_format(kvm);
2005
2006         /* Enable AES/DEA protected key functions by default */
2007         kvm->arch.crypto.aes_kw = 1;
2008         kvm->arch.crypto.dea_kw = 1;
2009         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2010                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2011         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2012                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2013 }
2014
2015 static void sca_dispose(struct kvm *kvm)
2016 {
2017         if (kvm->arch.use_esca)
2018                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2019         else
2020                 free_page((unsigned long)(kvm->arch.sca));
2021         kvm->arch.sca = NULL;
2022 }
2023
2024 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2025 {
2026         gfp_t alloc_flags = GFP_KERNEL;
2027         int i, rc;
2028         char debug_name[16];
2029         static unsigned long sca_offset;
2030
2031         rc = -EINVAL;
2032 #ifdef CONFIG_KVM_S390_UCONTROL
2033         if (type & ~KVM_VM_S390_UCONTROL)
2034                 goto out_err;
2035         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2036                 goto out_err;
2037 #else
2038         if (type)
2039                 goto out_err;
2040 #endif
2041
2042         rc = s390_enable_sie();
2043         if (rc)
2044                 goto out_err;
2045
2046         rc = -ENOMEM;
2047
2048         if (!sclp.has_64bscao)
2049                 alloc_flags |= GFP_DMA;
2050         rwlock_init(&kvm->arch.sca_lock);
2051         /* start with basic SCA */
2052         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2053         if (!kvm->arch.sca)
2054                 goto out_err;
2055         spin_lock(&kvm_lock);
2056         sca_offset += 16;
2057         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2058                 sca_offset = 0;
2059         kvm->arch.sca = (struct bsca_block *)
2060                         ((char *) kvm->arch.sca + sca_offset);
2061         spin_unlock(&kvm_lock);
2062
2063         sprintf(debug_name, "kvm-%u", current->pid);
2064
2065         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2066         if (!kvm->arch.dbf)
2067                 goto out_err;
2068
2069         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2070         kvm->arch.sie_page2 =
2071              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2072         if (!kvm->arch.sie_page2)
2073                 goto out_err;
2074
2075         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2076
2077         for (i = 0; i < kvm_s390_fac_size(); i++) {
2078                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2079                                               (kvm_s390_fac_base[i] |
2080                                                kvm_s390_fac_ext[i]);
2081                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2082                                               kvm_s390_fac_base[i];
2083         }
2084
2085         /* we are always in czam mode - even on pre z14 machines */
2086         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2087         set_kvm_facility(kvm->arch.model.fac_list, 138);
2088         /* we emulate STHYI in kvm */
2089         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2090         set_kvm_facility(kvm->arch.model.fac_list, 74);
2091         if (MACHINE_HAS_TLB_GUEST) {
2092                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2093                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2094         }
2095
2096         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2097         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2098
2099         kvm_s390_crypto_init(kvm);
2100
2101         mutex_init(&kvm->arch.float_int.ais_lock);
2102         spin_lock_init(&kvm->arch.float_int.lock);
2103         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2104                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2105         init_waitqueue_head(&kvm->arch.ipte_wq);
2106         mutex_init(&kvm->arch.ipte_mutex);
2107
2108         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2109         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2110
2111         if (type & KVM_VM_S390_UCONTROL) {
2112                 kvm->arch.gmap = NULL;
2113                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2114         } else {
2115                 if (sclp.hamax == U64_MAX)
2116                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2117                 else
2118                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2119                                                     sclp.hamax + 1);
2120                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2121                 if (!kvm->arch.gmap)
2122                         goto out_err;
2123                 kvm->arch.gmap->private = kvm;
2124                 kvm->arch.gmap->pfault_enabled = 0;
2125         }
2126
2127         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2128         kvm->arch.use_skf = sclp.has_skey;
2129         spin_lock_init(&kvm->arch.start_stop_lock);
2130         kvm_s390_vsie_init(kvm);
2131         kvm_s390_gisa_init(kvm);
2132         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2133
2134         return 0;
2135 out_err:
2136         free_page((unsigned long)kvm->arch.sie_page2);
2137         debug_unregister(kvm->arch.dbf);
2138         sca_dispose(kvm);
2139         KVM_EVENT(3, "creation of vm failed: %d", rc);
2140         return rc;
2141 }
2142
2143 bool kvm_arch_has_vcpu_debugfs(void)
2144 {
2145         return false;
2146 }
2147
2148 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2149 {
2150         return 0;
2151 }
2152
2153 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2154 {
2155         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2156         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2157         kvm_s390_clear_local_irqs(vcpu);
2158         kvm_clear_async_pf_completion_queue(vcpu);
2159         if (!kvm_is_ucontrol(vcpu->kvm))
2160                 sca_del_vcpu(vcpu);
2161
2162         if (kvm_is_ucontrol(vcpu->kvm))
2163                 gmap_remove(vcpu->arch.gmap);
2164
2165         if (vcpu->kvm->arch.use_cmma)
2166                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2167         free_page((unsigned long)(vcpu->arch.sie_block));
2168
2169         kvm_vcpu_uninit(vcpu);
2170         kmem_cache_free(kvm_vcpu_cache, vcpu);
2171 }
2172
2173 static void kvm_free_vcpus(struct kvm *kvm)
2174 {
2175         unsigned int i;
2176         struct kvm_vcpu *vcpu;
2177
2178         kvm_for_each_vcpu(i, vcpu, kvm)
2179                 kvm_arch_vcpu_destroy(vcpu);
2180
2181         mutex_lock(&kvm->lock);
2182         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2183                 kvm->vcpus[i] = NULL;
2184
2185         atomic_set(&kvm->online_vcpus, 0);
2186         mutex_unlock(&kvm->lock);
2187 }
2188
2189 void kvm_arch_destroy_vm(struct kvm *kvm)
2190 {
2191         kvm_free_vcpus(kvm);
2192         sca_dispose(kvm);
2193         debug_unregister(kvm->arch.dbf);
2194         kvm_s390_gisa_destroy(kvm);
2195         free_page((unsigned long)kvm->arch.sie_page2);
2196         if (!kvm_is_ucontrol(kvm))
2197                 gmap_remove(kvm->arch.gmap);
2198         kvm_s390_destroy_adapters(kvm);
2199         kvm_s390_clear_float_irqs(kvm);
2200         kvm_s390_vsie_destroy(kvm);
2201         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2202 }
2203
2204 /* Section: vcpu related */
2205 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2206 {
2207         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2208         if (!vcpu->arch.gmap)
2209                 return -ENOMEM;
2210         vcpu->arch.gmap->private = vcpu->kvm;
2211
2212         return 0;
2213 }
2214
2215 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2216 {
2217         if (!kvm_s390_use_sca_entries())
2218                 return;
2219         read_lock(&vcpu->kvm->arch.sca_lock);
2220         if (vcpu->kvm->arch.use_esca) {
2221                 struct esca_block *sca = vcpu->kvm->arch.sca;
2222
2223                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2224                 sca->cpu[vcpu->vcpu_id].sda = 0;
2225         } else {
2226                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2227
2228                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2229                 sca->cpu[vcpu->vcpu_id].sda = 0;
2230         }
2231         read_unlock(&vcpu->kvm->arch.sca_lock);
2232 }
2233
2234 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2235 {
2236         if (!kvm_s390_use_sca_entries()) {
2237                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2238
2239                 /* we still need the basic sca for the ipte control */
2240                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2241                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2242                 return;
2243         }
2244         read_lock(&vcpu->kvm->arch.sca_lock);
2245         if (vcpu->kvm->arch.use_esca) {
2246                 struct esca_block *sca = vcpu->kvm->arch.sca;
2247
2248                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2249                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2250                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2251                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2253         } else {
2254                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2255
2256                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2257                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2258                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2259                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2260         }
2261         read_unlock(&vcpu->kvm->arch.sca_lock);
2262 }
2263
2264 /* Basic SCA to Extended SCA data copy routines */
2265 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2266 {
2267         d->sda = s->sda;
2268         d->sigp_ctrl.c = s->sigp_ctrl.c;
2269         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2270 }
2271
2272 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2273 {
2274         int i;
2275
2276         d->ipte_control = s->ipte_control;
2277         d->mcn[0] = s->mcn;
2278         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2279                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2280 }
2281
2282 static int sca_switch_to_extended(struct kvm *kvm)
2283 {
2284         struct bsca_block *old_sca = kvm->arch.sca;
2285         struct esca_block *new_sca;
2286         struct kvm_vcpu *vcpu;
2287         unsigned int vcpu_idx;
2288         u32 scaol, scaoh;
2289
2290         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2291         if (!new_sca)
2292                 return -ENOMEM;
2293
2294         scaoh = (u32)((u64)(new_sca) >> 32);
2295         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2296
2297         kvm_s390_vcpu_block_all(kvm);
2298         write_lock(&kvm->arch.sca_lock);
2299
2300         sca_copy_b_to_e(new_sca, old_sca);
2301
2302         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2303                 vcpu->arch.sie_block->scaoh = scaoh;
2304                 vcpu->arch.sie_block->scaol = scaol;
2305                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2306         }
2307         kvm->arch.sca = new_sca;
2308         kvm->arch.use_esca = 1;
2309
2310         write_unlock(&kvm->arch.sca_lock);
2311         kvm_s390_vcpu_unblock_all(kvm);
2312
2313         free_page((unsigned long)old_sca);
2314
2315         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2316                  old_sca, kvm->arch.sca);
2317         return 0;
2318 }
2319
2320 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2321 {
2322         int rc;
2323
2324         if (!kvm_s390_use_sca_entries()) {
2325                 if (id < KVM_MAX_VCPUS)
2326                         return true;
2327                 return false;
2328         }
2329         if (id < KVM_S390_BSCA_CPU_SLOTS)
2330                 return true;
2331         if (!sclp.has_esca || !sclp.has_64bscao)
2332                 return false;
2333
2334         mutex_lock(&kvm->lock);
2335         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2336         mutex_unlock(&kvm->lock);
2337
2338         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2339 }
2340
2341 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2342 {
2343         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2344         kvm_clear_async_pf_completion_queue(vcpu);
2345         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2346                                     KVM_SYNC_GPRS |
2347                                     KVM_SYNC_ACRS |
2348                                     KVM_SYNC_CRS |
2349                                     KVM_SYNC_ARCH0 |
2350                                     KVM_SYNC_PFAULT;
2351         kvm_s390_set_prefix(vcpu, 0);
2352         if (test_kvm_facility(vcpu->kvm, 64))
2353                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2354         if (test_kvm_facility(vcpu->kvm, 82))
2355                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2356         if (test_kvm_facility(vcpu->kvm, 133))
2357                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2358         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2359          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2360          */
2361         if (MACHINE_HAS_VX)
2362                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2363         else
2364                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2365
2366         if (kvm_is_ucontrol(vcpu->kvm))
2367                 return __kvm_ucontrol_vcpu_init(vcpu);
2368
2369         return 0;
2370 }
2371
2372 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2373 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2374 {
2375         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2376         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2377         vcpu->arch.cputm_start = get_tod_clock_fast();
2378         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2379 }
2380
2381 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2382 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2383 {
2384         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2385         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2386         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2387         vcpu->arch.cputm_start = 0;
2388         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2389 }
2390
2391 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2392 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2393 {
2394         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2395         vcpu->arch.cputm_enabled = true;
2396         __start_cpu_timer_accounting(vcpu);
2397 }
2398
2399 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2400 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2401 {
2402         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2403         __stop_cpu_timer_accounting(vcpu);
2404         vcpu->arch.cputm_enabled = false;
2405 }
2406
2407 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2408 {
2409         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2410         __enable_cpu_timer_accounting(vcpu);
2411         preempt_enable();
2412 }
2413
2414 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2415 {
2416         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2417         __disable_cpu_timer_accounting(vcpu);
2418         preempt_enable();
2419 }
2420
2421 /* set the cpu timer - may only be called from the VCPU thread itself */
2422 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2423 {
2424         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2425         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2426         if (vcpu->arch.cputm_enabled)
2427                 vcpu->arch.cputm_start = get_tod_clock_fast();
2428         vcpu->arch.sie_block->cputm = cputm;
2429         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2430         preempt_enable();
2431 }
2432
2433 /* update and get the cpu timer - can also be called from other VCPU threads */
2434 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2435 {
2436         unsigned int seq;
2437         __u64 value;
2438
2439         if (unlikely(!vcpu->arch.cputm_enabled))
2440                 return vcpu->arch.sie_block->cputm;
2441
2442         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2443         do {
2444                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2445                 /*
2446                  * If the writer would ever execute a read in the critical
2447                  * section, e.g. in irq context, we have a deadlock.
2448                  */
2449                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2450                 value = vcpu->arch.sie_block->cputm;
2451                 /* if cputm_start is 0, accounting is being started/stopped */
2452                 if (likely(vcpu->arch.cputm_start))
2453                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2454         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2455         preempt_enable();
2456         return value;
2457 }
2458
2459 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2460 {
2461
2462         gmap_enable(vcpu->arch.enabled_gmap);
2463         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2464         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2465                 __start_cpu_timer_accounting(vcpu);
2466         vcpu->cpu = cpu;
2467 }
2468
2469 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2470 {
2471         vcpu->cpu = -1;
2472         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2473                 __stop_cpu_timer_accounting(vcpu);
2474         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2475         vcpu->arch.enabled_gmap = gmap_get_enabled();
2476         gmap_disable(vcpu->arch.enabled_gmap);
2477
2478 }
2479
2480 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2481 {
2482         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2483         vcpu->arch.sie_block->gpsw.mask = 0UL;
2484         vcpu->arch.sie_block->gpsw.addr = 0UL;
2485         kvm_s390_set_prefix(vcpu, 0);
2486         kvm_s390_set_cpu_timer(vcpu, 0);
2487         vcpu->arch.sie_block->ckc       = 0UL;
2488         vcpu->arch.sie_block->todpr     = 0;
2489         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2490         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2491                                         CR0_INTERRUPT_KEY_SUBMASK |
2492                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2493         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2494                                         CR14_UNUSED_33 |
2495                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2496         /* make sure the new fpc will be lazily loaded */
2497         save_fpu_regs();
2498         current->thread.fpu.fpc = 0;
2499         vcpu->arch.sie_block->gbea = 1;
2500         vcpu->arch.sie_block->pp = 0;
2501         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2502         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2503         kvm_clear_async_pf_completion_queue(vcpu);
2504         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2505                 kvm_s390_vcpu_stop(vcpu);
2506         kvm_s390_clear_local_irqs(vcpu);
2507 }
2508
2509 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2510 {
2511         mutex_lock(&vcpu->kvm->lock);
2512         preempt_disable();
2513         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2514         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2515         preempt_enable();
2516         mutex_unlock(&vcpu->kvm->lock);
2517         if (!kvm_is_ucontrol(vcpu->kvm)) {
2518                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2519                 sca_add_vcpu(vcpu);
2520         }
2521         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2522                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2523         /* make vcpu_load load the right gmap on the first trigger */
2524         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2525 }
2526
2527 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2528 {
2529         if (!test_kvm_facility(vcpu->kvm, 76))
2530                 return;
2531
2532         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2533
2534         if (vcpu->kvm->arch.crypto.aes_kw)
2535                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2536         if (vcpu->kvm->arch.crypto.dea_kw)
2537                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2538
2539         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2540 }
2541
2542 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2543 {
2544         free_page(vcpu->arch.sie_block->cbrlo);
2545         vcpu->arch.sie_block->cbrlo = 0;
2546 }
2547
2548 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2549 {
2550         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2551         if (!vcpu->arch.sie_block->cbrlo)
2552                 return -ENOMEM;
2553         return 0;
2554 }
2555
2556 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2557 {
2558         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2559
2560         vcpu->arch.sie_block->ibc = model->ibc;
2561         if (test_kvm_facility(vcpu->kvm, 7))
2562                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2563 }
2564
2565 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2566 {
2567         int rc = 0;
2568
2569         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2570                                                     CPUSTAT_SM |
2571                                                     CPUSTAT_STOPPED);
2572
2573         if (test_kvm_facility(vcpu->kvm, 78))
2574                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2575         else if (test_kvm_facility(vcpu->kvm, 8))
2576                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2577
2578         kvm_s390_vcpu_setup_model(vcpu);
2579
2580         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2581         if (MACHINE_HAS_ESOP)
2582                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2583         if (test_kvm_facility(vcpu->kvm, 9))
2584                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2585         if (test_kvm_facility(vcpu->kvm, 73))
2586                 vcpu->arch.sie_block->ecb |= ECB_TE;
2587
2588         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2589                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2590         if (test_kvm_facility(vcpu->kvm, 130))
2591                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2592         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2593         if (sclp.has_cei)
2594                 vcpu->arch.sie_block->eca |= ECA_CEI;
2595         if (sclp.has_ib)
2596                 vcpu->arch.sie_block->eca |= ECA_IB;
2597         if (sclp.has_siif)
2598                 vcpu->arch.sie_block->eca |= ECA_SII;
2599         if (sclp.has_sigpif)
2600                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2601         if (test_kvm_facility(vcpu->kvm, 129)) {
2602                 vcpu->arch.sie_block->eca |= ECA_VX;
2603                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2604         }
2605         if (test_kvm_facility(vcpu->kvm, 139))
2606                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2607
2608         if (vcpu->arch.sie_block->gd) {
2609                 vcpu->arch.sie_block->eca |= ECA_AIV;
2610                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2611                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2612         }
2613         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2614                                         | SDNXC;
2615         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2616
2617         if (sclp.has_kss)
2618                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2619         else
2620                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2621
2622         if (vcpu->kvm->arch.use_cmma) {
2623                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2624                 if (rc)
2625                         return rc;
2626         }
2627         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2628         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2629
2630         kvm_s390_vcpu_crypto_setup(vcpu);
2631
2632         return rc;
2633 }
2634
2635 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2636                                       unsigned int id)
2637 {
2638         struct kvm_vcpu *vcpu;
2639         struct sie_page *sie_page;
2640         int rc = -EINVAL;
2641
2642         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2643                 goto out;
2644
2645         rc = -ENOMEM;
2646
2647         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2648         if (!vcpu)
2649                 goto out;
2650
2651         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2652         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2653         if (!sie_page)
2654                 goto out_free_cpu;
2655
2656         vcpu->arch.sie_block = &sie_page->sie_block;
2657         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2658
2659         /* the real guest size will always be smaller than msl */
2660         vcpu->arch.sie_block->mso = 0;
2661         vcpu->arch.sie_block->msl = sclp.hamax;
2662
2663         vcpu->arch.sie_block->icpua = id;
2664         spin_lock_init(&vcpu->arch.local_int.lock);
2665         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2666         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2667                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2668         seqcount_init(&vcpu->arch.cputm_seqcount);
2669
2670         rc = kvm_vcpu_init(vcpu, kvm, id);
2671         if (rc)
2672                 goto out_free_sie_block;
2673         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2674                  vcpu->arch.sie_block);
2675         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2676
2677         return vcpu;
2678 out_free_sie_block:
2679         free_page((unsigned long)(vcpu->arch.sie_block));
2680 out_free_cpu:
2681         kmem_cache_free(kvm_vcpu_cache, vcpu);
2682 out:
2683         return ERR_PTR(rc);
2684 }
2685
2686 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2687 {
2688         return kvm_s390_vcpu_has_irq(vcpu, 0);
2689 }
2690
2691 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2692 {
2693         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2694 }
2695
2696 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2697 {
2698         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2699         exit_sie(vcpu);
2700 }
2701
2702 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2703 {
2704         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2705 }
2706
2707 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2708 {
2709         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2710         exit_sie(vcpu);
2711 }
2712
2713 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2714 {
2715         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2716 }
2717
2718 /*
2719  * Kick a guest cpu out of SIE and wait until SIE is not running.
2720  * If the CPU is not running (e.g. waiting as idle) the function will
2721  * return immediately. */
2722 void exit_sie(struct kvm_vcpu *vcpu)
2723 {
2724         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2725         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2726                 cpu_relax();
2727 }
2728
2729 /* Kick a guest cpu out of SIE to process a request synchronously */
2730 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2731 {
2732         kvm_make_request(req, vcpu);
2733         kvm_s390_vcpu_request(vcpu);
2734 }
2735
2736 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2737                               unsigned long end)
2738 {
2739         struct kvm *kvm = gmap->private;
2740         struct kvm_vcpu *vcpu;
2741         unsigned long prefix;
2742         int i;
2743
2744         if (gmap_is_shadow(gmap))
2745                 return;
2746         if (start >= 1UL << 31)
2747                 /* We are only interested in prefix pages */
2748                 return;
2749         kvm_for_each_vcpu(i, vcpu, kvm) {
2750                 /* match against both prefix pages */
2751                 prefix = kvm_s390_get_prefix(vcpu);
2752                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2753                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2754                                    start, end);
2755                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2756                 }
2757         }
2758 }
2759
2760 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2761 {
2762         /* kvm common code refers to this, but never calls it */
2763         BUG();
2764         return 0;
2765 }
2766
2767 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2768                                            struct kvm_one_reg *reg)
2769 {
2770         int r = -EINVAL;
2771
2772         switch (reg->id) {
2773         case KVM_REG_S390_TODPR:
2774                 r = put_user(vcpu->arch.sie_block->todpr,
2775                              (u32 __user *)reg->addr);
2776                 break;
2777         case KVM_REG_S390_EPOCHDIFF:
2778                 r = put_user(vcpu->arch.sie_block->epoch,
2779                              (u64 __user *)reg->addr);
2780                 break;
2781         case KVM_REG_S390_CPU_TIMER:
2782                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2783                              (u64 __user *)reg->addr);
2784                 break;
2785         case KVM_REG_S390_CLOCK_COMP:
2786                 r = put_user(vcpu->arch.sie_block->ckc,
2787                              (u64 __user *)reg->addr);
2788                 break;
2789         case KVM_REG_S390_PFTOKEN:
2790                 r = put_user(vcpu->arch.pfault_token,
2791                              (u64 __user *)reg->addr);
2792                 break;
2793         case KVM_REG_S390_PFCOMPARE:
2794                 r = put_user(vcpu->arch.pfault_compare,
2795                              (u64 __user *)reg->addr);
2796                 break;
2797         case KVM_REG_S390_PFSELECT:
2798                 r = put_user(vcpu->arch.pfault_select,
2799                              (u64 __user *)reg->addr);
2800                 break;
2801         case KVM_REG_S390_PP:
2802                 r = put_user(vcpu->arch.sie_block->pp,
2803                              (u64 __user *)reg->addr);
2804                 break;
2805         case KVM_REG_S390_GBEA:
2806                 r = put_user(vcpu->arch.sie_block->gbea,
2807                              (u64 __user *)reg->addr);
2808                 break;
2809         default:
2810                 break;
2811         }
2812
2813         return r;
2814 }
2815
2816 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2817                                            struct kvm_one_reg *reg)
2818 {
2819         int r = -EINVAL;
2820         __u64 val;
2821
2822         switch (reg->id) {
2823         case KVM_REG_S390_TODPR:
2824                 r = get_user(vcpu->arch.sie_block->todpr,
2825                              (u32 __user *)reg->addr);
2826                 break;
2827         case KVM_REG_S390_EPOCHDIFF:
2828                 r = get_user(vcpu->arch.sie_block->epoch,
2829                              (u64 __user *)reg->addr);
2830                 break;
2831         case KVM_REG_S390_CPU_TIMER:
2832                 r = get_user(val, (u64 __user *)reg->addr);
2833                 if (!r)
2834                         kvm_s390_set_cpu_timer(vcpu, val);
2835                 break;
2836         case KVM_REG_S390_CLOCK_COMP:
2837                 r = get_user(vcpu->arch.sie_block->ckc,
2838                              (u64 __user *)reg->addr);
2839                 break;
2840         case KVM_REG_S390_PFTOKEN:
2841                 r = get_user(vcpu->arch.pfault_token,
2842                              (u64 __user *)reg->addr);
2843                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2844                         kvm_clear_async_pf_completion_queue(vcpu);
2845                 break;
2846         case KVM_REG_S390_PFCOMPARE:
2847                 r = get_user(vcpu->arch.pfault_compare,
2848                              (u64 __user *)reg->addr);
2849                 break;
2850         case KVM_REG_S390_PFSELECT:
2851                 r = get_user(vcpu->arch.pfault_select,
2852                              (u64 __user *)reg->addr);
2853                 break;
2854         case KVM_REG_S390_PP:
2855                 r = get_user(vcpu->arch.sie_block->pp,
2856                              (u64 __user *)reg->addr);
2857                 break;
2858         case KVM_REG_S390_GBEA:
2859                 r = get_user(vcpu->arch.sie_block->gbea,
2860                              (u64 __user *)reg->addr);
2861                 break;
2862         default:
2863                 break;
2864         }
2865
2866         return r;
2867 }
2868
2869 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2870 {
2871         kvm_s390_vcpu_initial_reset(vcpu);
2872         return 0;
2873 }
2874
2875 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2876 {
2877         vcpu_load(vcpu);
2878         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2879         vcpu_put(vcpu);
2880         return 0;
2881 }
2882
2883 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2884 {
2885         vcpu_load(vcpu);
2886         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2887         vcpu_put(vcpu);
2888         return 0;
2889 }
2890
2891 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2892                                   struct kvm_sregs *sregs)
2893 {
2894         vcpu_load(vcpu);
2895
2896         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2897         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2898
2899         vcpu_put(vcpu);
2900         return 0;
2901 }
2902
2903 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2904                                   struct kvm_sregs *sregs)
2905 {
2906         vcpu_load(vcpu);
2907
2908         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2909         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2910
2911         vcpu_put(vcpu);
2912         return 0;
2913 }
2914
2915 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2916 {
2917         int ret = 0;
2918
2919         vcpu_load(vcpu);
2920
2921         if (test_fp_ctl(fpu->fpc)) {
2922                 ret = -EINVAL;
2923                 goto out;
2924         }
2925         vcpu->run->s.regs.fpc = fpu->fpc;
2926         if (MACHINE_HAS_VX)
2927                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2928                                  (freg_t *) fpu->fprs);
2929         else
2930                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2931
2932 out:
2933         vcpu_put(vcpu);
2934         return ret;
2935 }
2936
2937 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2938 {
2939         vcpu_load(vcpu);
2940
2941         /* make sure we have the latest values */
2942         save_fpu_regs();
2943         if (MACHINE_HAS_VX)
2944                 convert_vx_to_fp((freg_t *) fpu->fprs,
2945                                  (__vector128 *) vcpu->run->s.regs.vrs);
2946         else
2947                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2948         fpu->fpc = vcpu->run->s.regs.fpc;
2949
2950         vcpu_put(vcpu);
2951         return 0;
2952 }
2953
2954 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2955 {
2956         int rc = 0;
2957
2958         if (!is_vcpu_stopped(vcpu))
2959                 rc = -EBUSY;
2960         else {
2961                 vcpu->run->psw_mask = psw.mask;
2962                 vcpu->run->psw_addr = psw.addr;
2963         }
2964         return rc;
2965 }
2966
2967 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2968                                   struct kvm_translation *tr)
2969 {
2970         return -EINVAL; /* not implemented yet */
2971 }
2972
2973 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2974                               KVM_GUESTDBG_USE_HW_BP | \
2975                               KVM_GUESTDBG_ENABLE)
2976
2977 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2978                                         struct kvm_guest_debug *dbg)
2979 {
2980         int rc = 0;
2981
2982         vcpu_load(vcpu);
2983
2984         vcpu->guest_debug = 0;
2985         kvm_s390_clear_bp_data(vcpu);
2986
2987         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2988                 rc = -EINVAL;
2989                 goto out;
2990         }
2991         if (!sclp.has_gpere) {
2992                 rc = -EINVAL;
2993                 goto out;
2994         }
2995
2996         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2997                 vcpu->guest_debug = dbg->control;
2998                 /* enforce guest PER */
2999                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3000
3001                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3002                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3003         } else {
3004                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3005                 vcpu->arch.guestdbg.last_bp = 0;
3006         }
3007
3008         if (rc) {
3009                 vcpu->guest_debug = 0;
3010                 kvm_s390_clear_bp_data(vcpu);
3011                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3012         }
3013
3014 out:
3015         vcpu_put(vcpu);
3016         return rc;
3017 }
3018
3019 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3020                                     struct kvm_mp_state *mp_state)
3021 {
3022         int ret;
3023
3024         vcpu_load(vcpu);
3025
3026         /* CHECK_STOP and LOAD are not supported yet */
3027         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3028                                       KVM_MP_STATE_OPERATING;
3029
3030         vcpu_put(vcpu);
3031         return ret;
3032 }
3033
3034 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3035                                     struct kvm_mp_state *mp_state)
3036 {
3037         int rc = 0;
3038
3039         vcpu_load(vcpu);
3040
3041         /* user space knows about this interface - let it control the state */
3042         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3043
3044         switch (mp_state->mp_state) {
3045         case KVM_MP_STATE_STOPPED:
3046                 kvm_s390_vcpu_stop(vcpu);
3047                 break;
3048         case KVM_MP_STATE_OPERATING:
3049                 kvm_s390_vcpu_start(vcpu);
3050                 break;
3051         case KVM_MP_STATE_LOAD:
3052         case KVM_MP_STATE_CHECK_STOP:
3053                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3054         default:
3055                 rc = -ENXIO;
3056         }
3057
3058         vcpu_put(vcpu);
3059         return rc;
3060 }
3061
3062 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3063 {
3064         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3065 }
3066
3067 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3068 {
3069 retry:
3070         kvm_s390_vcpu_request_handled(vcpu);
3071         if (!kvm_request_pending(vcpu))
3072                 return 0;
3073         /*
3074          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3075          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3076          * This ensures that the ipte instruction for this request has
3077          * already finished. We might race against a second unmapper that
3078          * wants to set the blocking bit. Lets just retry the request loop.
3079          */
3080         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3081                 int rc;
3082                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3083                                           kvm_s390_get_prefix(vcpu),
3084                                           PAGE_SIZE * 2, PROT_WRITE);
3085                 if (rc) {
3086                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3087                         return rc;
3088                 }
3089                 goto retry;
3090         }
3091
3092         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3093                 vcpu->arch.sie_block->ihcpu = 0xffff;
3094                 goto retry;
3095         }
3096
3097         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3098                 if (!ibs_enabled(vcpu)) {
3099                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3100                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3101                 }
3102                 goto retry;
3103         }
3104
3105         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3106                 if (ibs_enabled(vcpu)) {
3107                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3108                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3109                 }
3110                 goto retry;
3111         }
3112
3113         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3114                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3115                 goto retry;
3116         }
3117
3118         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3119                 /*
3120                  * Disable CMM virtualization; we will emulate the ESSA
3121                  * instruction manually, in order to provide additional
3122                  * functionalities needed for live migration.
3123                  */
3124                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3125                 goto retry;
3126         }
3127
3128         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3129                 /*
3130                  * Re-enable CMM virtualization if CMMA is available and
3131                  * CMM has been used.
3132                  */
3133                 if ((vcpu->kvm->arch.use_cmma) &&
3134                     (vcpu->kvm->mm->context.uses_cmm))
3135                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3136                 goto retry;
3137         }
3138
3139         /* nothing to do, just clear the request */
3140         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3141
3142         return 0;
3143 }
3144
3145 void kvm_s390_set_tod_clock(struct kvm *kvm,
3146                             const struct kvm_s390_vm_tod_clock *gtod)
3147 {
3148         struct kvm_vcpu *vcpu;
3149         struct kvm_s390_tod_clock_ext htod;
3150         int i;
3151
3152         mutex_lock(&kvm->lock);
3153         preempt_disable();
3154
3155         get_tod_clock_ext((char *)&htod);
3156
3157         kvm->arch.epoch = gtod->tod - htod.tod;
3158         kvm->arch.epdx = 0;
3159         if (test_kvm_facility(kvm, 139)) {
3160                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3161                 if (kvm->arch.epoch > gtod->tod)
3162                         kvm->arch.epdx -= 1;
3163         }
3164
3165         kvm_s390_vcpu_block_all(kvm);
3166         kvm_for_each_vcpu(i, vcpu, kvm) {
3167                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3168                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3169         }
3170
3171         kvm_s390_vcpu_unblock_all(kvm);
3172         preempt_enable();
3173         mutex_unlock(&kvm->lock);
3174 }
3175
3176 /**
3177  * kvm_arch_fault_in_page - fault-in guest page if necessary
3178  * @vcpu: The corresponding virtual cpu
3179  * @gpa: Guest physical address
3180  * @writable: Whether the page should be writable or not
3181  *
3182  * Make sure that a guest page has been faulted-in on the host.
3183  *
3184  * Return: Zero on success, negative error code otherwise.
3185  */
3186 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3187 {
3188         return gmap_fault(vcpu->arch.gmap, gpa,
3189                           writable ? FAULT_FLAG_WRITE : 0);
3190 }
3191
3192 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3193                                       unsigned long token)
3194 {
3195         struct kvm_s390_interrupt inti;
3196         struct kvm_s390_irq irq;
3197
3198         if (start_token) {
3199                 irq.u.ext.ext_params2 = token;
3200                 irq.type = KVM_S390_INT_PFAULT_INIT;
3201                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3202         } else {
3203                 inti.type = KVM_S390_INT_PFAULT_DONE;
3204                 inti.parm64 = token;
3205                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3206         }
3207 }
3208
3209 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3210                                      struct kvm_async_pf *work)
3211 {
3212         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3213         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3214 }
3215
3216 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3217                                  struct kvm_async_pf *work)
3218 {
3219         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3220         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3221 }
3222
3223 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3224                                struct kvm_async_pf *work)
3225 {
3226         /* s390 will always inject the page directly */
3227 }
3228
3229 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3230 {
3231         /*
3232          * s390 will always inject the page directly,
3233          * but we still want check_async_completion to cleanup
3234          */
3235         return true;
3236 }
3237
3238 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3239 {
3240         hva_t hva;
3241         struct kvm_arch_async_pf arch;
3242         int rc;
3243
3244         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3245                 return 0;
3246         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3247             vcpu->arch.pfault_compare)
3248                 return 0;
3249         if (psw_extint_disabled(vcpu))
3250                 return 0;
3251         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3252                 return 0;
3253         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3254                 return 0;
3255         if (!vcpu->arch.gmap->pfault_enabled)
3256                 return 0;
3257
3258         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3259         hva += current->thread.gmap_addr & ~PAGE_MASK;
3260         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3261                 return 0;
3262
3263         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3264         return rc;
3265 }
3266
3267 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3268 {
3269         int rc, cpuflags;
3270
3271         /*
3272          * On s390 notifications for arriving pages will be delivered directly
3273          * to the guest but the house keeping for completed pfaults is
3274          * handled outside the worker.
3275          */
3276         kvm_check_async_pf_completion(vcpu);
3277
3278         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3279         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3280
3281         if (need_resched())
3282                 schedule();
3283
3284         if (test_cpu_flag(CIF_MCCK_PENDING))
3285                 s390_handle_mcck();
3286
3287         if (!kvm_is_ucontrol(vcpu->kvm)) {
3288                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3289                 if (rc)
3290                         return rc;
3291         }
3292
3293         rc = kvm_s390_handle_requests(vcpu);
3294         if (rc)
3295                 return rc;
3296
3297         if (guestdbg_enabled(vcpu)) {
3298                 kvm_s390_backup_guest_per_regs(vcpu);
3299                 kvm_s390_patch_guest_per_regs(vcpu);
3300         }
3301
3302         vcpu->arch.sie_block->icptcode = 0;
3303         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3304         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3305         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3306
3307         return 0;
3308 }
3309
3310 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3311 {
3312         struct kvm_s390_pgm_info pgm_info = {
3313                 .code = PGM_ADDRESSING,
3314         };
3315         u8 opcode, ilen;
3316         int rc;
3317
3318         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3319         trace_kvm_s390_sie_fault(vcpu);
3320
3321         /*
3322          * We want to inject an addressing exception, which is defined as a
3323          * suppressing or terminating exception. However, since we came here
3324          * by a DAT access exception, the PSW still points to the faulting
3325          * instruction since DAT exceptions are nullifying. So we've got
3326          * to look up the current opcode to get the length of the instruction
3327          * to be able to forward the PSW.
3328          */
3329         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3330         ilen = insn_length(opcode);
3331         if (rc < 0) {
3332                 return rc;
3333         } else if (rc) {
3334                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3335                  * Forward by arbitrary ilc, injection will take care of
3336                  * nullification if necessary.
3337                  */
3338                 pgm_info = vcpu->arch.pgm;
3339                 ilen = 4;
3340         }
3341         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3342         kvm_s390_forward_psw(vcpu, ilen);
3343         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3344 }
3345
3346 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3347 {
3348         struct mcck_volatile_info *mcck_info;
3349         struct sie_page *sie_page;
3350
3351         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3352                    vcpu->arch.sie_block->icptcode);
3353         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3354
3355         if (guestdbg_enabled(vcpu))
3356                 kvm_s390_restore_guest_per_regs(vcpu);
3357
3358         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3359         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3360
3361         if (exit_reason == -EINTR) {
3362                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3363                 sie_page = container_of(vcpu->arch.sie_block,
3364                                         struct sie_page, sie_block);
3365                 mcck_info = &sie_page->mcck_info;
3366                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3367                 return 0;
3368         }
3369
3370         if (vcpu->arch.sie_block->icptcode > 0) {
3371                 int rc = kvm_handle_sie_intercept(vcpu);
3372
3373                 if (rc != -EOPNOTSUPP)
3374                         return rc;
3375                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3376                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3377                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3378                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3379                 return -EREMOTE;
3380         } else if (exit_reason != -EFAULT) {
3381                 vcpu->stat.exit_null++;
3382                 return 0;
3383         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3384                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3385                 vcpu->run->s390_ucontrol.trans_exc_code =
3386                                                 current->thread.gmap_addr;
3387                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3388                 return -EREMOTE;
3389         } else if (current->thread.gmap_pfault) {
3390                 trace_kvm_s390_major_guest_pfault(vcpu);
3391                 current->thread.gmap_pfault = 0;
3392                 if (kvm_arch_setup_async_pf(vcpu))
3393                         return 0;
3394                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3395         }
3396         return vcpu_post_run_fault_in_sie(vcpu);
3397 }
3398
3399 static int __vcpu_run(struct kvm_vcpu *vcpu)
3400 {
3401         int rc, exit_reason;
3402
3403         /*
3404          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3405          * ning the guest), so that memslots (and other stuff) are protected
3406          */
3407         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3408
3409         do {
3410                 rc = vcpu_pre_run(vcpu);
3411                 if (rc)
3412                         break;
3413
3414                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3415                 /*
3416                  * As PF_VCPU will be used in fault handler, between
3417                  * guest_enter and guest_exit should be no uaccess.
3418                  */
3419                 local_irq_disable();
3420                 guest_enter_irqoff();
3421                 __disable_cpu_timer_accounting(vcpu);
3422                 local_irq_enable();
3423                 exit_reason = sie64a(vcpu->arch.sie_block,
3424                                      vcpu->run->s.regs.gprs);
3425                 local_irq_disable();
3426                 __enable_cpu_timer_accounting(vcpu);
3427                 guest_exit_irqoff();
3428                 local_irq_enable();
3429                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3430
3431                 rc = vcpu_post_run(vcpu, exit_reason);
3432         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3433
3434         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3435         return rc;
3436 }
3437
3438 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3439 {
3440         struct runtime_instr_cb *riccb;
3441         struct gs_cb *gscb;
3442
3443         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3444         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3445         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3446         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3447         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3448                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3449         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3450                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3451                 /* some control register changes require a tlb flush */
3452                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3453         }
3454         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3455                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3456                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3457                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3458                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3459                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3460         }
3461         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3462                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3463                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3464                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3465                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3466                         kvm_clear_async_pf_completion_queue(vcpu);
3467         }
3468         /*
3469          * If userspace sets the riccb (e.g. after migration) to a valid state,
3470          * we should enable RI here instead of doing the lazy enablement.
3471          */
3472         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3473             test_kvm_facility(vcpu->kvm, 64) &&
3474             riccb->v &&
3475             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3476                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3477                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3478         }
3479         /*
3480          * If userspace sets the gscb (e.g. after migration) to non-zero,
3481          * we should enable GS here instead of doing the lazy enablement.
3482          */
3483         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3484             test_kvm_facility(vcpu->kvm, 133) &&
3485             gscb->gssm &&
3486             !vcpu->arch.gs_enabled) {
3487                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3488                 vcpu->arch.sie_block->ecb |= ECB_GS;
3489                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3490                 vcpu->arch.gs_enabled = 1;
3491         }
3492         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3493             test_kvm_facility(vcpu->kvm, 82)) {
3494                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3495                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3496         }
3497         save_access_regs(vcpu->arch.host_acrs);
3498         restore_access_regs(vcpu->run->s.regs.acrs);
3499         /* save host (userspace) fprs/vrs */
3500         save_fpu_regs();
3501         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3502         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3503         if (MACHINE_HAS_VX)
3504                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3505         else
3506                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3507         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3508         if (test_fp_ctl(current->thread.fpu.fpc))
3509                 /* User space provided an invalid FPC, let's clear it */
3510                 current->thread.fpu.fpc = 0;
3511         if (MACHINE_HAS_GS) {
3512                 preempt_disable();
3513                 __ctl_set_bit(2, 4);
3514                 if (current->thread.gs_cb) {
3515                         vcpu->arch.host_gscb = current->thread.gs_cb;
3516                         save_gs_cb(vcpu->arch.host_gscb);
3517                 }
3518                 if (vcpu->arch.gs_enabled) {
3519                         current->thread.gs_cb = (struct gs_cb *)
3520                                                 &vcpu->run->s.regs.gscb;
3521                         restore_gs_cb(current->thread.gs_cb);
3522                 }
3523                 preempt_enable();
3524         }
3525
3526         kvm_run->kvm_dirty_regs = 0;
3527 }
3528
3529 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3530 {
3531         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3532         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3533         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3534         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3535         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3536         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3537         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3538         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3539         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3540         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3541         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3542         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3543         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3544         save_access_regs(vcpu->run->s.regs.acrs);
3545         restore_access_regs(vcpu->arch.host_acrs);
3546         /* Save guest register state */
3547         save_fpu_regs();
3548         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3549         /* Restore will be done lazily at return */
3550         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3551         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3552         if (MACHINE_HAS_GS) {
3553                 __ctl_set_bit(2, 4);
3554                 if (vcpu->arch.gs_enabled)
3555                         save_gs_cb(current->thread.gs_cb);
3556                 preempt_disable();
3557                 current->thread.gs_cb = vcpu->arch.host_gscb;
3558                 restore_gs_cb(vcpu->arch.host_gscb);
3559                 preempt_enable();
3560                 if (!vcpu->arch.host_gscb)
3561                         __ctl_clear_bit(2, 4);
3562                 vcpu->arch.host_gscb = NULL;
3563         }
3564
3565 }
3566
3567 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3568 {
3569         int rc;
3570
3571         if (kvm_run->immediate_exit)
3572                 return -EINTR;
3573
3574         vcpu_load(vcpu);
3575
3576         if (guestdbg_exit_pending(vcpu)) {
3577                 kvm_s390_prepare_debug_exit(vcpu);
3578                 rc = 0;
3579                 goto out;
3580         }
3581
3582         kvm_sigset_activate(vcpu);
3583
3584         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3585                 kvm_s390_vcpu_start(vcpu);
3586         } else if (is_vcpu_stopped(vcpu)) {
3587                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3588                                    vcpu->vcpu_id);
3589                 rc = -EINVAL;
3590                 goto out;
3591         }
3592
3593         sync_regs(vcpu, kvm_run);
3594         enable_cpu_timer_accounting(vcpu);
3595
3596         might_fault();
3597         rc = __vcpu_run(vcpu);
3598
3599         if (signal_pending(current) && !rc) {
3600                 kvm_run->exit_reason = KVM_EXIT_INTR;
3601                 rc = -EINTR;
3602         }
3603
3604         if (guestdbg_exit_pending(vcpu) && !rc)  {
3605                 kvm_s390_prepare_debug_exit(vcpu);
3606                 rc = 0;
3607         }
3608
3609         if (rc == -EREMOTE) {
3610                 /* userspace support is needed, kvm_run has been prepared */
3611                 rc = 0;
3612         }
3613
3614         disable_cpu_timer_accounting(vcpu);
3615         store_regs(vcpu, kvm_run);
3616
3617         kvm_sigset_deactivate(vcpu);
3618
3619         vcpu->stat.exit_userspace++;
3620 out:
3621         vcpu_put(vcpu);
3622         return rc;
3623 }
3624
3625 /*
3626  * store status at address
3627  * we use have two special cases:
3628  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3629  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3630  */
3631 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3632 {
3633         unsigned char archmode = 1;
3634         freg_t fprs[NUM_FPRS];
3635         unsigned int px;
3636         u64 clkcomp, cputm;
3637         int rc;
3638
3639         px = kvm_s390_get_prefix(vcpu);
3640         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3641                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3642                         return -EFAULT;
3643                 gpa = 0;
3644         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3645                 if (write_guest_real(vcpu, 163, &archmode, 1))
3646                         return -EFAULT;
3647                 gpa = px;
3648         } else
3649                 gpa -= __LC_FPREGS_SAVE_AREA;
3650
3651         /* manually convert vector registers if necessary */
3652         if (MACHINE_HAS_VX) {
3653                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3654                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3655                                      fprs, 128);
3656         } else {
3657                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3658                                      vcpu->run->s.regs.fprs, 128);
3659         }
3660         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3661                               vcpu->run->s.regs.gprs, 128);
3662         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3663                               &vcpu->arch.sie_block->gpsw, 16);
3664         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3665                               &px, 4);
3666         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3667                               &vcpu->run->s.regs.fpc, 4);
3668         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3669                               &vcpu->arch.sie_block->todpr, 4);
3670         cputm = kvm_s390_get_cpu_timer(vcpu);
3671         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3672                               &cputm, 8);
3673         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3674         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3675                               &clkcomp, 8);
3676         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3677                               &vcpu->run->s.regs.acrs, 64);
3678         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3679                               &vcpu->arch.sie_block->gcr, 128);
3680         return rc ? -EFAULT : 0;
3681 }
3682
3683 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3684 {
3685         /*
3686          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3687          * switch in the run ioctl. Let's update our copies before we save
3688          * it into the save area
3689          */
3690         save_fpu_regs();
3691         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3692         save_access_regs(vcpu->run->s.regs.acrs);
3693
3694         return kvm_s390_store_status_unloaded(vcpu, addr);
3695 }
3696
3697 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3698 {
3699         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3700         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3701 }
3702
3703 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3704 {
3705         unsigned int i;
3706         struct kvm_vcpu *vcpu;
3707
3708         kvm_for_each_vcpu(i, vcpu, kvm) {
3709                 __disable_ibs_on_vcpu(vcpu);
3710         }
3711 }
3712
3713 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3714 {
3715         if (!sclp.has_ibs)
3716                 return;
3717         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3718         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3719 }
3720
3721 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3722 {
3723         int i, online_vcpus, started_vcpus = 0;
3724
3725         if (!is_vcpu_stopped(vcpu))
3726                 return;
3727
3728         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3729         /* Only one cpu at a time may enter/leave the STOPPED state. */
3730         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3731         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3732
3733         for (i = 0; i < online_vcpus; i++) {
3734                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3735                         started_vcpus++;
3736         }
3737
3738         if (started_vcpus == 0) {
3739                 /* we're the only active VCPU -> speed it up */
3740                 __enable_ibs_on_vcpu(vcpu);
3741         } else if (started_vcpus == 1) {
3742                 /*
3743                  * As we are starting a second VCPU, we have to disable
3744                  * the IBS facility on all VCPUs to remove potentially
3745                  * oustanding ENABLE requests.
3746                  */
3747                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3748         }
3749
3750         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3751         /*
3752          * Another VCPU might have used IBS while we were offline.
3753          * Let's play safe and flush the VCPU at startup.
3754          */
3755         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3756         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3757         return;
3758 }
3759
3760 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3761 {
3762         int i, online_vcpus, started_vcpus = 0;
3763         struct kvm_vcpu *started_vcpu = NULL;
3764
3765         if (is_vcpu_stopped(vcpu))
3766                 return;
3767
3768         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3769         /* Only one cpu at a time may enter/leave the STOPPED state. */
3770         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3771         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3772
3773         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3774         kvm_s390_clear_stop_irq(vcpu);
3775
3776         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3777         __disable_ibs_on_vcpu(vcpu);
3778
3779         for (i = 0; i < online_vcpus; i++) {
3780                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3781                         started_vcpus++;
3782                         started_vcpu = vcpu->kvm->vcpus[i];
3783                 }
3784         }
3785
3786         if (started_vcpus == 1) {
3787                 /*
3788                  * As we only have one VCPU left, we want to enable the
3789                  * IBS facility for that VCPU to speed it up.
3790                  */
3791                 __enable_ibs_on_vcpu(started_vcpu);
3792         }
3793
3794         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3795         return;
3796 }
3797
3798 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3799                                      struct kvm_enable_cap *cap)
3800 {
3801         int r;
3802
3803         if (cap->flags)
3804                 return -EINVAL;
3805
3806         switch (cap->cap) {
3807         case KVM_CAP_S390_CSS_SUPPORT:
3808                 if (!vcpu->kvm->arch.css_support) {
3809                         vcpu->kvm->arch.css_support = 1;
3810                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3811                         trace_kvm_s390_enable_css(vcpu->kvm);
3812                 }
3813                 r = 0;
3814                 break;
3815         default:
3816                 r = -EINVAL;
3817                 break;
3818         }
3819         return r;
3820 }
3821
3822 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3823                                   struct kvm_s390_mem_op *mop)
3824 {
3825         void __user *uaddr = (void __user *)mop->buf;
3826         void *tmpbuf = NULL;
3827         int r, srcu_idx;
3828         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3829                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3830
3831         if (mop->flags & ~supported_flags)
3832                 return -EINVAL;
3833
3834         if (mop->size > MEM_OP_MAX_SIZE)
3835                 return -E2BIG;
3836
3837         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3838                 tmpbuf = vmalloc(mop->size);
3839                 if (!tmpbuf)
3840                         return -ENOMEM;
3841         }
3842
3843         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3844
3845         switch (mop->op) {
3846         case KVM_S390_MEMOP_LOGICAL_READ:
3847                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3848                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3849                                             mop->size, GACC_FETCH);
3850                         break;
3851                 }
3852                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3853                 if (r == 0) {
3854                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3855                                 r = -EFAULT;
3856                 }
3857                 break;
3858         case KVM_S390_MEMOP_LOGICAL_WRITE:
3859                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3860                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3861                                             mop->size, GACC_STORE);
3862                         break;
3863                 }
3864                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3865                         r = -EFAULT;
3866                         break;
3867                 }
3868                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3869                 break;
3870         default:
3871                 r = -EINVAL;
3872         }
3873
3874         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3875
3876         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3877                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3878
3879         vfree(tmpbuf);
3880         return r;
3881 }
3882
3883 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3884                                unsigned int ioctl, unsigned long arg)
3885 {
3886         struct kvm_vcpu *vcpu = filp->private_data;
3887         void __user *argp = (void __user *)arg;
3888
3889         switch (ioctl) {
3890         case KVM_S390_IRQ: {
3891                 struct kvm_s390_irq s390irq;
3892
3893                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3894                         return -EFAULT;
3895                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3896         }
3897         case KVM_S390_INTERRUPT: {
3898                 struct kvm_s390_interrupt s390int;
3899                 struct kvm_s390_irq s390irq;
3900
3901                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3902                         return -EFAULT;
3903                 if (s390int_to_s390irq(&s390int, &s390irq))
3904                         return -EINVAL;
3905                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3906         }
3907         }
3908         return -ENOIOCTLCMD;
3909 }
3910
3911 long kvm_arch_vcpu_ioctl(struct file *filp,
3912                          unsigned int ioctl, unsigned long arg)
3913 {
3914         struct kvm_vcpu *vcpu = filp->private_data;
3915         void __user *argp = (void __user *)arg;
3916         int idx;
3917         long r;
3918
3919         vcpu_load(vcpu);
3920
3921         switch (ioctl) {
3922         case KVM_S390_STORE_STATUS:
3923                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3924                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3925                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3926                 break;
3927         case KVM_S390_SET_INITIAL_PSW: {
3928                 psw_t psw;
3929
3930                 r = -EFAULT;
3931                 if (copy_from_user(&psw, argp, sizeof(psw)))
3932                         break;
3933                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3934                 break;
3935         }
3936         case KVM_S390_INITIAL_RESET:
3937                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3938                 break;
3939         case KVM_SET_ONE_REG:
3940         case KVM_GET_ONE_REG: {
3941                 struct kvm_one_reg reg;
3942                 r = -EFAULT;
3943                 if (copy_from_user(&reg, argp, sizeof(reg)))
3944                         break;
3945                 if (ioctl == KVM_SET_ONE_REG)
3946                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3947                 else
3948                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3949                 break;
3950         }
3951 #ifdef CONFIG_KVM_S390_UCONTROL
3952         case KVM_S390_UCAS_MAP: {
3953                 struct kvm_s390_ucas_mapping ucasmap;
3954
3955                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3956                         r = -EFAULT;
3957                         break;
3958                 }
3959
3960                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3961                         r = -EINVAL;
3962                         break;
3963                 }
3964
3965                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3966                                      ucasmap.vcpu_addr, ucasmap.length);
3967                 break;
3968         }
3969         case KVM_S390_UCAS_UNMAP: {
3970                 struct kvm_s390_ucas_mapping ucasmap;
3971
3972                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3973                         r = -EFAULT;
3974                         break;
3975                 }
3976
3977                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3978                         r = -EINVAL;
3979                         break;
3980                 }
3981
3982                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3983                         ucasmap.length);
3984                 break;
3985         }
3986 #endif
3987         case KVM_S390_VCPU_FAULT: {
3988                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3989                 break;
3990         }
3991         case KVM_ENABLE_CAP:
3992         {
3993                 struct kvm_enable_cap cap;
3994                 r = -EFAULT;
3995                 if (copy_from_user(&cap, argp, sizeof(cap)))
3996                         break;
3997                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3998                 break;
3999         }
4000         case KVM_S390_MEM_OP: {
4001                 struct kvm_s390_mem_op mem_op;
4002
4003                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4004                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4005                 else
4006                         r = -EFAULT;
4007                 break;
4008         }
4009         case KVM_S390_SET_IRQ_STATE: {
4010                 struct kvm_s390_irq_state irq_state;
4011
4012                 r = -EFAULT;
4013                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4014                         break;
4015                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4016                     irq_state.len == 0 ||
4017                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4018                         r = -EINVAL;
4019                         break;
4020                 }
4021                 /* do not use irq_state.flags, it will break old QEMUs */
4022                 r = kvm_s390_set_irq_state(vcpu,
4023                                            (void __user *) irq_state.buf,
4024                                            irq_state.len);
4025                 break;
4026         }
4027         case KVM_S390_GET_IRQ_STATE: {
4028                 struct kvm_s390_irq_state irq_state;
4029
4030                 r = -EFAULT;
4031                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4032                         break;
4033                 if (irq_state.len == 0) {
4034                         r = -EINVAL;
4035                         break;
4036                 }
4037                 /* do not use irq_state.flags, it will break old QEMUs */
4038                 r = kvm_s390_get_irq_state(vcpu,
4039                                            (__u8 __user *)  irq_state.buf,
4040                                            irq_state.len);
4041                 break;
4042         }
4043         default:
4044                 r = -ENOTTY;
4045         }
4046
4047         vcpu_put(vcpu);
4048         return r;
4049 }
4050
4051 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4052 {
4053 #ifdef CONFIG_KVM_S390_UCONTROL
4054         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4055                  && (kvm_is_ucontrol(vcpu->kvm))) {
4056                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4057                 get_page(vmf->page);
4058                 return 0;
4059         }
4060 #endif
4061         return VM_FAULT_SIGBUS;
4062 }
4063
4064 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4065                             unsigned long npages)
4066 {
4067         return 0;
4068 }
4069
4070 /* Section: memory related */
4071 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4072                                    struct kvm_memory_slot *memslot,
4073                                    const struct kvm_userspace_memory_region *mem,
4074                                    enum kvm_mr_change change)
4075 {
4076         /* A few sanity checks. We can have memory slots which have to be
4077            located/ended at a segment boundary (1MB). The memory in userland is
4078            ok to be fragmented into various different vmas. It is okay to mmap()
4079            and munmap() stuff in this slot after doing this call at any time */
4080
4081         if (mem->userspace_addr & 0xffffful)
4082                 return -EINVAL;
4083
4084         if (mem->memory_size & 0xffffful)
4085                 return -EINVAL;
4086
4087         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4088                 return -EINVAL;
4089
4090         return 0;
4091 }
4092
4093 void kvm_arch_commit_memory_region(struct kvm *kvm,
4094                                 const struct kvm_userspace_memory_region *mem,
4095                                 const struct kvm_memory_slot *old,
4096                                 const struct kvm_memory_slot *new,
4097                                 enum kvm_mr_change change)
4098 {
4099         int rc;
4100
4101         /* If the basics of the memslot do not change, we do not want
4102          * to update the gmap. Every update causes several unnecessary
4103          * segment translation exceptions. This is usually handled just
4104          * fine by the normal fault handler + gmap, but it will also
4105          * cause faults on the prefix page of running guest CPUs.
4106          */
4107         if (old->userspace_addr == mem->userspace_addr &&
4108             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4109             old->npages * PAGE_SIZE == mem->memory_size)
4110                 return;
4111
4112         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4113                 mem->guest_phys_addr, mem->memory_size);
4114         if (rc)
4115                 pr_warn("failed to commit memory region\n");
4116         return;
4117 }
4118
4119 static inline unsigned long nonhyp_mask(int i)
4120 {
4121         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4122
4123         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4124 }
4125
4126 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4127 {
4128         vcpu->valid_wakeup = false;
4129 }
4130
4131 static int __init kvm_s390_init(void)
4132 {
4133         int i;
4134
4135         if (!sclp.has_sief2) {
4136                 pr_info("SIE not available\n");
4137                 return -ENODEV;
4138         }
4139
4140         for (i = 0; i < 16; i++)
4141                 kvm_s390_fac_base[i] |=
4142                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4143
4144         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4145 }
4146
4147 static void __exit kvm_s390_exit(void)
4148 {
4149         kvm_exit();
4150 }
4151
4152 module_init(kvm_s390_init);
4153 module_exit(kvm_s390_exit);
4154
4155 /*
4156  * Enable autoloading of the kvm module.
4157  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4158  * since x86 takes a different approach.
4159  */
4160 #include <linux/miscdevice.h>
4161 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4162 MODULE_ALIAS("devname:kvm");