]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/s390/kvm/kvm-s390.c
Merge tag 'pm-4.19-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63         { "userspace_handled", VCPU_STAT(exit_userspace) },
64         { "exit_null", VCPU_STAT(exit_null) },
65         { "exit_validity", VCPU_STAT(exit_validity) },
66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67         { "exit_external_request", VCPU_STAT(exit_external_request) },
68         { "exit_io_request", VCPU_STAT(exit_io_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92         { "deliver_program", VCPU_STAT(deliver_program) },
93         { "deliver_io", VCPU_STAT(deliver_io) },
94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96         { "inject_ckc", VCPU_STAT(inject_ckc) },
97         { "inject_cputm", VCPU_STAT(inject_cputm) },
98         { "inject_external_call", VCPU_STAT(inject_external_call) },
99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101         { "inject_io", VM_STAT(inject_io) },
102         { "inject_mchk", VCPU_STAT(inject_mchk) },
103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104         { "inject_program", VCPU_STAT(inject_program) },
105         { "inject_restart", VCPU_STAT(inject_restart) },
106         { "inject_service_signal", VM_STAT(inject_service_signal) },
107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110         { "inject_virtio", VM_STAT(inject_virtio) },
111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112         { "instruction_gs", VCPU_STAT(instruction_gs) },
113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119         { "instruction_sck", VCPU_STAT(instruction_sck) },
120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121         { "instruction_spx", VCPU_STAT(instruction_spx) },
122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123         { "instruction_stap", VCPU_STAT(instruction_stap) },
124         { "instruction_iske", VCPU_STAT(instruction_iske) },
125         { "instruction_ri", VCPU_STAT(instruction_ri) },
126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127         { "instruction_sske", VCPU_STAT(instruction_sske) },
128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129         { "instruction_essa", VCPU_STAT(instruction_essa) },
130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132         { "instruction_tb", VCPU_STAT(instruction_tb) },
133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137         { "instruction_sie", VCPU_STAT(instruction_sie) },
138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
161         { NULL }
162 };
163
164 struct kvm_s390_tod_clock_ext {
165         __u8 epoch_idx;
166         __u64 tod;
167         __u8 reserved[7];
168 } __packed;
169
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174
175 /* allow 1m huge page guest backing, if !nested */
176 static int hpage;
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179
180 /*
181  * For now we handle at most 16 double words as this is what the s390 base
182  * kernel handles and stores in the prefix page. If we ever need to go beyond
183  * this, this requires changes to code, but the external uapi can stay.
184  */
185 #define SIZE_INTERNAL 16
186
187 /*
188  * Base feature mask that defines default mask for facilities. Consists of the
189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
190  */
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
192 /*
193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194  * and defines the facilities that can be enabled via a cpu model.
195  */
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
197
198 static unsigned long kvm_s390_fac_size(void)
199 {
200         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203                 sizeof(S390_lowcore.stfle_fac_list));
204
205         return SIZE_INTERNAL;
206 }
207
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
212
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
216
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
219 {
220         /* every s390 is virtualization enabled ;-) */
221         return 0;
222 }
223
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
225                               unsigned long end);
226
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 {
229         u8 delta_idx = 0;
230
231         /*
232          * The TOD jumps by delta, we have to compensate this by adding
233          * -delta to the epoch.
234          */
235         delta = -delta;
236
237         /* sign-extension - we're adding to signed values below */
238         if ((s64)delta < 0)
239                 delta_idx = -1;
240
241         scb->epoch += delta;
242         if (scb->ecd & ECD_MEF) {
243                 scb->epdx += delta_idx;
244                 if (scb->epoch < delta)
245                         scb->epdx += 1;
246         }
247 }
248
249 /*
250  * This callback is executed during stop_machine(). All CPUs are therefore
251  * temporarily stopped. In order not to change guest behavior, we have to
252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253  * so a CPU won't be stopped while calculating with the epoch.
254  */
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
256                           void *v)
257 {
258         struct kvm *kvm;
259         struct kvm_vcpu *vcpu;
260         int i;
261         unsigned long long *delta = v;
262
263         list_for_each_entry(kvm, &vm_list, vm_list) {
264                 kvm_for_each_vcpu(i, vcpu, kvm) {
265                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
266                         if (i == 0) {
267                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
269                         }
270                         if (vcpu->arch.cputm_enabled)
271                                 vcpu->arch.cputm_start += *delta;
272                         if (vcpu->arch.vsie_block)
273                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
274                                                    *delta);
275                 }
276         }
277         return NOTIFY_OK;
278 }
279
280 static struct notifier_block kvm_clock_notifier = {
281         .notifier_call = kvm_clock_sync,
282 };
283
284 int kvm_arch_hardware_setup(void)
285 {
286         gmap_notifier.notifier_call = kvm_gmap_notifier;
287         gmap_register_pte_notifier(&gmap_notifier);
288         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289         gmap_register_pte_notifier(&vsie_gmap_notifier);
290         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291                                        &kvm_clock_notifier);
292         return 0;
293 }
294
295 void kvm_arch_hardware_unsetup(void)
296 {
297         gmap_unregister_pte_notifier(&gmap_notifier);
298         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300                                          &kvm_clock_notifier);
301 }
302
303 static void allow_cpu_feat(unsigned long nr)
304 {
305         set_bit_inv(nr, kvm_s390_available_cpu_feat);
306 }
307
308 static inline int plo_test_bit(unsigned char nr)
309 {
310         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
311         int cc;
312
313         asm volatile(
314                 /* Parameter registers are ignored for "test bit" */
315                 "       plo     0,0,0,0(0)\n"
316                 "       ipm     %0\n"
317                 "       srl     %0,28\n"
318                 : "=d" (cc)
319                 : "d" (r0)
320                 : "cc");
321         return cc == 0;
322 }
323
324 static void kvm_s390_cpu_feat_init(void)
325 {
326         int i;
327
328         for (i = 0; i < 256; ++i) {
329                 if (plo_test_bit(i))
330                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
331         }
332
333         if (test_facility(28)) /* TOD-clock steering */
334                 ptff(kvm_s390_available_subfunc.ptff,
335                      sizeof(kvm_s390_available_subfunc.ptff),
336                      PTFF_QAF);
337
338         if (test_facility(17)) { /* MSA */
339                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340                               kvm_s390_available_subfunc.kmac);
341                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342                               kvm_s390_available_subfunc.kmc);
343                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
344                               kvm_s390_available_subfunc.km);
345                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346                               kvm_s390_available_subfunc.kimd);
347                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348                               kvm_s390_available_subfunc.klmd);
349         }
350         if (test_facility(76)) /* MSA3 */
351                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352                               kvm_s390_available_subfunc.pckmo);
353         if (test_facility(77)) { /* MSA4 */
354                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355                               kvm_s390_available_subfunc.kmctr);
356                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357                               kvm_s390_available_subfunc.kmf);
358                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359                               kvm_s390_available_subfunc.kmo);
360                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361                               kvm_s390_available_subfunc.pcc);
362         }
363         if (test_facility(57)) /* MSA5 */
364                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365                               kvm_s390_available_subfunc.ppno);
366
367         if (test_facility(146)) /* MSA8 */
368                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369                               kvm_s390_available_subfunc.kma);
370
371         if (MACHINE_HAS_ESOP)
372                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
373         /*
374          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
376          */
377         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378             !test_facility(3) || !nested)
379                 return;
380         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381         if (sclp.has_64bscao)
382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
383         if (sclp.has_siif)
384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
385         if (sclp.has_gpere)
386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
387         if (sclp.has_gsls)
388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
389         if (sclp.has_ib)
390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
391         if (sclp.has_cei)
392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
393         if (sclp.has_ibs)
394                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
395         if (sclp.has_kss)
396                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
397         /*
398          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399          * all skey handling functions read/set the skey from the PGSTE
400          * instead of the real storage key.
401          *
402          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403          * pages being detected as preserved although they are resident.
404          *
405          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
407          *
408          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
411          *
412          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413          * cannot easily shadow the SCA because of the ipte lock.
414          */
415 }
416
417 int kvm_arch_init(void *opaque)
418 {
419         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
420         if (!kvm_s390_dbf)
421                 return -ENOMEM;
422
423         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
424                 debug_unregister(kvm_s390_dbf);
425                 return -ENOMEM;
426         }
427
428         kvm_s390_cpu_feat_init();
429
430         /* Register floating interrupt controller interface. */
431         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
432 }
433
434 void kvm_arch_exit(void)
435 {
436         debug_unregister(kvm_s390_dbf);
437 }
438
439 /* Section: device related */
440 long kvm_arch_dev_ioctl(struct file *filp,
441                         unsigned int ioctl, unsigned long arg)
442 {
443         if (ioctl == KVM_S390_ENABLE_SIE)
444                 return s390_enable_sie();
445         return -EINVAL;
446 }
447
448 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
449 {
450         int r;
451
452         switch (ext) {
453         case KVM_CAP_S390_PSW:
454         case KVM_CAP_S390_GMAP:
455         case KVM_CAP_SYNC_MMU:
456 #ifdef CONFIG_KVM_S390_UCONTROL
457         case KVM_CAP_S390_UCONTROL:
458 #endif
459         case KVM_CAP_ASYNC_PF:
460         case KVM_CAP_SYNC_REGS:
461         case KVM_CAP_ONE_REG:
462         case KVM_CAP_ENABLE_CAP:
463         case KVM_CAP_S390_CSS_SUPPORT:
464         case KVM_CAP_IOEVENTFD:
465         case KVM_CAP_DEVICE_CTRL:
466         case KVM_CAP_ENABLE_CAP_VM:
467         case KVM_CAP_S390_IRQCHIP:
468         case KVM_CAP_VM_ATTRIBUTES:
469         case KVM_CAP_MP_STATE:
470         case KVM_CAP_IMMEDIATE_EXIT:
471         case KVM_CAP_S390_INJECT_IRQ:
472         case KVM_CAP_S390_USER_SIGP:
473         case KVM_CAP_S390_USER_STSI:
474         case KVM_CAP_S390_SKEYS:
475         case KVM_CAP_S390_IRQ_STATE:
476         case KVM_CAP_S390_USER_INSTR0:
477         case KVM_CAP_S390_CMMA_MIGRATION:
478         case KVM_CAP_S390_AIS:
479         case KVM_CAP_S390_AIS_MIGRATION:
480                 r = 1;
481                 break;
482         case KVM_CAP_S390_HPAGE_1M:
483                 r = 0;
484                 if (hpage)
485                         r = 1;
486                 break;
487         case KVM_CAP_S390_MEM_OP:
488                 r = MEM_OP_MAX_SIZE;
489                 break;
490         case KVM_CAP_NR_VCPUS:
491         case KVM_CAP_MAX_VCPUS:
492                 r = KVM_S390_BSCA_CPU_SLOTS;
493                 if (!kvm_s390_use_sca_entries())
494                         r = KVM_MAX_VCPUS;
495                 else if (sclp.has_esca && sclp.has_64bscao)
496                         r = KVM_S390_ESCA_CPU_SLOTS;
497                 break;
498         case KVM_CAP_NR_MEMSLOTS:
499                 r = KVM_USER_MEM_SLOTS;
500                 break;
501         case KVM_CAP_S390_COW:
502                 r = MACHINE_HAS_ESOP;
503                 break;
504         case KVM_CAP_S390_VECTOR_REGISTERS:
505                 r = MACHINE_HAS_VX;
506                 break;
507         case KVM_CAP_S390_RI:
508                 r = test_facility(64);
509                 break;
510         case KVM_CAP_S390_GS:
511                 r = test_facility(133);
512                 break;
513         case KVM_CAP_S390_BPB:
514                 r = test_facility(82);
515                 break;
516         default:
517                 r = 0;
518         }
519         return r;
520 }
521
522 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
523                                     struct kvm_memory_slot *memslot)
524 {
525         int i;
526         gfn_t cur_gfn, last_gfn;
527         unsigned long gaddr, vmaddr;
528         struct gmap *gmap = kvm->arch.gmap;
529         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
530
531         /* Loop over all guest segments */
532         cur_gfn = memslot->base_gfn;
533         last_gfn = memslot->base_gfn + memslot->npages;
534         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
535                 gaddr = gfn_to_gpa(cur_gfn);
536                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
537                 if (kvm_is_error_hva(vmaddr))
538                         continue;
539
540                 bitmap_zero(bitmap, _PAGE_ENTRIES);
541                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
542                 for (i = 0; i < _PAGE_ENTRIES; i++) {
543                         if (test_bit(i, bitmap))
544                                 mark_page_dirty(kvm, cur_gfn + i);
545                 }
546
547                 if (fatal_signal_pending(current))
548                         return;
549                 cond_resched();
550         }
551 }
552
553 /* Section: vm related */
554 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
555
556 /*
557  * Get (and clear) the dirty memory log for a memory slot.
558  */
559 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
560                                struct kvm_dirty_log *log)
561 {
562         int r;
563         unsigned long n;
564         struct kvm_memslots *slots;
565         struct kvm_memory_slot *memslot;
566         int is_dirty = 0;
567
568         if (kvm_is_ucontrol(kvm))
569                 return -EINVAL;
570
571         mutex_lock(&kvm->slots_lock);
572
573         r = -EINVAL;
574         if (log->slot >= KVM_USER_MEM_SLOTS)
575                 goto out;
576
577         slots = kvm_memslots(kvm);
578         memslot = id_to_memslot(slots, log->slot);
579         r = -ENOENT;
580         if (!memslot->dirty_bitmap)
581                 goto out;
582
583         kvm_s390_sync_dirty_log(kvm, memslot);
584         r = kvm_get_dirty_log(kvm, log, &is_dirty);
585         if (r)
586                 goto out;
587
588         /* Clear the dirty log */
589         if (is_dirty) {
590                 n = kvm_dirty_bitmap_bytes(memslot);
591                 memset(memslot->dirty_bitmap, 0, n);
592         }
593         r = 0;
594 out:
595         mutex_unlock(&kvm->slots_lock);
596         return r;
597 }
598
599 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
600 {
601         unsigned int i;
602         struct kvm_vcpu *vcpu;
603
604         kvm_for_each_vcpu(i, vcpu, kvm) {
605                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
606         }
607 }
608
609 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
610 {
611         int r;
612
613         if (cap->flags)
614                 return -EINVAL;
615
616         switch (cap->cap) {
617         case KVM_CAP_S390_IRQCHIP:
618                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
619                 kvm->arch.use_irqchip = 1;
620                 r = 0;
621                 break;
622         case KVM_CAP_S390_USER_SIGP:
623                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
624                 kvm->arch.user_sigp = 1;
625                 r = 0;
626                 break;
627         case KVM_CAP_S390_VECTOR_REGISTERS:
628                 mutex_lock(&kvm->lock);
629                 if (kvm->created_vcpus) {
630                         r = -EBUSY;
631                 } else if (MACHINE_HAS_VX) {
632                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
633                         set_kvm_facility(kvm->arch.model.fac_list, 129);
634                         if (test_facility(134)) {
635                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
636                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
637                         }
638                         if (test_facility(135)) {
639                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
640                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
641                         }
642                         r = 0;
643                 } else
644                         r = -EINVAL;
645                 mutex_unlock(&kvm->lock);
646                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
647                          r ? "(not available)" : "(success)");
648                 break;
649         case KVM_CAP_S390_RI:
650                 r = -EINVAL;
651                 mutex_lock(&kvm->lock);
652                 if (kvm->created_vcpus) {
653                         r = -EBUSY;
654                 } else if (test_facility(64)) {
655                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
656                         set_kvm_facility(kvm->arch.model.fac_list, 64);
657                         r = 0;
658                 }
659                 mutex_unlock(&kvm->lock);
660                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
661                          r ? "(not available)" : "(success)");
662                 break;
663         case KVM_CAP_S390_AIS:
664                 mutex_lock(&kvm->lock);
665                 if (kvm->created_vcpus) {
666                         r = -EBUSY;
667                 } else {
668                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
669                         set_kvm_facility(kvm->arch.model.fac_list, 72);
670                         r = 0;
671                 }
672                 mutex_unlock(&kvm->lock);
673                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
674                          r ? "(not available)" : "(success)");
675                 break;
676         case KVM_CAP_S390_GS:
677                 r = -EINVAL;
678                 mutex_lock(&kvm->lock);
679                 if (kvm->created_vcpus) {
680                         r = -EBUSY;
681                 } else if (test_facility(133)) {
682                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
683                         set_kvm_facility(kvm->arch.model.fac_list, 133);
684                         r = 0;
685                 }
686                 mutex_unlock(&kvm->lock);
687                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
688                          r ? "(not available)" : "(success)");
689                 break;
690         case KVM_CAP_S390_HPAGE_1M:
691                 mutex_lock(&kvm->lock);
692                 if (kvm->created_vcpus)
693                         r = -EBUSY;
694                 else if (!hpage || kvm->arch.use_cmma)
695                         r = -EINVAL;
696                 else {
697                         r = 0;
698                         kvm->mm->context.allow_gmap_hpage_1m = 1;
699                         /*
700                          * We might have to create fake 4k page
701                          * tables. To avoid that the hardware works on
702                          * stale PGSTEs, we emulate these instructions.
703                          */
704                         kvm->arch.use_skf = 0;
705                         kvm->arch.use_pfmfi = 0;
706                 }
707                 mutex_unlock(&kvm->lock);
708                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
709                          r ? "(not available)" : "(success)");
710                 break;
711         case KVM_CAP_S390_USER_STSI:
712                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
713                 kvm->arch.user_stsi = 1;
714                 r = 0;
715                 break;
716         case KVM_CAP_S390_USER_INSTR0:
717                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
718                 kvm->arch.user_instr0 = 1;
719                 icpt_operexc_on_all_vcpus(kvm);
720                 r = 0;
721                 break;
722         default:
723                 r = -EINVAL;
724                 break;
725         }
726         return r;
727 }
728
729 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
730 {
731         int ret;
732
733         switch (attr->attr) {
734         case KVM_S390_VM_MEM_LIMIT_SIZE:
735                 ret = 0;
736                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
737                          kvm->arch.mem_limit);
738                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
739                         ret = -EFAULT;
740                 break;
741         default:
742                 ret = -ENXIO;
743                 break;
744         }
745         return ret;
746 }
747
748 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
749 {
750         int ret;
751         unsigned int idx;
752         switch (attr->attr) {
753         case KVM_S390_VM_MEM_ENABLE_CMMA:
754                 ret = -ENXIO;
755                 if (!sclp.has_cmma)
756                         break;
757
758                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
759                 mutex_lock(&kvm->lock);
760                 if (kvm->created_vcpus)
761                         ret = -EBUSY;
762                 else if (kvm->mm->context.allow_gmap_hpage_1m)
763                         ret = -EINVAL;
764                 else {
765                         kvm->arch.use_cmma = 1;
766                         /* Not compatible with cmma. */
767                         kvm->arch.use_pfmfi = 0;
768                         ret = 0;
769                 }
770                 mutex_unlock(&kvm->lock);
771                 break;
772         case KVM_S390_VM_MEM_CLR_CMMA:
773                 ret = -ENXIO;
774                 if (!sclp.has_cmma)
775                         break;
776                 ret = -EINVAL;
777                 if (!kvm->arch.use_cmma)
778                         break;
779
780                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
781                 mutex_lock(&kvm->lock);
782                 idx = srcu_read_lock(&kvm->srcu);
783                 s390_reset_cmma(kvm->arch.gmap->mm);
784                 srcu_read_unlock(&kvm->srcu, idx);
785                 mutex_unlock(&kvm->lock);
786                 ret = 0;
787                 break;
788         case KVM_S390_VM_MEM_LIMIT_SIZE: {
789                 unsigned long new_limit;
790
791                 if (kvm_is_ucontrol(kvm))
792                         return -EINVAL;
793
794                 if (get_user(new_limit, (u64 __user *)attr->addr))
795                         return -EFAULT;
796
797                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
798                     new_limit > kvm->arch.mem_limit)
799                         return -E2BIG;
800
801                 if (!new_limit)
802                         return -EINVAL;
803
804                 /* gmap_create takes last usable address */
805                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
806                         new_limit -= 1;
807
808                 ret = -EBUSY;
809                 mutex_lock(&kvm->lock);
810                 if (!kvm->created_vcpus) {
811                         /* gmap_create will round the limit up */
812                         struct gmap *new = gmap_create(current->mm, new_limit);
813
814                         if (!new) {
815                                 ret = -ENOMEM;
816                         } else {
817                                 gmap_remove(kvm->arch.gmap);
818                                 new->private = kvm;
819                                 kvm->arch.gmap = new;
820                                 ret = 0;
821                         }
822                 }
823                 mutex_unlock(&kvm->lock);
824                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
825                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
826                          (void *) kvm->arch.gmap->asce);
827                 break;
828         }
829         default:
830                 ret = -ENXIO;
831                 break;
832         }
833         return ret;
834 }
835
836 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
837
838 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
839 {
840         struct kvm_vcpu *vcpu;
841         int i;
842
843         kvm_s390_vcpu_block_all(kvm);
844
845         kvm_for_each_vcpu(i, vcpu, kvm)
846                 kvm_s390_vcpu_crypto_setup(vcpu);
847
848         kvm_s390_vcpu_unblock_all(kvm);
849 }
850
851 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
852 {
853         if (!test_kvm_facility(kvm, 76))
854                 return -EINVAL;
855
856         mutex_lock(&kvm->lock);
857         switch (attr->attr) {
858         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
859                 get_random_bytes(
860                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
861                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
862                 kvm->arch.crypto.aes_kw = 1;
863                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
864                 break;
865         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
866                 get_random_bytes(
867                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
868                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
869                 kvm->arch.crypto.dea_kw = 1;
870                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
871                 break;
872         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
873                 kvm->arch.crypto.aes_kw = 0;
874                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
875                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
876                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
877                 break;
878         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
879                 kvm->arch.crypto.dea_kw = 0;
880                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
881                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
882                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
883                 break;
884         default:
885                 mutex_unlock(&kvm->lock);
886                 return -ENXIO;
887         }
888
889         kvm_s390_vcpu_crypto_reset_all(kvm);
890         mutex_unlock(&kvm->lock);
891         return 0;
892 }
893
894 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
895 {
896         int cx;
897         struct kvm_vcpu *vcpu;
898
899         kvm_for_each_vcpu(cx, vcpu, kvm)
900                 kvm_s390_sync_request(req, vcpu);
901 }
902
903 /*
904  * Must be called with kvm->srcu held to avoid races on memslots, and with
905  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
906  */
907 static int kvm_s390_vm_start_migration(struct kvm *kvm)
908 {
909         struct kvm_memory_slot *ms;
910         struct kvm_memslots *slots;
911         unsigned long ram_pages = 0;
912         int slotnr;
913
914         /* migration mode already enabled */
915         if (kvm->arch.migration_mode)
916                 return 0;
917         slots = kvm_memslots(kvm);
918         if (!slots || !slots->used_slots)
919                 return -EINVAL;
920
921         if (!kvm->arch.use_cmma) {
922                 kvm->arch.migration_mode = 1;
923                 return 0;
924         }
925         /* mark all the pages in active slots as dirty */
926         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
927                 ms = slots->memslots + slotnr;
928                 /*
929                  * The second half of the bitmap is only used on x86,
930                  * and would be wasted otherwise, so we put it to good
931                  * use here to keep track of the state of the storage
932                  * attributes.
933                  */
934                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
935                 ram_pages += ms->npages;
936         }
937         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
938         kvm->arch.migration_mode = 1;
939         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
940         return 0;
941 }
942
943 /*
944  * Must be called with kvm->slots_lock to avoid races with ourselves and
945  * kvm_s390_vm_start_migration.
946  */
947 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
948 {
949         /* migration mode already disabled */
950         if (!kvm->arch.migration_mode)
951                 return 0;
952         kvm->arch.migration_mode = 0;
953         if (kvm->arch.use_cmma)
954                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
955         return 0;
956 }
957
958 static int kvm_s390_vm_set_migration(struct kvm *kvm,
959                                      struct kvm_device_attr *attr)
960 {
961         int res = -ENXIO;
962
963         mutex_lock(&kvm->slots_lock);
964         switch (attr->attr) {
965         case KVM_S390_VM_MIGRATION_START:
966                 res = kvm_s390_vm_start_migration(kvm);
967                 break;
968         case KVM_S390_VM_MIGRATION_STOP:
969                 res = kvm_s390_vm_stop_migration(kvm);
970                 break;
971         default:
972                 break;
973         }
974         mutex_unlock(&kvm->slots_lock);
975
976         return res;
977 }
978
979 static int kvm_s390_vm_get_migration(struct kvm *kvm,
980                                      struct kvm_device_attr *attr)
981 {
982         u64 mig = kvm->arch.migration_mode;
983
984         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
985                 return -ENXIO;
986
987         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
988                 return -EFAULT;
989         return 0;
990 }
991
992 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
993 {
994         struct kvm_s390_vm_tod_clock gtod;
995
996         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
997                 return -EFAULT;
998
999         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1000                 return -EINVAL;
1001         kvm_s390_set_tod_clock(kvm, &gtod);
1002
1003         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1004                 gtod.epoch_idx, gtod.tod);
1005
1006         return 0;
1007 }
1008
1009 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1010 {
1011         u8 gtod_high;
1012
1013         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1014                                            sizeof(gtod_high)))
1015                 return -EFAULT;
1016
1017         if (gtod_high != 0)
1018                 return -EINVAL;
1019         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1020
1021         return 0;
1022 }
1023
1024 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1025 {
1026         struct kvm_s390_vm_tod_clock gtod = { 0 };
1027
1028         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1029                            sizeof(gtod.tod)))
1030                 return -EFAULT;
1031
1032         kvm_s390_set_tod_clock(kvm, &gtod);
1033         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1034         return 0;
1035 }
1036
1037 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1038 {
1039         int ret;
1040
1041         if (attr->flags)
1042                 return -EINVAL;
1043
1044         switch (attr->attr) {
1045         case KVM_S390_VM_TOD_EXT:
1046                 ret = kvm_s390_set_tod_ext(kvm, attr);
1047                 break;
1048         case KVM_S390_VM_TOD_HIGH:
1049                 ret = kvm_s390_set_tod_high(kvm, attr);
1050                 break;
1051         case KVM_S390_VM_TOD_LOW:
1052                 ret = kvm_s390_set_tod_low(kvm, attr);
1053                 break;
1054         default:
1055                 ret = -ENXIO;
1056                 break;
1057         }
1058         return ret;
1059 }
1060
1061 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1062                                    struct kvm_s390_vm_tod_clock *gtod)
1063 {
1064         struct kvm_s390_tod_clock_ext htod;
1065
1066         preempt_disable();
1067
1068         get_tod_clock_ext((char *)&htod);
1069
1070         gtod->tod = htod.tod + kvm->arch.epoch;
1071         gtod->epoch_idx = 0;
1072         if (test_kvm_facility(kvm, 139)) {
1073                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1074                 if (gtod->tod < htod.tod)
1075                         gtod->epoch_idx += 1;
1076         }
1077
1078         preempt_enable();
1079 }
1080
1081 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1082 {
1083         struct kvm_s390_vm_tod_clock gtod;
1084
1085         memset(&gtod, 0, sizeof(gtod));
1086         kvm_s390_get_tod_clock(kvm, &gtod);
1087         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1088                 return -EFAULT;
1089
1090         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1091                 gtod.epoch_idx, gtod.tod);
1092         return 0;
1093 }
1094
1095 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097         u8 gtod_high = 0;
1098
1099         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1100                                          sizeof(gtod_high)))
1101                 return -EFAULT;
1102         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1103
1104         return 0;
1105 }
1106
1107 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1108 {
1109         u64 gtod;
1110
1111         gtod = kvm_s390_get_tod_clock_fast(kvm);
1112         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1113                 return -EFAULT;
1114         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1115
1116         return 0;
1117 }
1118
1119 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1120 {
1121         int ret;
1122
1123         if (attr->flags)
1124                 return -EINVAL;
1125
1126         switch (attr->attr) {
1127         case KVM_S390_VM_TOD_EXT:
1128                 ret = kvm_s390_get_tod_ext(kvm, attr);
1129                 break;
1130         case KVM_S390_VM_TOD_HIGH:
1131                 ret = kvm_s390_get_tod_high(kvm, attr);
1132                 break;
1133         case KVM_S390_VM_TOD_LOW:
1134                 ret = kvm_s390_get_tod_low(kvm, attr);
1135                 break;
1136         default:
1137                 ret = -ENXIO;
1138                 break;
1139         }
1140         return ret;
1141 }
1142
1143 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1144 {
1145         struct kvm_s390_vm_cpu_processor *proc;
1146         u16 lowest_ibc, unblocked_ibc;
1147         int ret = 0;
1148
1149         mutex_lock(&kvm->lock);
1150         if (kvm->created_vcpus) {
1151                 ret = -EBUSY;
1152                 goto out;
1153         }
1154         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1155         if (!proc) {
1156                 ret = -ENOMEM;
1157                 goto out;
1158         }
1159         if (!copy_from_user(proc, (void __user *)attr->addr,
1160                             sizeof(*proc))) {
1161                 kvm->arch.model.cpuid = proc->cpuid;
1162                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1163                 unblocked_ibc = sclp.ibc & 0xfff;
1164                 if (lowest_ibc && proc->ibc) {
1165                         if (proc->ibc > unblocked_ibc)
1166                                 kvm->arch.model.ibc = unblocked_ibc;
1167                         else if (proc->ibc < lowest_ibc)
1168                                 kvm->arch.model.ibc = lowest_ibc;
1169                         else
1170                                 kvm->arch.model.ibc = proc->ibc;
1171                 }
1172                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1173                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1174                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1175                          kvm->arch.model.ibc,
1176                          kvm->arch.model.cpuid);
1177                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1178                          kvm->arch.model.fac_list[0],
1179                          kvm->arch.model.fac_list[1],
1180                          kvm->arch.model.fac_list[2]);
1181         } else
1182                 ret = -EFAULT;
1183         kfree(proc);
1184 out:
1185         mutex_unlock(&kvm->lock);
1186         return ret;
1187 }
1188
1189 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1190                                        struct kvm_device_attr *attr)
1191 {
1192         struct kvm_s390_vm_cpu_feat data;
1193
1194         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1195                 return -EFAULT;
1196         if (!bitmap_subset((unsigned long *) data.feat,
1197                            kvm_s390_available_cpu_feat,
1198                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1199                 return -EINVAL;
1200
1201         mutex_lock(&kvm->lock);
1202         if (kvm->created_vcpus) {
1203                 mutex_unlock(&kvm->lock);
1204                 return -EBUSY;
1205         }
1206         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1207                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1208         mutex_unlock(&kvm->lock);
1209         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1210                          data.feat[0],
1211                          data.feat[1],
1212                          data.feat[2]);
1213         return 0;
1214 }
1215
1216 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1217                                           struct kvm_device_attr *attr)
1218 {
1219         /*
1220          * Once supported by kernel + hw, we have to store the subfunctions
1221          * in kvm->arch and remember that user space configured them.
1222          */
1223         return -ENXIO;
1224 }
1225
1226 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1227 {
1228         int ret = -ENXIO;
1229
1230         switch (attr->attr) {
1231         case KVM_S390_VM_CPU_PROCESSOR:
1232                 ret = kvm_s390_set_processor(kvm, attr);
1233                 break;
1234         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1235                 ret = kvm_s390_set_processor_feat(kvm, attr);
1236                 break;
1237         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1238                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1239                 break;
1240         }
1241         return ret;
1242 }
1243
1244 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246         struct kvm_s390_vm_cpu_processor *proc;
1247         int ret = 0;
1248
1249         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1250         if (!proc) {
1251                 ret = -ENOMEM;
1252                 goto out;
1253         }
1254         proc->cpuid = kvm->arch.model.cpuid;
1255         proc->ibc = kvm->arch.model.ibc;
1256         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1257                S390_ARCH_FAC_LIST_SIZE_BYTE);
1258         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1259                  kvm->arch.model.ibc,
1260                  kvm->arch.model.cpuid);
1261         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1262                  kvm->arch.model.fac_list[0],
1263                  kvm->arch.model.fac_list[1],
1264                  kvm->arch.model.fac_list[2]);
1265         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1266                 ret = -EFAULT;
1267         kfree(proc);
1268 out:
1269         return ret;
1270 }
1271
1272 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274         struct kvm_s390_vm_cpu_machine *mach;
1275         int ret = 0;
1276
1277         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1278         if (!mach) {
1279                 ret = -ENOMEM;
1280                 goto out;
1281         }
1282         get_cpu_id((struct cpuid *) &mach->cpuid);
1283         mach->ibc = sclp.ibc;
1284         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1285                S390_ARCH_FAC_LIST_SIZE_BYTE);
1286         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1287                sizeof(S390_lowcore.stfle_fac_list));
1288         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1289                  kvm->arch.model.ibc,
1290                  kvm->arch.model.cpuid);
1291         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1292                  mach->fac_mask[0],
1293                  mach->fac_mask[1],
1294                  mach->fac_mask[2]);
1295         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1296                  mach->fac_list[0],
1297                  mach->fac_list[1],
1298                  mach->fac_list[2]);
1299         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1300                 ret = -EFAULT;
1301         kfree(mach);
1302 out:
1303         return ret;
1304 }
1305
1306 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1307                                        struct kvm_device_attr *attr)
1308 {
1309         struct kvm_s390_vm_cpu_feat data;
1310
1311         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1312                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1313         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1314                 return -EFAULT;
1315         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1316                          data.feat[0],
1317                          data.feat[1],
1318                          data.feat[2]);
1319         return 0;
1320 }
1321
1322 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1323                                      struct kvm_device_attr *attr)
1324 {
1325         struct kvm_s390_vm_cpu_feat data;
1326
1327         bitmap_copy((unsigned long *) data.feat,
1328                     kvm_s390_available_cpu_feat,
1329                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1330         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1331                 return -EFAULT;
1332         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1333                          data.feat[0],
1334                          data.feat[1],
1335                          data.feat[2]);
1336         return 0;
1337 }
1338
1339 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1340                                           struct kvm_device_attr *attr)
1341 {
1342         /*
1343          * Once we can actually configure subfunctions (kernel + hw support),
1344          * we have to check if they were already set by user space, if so copy
1345          * them from kvm->arch.
1346          */
1347         return -ENXIO;
1348 }
1349
1350 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1351                                         struct kvm_device_attr *attr)
1352 {
1353         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1354             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1355                 return -EFAULT;
1356         return 0;
1357 }
1358 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1359 {
1360         int ret = -ENXIO;
1361
1362         switch (attr->attr) {
1363         case KVM_S390_VM_CPU_PROCESSOR:
1364                 ret = kvm_s390_get_processor(kvm, attr);
1365                 break;
1366         case KVM_S390_VM_CPU_MACHINE:
1367                 ret = kvm_s390_get_machine(kvm, attr);
1368                 break;
1369         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1370                 ret = kvm_s390_get_processor_feat(kvm, attr);
1371                 break;
1372         case KVM_S390_VM_CPU_MACHINE_FEAT:
1373                 ret = kvm_s390_get_machine_feat(kvm, attr);
1374                 break;
1375         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1376                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1377                 break;
1378         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1379                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1380                 break;
1381         }
1382         return ret;
1383 }
1384
1385 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1386 {
1387         int ret;
1388
1389         switch (attr->group) {
1390         case KVM_S390_VM_MEM_CTRL:
1391                 ret = kvm_s390_set_mem_control(kvm, attr);
1392                 break;
1393         case KVM_S390_VM_TOD:
1394                 ret = kvm_s390_set_tod(kvm, attr);
1395                 break;
1396         case KVM_S390_VM_CPU_MODEL:
1397                 ret = kvm_s390_set_cpu_model(kvm, attr);
1398                 break;
1399         case KVM_S390_VM_CRYPTO:
1400                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1401                 break;
1402         case KVM_S390_VM_MIGRATION:
1403                 ret = kvm_s390_vm_set_migration(kvm, attr);
1404                 break;
1405         default:
1406                 ret = -ENXIO;
1407                 break;
1408         }
1409
1410         return ret;
1411 }
1412
1413 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1414 {
1415         int ret;
1416
1417         switch (attr->group) {
1418         case KVM_S390_VM_MEM_CTRL:
1419                 ret = kvm_s390_get_mem_control(kvm, attr);
1420                 break;
1421         case KVM_S390_VM_TOD:
1422                 ret = kvm_s390_get_tod(kvm, attr);
1423                 break;
1424         case KVM_S390_VM_CPU_MODEL:
1425                 ret = kvm_s390_get_cpu_model(kvm, attr);
1426                 break;
1427         case KVM_S390_VM_MIGRATION:
1428                 ret = kvm_s390_vm_get_migration(kvm, attr);
1429                 break;
1430         default:
1431                 ret = -ENXIO;
1432                 break;
1433         }
1434
1435         return ret;
1436 }
1437
1438 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1439 {
1440         int ret;
1441
1442         switch (attr->group) {
1443         case KVM_S390_VM_MEM_CTRL:
1444                 switch (attr->attr) {
1445                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1446                 case KVM_S390_VM_MEM_CLR_CMMA:
1447                         ret = sclp.has_cmma ? 0 : -ENXIO;
1448                         break;
1449                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1450                         ret = 0;
1451                         break;
1452                 default:
1453                         ret = -ENXIO;
1454                         break;
1455                 }
1456                 break;
1457         case KVM_S390_VM_TOD:
1458                 switch (attr->attr) {
1459                 case KVM_S390_VM_TOD_LOW:
1460                 case KVM_S390_VM_TOD_HIGH:
1461                         ret = 0;
1462                         break;
1463                 default:
1464                         ret = -ENXIO;
1465                         break;
1466                 }
1467                 break;
1468         case KVM_S390_VM_CPU_MODEL:
1469                 switch (attr->attr) {
1470                 case KVM_S390_VM_CPU_PROCESSOR:
1471                 case KVM_S390_VM_CPU_MACHINE:
1472                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1473                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1474                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1475                         ret = 0;
1476                         break;
1477                 /* configuring subfunctions is not supported yet */
1478                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1479                 default:
1480                         ret = -ENXIO;
1481                         break;
1482                 }
1483                 break;
1484         case KVM_S390_VM_CRYPTO:
1485                 switch (attr->attr) {
1486                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1487                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1488                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1489                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1490                         ret = 0;
1491                         break;
1492                 default:
1493                         ret = -ENXIO;
1494                         break;
1495                 }
1496                 break;
1497         case KVM_S390_VM_MIGRATION:
1498                 ret = 0;
1499                 break;
1500         default:
1501                 ret = -ENXIO;
1502                 break;
1503         }
1504
1505         return ret;
1506 }
1507
1508 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1509 {
1510         uint8_t *keys;
1511         uint64_t hva;
1512         int srcu_idx, i, r = 0;
1513
1514         if (args->flags != 0)
1515                 return -EINVAL;
1516
1517         /* Is this guest using storage keys? */
1518         if (!mm_uses_skeys(current->mm))
1519                 return KVM_S390_GET_SKEYS_NONE;
1520
1521         /* Enforce sane limit on memory allocation */
1522         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1523                 return -EINVAL;
1524
1525         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1526         if (!keys)
1527                 return -ENOMEM;
1528
1529         down_read(&current->mm->mmap_sem);
1530         srcu_idx = srcu_read_lock(&kvm->srcu);
1531         for (i = 0; i < args->count; i++) {
1532                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1533                 if (kvm_is_error_hva(hva)) {
1534                         r = -EFAULT;
1535                         break;
1536                 }
1537
1538                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1539                 if (r)
1540                         break;
1541         }
1542         srcu_read_unlock(&kvm->srcu, srcu_idx);
1543         up_read(&current->mm->mmap_sem);
1544
1545         if (!r) {
1546                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1547                                  sizeof(uint8_t) * args->count);
1548                 if (r)
1549                         r = -EFAULT;
1550         }
1551
1552         kvfree(keys);
1553         return r;
1554 }
1555
1556 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1557 {
1558         uint8_t *keys;
1559         uint64_t hva;
1560         int srcu_idx, i, r = 0;
1561         bool unlocked;
1562
1563         if (args->flags != 0)
1564                 return -EINVAL;
1565
1566         /* Enforce sane limit on memory allocation */
1567         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1568                 return -EINVAL;
1569
1570         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1571         if (!keys)
1572                 return -ENOMEM;
1573
1574         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1575                            sizeof(uint8_t) * args->count);
1576         if (r) {
1577                 r = -EFAULT;
1578                 goto out;
1579         }
1580
1581         /* Enable storage key handling for the guest */
1582         r = s390_enable_skey();
1583         if (r)
1584                 goto out;
1585
1586         i = 0;
1587         down_read(&current->mm->mmap_sem);
1588         srcu_idx = srcu_read_lock(&kvm->srcu);
1589         while (i < args->count) {
1590                 unlocked = false;
1591                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1592                 if (kvm_is_error_hva(hva)) {
1593                         r = -EFAULT;
1594                         break;
1595                 }
1596
1597                 /* Lowest order bit is reserved */
1598                 if (keys[i] & 0x01) {
1599                         r = -EINVAL;
1600                         break;
1601                 }
1602
1603                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1604                 if (r) {
1605                         r = fixup_user_fault(current, current->mm, hva,
1606                                              FAULT_FLAG_WRITE, &unlocked);
1607                         if (r)
1608                                 break;
1609                 }
1610                 if (!r)
1611                         i++;
1612         }
1613         srcu_read_unlock(&kvm->srcu, srcu_idx);
1614         up_read(&current->mm->mmap_sem);
1615 out:
1616         kvfree(keys);
1617         return r;
1618 }
1619
1620 /*
1621  * Base address and length must be sent at the start of each block, therefore
1622  * it's cheaper to send some clean data, as long as it's less than the size of
1623  * two longs.
1624  */
1625 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1626 /* for consistency */
1627 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1628
1629 /*
1630  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1631  * address falls in a hole. In that case the index of one of the memslots
1632  * bordering the hole is returned.
1633  */
1634 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1635 {
1636         int start = 0, end = slots->used_slots;
1637         int slot = atomic_read(&slots->lru_slot);
1638         struct kvm_memory_slot *memslots = slots->memslots;
1639
1640         if (gfn >= memslots[slot].base_gfn &&
1641             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1642                 return slot;
1643
1644         while (start < end) {
1645                 slot = start + (end - start) / 2;
1646
1647                 if (gfn >= memslots[slot].base_gfn)
1648                         end = slot;
1649                 else
1650                         start = slot + 1;
1651         }
1652
1653         if (gfn >= memslots[start].base_gfn &&
1654             gfn < memslots[start].base_gfn + memslots[start].npages) {
1655                 atomic_set(&slots->lru_slot, start);
1656         }
1657
1658         return start;
1659 }
1660
1661 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1662                               u8 *res, unsigned long bufsize)
1663 {
1664         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1665
1666         args->count = 0;
1667         while (args->count < bufsize) {
1668                 hva = gfn_to_hva(kvm, cur_gfn);
1669                 /*
1670                  * We return an error if the first value was invalid, but we
1671                  * return successfully if at least one value was copied.
1672                  */
1673                 if (kvm_is_error_hva(hva))
1674                         return args->count ? 0 : -EFAULT;
1675                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1676                         pgstev = 0;
1677                 res[args->count++] = (pgstev >> 24) & 0x43;
1678                 cur_gfn++;
1679         }
1680
1681         return 0;
1682 }
1683
1684 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1685                                               unsigned long cur_gfn)
1686 {
1687         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1688         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1689         unsigned long ofs = cur_gfn - ms->base_gfn;
1690
1691         if (ms->base_gfn + ms->npages <= cur_gfn) {
1692                 slotidx--;
1693                 /* If we are above the highest slot, wrap around */
1694                 if (slotidx < 0)
1695                         slotidx = slots->used_slots - 1;
1696
1697                 ms = slots->memslots + slotidx;
1698                 ofs = 0;
1699         }
1700         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1701         while ((slotidx > 0) && (ofs >= ms->npages)) {
1702                 slotidx--;
1703                 ms = slots->memslots + slotidx;
1704                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1705         }
1706         return ms->base_gfn + ofs;
1707 }
1708
1709 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1710                              u8 *res, unsigned long bufsize)
1711 {
1712         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1713         struct kvm_memslots *slots = kvm_memslots(kvm);
1714         struct kvm_memory_slot *ms;
1715
1716         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1717         ms = gfn_to_memslot(kvm, cur_gfn);
1718         args->count = 0;
1719         args->start_gfn = cur_gfn;
1720         if (!ms)
1721                 return 0;
1722         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1723         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1724
1725         while (args->count < bufsize) {
1726                 hva = gfn_to_hva(kvm, cur_gfn);
1727                 if (kvm_is_error_hva(hva))
1728                         return 0;
1729                 /* Decrement only if we actually flipped the bit to 0 */
1730                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1731                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1732                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1733                         pgstev = 0;
1734                 /* Save the value */
1735                 res[args->count++] = (pgstev >> 24) & 0x43;
1736                 /* If the next bit is too far away, stop. */
1737                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1738                         return 0;
1739                 /* If we reached the previous "next", find the next one */
1740                 if (cur_gfn == next_gfn)
1741                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1742                 /* Reached the end of memory or of the buffer, stop */
1743                 if ((next_gfn >= mem_end) ||
1744                     (next_gfn - args->start_gfn >= bufsize))
1745                         return 0;
1746                 cur_gfn++;
1747                 /* Reached the end of the current memslot, take the next one. */
1748                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1749                         ms = gfn_to_memslot(kvm, cur_gfn);
1750                         if (!ms)
1751                                 return 0;
1752                 }
1753         }
1754         return 0;
1755 }
1756
1757 /*
1758  * This function searches for the next page with dirty CMMA attributes, and
1759  * saves the attributes in the buffer up to either the end of the buffer or
1760  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1761  * no trailing clean bytes are saved.
1762  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1763  * output buffer will indicate 0 as length.
1764  */
1765 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1766                                   struct kvm_s390_cmma_log *args)
1767 {
1768         unsigned long bufsize;
1769         int srcu_idx, peek, ret;
1770         u8 *values;
1771
1772         if (!kvm->arch.use_cmma)
1773                 return -ENXIO;
1774         /* Invalid/unsupported flags were specified */
1775         if (args->flags & ~KVM_S390_CMMA_PEEK)
1776                 return -EINVAL;
1777         /* Migration mode query, and we are not doing a migration */
1778         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1779         if (!peek && !kvm->arch.migration_mode)
1780                 return -EINVAL;
1781         /* CMMA is disabled or was not used, or the buffer has length zero */
1782         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1783         if (!bufsize || !kvm->mm->context.uses_cmm) {
1784                 memset(args, 0, sizeof(*args));
1785                 return 0;
1786         }
1787         /* We are not peeking, and there are no dirty pages */
1788         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1789                 memset(args, 0, sizeof(*args));
1790                 return 0;
1791         }
1792
1793         values = vmalloc(bufsize);
1794         if (!values)
1795                 return -ENOMEM;
1796
1797         down_read(&kvm->mm->mmap_sem);
1798         srcu_idx = srcu_read_lock(&kvm->srcu);
1799         if (peek)
1800                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1801         else
1802                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1803         srcu_read_unlock(&kvm->srcu, srcu_idx);
1804         up_read(&kvm->mm->mmap_sem);
1805
1806         if (kvm->arch.migration_mode)
1807                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1808         else
1809                 args->remaining = 0;
1810
1811         if (copy_to_user((void __user *)args->values, values, args->count))
1812                 ret = -EFAULT;
1813
1814         vfree(values);
1815         return ret;
1816 }
1817
1818 /*
1819  * This function sets the CMMA attributes for the given pages. If the input
1820  * buffer has zero length, no action is taken, otherwise the attributes are
1821  * set and the mm->context.uses_cmm flag is set.
1822  */
1823 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1824                                   const struct kvm_s390_cmma_log *args)
1825 {
1826         unsigned long hva, mask, pgstev, i;
1827         uint8_t *bits;
1828         int srcu_idx, r = 0;
1829
1830         mask = args->mask;
1831
1832         if (!kvm->arch.use_cmma)
1833                 return -ENXIO;
1834         /* invalid/unsupported flags */
1835         if (args->flags != 0)
1836                 return -EINVAL;
1837         /* Enforce sane limit on memory allocation */
1838         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1839                 return -EINVAL;
1840         /* Nothing to do */
1841         if (args->count == 0)
1842                 return 0;
1843
1844         bits = vmalloc(array_size(sizeof(*bits), args->count));
1845         if (!bits)
1846                 return -ENOMEM;
1847
1848         r = copy_from_user(bits, (void __user *)args->values, args->count);
1849         if (r) {
1850                 r = -EFAULT;
1851                 goto out;
1852         }
1853
1854         down_read(&kvm->mm->mmap_sem);
1855         srcu_idx = srcu_read_lock(&kvm->srcu);
1856         for (i = 0; i < args->count; i++) {
1857                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1858                 if (kvm_is_error_hva(hva)) {
1859                         r = -EFAULT;
1860                         break;
1861                 }
1862
1863                 pgstev = bits[i];
1864                 pgstev = pgstev << 24;
1865                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1866                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1867         }
1868         srcu_read_unlock(&kvm->srcu, srcu_idx);
1869         up_read(&kvm->mm->mmap_sem);
1870
1871         if (!kvm->mm->context.uses_cmm) {
1872                 down_write(&kvm->mm->mmap_sem);
1873                 kvm->mm->context.uses_cmm = 1;
1874                 up_write(&kvm->mm->mmap_sem);
1875         }
1876 out:
1877         vfree(bits);
1878         return r;
1879 }
1880
1881 long kvm_arch_vm_ioctl(struct file *filp,
1882                        unsigned int ioctl, unsigned long arg)
1883 {
1884         struct kvm *kvm = filp->private_data;
1885         void __user *argp = (void __user *)arg;
1886         struct kvm_device_attr attr;
1887         int r;
1888
1889         switch (ioctl) {
1890         case KVM_S390_INTERRUPT: {
1891                 struct kvm_s390_interrupt s390int;
1892
1893                 r = -EFAULT;
1894                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1895                         break;
1896                 r = kvm_s390_inject_vm(kvm, &s390int);
1897                 break;
1898         }
1899         case KVM_ENABLE_CAP: {
1900                 struct kvm_enable_cap cap;
1901                 r = -EFAULT;
1902                 if (copy_from_user(&cap, argp, sizeof(cap)))
1903                         break;
1904                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1905                 break;
1906         }
1907         case KVM_CREATE_IRQCHIP: {
1908                 struct kvm_irq_routing_entry routing;
1909
1910                 r = -EINVAL;
1911                 if (kvm->arch.use_irqchip) {
1912                         /* Set up dummy routing. */
1913                         memset(&routing, 0, sizeof(routing));
1914                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1915                 }
1916                 break;
1917         }
1918         case KVM_SET_DEVICE_ATTR: {
1919                 r = -EFAULT;
1920                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1921                         break;
1922                 r = kvm_s390_vm_set_attr(kvm, &attr);
1923                 break;
1924         }
1925         case KVM_GET_DEVICE_ATTR: {
1926                 r = -EFAULT;
1927                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1928                         break;
1929                 r = kvm_s390_vm_get_attr(kvm, &attr);
1930                 break;
1931         }
1932         case KVM_HAS_DEVICE_ATTR: {
1933                 r = -EFAULT;
1934                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1935                         break;
1936                 r = kvm_s390_vm_has_attr(kvm, &attr);
1937                 break;
1938         }
1939         case KVM_S390_GET_SKEYS: {
1940                 struct kvm_s390_skeys args;
1941
1942                 r = -EFAULT;
1943                 if (copy_from_user(&args, argp,
1944                                    sizeof(struct kvm_s390_skeys)))
1945                         break;
1946                 r = kvm_s390_get_skeys(kvm, &args);
1947                 break;
1948         }
1949         case KVM_S390_SET_SKEYS: {
1950                 struct kvm_s390_skeys args;
1951
1952                 r = -EFAULT;
1953                 if (copy_from_user(&args, argp,
1954                                    sizeof(struct kvm_s390_skeys)))
1955                         break;
1956                 r = kvm_s390_set_skeys(kvm, &args);
1957                 break;
1958         }
1959         case KVM_S390_GET_CMMA_BITS: {
1960                 struct kvm_s390_cmma_log args;
1961
1962                 r = -EFAULT;
1963                 if (copy_from_user(&args, argp, sizeof(args)))
1964                         break;
1965                 mutex_lock(&kvm->slots_lock);
1966                 r = kvm_s390_get_cmma_bits(kvm, &args);
1967                 mutex_unlock(&kvm->slots_lock);
1968                 if (!r) {
1969                         r = copy_to_user(argp, &args, sizeof(args));
1970                         if (r)
1971                                 r = -EFAULT;
1972                 }
1973                 break;
1974         }
1975         case KVM_S390_SET_CMMA_BITS: {
1976                 struct kvm_s390_cmma_log args;
1977
1978                 r = -EFAULT;
1979                 if (copy_from_user(&args, argp, sizeof(args)))
1980                         break;
1981                 mutex_lock(&kvm->slots_lock);
1982                 r = kvm_s390_set_cmma_bits(kvm, &args);
1983                 mutex_unlock(&kvm->slots_lock);
1984                 break;
1985         }
1986         default:
1987                 r = -ENOTTY;
1988         }
1989
1990         return r;
1991 }
1992
1993 static int kvm_s390_query_ap_config(u8 *config)
1994 {
1995         u32 fcn_code = 0x04000000UL;
1996         u32 cc = 0;
1997
1998         memset(config, 0, 128);
1999         asm volatile(
2000                 "lgr 0,%1\n"
2001                 "lgr 2,%2\n"
2002                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
2003                 "0: ipm %0\n"
2004                 "srl %0,28\n"
2005                 "1:\n"
2006                 EX_TABLE(0b, 1b)
2007                 : "+r" (cc)
2008                 : "r" (fcn_code), "r" (config)
2009                 : "cc", "0", "2", "memory"
2010         );
2011
2012         return cc;
2013 }
2014
2015 static int kvm_s390_apxa_installed(void)
2016 {
2017         u8 config[128];
2018         int cc;
2019
2020         if (test_facility(12)) {
2021                 cc = kvm_s390_query_ap_config(config);
2022
2023                 if (cc)
2024                         pr_err("PQAP(QCI) failed with cc=%d", cc);
2025                 else
2026                         return config[0] & 0x40;
2027         }
2028
2029         return 0;
2030 }
2031
2032 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2033 {
2034         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2035
2036         if (kvm_s390_apxa_installed())
2037                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2038         else
2039                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2040 }
2041
2042 static u64 kvm_s390_get_initial_cpuid(void)
2043 {
2044         struct cpuid cpuid;
2045
2046         get_cpu_id(&cpuid);
2047         cpuid.version = 0xff;
2048         return *((u64 *) &cpuid);
2049 }
2050
2051 static void kvm_s390_crypto_init(struct kvm *kvm)
2052 {
2053         if (!test_kvm_facility(kvm, 76))
2054                 return;
2055
2056         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2057         kvm_s390_set_crycb_format(kvm);
2058
2059         /* Enable AES/DEA protected key functions by default */
2060         kvm->arch.crypto.aes_kw = 1;
2061         kvm->arch.crypto.dea_kw = 1;
2062         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2063                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2064         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2065                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2066 }
2067
2068 static void sca_dispose(struct kvm *kvm)
2069 {
2070         if (kvm->arch.use_esca)
2071                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2072         else
2073                 free_page((unsigned long)(kvm->arch.sca));
2074         kvm->arch.sca = NULL;
2075 }
2076
2077 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2078 {
2079         gfp_t alloc_flags = GFP_KERNEL;
2080         int i, rc;
2081         char debug_name[16];
2082         static unsigned long sca_offset;
2083
2084         rc = -EINVAL;
2085 #ifdef CONFIG_KVM_S390_UCONTROL
2086         if (type & ~KVM_VM_S390_UCONTROL)
2087                 goto out_err;
2088         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2089                 goto out_err;
2090 #else
2091         if (type)
2092                 goto out_err;
2093 #endif
2094
2095         rc = s390_enable_sie();
2096         if (rc)
2097                 goto out_err;
2098
2099         rc = -ENOMEM;
2100
2101         if (!sclp.has_64bscao)
2102                 alloc_flags |= GFP_DMA;
2103         rwlock_init(&kvm->arch.sca_lock);
2104         /* start with basic SCA */
2105         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2106         if (!kvm->arch.sca)
2107                 goto out_err;
2108         spin_lock(&kvm_lock);
2109         sca_offset += 16;
2110         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2111                 sca_offset = 0;
2112         kvm->arch.sca = (struct bsca_block *)
2113                         ((char *) kvm->arch.sca + sca_offset);
2114         spin_unlock(&kvm_lock);
2115
2116         sprintf(debug_name, "kvm-%u", current->pid);
2117
2118         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2119         if (!kvm->arch.dbf)
2120                 goto out_err;
2121
2122         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2123         kvm->arch.sie_page2 =
2124              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2125         if (!kvm->arch.sie_page2)
2126                 goto out_err;
2127
2128         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2129
2130         for (i = 0; i < kvm_s390_fac_size(); i++) {
2131                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2132                                               (kvm_s390_fac_base[i] |
2133                                                kvm_s390_fac_ext[i]);
2134                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2135                                               kvm_s390_fac_base[i];
2136         }
2137
2138         /* we are always in czam mode - even on pre z14 machines */
2139         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2140         set_kvm_facility(kvm->arch.model.fac_list, 138);
2141         /* we emulate STHYI in kvm */
2142         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2143         set_kvm_facility(kvm->arch.model.fac_list, 74);
2144         if (MACHINE_HAS_TLB_GUEST) {
2145                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2146                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2147         }
2148
2149         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2150         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2151
2152         kvm_s390_crypto_init(kvm);
2153
2154         mutex_init(&kvm->arch.float_int.ais_lock);
2155         spin_lock_init(&kvm->arch.float_int.lock);
2156         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2157                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2158         init_waitqueue_head(&kvm->arch.ipte_wq);
2159         mutex_init(&kvm->arch.ipte_mutex);
2160
2161         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2162         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2163
2164         if (type & KVM_VM_S390_UCONTROL) {
2165                 kvm->arch.gmap = NULL;
2166                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2167         } else {
2168                 if (sclp.hamax == U64_MAX)
2169                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2170                 else
2171                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2172                                                     sclp.hamax + 1);
2173                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2174                 if (!kvm->arch.gmap)
2175                         goto out_err;
2176                 kvm->arch.gmap->private = kvm;
2177                 kvm->arch.gmap->pfault_enabled = 0;
2178         }
2179
2180         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2181         kvm->arch.use_skf = sclp.has_skey;
2182         spin_lock_init(&kvm->arch.start_stop_lock);
2183         kvm_s390_vsie_init(kvm);
2184         kvm_s390_gisa_init(kvm);
2185         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2186
2187         return 0;
2188 out_err:
2189         free_page((unsigned long)kvm->arch.sie_page2);
2190         debug_unregister(kvm->arch.dbf);
2191         sca_dispose(kvm);
2192         KVM_EVENT(3, "creation of vm failed: %d", rc);
2193         return rc;
2194 }
2195
2196 bool kvm_arch_has_vcpu_debugfs(void)
2197 {
2198         return false;
2199 }
2200
2201 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2202 {
2203         return 0;
2204 }
2205
2206 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2207 {
2208         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2209         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2210         kvm_s390_clear_local_irqs(vcpu);
2211         kvm_clear_async_pf_completion_queue(vcpu);
2212         if (!kvm_is_ucontrol(vcpu->kvm))
2213                 sca_del_vcpu(vcpu);
2214
2215         if (kvm_is_ucontrol(vcpu->kvm))
2216                 gmap_remove(vcpu->arch.gmap);
2217
2218         if (vcpu->kvm->arch.use_cmma)
2219                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2220         free_page((unsigned long)(vcpu->arch.sie_block));
2221
2222         kvm_vcpu_uninit(vcpu);
2223         kmem_cache_free(kvm_vcpu_cache, vcpu);
2224 }
2225
2226 static void kvm_free_vcpus(struct kvm *kvm)
2227 {
2228         unsigned int i;
2229         struct kvm_vcpu *vcpu;
2230
2231         kvm_for_each_vcpu(i, vcpu, kvm)
2232                 kvm_arch_vcpu_destroy(vcpu);
2233
2234         mutex_lock(&kvm->lock);
2235         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2236                 kvm->vcpus[i] = NULL;
2237
2238         atomic_set(&kvm->online_vcpus, 0);
2239         mutex_unlock(&kvm->lock);
2240 }
2241
2242 void kvm_arch_destroy_vm(struct kvm *kvm)
2243 {
2244         kvm_free_vcpus(kvm);
2245         sca_dispose(kvm);
2246         debug_unregister(kvm->arch.dbf);
2247         kvm_s390_gisa_destroy(kvm);
2248         free_page((unsigned long)kvm->arch.sie_page2);
2249         if (!kvm_is_ucontrol(kvm))
2250                 gmap_remove(kvm->arch.gmap);
2251         kvm_s390_destroy_adapters(kvm);
2252         kvm_s390_clear_float_irqs(kvm);
2253         kvm_s390_vsie_destroy(kvm);
2254         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2255 }
2256
2257 /* Section: vcpu related */
2258 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2259 {
2260         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2261         if (!vcpu->arch.gmap)
2262                 return -ENOMEM;
2263         vcpu->arch.gmap->private = vcpu->kvm;
2264
2265         return 0;
2266 }
2267
2268 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2269 {
2270         if (!kvm_s390_use_sca_entries())
2271                 return;
2272         read_lock(&vcpu->kvm->arch.sca_lock);
2273         if (vcpu->kvm->arch.use_esca) {
2274                 struct esca_block *sca = vcpu->kvm->arch.sca;
2275
2276                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2277                 sca->cpu[vcpu->vcpu_id].sda = 0;
2278         } else {
2279                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2280
2281                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2282                 sca->cpu[vcpu->vcpu_id].sda = 0;
2283         }
2284         read_unlock(&vcpu->kvm->arch.sca_lock);
2285 }
2286
2287 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2288 {
2289         if (!kvm_s390_use_sca_entries()) {
2290                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2291
2292                 /* we still need the basic sca for the ipte control */
2293                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2294                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2295                 return;
2296         }
2297         read_lock(&vcpu->kvm->arch.sca_lock);
2298         if (vcpu->kvm->arch.use_esca) {
2299                 struct esca_block *sca = vcpu->kvm->arch.sca;
2300
2301                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2302                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2303                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2304                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2305                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2306         } else {
2307                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2308
2309                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2310                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2311                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2312                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2313         }
2314         read_unlock(&vcpu->kvm->arch.sca_lock);
2315 }
2316
2317 /* Basic SCA to Extended SCA data copy routines */
2318 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2319 {
2320         d->sda = s->sda;
2321         d->sigp_ctrl.c = s->sigp_ctrl.c;
2322         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2323 }
2324
2325 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2326 {
2327         int i;
2328
2329         d->ipte_control = s->ipte_control;
2330         d->mcn[0] = s->mcn;
2331         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2332                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2333 }
2334
2335 static int sca_switch_to_extended(struct kvm *kvm)
2336 {
2337         struct bsca_block *old_sca = kvm->arch.sca;
2338         struct esca_block *new_sca;
2339         struct kvm_vcpu *vcpu;
2340         unsigned int vcpu_idx;
2341         u32 scaol, scaoh;
2342
2343         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2344         if (!new_sca)
2345                 return -ENOMEM;
2346
2347         scaoh = (u32)((u64)(new_sca) >> 32);
2348         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2349
2350         kvm_s390_vcpu_block_all(kvm);
2351         write_lock(&kvm->arch.sca_lock);
2352
2353         sca_copy_b_to_e(new_sca, old_sca);
2354
2355         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2356                 vcpu->arch.sie_block->scaoh = scaoh;
2357                 vcpu->arch.sie_block->scaol = scaol;
2358                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2359         }
2360         kvm->arch.sca = new_sca;
2361         kvm->arch.use_esca = 1;
2362
2363         write_unlock(&kvm->arch.sca_lock);
2364         kvm_s390_vcpu_unblock_all(kvm);
2365
2366         free_page((unsigned long)old_sca);
2367
2368         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2369                  old_sca, kvm->arch.sca);
2370         return 0;
2371 }
2372
2373 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2374 {
2375         int rc;
2376
2377         if (!kvm_s390_use_sca_entries()) {
2378                 if (id < KVM_MAX_VCPUS)
2379                         return true;
2380                 return false;
2381         }
2382         if (id < KVM_S390_BSCA_CPU_SLOTS)
2383                 return true;
2384         if (!sclp.has_esca || !sclp.has_64bscao)
2385                 return false;
2386
2387         mutex_lock(&kvm->lock);
2388         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2389         mutex_unlock(&kvm->lock);
2390
2391         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2392 }
2393
2394 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2395 {
2396         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2397         kvm_clear_async_pf_completion_queue(vcpu);
2398         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2399                                     KVM_SYNC_GPRS |
2400                                     KVM_SYNC_ACRS |
2401                                     KVM_SYNC_CRS |
2402                                     KVM_SYNC_ARCH0 |
2403                                     KVM_SYNC_PFAULT;
2404         kvm_s390_set_prefix(vcpu, 0);
2405         if (test_kvm_facility(vcpu->kvm, 64))
2406                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2407         if (test_kvm_facility(vcpu->kvm, 82))
2408                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2409         if (test_kvm_facility(vcpu->kvm, 133))
2410                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2411         if (test_kvm_facility(vcpu->kvm, 156))
2412                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2413         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2414          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2415          */
2416         if (MACHINE_HAS_VX)
2417                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2418         else
2419                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2420
2421         if (kvm_is_ucontrol(vcpu->kvm))
2422                 return __kvm_ucontrol_vcpu_init(vcpu);
2423
2424         return 0;
2425 }
2426
2427 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2428 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2429 {
2430         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2431         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2432         vcpu->arch.cputm_start = get_tod_clock_fast();
2433         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2434 }
2435
2436 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2437 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2438 {
2439         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2440         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2441         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2442         vcpu->arch.cputm_start = 0;
2443         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2444 }
2445
2446 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2447 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2448 {
2449         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2450         vcpu->arch.cputm_enabled = true;
2451         __start_cpu_timer_accounting(vcpu);
2452 }
2453
2454 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2455 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2456 {
2457         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2458         __stop_cpu_timer_accounting(vcpu);
2459         vcpu->arch.cputm_enabled = false;
2460 }
2461
2462 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2463 {
2464         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2465         __enable_cpu_timer_accounting(vcpu);
2466         preempt_enable();
2467 }
2468
2469 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2470 {
2471         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2472         __disable_cpu_timer_accounting(vcpu);
2473         preempt_enable();
2474 }
2475
2476 /* set the cpu timer - may only be called from the VCPU thread itself */
2477 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2478 {
2479         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2480         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2481         if (vcpu->arch.cputm_enabled)
2482                 vcpu->arch.cputm_start = get_tod_clock_fast();
2483         vcpu->arch.sie_block->cputm = cputm;
2484         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2485         preempt_enable();
2486 }
2487
2488 /* update and get the cpu timer - can also be called from other VCPU threads */
2489 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2490 {
2491         unsigned int seq;
2492         __u64 value;
2493
2494         if (unlikely(!vcpu->arch.cputm_enabled))
2495                 return vcpu->arch.sie_block->cputm;
2496
2497         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2498         do {
2499                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2500                 /*
2501                  * If the writer would ever execute a read in the critical
2502                  * section, e.g. in irq context, we have a deadlock.
2503                  */
2504                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2505                 value = vcpu->arch.sie_block->cputm;
2506                 /* if cputm_start is 0, accounting is being started/stopped */
2507                 if (likely(vcpu->arch.cputm_start))
2508                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2509         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2510         preempt_enable();
2511         return value;
2512 }
2513
2514 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2515 {
2516
2517         gmap_enable(vcpu->arch.enabled_gmap);
2518         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2519         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2520                 __start_cpu_timer_accounting(vcpu);
2521         vcpu->cpu = cpu;
2522 }
2523
2524 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2525 {
2526         vcpu->cpu = -1;
2527         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2528                 __stop_cpu_timer_accounting(vcpu);
2529         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2530         vcpu->arch.enabled_gmap = gmap_get_enabled();
2531         gmap_disable(vcpu->arch.enabled_gmap);
2532
2533 }
2534
2535 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2536 {
2537         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2538         vcpu->arch.sie_block->gpsw.mask = 0UL;
2539         vcpu->arch.sie_block->gpsw.addr = 0UL;
2540         kvm_s390_set_prefix(vcpu, 0);
2541         kvm_s390_set_cpu_timer(vcpu, 0);
2542         vcpu->arch.sie_block->ckc       = 0UL;
2543         vcpu->arch.sie_block->todpr     = 0;
2544         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2545         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2546                                         CR0_INTERRUPT_KEY_SUBMASK |
2547                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2548         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2549                                         CR14_UNUSED_33 |
2550                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2551         /* make sure the new fpc will be lazily loaded */
2552         save_fpu_regs();
2553         current->thread.fpu.fpc = 0;
2554         vcpu->arch.sie_block->gbea = 1;
2555         vcpu->arch.sie_block->pp = 0;
2556         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2557         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2558         kvm_clear_async_pf_completion_queue(vcpu);
2559         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2560                 kvm_s390_vcpu_stop(vcpu);
2561         kvm_s390_clear_local_irqs(vcpu);
2562 }
2563
2564 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2565 {
2566         mutex_lock(&vcpu->kvm->lock);
2567         preempt_disable();
2568         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2569         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2570         preempt_enable();
2571         mutex_unlock(&vcpu->kvm->lock);
2572         if (!kvm_is_ucontrol(vcpu->kvm)) {
2573                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2574                 sca_add_vcpu(vcpu);
2575         }
2576         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2577                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2578         /* make vcpu_load load the right gmap on the first trigger */
2579         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2580 }
2581
2582 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2583 {
2584         if (!test_kvm_facility(vcpu->kvm, 76))
2585                 return;
2586
2587         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2588
2589         if (vcpu->kvm->arch.crypto.aes_kw)
2590                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2591         if (vcpu->kvm->arch.crypto.dea_kw)
2592                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2593
2594         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2595 }
2596
2597 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2598 {
2599         free_page(vcpu->arch.sie_block->cbrlo);
2600         vcpu->arch.sie_block->cbrlo = 0;
2601 }
2602
2603 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2604 {
2605         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2606         if (!vcpu->arch.sie_block->cbrlo)
2607                 return -ENOMEM;
2608         return 0;
2609 }
2610
2611 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2612 {
2613         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2614
2615         vcpu->arch.sie_block->ibc = model->ibc;
2616         if (test_kvm_facility(vcpu->kvm, 7))
2617                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2618 }
2619
2620 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2621 {
2622         int rc = 0;
2623
2624         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2625                                                     CPUSTAT_SM |
2626                                                     CPUSTAT_STOPPED);
2627
2628         if (test_kvm_facility(vcpu->kvm, 78))
2629                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2630         else if (test_kvm_facility(vcpu->kvm, 8))
2631                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2632
2633         kvm_s390_vcpu_setup_model(vcpu);
2634
2635         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2636         if (MACHINE_HAS_ESOP)
2637                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2638         if (test_kvm_facility(vcpu->kvm, 9))
2639                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2640         if (test_kvm_facility(vcpu->kvm, 73))
2641                 vcpu->arch.sie_block->ecb |= ECB_TE;
2642
2643         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2644                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2645         if (test_kvm_facility(vcpu->kvm, 130))
2646                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2647         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2648         if (sclp.has_cei)
2649                 vcpu->arch.sie_block->eca |= ECA_CEI;
2650         if (sclp.has_ib)
2651                 vcpu->arch.sie_block->eca |= ECA_IB;
2652         if (sclp.has_siif)
2653                 vcpu->arch.sie_block->eca |= ECA_SII;
2654         if (sclp.has_sigpif)
2655                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2656         if (test_kvm_facility(vcpu->kvm, 129)) {
2657                 vcpu->arch.sie_block->eca |= ECA_VX;
2658                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2659         }
2660         if (test_kvm_facility(vcpu->kvm, 139))
2661                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2662         if (test_kvm_facility(vcpu->kvm, 156))
2663                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2664         if (vcpu->arch.sie_block->gd) {
2665                 vcpu->arch.sie_block->eca |= ECA_AIV;
2666                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2667                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2668         }
2669         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2670                                         | SDNXC;
2671         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2672
2673         if (sclp.has_kss)
2674                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2675         else
2676                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2677
2678         if (vcpu->kvm->arch.use_cmma) {
2679                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2680                 if (rc)
2681                         return rc;
2682         }
2683         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2684         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2685
2686         kvm_s390_vcpu_crypto_setup(vcpu);
2687
2688         return rc;
2689 }
2690
2691 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2692                                       unsigned int id)
2693 {
2694         struct kvm_vcpu *vcpu;
2695         struct sie_page *sie_page;
2696         int rc = -EINVAL;
2697
2698         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2699                 goto out;
2700
2701         rc = -ENOMEM;
2702
2703         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2704         if (!vcpu)
2705                 goto out;
2706
2707         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2708         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2709         if (!sie_page)
2710                 goto out_free_cpu;
2711
2712         vcpu->arch.sie_block = &sie_page->sie_block;
2713         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2714
2715         /* the real guest size will always be smaller than msl */
2716         vcpu->arch.sie_block->mso = 0;
2717         vcpu->arch.sie_block->msl = sclp.hamax;
2718
2719         vcpu->arch.sie_block->icpua = id;
2720         spin_lock_init(&vcpu->arch.local_int.lock);
2721         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2722         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2723                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2724         seqcount_init(&vcpu->arch.cputm_seqcount);
2725
2726         rc = kvm_vcpu_init(vcpu, kvm, id);
2727         if (rc)
2728                 goto out_free_sie_block;
2729         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2730                  vcpu->arch.sie_block);
2731         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2732
2733         return vcpu;
2734 out_free_sie_block:
2735         free_page((unsigned long)(vcpu->arch.sie_block));
2736 out_free_cpu:
2737         kmem_cache_free(kvm_vcpu_cache, vcpu);
2738 out:
2739         return ERR_PTR(rc);
2740 }
2741
2742 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2743 {
2744         return kvm_s390_vcpu_has_irq(vcpu, 0);
2745 }
2746
2747 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2748 {
2749         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2750 }
2751
2752 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2753 {
2754         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2755         exit_sie(vcpu);
2756 }
2757
2758 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2759 {
2760         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2761 }
2762
2763 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2764 {
2765         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2766         exit_sie(vcpu);
2767 }
2768
2769 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2770 {
2771         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2772 }
2773
2774 /*
2775  * Kick a guest cpu out of SIE and wait until SIE is not running.
2776  * If the CPU is not running (e.g. waiting as idle) the function will
2777  * return immediately. */
2778 void exit_sie(struct kvm_vcpu *vcpu)
2779 {
2780         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2781         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2782                 cpu_relax();
2783 }
2784
2785 /* Kick a guest cpu out of SIE to process a request synchronously */
2786 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2787 {
2788         kvm_make_request(req, vcpu);
2789         kvm_s390_vcpu_request(vcpu);
2790 }
2791
2792 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2793                               unsigned long end)
2794 {
2795         struct kvm *kvm = gmap->private;
2796         struct kvm_vcpu *vcpu;
2797         unsigned long prefix;
2798         int i;
2799
2800         if (gmap_is_shadow(gmap))
2801                 return;
2802         if (start >= 1UL << 31)
2803                 /* We are only interested in prefix pages */
2804                 return;
2805         kvm_for_each_vcpu(i, vcpu, kvm) {
2806                 /* match against both prefix pages */
2807                 prefix = kvm_s390_get_prefix(vcpu);
2808                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2809                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2810                                    start, end);
2811                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2812                 }
2813         }
2814 }
2815
2816 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2817 {
2818         /* kvm common code refers to this, but never calls it */
2819         BUG();
2820         return 0;
2821 }
2822
2823 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2824                                            struct kvm_one_reg *reg)
2825 {
2826         int r = -EINVAL;
2827
2828         switch (reg->id) {
2829         case KVM_REG_S390_TODPR:
2830                 r = put_user(vcpu->arch.sie_block->todpr,
2831                              (u32 __user *)reg->addr);
2832                 break;
2833         case KVM_REG_S390_EPOCHDIFF:
2834                 r = put_user(vcpu->arch.sie_block->epoch,
2835                              (u64 __user *)reg->addr);
2836                 break;
2837         case KVM_REG_S390_CPU_TIMER:
2838                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2839                              (u64 __user *)reg->addr);
2840                 break;
2841         case KVM_REG_S390_CLOCK_COMP:
2842                 r = put_user(vcpu->arch.sie_block->ckc,
2843                              (u64 __user *)reg->addr);
2844                 break;
2845         case KVM_REG_S390_PFTOKEN:
2846                 r = put_user(vcpu->arch.pfault_token,
2847                              (u64 __user *)reg->addr);
2848                 break;
2849         case KVM_REG_S390_PFCOMPARE:
2850                 r = put_user(vcpu->arch.pfault_compare,
2851                              (u64 __user *)reg->addr);
2852                 break;
2853         case KVM_REG_S390_PFSELECT:
2854                 r = put_user(vcpu->arch.pfault_select,
2855                              (u64 __user *)reg->addr);
2856                 break;
2857         case KVM_REG_S390_PP:
2858                 r = put_user(vcpu->arch.sie_block->pp,
2859                              (u64 __user *)reg->addr);
2860                 break;
2861         case KVM_REG_S390_GBEA:
2862                 r = put_user(vcpu->arch.sie_block->gbea,
2863                              (u64 __user *)reg->addr);
2864                 break;
2865         default:
2866                 break;
2867         }
2868
2869         return r;
2870 }
2871
2872 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2873                                            struct kvm_one_reg *reg)
2874 {
2875         int r = -EINVAL;
2876         __u64 val;
2877
2878         switch (reg->id) {
2879         case KVM_REG_S390_TODPR:
2880                 r = get_user(vcpu->arch.sie_block->todpr,
2881                              (u32 __user *)reg->addr);
2882                 break;
2883         case KVM_REG_S390_EPOCHDIFF:
2884                 r = get_user(vcpu->arch.sie_block->epoch,
2885                              (u64 __user *)reg->addr);
2886                 break;
2887         case KVM_REG_S390_CPU_TIMER:
2888                 r = get_user(val, (u64 __user *)reg->addr);
2889                 if (!r)
2890                         kvm_s390_set_cpu_timer(vcpu, val);
2891                 break;
2892         case KVM_REG_S390_CLOCK_COMP:
2893                 r = get_user(vcpu->arch.sie_block->ckc,
2894                              (u64 __user *)reg->addr);
2895                 break;
2896         case KVM_REG_S390_PFTOKEN:
2897                 r = get_user(vcpu->arch.pfault_token,
2898                              (u64 __user *)reg->addr);
2899                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2900                         kvm_clear_async_pf_completion_queue(vcpu);
2901                 break;
2902         case KVM_REG_S390_PFCOMPARE:
2903                 r = get_user(vcpu->arch.pfault_compare,
2904                              (u64 __user *)reg->addr);
2905                 break;
2906         case KVM_REG_S390_PFSELECT:
2907                 r = get_user(vcpu->arch.pfault_select,
2908                              (u64 __user *)reg->addr);
2909                 break;
2910         case KVM_REG_S390_PP:
2911                 r = get_user(vcpu->arch.sie_block->pp,
2912                              (u64 __user *)reg->addr);
2913                 break;
2914         case KVM_REG_S390_GBEA:
2915                 r = get_user(vcpu->arch.sie_block->gbea,
2916                              (u64 __user *)reg->addr);
2917                 break;
2918         default:
2919                 break;
2920         }
2921
2922         return r;
2923 }
2924
2925 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2926 {
2927         kvm_s390_vcpu_initial_reset(vcpu);
2928         return 0;
2929 }
2930
2931 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2932 {
2933         vcpu_load(vcpu);
2934         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2935         vcpu_put(vcpu);
2936         return 0;
2937 }
2938
2939 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2940 {
2941         vcpu_load(vcpu);
2942         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2943         vcpu_put(vcpu);
2944         return 0;
2945 }
2946
2947 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2948                                   struct kvm_sregs *sregs)
2949 {
2950         vcpu_load(vcpu);
2951
2952         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2953         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2954
2955         vcpu_put(vcpu);
2956         return 0;
2957 }
2958
2959 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2960                                   struct kvm_sregs *sregs)
2961 {
2962         vcpu_load(vcpu);
2963
2964         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2965         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2966
2967         vcpu_put(vcpu);
2968         return 0;
2969 }
2970
2971 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2972 {
2973         int ret = 0;
2974
2975         vcpu_load(vcpu);
2976
2977         if (test_fp_ctl(fpu->fpc)) {
2978                 ret = -EINVAL;
2979                 goto out;
2980         }
2981         vcpu->run->s.regs.fpc = fpu->fpc;
2982         if (MACHINE_HAS_VX)
2983                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2984                                  (freg_t *) fpu->fprs);
2985         else
2986                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2987
2988 out:
2989         vcpu_put(vcpu);
2990         return ret;
2991 }
2992
2993 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2994 {
2995         vcpu_load(vcpu);
2996
2997         /* make sure we have the latest values */
2998         save_fpu_regs();
2999         if (MACHINE_HAS_VX)
3000                 convert_vx_to_fp((freg_t *) fpu->fprs,
3001                                  (__vector128 *) vcpu->run->s.regs.vrs);
3002         else
3003                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3004         fpu->fpc = vcpu->run->s.regs.fpc;
3005
3006         vcpu_put(vcpu);
3007         return 0;
3008 }
3009
3010 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3011 {
3012         int rc = 0;
3013
3014         if (!is_vcpu_stopped(vcpu))
3015                 rc = -EBUSY;
3016         else {
3017                 vcpu->run->psw_mask = psw.mask;
3018                 vcpu->run->psw_addr = psw.addr;
3019         }
3020         return rc;
3021 }
3022
3023 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3024                                   struct kvm_translation *tr)
3025 {
3026         return -EINVAL; /* not implemented yet */
3027 }
3028
3029 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3030                               KVM_GUESTDBG_USE_HW_BP | \
3031                               KVM_GUESTDBG_ENABLE)
3032
3033 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3034                                         struct kvm_guest_debug *dbg)
3035 {
3036         int rc = 0;
3037
3038         vcpu_load(vcpu);
3039
3040         vcpu->guest_debug = 0;
3041         kvm_s390_clear_bp_data(vcpu);
3042
3043         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3044                 rc = -EINVAL;
3045                 goto out;
3046         }
3047         if (!sclp.has_gpere) {
3048                 rc = -EINVAL;
3049                 goto out;
3050         }
3051
3052         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3053                 vcpu->guest_debug = dbg->control;
3054                 /* enforce guest PER */
3055                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3056
3057                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3058                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3059         } else {
3060                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3061                 vcpu->arch.guestdbg.last_bp = 0;
3062         }
3063
3064         if (rc) {
3065                 vcpu->guest_debug = 0;
3066                 kvm_s390_clear_bp_data(vcpu);
3067                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3068         }
3069
3070 out:
3071         vcpu_put(vcpu);
3072         return rc;
3073 }
3074
3075 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3076                                     struct kvm_mp_state *mp_state)
3077 {
3078         int ret;
3079
3080         vcpu_load(vcpu);
3081
3082         /* CHECK_STOP and LOAD are not supported yet */
3083         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3084                                       KVM_MP_STATE_OPERATING;
3085
3086         vcpu_put(vcpu);
3087         return ret;
3088 }
3089
3090 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3091                                     struct kvm_mp_state *mp_state)
3092 {
3093         int rc = 0;
3094
3095         vcpu_load(vcpu);
3096
3097         /* user space knows about this interface - let it control the state */
3098         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3099
3100         switch (mp_state->mp_state) {
3101         case KVM_MP_STATE_STOPPED:
3102                 kvm_s390_vcpu_stop(vcpu);
3103                 break;
3104         case KVM_MP_STATE_OPERATING:
3105                 kvm_s390_vcpu_start(vcpu);
3106                 break;
3107         case KVM_MP_STATE_LOAD:
3108         case KVM_MP_STATE_CHECK_STOP:
3109                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3110         default:
3111                 rc = -ENXIO;
3112         }
3113
3114         vcpu_put(vcpu);
3115         return rc;
3116 }
3117
3118 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3119 {
3120         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3121 }
3122
3123 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3124 {
3125 retry:
3126         kvm_s390_vcpu_request_handled(vcpu);
3127         if (!kvm_request_pending(vcpu))
3128                 return 0;
3129         /*
3130          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3131          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3132          * This ensures that the ipte instruction for this request has
3133          * already finished. We might race against a second unmapper that
3134          * wants to set the blocking bit. Lets just retry the request loop.
3135          */
3136         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3137                 int rc;
3138                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3139                                           kvm_s390_get_prefix(vcpu),
3140                                           PAGE_SIZE * 2, PROT_WRITE);
3141                 if (rc) {
3142                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3143                         return rc;
3144                 }
3145                 goto retry;
3146         }
3147
3148         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3149                 vcpu->arch.sie_block->ihcpu = 0xffff;
3150                 goto retry;
3151         }
3152
3153         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3154                 if (!ibs_enabled(vcpu)) {
3155                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3156                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3157                 }
3158                 goto retry;
3159         }
3160
3161         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3162                 if (ibs_enabled(vcpu)) {
3163                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3164                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3165                 }
3166                 goto retry;
3167         }
3168
3169         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3170                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3171                 goto retry;
3172         }
3173
3174         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3175                 /*
3176                  * Disable CMM virtualization; we will emulate the ESSA
3177                  * instruction manually, in order to provide additional
3178                  * functionalities needed for live migration.
3179                  */
3180                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3181                 goto retry;
3182         }
3183
3184         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3185                 /*
3186                  * Re-enable CMM virtualization if CMMA is available and
3187                  * CMM has been used.
3188                  */
3189                 if ((vcpu->kvm->arch.use_cmma) &&
3190                     (vcpu->kvm->mm->context.uses_cmm))
3191                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3192                 goto retry;
3193         }
3194
3195         /* nothing to do, just clear the request */
3196         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3197
3198         return 0;
3199 }
3200
3201 void kvm_s390_set_tod_clock(struct kvm *kvm,
3202                             const struct kvm_s390_vm_tod_clock *gtod)
3203 {
3204         struct kvm_vcpu *vcpu;
3205         struct kvm_s390_tod_clock_ext htod;
3206         int i;
3207
3208         mutex_lock(&kvm->lock);
3209         preempt_disable();
3210
3211         get_tod_clock_ext((char *)&htod);
3212
3213         kvm->arch.epoch = gtod->tod - htod.tod;
3214         kvm->arch.epdx = 0;
3215         if (test_kvm_facility(kvm, 139)) {
3216                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3217                 if (kvm->arch.epoch > gtod->tod)
3218                         kvm->arch.epdx -= 1;
3219         }
3220
3221         kvm_s390_vcpu_block_all(kvm);
3222         kvm_for_each_vcpu(i, vcpu, kvm) {
3223                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3224                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3225         }
3226
3227         kvm_s390_vcpu_unblock_all(kvm);
3228         preempt_enable();
3229         mutex_unlock(&kvm->lock);
3230 }
3231
3232 /**
3233  * kvm_arch_fault_in_page - fault-in guest page if necessary
3234  * @vcpu: The corresponding virtual cpu
3235  * @gpa: Guest physical address
3236  * @writable: Whether the page should be writable or not
3237  *
3238  * Make sure that a guest page has been faulted-in on the host.
3239  *
3240  * Return: Zero on success, negative error code otherwise.
3241  */
3242 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3243 {
3244         return gmap_fault(vcpu->arch.gmap, gpa,
3245                           writable ? FAULT_FLAG_WRITE : 0);
3246 }
3247
3248 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3249                                       unsigned long token)
3250 {
3251         struct kvm_s390_interrupt inti;
3252         struct kvm_s390_irq irq;
3253
3254         if (start_token) {
3255                 irq.u.ext.ext_params2 = token;
3256                 irq.type = KVM_S390_INT_PFAULT_INIT;
3257                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3258         } else {
3259                 inti.type = KVM_S390_INT_PFAULT_DONE;
3260                 inti.parm64 = token;
3261                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3262         }
3263 }
3264
3265 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3266                                      struct kvm_async_pf *work)
3267 {
3268         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3269         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3270 }
3271
3272 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3273                                  struct kvm_async_pf *work)
3274 {
3275         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3276         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3277 }
3278
3279 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3280                                struct kvm_async_pf *work)
3281 {
3282         /* s390 will always inject the page directly */
3283 }
3284
3285 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3286 {
3287         /*
3288          * s390 will always inject the page directly,
3289          * but we still want check_async_completion to cleanup
3290          */
3291         return true;
3292 }
3293
3294 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3295 {
3296         hva_t hva;
3297         struct kvm_arch_async_pf arch;
3298         int rc;
3299
3300         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3301                 return 0;
3302         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3303             vcpu->arch.pfault_compare)
3304                 return 0;
3305         if (psw_extint_disabled(vcpu))
3306                 return 0;
3307         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3308                 return 0;
3309         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3310                 return 0;
3311         if (!vcpu->arch.gmap->pfault_enabled)
3312                 return 0;
3313
3314         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3315         hva += current->thread.gmap_addr & ~PAGE_MASK;
3316         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3317                 return 0;
3318
3319         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3320         return rc;
3321 }
3322
3323 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3324 {
3325         int rc, cpuflags;
3326
3327         /*
3328          * On s390 notifications for arriving pages will be delivered directly
3329          * to the guest but the house keeping for completed pfaults is
3330          * handled outside the worker.
3331          */
3332         kvm_check_async_pf_completion(vcpu);
3333
3334         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3335         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3336
3337         if (need_resched())
3338                 schedule();
3339
3340         if (test_cpu_flag(CIF_MCCK_PENDING))
3341                 s390_handle_mcck();
3342
3343         if (!kvm_is_ucontrol(vcpu->kvm)) {
3344                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3345                 if (rc)
3346                         return rc;
3347         }
3348
3349         rc = kvm_s390_handle_requests(vcpu);
3350         if (rc)
3351                 return rc;
3352
3353         if (guestdbg_enabled(vcpu)) {
3354                 kvm_s390_backup_guest_per_regs(vcpu);
3355                 kvm_s390_patch_guest_per_regs(vcpu);
3356         }
3357
3358         vcpu->arch.sie_block->icptcode = 0;
3359         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3360         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3361         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3362
3363         return 0;
3364 }
3365
3366 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3367 {
3368         struct kvm_s390_pgm_info pgm_info = {
3369                 .code = PGM_ADDRESSING,
3370         };
3371         u8 opcode, ilen;
3372         int rc;
3373
3374         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3375         trace_kvm_s390_sie_fault(vcpu);
3376
3377         /*
3378          * We want to inject an addressing exception, which is defined as a
3379          * suppressing or terminating exception. However, since we came here
3380          * by a DAT access exception, the PSW still points to the faulting
3381          * instruction since DAT exceptions are nullifying. So we've got
3382          * to look up the current opcode to get the length of the instruction
3383          * to be able to forward the PSW.
3384          */
3385         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3386         ilen = insn_length(opcode);
3387         if (rc < 0) {
3388                 return rc;
3389         } else if (rc) {
3390                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3391                  * Forward by arbitrary ilc, injection will take care of
3392                  * nullification if necessary.
3393                  */
3394                 pgm_info = vcpu->arch.pgm;
3395                 ilen = 4;
3396         }
3397         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3398         kvm_s390_forward_psw(vcpu, ilen);
3399         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3400 }
3401
3402 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3403 {
3404         struct mcck_volatile_info *mcck_info;
3405         struct sie_page *sie_page;
3406
3407         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3408                    vcpu->arch.sie_block->icptcode);
3409         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3410
3411         if (guestdbg_enabled(vcpu))
3412                 kvm_s390_restore_guest_per_regs(vcpu);
3413
3414         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3415         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3416
3417         if (exit_reason == -EINTR) {
3418                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3419                 sie_page = container_of(vcpu->arch.sie_block,
3420                                         struct sie_page, sie_block);
3421                 mcck_info = &sie_page->mcck_info;
3422                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3423                 return 0;
3424         }
3425
3426         if (vcpu->arch.sie_block->icptcode > 0) {
3427                 int rc = kvm_handle_sie_intercept(vcpu);
3428
3429                 if (rc != -EOPNOTSUPP)
3430                         return rc;
3431                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3432                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3433                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3434                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3435                 return -EREMOTE;
3436         } else if (exit_reason != -EFAULT) {
3437                 vcpu->stat.exit_null++;
3438                 return 0;
3439         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3440                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3441                 vcpu->run->s390_ucontrol.trans_exc_code =
3442                                                 current->thread.gmap_addr;
3443                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3444                 return -EREMOTE;
3445         } else if (current->thread.gmap_pfault) {
3446                 trace_kvm_s390_major_guest_pfault(vcpu);
3447                 current->thread.gmap_pfault = 0;
3448                 if (kvm_arch_setup_async_pf(vcpu))
3449                         return 0;
3450                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3451         }
3452         return vcpu_post_run_fault_in_sie(vcpu);
3453 }
3454
3455 static int __vcpu_run(struct kvm_vcpu *vcpu)
3456 {
3457         int rc, exit_reason;
3458
3459         /*
3460          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3461          * ning the guest), so that memslots (and other stuff) are protected
3462          */
3463         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3464
3465         do {
3466                 rc = vcpu_pre_run(vcpu);
3467                 if (rc)
3468                         break;
3469
3470                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3471                 /*
3472                  * As PF_VCPU will be used in fault handler, between
3473                  * guest_enter and guest_exit should be no uaccess.
3474                  */
3475                 local_irq_disable();
3476                 guest_enter_irqoff();
3477                 __disable_cpu_timer_accounting(vcpu);
3478                 local_irq_enable();
3479                 exit_reason = sie64a(vcpu->arch.sie_block,
3480                                      vcpu->run->s.regs.gprs);
3481                 local_irq_disable();
3482                 __enable_cpu_timer_accounting(vcpu);
3483                 guest_exit_irqoff();
3484                 local_irq_enable();
3485                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3486
3487                 rc = vcpu_post_run(vcpu, exit_reason);
3488         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3489
3490         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3491         return rc;
3492 }
3493
3494 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3495 {
3496         struct runtime_instr_cb *riccb;
3497         struct gs_cb *gscb;
3498
3499         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3500         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3501         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3502         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3503         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3504                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3505         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3506                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3507                 /* some control register changes require a tlb flush */
3508                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3509         }
3510         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3511                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3512                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3513                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3514                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3515                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3516         }
3517         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3518                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3519                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3520                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3521                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3522                         kvm_clear_async_pf_completion_queue(vcpu);
3523         }
3524         /*
3525          * If userspace sets the riccb (e.g. after migration) to a valid state,
3526          * we should enable RI here instead of doing the lazy enablement.
3527          */
3528         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3529             test_kvm_facility(vcpu->kvm, 64) &&
3530             riccb->v &&
3531             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3532                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3533                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3534         }
3535         /*
3536          * If userspace sets the gscb (e.g. after migration) to non-zero,
3537          * we should enable GS here instead of doing the lazy enablement.
3538          */
3539         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3540             test_kvm_facility(vcpu->kvm, 133) &&
3541             gscb->gssm &&
3542             !vcpu->arch.gs_enabled) {
3543                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3544                 vcpu->arch.sie_block->ecb |= ECB_GS;
3545                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3546                 vcpu->arch.gs_enabled = 1;
3547         }
3548         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3549             test_kvm_facility(vcpu->kvm, 82)) {
3550                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3551                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3552         }
3553         save_access_regs(vcpu->arch.host_acrs);
3554         restore_access_regs(vcpu->run->s.regs.acrs);
3555         /* save host (userspace) fprs/vrs */
3556         save_fpu_regs();
3557         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3558         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3559         if (MACHINE_HAS_VX)
3560                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3561         else
3562                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3563         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3564         if (test_fp_ctl(current->thread.fpu.fpc))
3565                 /* User space provided an invalid FPC, let's clear it */
3566                 current->thread.fpu.fpc = 0;
3567         if (MACHINE_HAS_GS) {
3568                 preempt_disable();
3569                 __ctl_set_bit(2, 4);
3570                 if (current->thread.gs_cb) {
3571                         vcpu->arch.host_gscb = current->thread.gs_cb;
3572                         save_gs_cb(vcpu->arch.host_gscb);
3573                 }
3574                 if (vcpu->arch.gs_enabled) {
3575                         current->thread.gs_cb = (struct gs_cb *)
3576                                                 &vcpu->run->s.regs.gscb;
3577                         restore_gs_cb(current->thread.gs_cb);
3578                 }
3579                 preempt_enable();
3580         }
3581         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3582
3583         kvm_run->kvm_dirty_regs = 0;
3584 }
3585
3586 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3587 {
3588         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3589         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3590         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3591         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3592         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3593         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3594         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3595         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3596         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3597         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3598         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3599         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3600         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3601         save_access_regs(vcpu->run->s.regs.acrs);
3602         restore_access_regs(vcpu->arch.host_acrs);
3603         /* Save guest register state */
3604         save_fpu_regs();
3605         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3606         /* Restore will be done lazily at return */
3607         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3608         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3609         if (MACHINE_HAS_GS) {
3610                 __ctl_set_bit(2, 4);
3611                 if (vcpu->arch.gs_enabled)
3612                         save_gs_cb(current->thread.gs_cb);
3613                 preempt_disable();
3614                 current->thread.gs_cb = vcpu->arch.host_gscb;
3615                 restore_gs_cb(vcpu->arch.host_gscb);
3616                 preempt_enable();
3617                 if (!vcpu->arch.host_gscb)
3618                         __ctl_clear_bit(2, 4);
3619                 vcpu->arch.host_gscb = NULL;
3620         }
3621         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3622 }
3623
3624 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3625 {
3626         int rc;
3627
3628         if (kvm_run->immediate_exit)
3629                 return -EINTR;
3630
3631         vcpu_load(vcpu);
3632
3633         if (guestdbg_exit_pending(vcpu)) {
3634                 kvm_s390_prepare_debug_exit(vcpu);
3635                 rc = 0;
3636                 goto out;
3637         }
3638
3639         kvm_sigset_activate(vcpu);
3640
3641         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3642                 kvm_s390_vcpu_start(vcpu);
3643         } else if (is_vcpu_stopped(vcpu)) {
3644                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3645                                    vcpu->vcpu_id);
3646                 rc = -EINVAL;
3647                 goto out;
3648         }
3649
3650         sync_regs(vcpu, kvm_run);
3651         enable_cpu_timer_accounting(vcpu);
3652
3653         might_fault();
3654         rc = __vcpu_run(vcpu);
3655
3656         if (signal_pending(current) && !rc) {
3657                 kvm_run->exit_reason = KVM_EXIT_INTR;
3658                 rc = -EINTR;
3659         }
3660
3661         if (guestdbg_exit_pending(vcpu) && !rc)  {
3662                 kvm_s390_prepare_debug_exit(vcpu);
3663                 rc = 0;
3664         }
3665
3666         if (rc == -EREMOTE) {
3667                 /* userspace support is needed, kvm_run has been prepared */
3668                 rc = 0;
3669         }
3670
3671         disable_cpu_timer_accounting(vcpu);
3672         store_regs(vcpu, kvm_run);
3673
3674         kvm_sigset_deactivate(vcpu);
3675
3676         vcpu->stat.exit_userspace++;
3677 out:
3678         vcpu_put(vcpu);
3679         return rc;
3680 }
3681
3682 /*
3683  * store status at address
3684  * we use have two special cases:
3685  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3686  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3687  */
3688 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3689 {
3690         unsigned char archmode = 1;
3691         freg_t fprs[NUM_FPRS];
3692         unsigned int px;
3693         u64 clkcomp, cputm;
3694         int rc;
3695
3696         px = kvm_s390_get_prefix(vcpu);
3697         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3698                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3699                         return -EFAULT;
3700                 gpa = 0;
3701         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3702                 if (write_guest_real(vcpu, 163, &archmode, 1))
3703                         return -EFAULT;
3704                 gpa = px;
3705         } else
3706                 gpa -= __LC_FPREGS_SAVE_AREA;
3707
3708         /* manually convert vector registers if necessary */
3709         if (MACHINE_HAS_VX) {
3710                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3711                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3712                                      fprs, 128);
3713         } else {
3714                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3715                                      vcpu->run->s.regs.fprs, 128);
3716         }
3717         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3718                               vcpu->run->s.regs.gprs, 128);
3719         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3720                               &vcpu->arch.sie_block->gpsw, 16);
3721         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3722                               &px, 4);
3723         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3724                               &vcpu->run->s.regs.fpc, 4);
3725         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3726                               &vcpu->arch.sie_block->todpr, 4);
3727         cputm = kvm_s390_get_cpu_timer(vcpu);
3728         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3729                               &cputm, 8);
3730         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3731         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3732                               &clkcomp, 8);
3733         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3734                               &vcpu->run->s.regs.acrs, 64);
3735         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3736                               &vcpu->arch.sie_block->gcr, 128);
3737         return rc ? -EFAULT : 0;
3738 }
3739
3740 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3741 {
3742         /*
3743          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3744          * switch in the run ioctl. Let's update our copies before we save
3745          * it into the save area
3746          */
3747         save_fpu_regs();
3748         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3749         save_access_regs(vcpu->run->s.regs.acrs);
3750
3751         return kvm_s390_store_status_unloaded(vcpu, addr);
3752 }
3753
3754 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3755 {
3756         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3757         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3758 }
3759
3760 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3761 {
3762         unsigned int i;
3763         struct kvm_vcpu *vcpu;
3764
3765         kvm_for_each_vcpu(i, vcpu, kvm) {
3766                 __disable_ibs_on_vcpu(vcpu);
3767         }
3768 }
3769
3770 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3771 {
3772         if (!sclp.has_ibs)
3773                 return;
3774         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3775         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3776 }
3777
3778 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3779 {
3780         int i, online_vcpus, started_vcpus = 0;
3781
3782         if (!is_vcpu_stopped(vcpu))
3783                 return;
3784
3785         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3786         /* Only one cpu at a time may enter/leave the STOPPED state. */
3787         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3788         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3789
3790         for (i = 0; i < online_vcpus; i++) {
3791                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3792                         started_vcpus++;
3793         }
3794
3795         if (started_vcpus == 0) {
3796                 /* we're the only active VCPU -> speed it up */
3797                 __enable_ibs_on_vcpu(vcpu);
3798         } else if (started_vcpus == 1) {
3799                 /*
3800                  * As we are starting a second VCPU, we have to disable
3801                  * the IBS facility on all VCPUs to remove potentially
3802                  * oustanding ENABLE requests.
3803                  */
3804                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3805         }
3806
3807         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3808         /*
3809          * Another VCPU might have used IBS while we were offline.
3810          * Let's play safe and flush the VCPU at startup.
3811          */
3812         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3813         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3814         return;
3815 }
3816
3817 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3818 {
3819         int i, online_vcpus, started_vcpus = 0;
3820         struct kvm_vcpu *started_vcpu = NULL;
3821
3822         if (is_vcpu_stopped(vcpu))
3823                 return;
3824
3825         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3826         /* Only one cpu at a time may enter/leave the STOPPED state. */
3827         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3828         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3829
3830         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3831         kvm_s390_clear_stop_irq(vcpu);
3832
3833         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3834         __disable_ibs_on_vcpu(vcpu);
3835
3836         for (i = 0; i < online_vcpus; i++) {
3837                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3838                         started_vcpus++;
3839                         started_vcpu = vcpu->kvm->vcpus[i];
3840                 }
3841         }
3842
3843         if (started_vcpus == 1) {
3844                 /*
3845                  * As we only have one VCPU left, we want to enable the
3846                  * IBS facility for that VCPU to speed it up.
3847                  */
3848                 __enable_ibs_on_vcpu(started_vcpu);
3849         }
3850
3851         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3852         return;
3853 }
3854
3855 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3856                                      struct kvm_enable_cap *cap)
3857 {
3858         int r;
3859
3860         if (cap->flags)
3861                 return -EINVAL;
3862
3863         switch (cap->cap) {
3864         case KVM_CAP_S390_CSS_SUPPORT:
3865                 if (!vcpu->kvm->arch.css_support) {
3866                         vcpu->kvm->arch.css_support = 1;
3867                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3868                         trace_kvm_s390_enable_css(vcpu->kvm);
3869                 }
3870                 r = 0;
3871                 break;
3872         default:
3873                 r = -EINVAL;
3874                 break;
3875         }
3876         return r;
3877 }
3878
3879 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3880                                   struct kvm_s390_mem_op *mop)
3881 {
3882         void __user *uaddr = (void __user *)mop->buf;
3883         void *tmpbuf = NULL;
3884         int r, srcu_idx;
3885         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3886                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3887
3888         if (mop->flags & ~supported_flags)
3889                 return -EINVAL;
3890
3891         if (mop->size > MEM_OP_MAX_SIZE)
3892                 return -E2BIG;
3893
3894         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3895                 tmpbuf = vmalloc(mop->size);
3896                 if (!tmpbuf)
3897                         return -ENOMEM;
3898         }
3899
3900         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3901
3902         switch (mop->op) {
3903         case KVM_S390_MEMOP_LOGICAL_READ:
3904                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3905                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3906                                             mop->size, GACC_FETCH);
3907                         break;
3908                 }
3909                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3910                 if (r == 0) {
3911                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3912                                 r = -EFAULT;
3913                 }
3914                 break;
3915         case KVM_S390_MEMOP_LOGICAL_WRITE:
3916                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3917                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3918                                             mop->size, GACC_STORE);
3919                         break;
3920                 }
3921                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3922                         r = -EFAULT;
3923                         break;
3924                 }
3925                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3926                 break;
3927         default:
3928                 r = -EINVAL;
3929         }
3930
3931         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3932
3933         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3934                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3935
3936         vfree(tmpbuf);
3937         return r;
3938 }
3939
3940 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3941                                unsigned int ioctl, unsigned long arg)
3942 {
3943         struct kvm_vcpu *vcpu = filp->private_data;
3944         void __user *argp = (void __user *)arg;
3945
3946         switch (ioctl) {
3947         case KVM_S390_IRQ: {
3948                 struct kvm_s390_irq s390irq;
3949
3950                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3951                         return -EFAULT;
3952                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3953         }
3954         case KVM_S390_INTERRUPT: {
3955                 struct kvm_s390_interrupt s390int;
3956                 struct kvm_s390_irq s390irq;
3957
3958                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3959                         return -EFAULT;
3960                 if (s390int_to_s390irq(&s390int, &s390irq))
3961                         return -EINVAL;
3962                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3963         }
3964         }
3965         return -ENOIOCTLCMD;
3966 }
3967
3968 long kvm_arch_vcpu_ioctl(struct file *filp,
3969                          unsigned int ioctl, unsigned long arg)
3970 {
3971         struct kvm_vcpu *vcpu = filp->private_data;
3972         void __user *argp = (void __user *)arg;
3973         int idx;
3974         long r;
3975
3976         vcpu_load(vcpu);
3977
3978         switch (ioctl) {
3979         case KVM_S390_STORE_STATUS:
3980                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3981                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3982                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3983                 break;
3984         case KVM_S390_SET_INITIAL_PSW: {
3985                 psw_t psw;
3986
3987                 r = -EFAULT;
3988                 if (copy_from_user(&psw, argp, sizeof(psw)))
3989                         break;
3990                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3991                 break;
3992         }
3993         case KVM_S390_INITIAL_RESET:
3994                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3995                 break;
3996         case KVM_SET_ONE_REG:
3997         case KVM_GET_ONE_REG: {
3998                 struct kvm_one_reg reg;
3999                 r = -EFAULT;
4000                 if (copy_from_user(&reg, argp, sizeof(reg)))
4001                         break;
4002                 if (ioctl == KVM_SET_ONE_REG)
4003                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4004                 else
4005                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4006                 break;
4007         }
4008 #ifdef CONFIG_KVM_S390_UCONTROL
4009         case KVM_S390_UCAS_MAP: {
4010                 struct kvm_s390_ucas_mapping ucasmap;
4011
4012                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4013                         r = -EFAULT;
4014                         break;
4015                 }
4016
4017                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4018                         r = -EINVAL;
4019                         break;
4020                 }
4021
4022                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4023                                      ucasmap.vcpu_addr, ucasmap.length);
4024                 break;
4025         }
4026         case KVM_S390_UCAS_UNMAP: {
4027                 struct kvm_s390_ucas_mapping ucasmap;
4028
4029                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4030                         r = -EFAULT;
4031                         break;
4032                 }
4033
4034                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4035                         r = -EINVAL;
4036                         break;
4037                 }
4038
4039                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4040                         ucasmap.length);
4041                 break;
4042         }
4043 #endif
4044         case KVM_S390_VCPU_FAULT: {
4045                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4046                 break;
4047         }
4048         case KVM_ENABLE_CAP:
4049         {
4050                 struct kvm_enable_cap cap;
4051                 r = -EFAULT;
4052                 if (copy_from_user(&cap, argp, sizeof(cap)))
4053                         break;
4054                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4055                 break;
4056         }
4057         case KVM_S390_MEM_OP: {
4058                 struct kvm_s390_mem_op mem_op;
4059
4060                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4061                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4062                 else
4063                         r = -EFAULT;
4064                 break;
4065         }
4066         case KVM_S390_SET_IRQ_STATE: {
4067                 struct kvm_s390_irq_state irq_state;
4068
4069                 r = -EFAULT;
4070                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4071                         break;
4072                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4073                     irq_state.len == 0 ||
4074                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4075                         r = -EINVAL;
4076                         break;
4077                 }
4078                 /* do not use irq_state.flags, it will break old QEMUs */
4079                 r = kvm_s390_set_irq_state(vcpu,
4080                                            (void __user *) irq_state.buf,
4081                                            irq_state.len);
4082                 break;
4083         }
4084         case KVM_S390_GET_IRQ_STATE: {
4085                 struct kvm_s390_irq_state irq_state;
4086
4087                 r = -EFAULT;
4088                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4089                         break;
4090                 if (irq_state.len == 0) {
4091                         r = -EINVAL;
4092                         break;
4093                 }
4094                 /* do not use irq_state.flags, it will break old QEMUs */
4095                 r = kvm_s390_get_irq_state(vcpu,
4096                                            (__u8 __user *)  irq_state.buf,
4097                                            irq_state.len);
4098                 break;
4099         }
4100         default:
4101                 r = -ENOTTY;
4102         }
4103
4104         vcpu_put(vcpu);
4105         return r;
4106 }
4107
4108 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4109 {
4110 #ifdef CONFIG_KVM_S390_UCONTROL
4111         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4112                  && (kvm_is_ucontrol(vcpu->kvm))) {
4113                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4114                 get_page(vmf->page);
4115                 return 0;
4116         }
4117 #endif
4118         return VM_FAULT_SIGBUS;
4119 }
4120
4121 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4122                             unsigned long npages)
4123 {
4124         return 0;
4125 }
4126
4127 /* Section: memory related */
4128 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4129                                    struct kvm_memory_slot *memslot,
4130                                    const struct kvm_userspace_memory_region *mem,
4131                                    enum kvm_mr_change change)
4132 {
4133         /* A few sanity checks. We can have memory slots which have to be
4134            located/ended at a segment boundary (1MB). The memory in userland is
4135            ok to be fragmented into various different vmas. It is okay to mmap()
4136            and munmap() stuff in this slot after doing this call at any time */
4137
4138         if (mem->userspace_addr & 0xffffful)
4139                 return -EINVAL;
4140
4141         if (mem->memory_size & 0xffffful)
4142                 return -EINVAL;
4143
4144         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4145                 return -EINVAL;
4146
4147         return 0;
4148 }
4149
4150 void kvm_arch_commit_memory_region(struct kvm *kvm,
4151                                 const struct kvm_userspace_memory_region *mem,
4152                                 const struct kvm_memory_slot *old,
4153                                 const struct kvm_memory_slot *new,
4154                                 enum kvm_mr_change change)
4155 {
4156         int rc;
4157
4158         /* If the basics of the memslot do not change, we do not want
4159          * to update the gmap. Every update causes several unnecessary
4160          * segment translation exceptions. This is usually handled just
4161          * fine by the normal fault handler + gmap, but it will also
4162          * cause faults on the prefix page of running guest CPUs.
4163          */
4164         if (old->userspace_addr == mem->userspace_addr &&
4165             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4166             old->npages * PAGE_SIZE == mem->memory_size)
4167                 return;
4168
4169         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4170                 mem->guest_phys_addr, mem->memory_size);
4171         if (rc)
4172                 pr_warn("failed to commit memory region\n");
4173         return;
4174 }
4175
4176 static inline unsigned long nonhyp_mask(int i)
4177 {
4178         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4179
4180         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4181 }
4182
4183 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4184 {
4185         vcpu->valid_wakeup = false;
4186 }
4187
4188 static int __init kvm_s390_init(void)
4189 {
4190         int i;
4191
4192         if (!sclp.has_sief2) {
4193                 pr_info("SIE not available\n");
4194                 return -ENODEV;
4195         }
4196
4197         if (nested && hpage) {
4198                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4199                 return -EINVAL;
4200         }
4201
4202         for (i = 0; i < 16; i++)
4203                 kvm_s390_fac_base[i] |=
4204                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4205
4206         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4207 }
4208
4209 static void __exit kvm_s390_exit(void)
4210 {
4211         kvm_exit();
4212 }
4213
4214 module_init(kvm_s390_init);
4215 module_exit(kvm_s390_exit);
4216
4217 /*
4218  * Enable autoloading of the kvm module.
4219  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4220  * since x86 takes a different approach.
4221  */
4222 #include <linux/miscdevice.h>
4223 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4224 MODULE_ALIAS("devname:kvm");