]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: add debug tracing for cpu features of CPU model
[linux.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62         { "userspace_handled", VCPU_STAT(exit_userspace) },
63         { "exit_null", VCPU_STAT(exit_null) },
64         { "exit_validity", VCPU_STAT(exit_validity) },
65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66         { "exit_external_request", VCPU_STAT(exit_external_request) },
67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68         { "exit_instruction", VCPU_STAT(exit_instruction) },
69         { "exit_pei", VCPU_STAT(exit_pei) },
70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92         { "instruction_spx", VCPU_STAT(instruction_spx) },
93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94         { "instruction_stap", VCPU_STAT(instruction_stap) },
95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99         { "instruction_essa", VCPU_STAT(instruction_essa) },
100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104         { "instruction_sie", VCPU_STAT(instruction_sie) },
105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121         { "diagnose_10", VCPU_STAT(diagnose_10) },
122         { "diagnose_44", VCPU_STAT(diagnose_44) },
123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124         { "diagnose_258", VCPU_STAT(diagnose_258) },
125         { "diagnose_308", VCPU_STAT(diagnose_308) },
126         { "diagnose_500", VCPU_STAT(diagnose_500) },
127         { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131         __u8 epoch_idx;
132         __u64 tod;
133         __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147         return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162         /* every s390 is virtualization enabled ;-) */
163         return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167                               unsigned long end);
168
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176                           void *v)
177 {
178         struct kvm *kvm;
179         struct kvm_vcpu *vcpu;
180         int i;
181         unsigned long long *delta = v;
182
183         list_for_each_entry(kvm, &vm_list, vm_list) {
184                 kvm->arch.epoch -= *delta;
185                 kvm_for_each_vcpu(i, vcpu, kvm) {
186                         vcpu->arch.sie_block->epoch -= *delta;
187                         if (vcpu->arch.cputm_enabled)
188                                 vcpu->arch.cputm_start += *delta;
189                         if (vcpu->arch.vsie_block)
190                                 vcpu->arch.vsie_block->epoch -= *delta;
191                 }
192         }
193         return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197         .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202         gmap_notifier.notifier_call = kvm_gmap_notifier;
203         gmap_register_pte_notifier(&gmap_notifier);
204         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205         gmap_register_pte_notifier(&vsie_gmap_notifier);
206         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207                                        &kvm_clock_notifier);
208         return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213         gmap_unregister_pte_notifier(&gmap_notifier);
214         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216                                          &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221         set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227         int cc;
228
229         asm volatile(
230                 /* Parameter registers are ignored for "test bit" */
231                 "       plo     0,0,0,0(0)\n"
232                 "       ipm     %0\n"
233                 "       srl     %0,28\n"
234                 : "=d" (cc)
235                 : "d" (r0)
236                 : "cc");
237         return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242         int i;
243
244         for (i = 0; i < 256; ++i) {
245                 if (plo_test_bit(i))
246                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247         }
248
249         if (test_facility(28)) /* TOD-clock steering */
250                 ptff(kvm_s390_available_subfunc.ptff,
251                      sizeof(kvm_s390_available_subfunc.ptff),
252                      PTFF_QAF);
253
254         if (test_facility(17)) { /* MSA */
255                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.kmac);
257                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kmc);
259                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.km);
261                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.kimd);
263                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.klmd);
265         }
266         if (test_facility(76)) /* MSA3 */
267                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.pckmo);
269         if (test_facility(77)) { /* MSA4 */
270                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmctr);
272                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.kmf);
274                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.kmo);
276                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.pcc);
278         }
279         if (test_facility(57)) /* MSA5 */
280                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281                               kvm_s390_available_subfunc.ppno);
282
283         if (test_facility(146)) /* MSA8 */
284                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285                               kvm_s390_available_subfunc.kma);
286
287         if (MACHINE_HAS_ESOP)
288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289         /*
290          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292          */
293         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294             !test_facility(3) || !nested)
295                 return;
296         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297         if (sclp.has_64bscao)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299         if (sclp.has_siif)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301         if (sclp.has_gpere)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303         if (sclp.has_gsls)
304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305         if (sclp.has_ib)
306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307         if (sclp.has_cei)
308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309         if (sclp.has_ibs)
310                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311         if (sclp.has_kss)
312                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313         /*
314          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315          * all skey handling functions read/set the skey from the PGSTE
316          * instead of the real storage key.
317          *
318          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319          * pages being detected as preserved although they are resident.
320          *
321          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323          *
324          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327          *
328          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329          * cannot easily shadow the SCA because of the ipte lock.
330          */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336         if (!kvm_s390_dbf)
337                 return -ENOMEM;
338
339         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340                 debug_unregister(kvm_s390_dbf);
341                 return -ENOMEM;
342         }
343
344         kvm_s390_cpu_feat_init();
345
346         /* Register floating interrupt controller interface. */
347         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352         debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357                         unsigned int ioctl, unsigned long arg)
358 {
359         if (ioctl == KVM_S390_ENABLE_SIE)
360                 return s390_enable_sie();
361         return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366         int r;
367
368         switch (ext) {
369         case KVM_CAP_S390_PSW:
370         case KVM_CAP_S390_GMAP:
371         case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373         case KVM_CAP_S390_UCONTROL:
374 #endif
375         case KVM_CAP_ASYNC_PF:
376         case KVM_CAP_SYNC_REGS:
377         case KVM_CAP_ONE_REG:
378         case KVM_CAP_ENABLE_CAP:
379         case KVM_CAP_S390_CSS_SUPPORT:
380         case KVM_CAP_IOEVENTFD:
381         case KVM_CAP_DEVICE_CTRL:
382         case KVM_CAP_ENABLE_CAP_VM:
383         case KVM_CAP_S390_IRQCHIP:
384         case KVM_CAP_VM_ATTRIBUTES:
385         case KVM_CAP_MP_STATE:
386         case KVM_CAP_IMMEDIATE_EXIT:
387         case KVM_CAP_S390_INJECT_IRQ:
388         case KVM_CAP_S390_USER_SIGP:
389         case KVM_CAP_S390_USER_STSI:
390         case KVM_CAP_S390_SKEYS:
391         case KVM_CAP_S390_IRQ_STATE:
392         case KVM_CAP_S390_USER_INSTR0:
393         case KVM_CAP_S390_CMMA_MIGRATION:
394         case KVM_CAP_S390_AIS:
395         case KVM_CAP_S390_AIS_MIGRATION:
396                 r = 1;
397                 break;
398         case KVM_CAP_S390_MEM_OP:
399                 r = MEM_OP_MAX_SIZE;
400                 break;
401         case KVM_CAP_NR_VCPUS:
402         case KVM_CAP_MAX_VCPUS:
403                 r = KVM_S390_BSCA_CPU_SLOTS;
404                 if (!kvm_s390_use_sca_entries())
405                         r = KVM_MAX_VCPUS;
406                 else if (sclp.has_esca && sclp.has_64bscao)
407                         r = KVM_S390_ESCA_CPU_SLOTS;
408                 break;
409         case KVM_CAP_NR_MEMSLOTS:
410                 r = KVM_USER_MEM_SLOTS;
411                 break;
412         case KVM_CAP_S390_COW:
413                 r = MACHINE_HAS_ESOP;
414                 break;
415         case KVM_CAP_S390_VECTOR_REGISTERS:
416                 r = MACHINE_HAS_VX;
417                 break;
418         case KVM_CAP_S390_RI:
419                 r = test_facility(64);
420                 break;
421         case KVM_CAP_S390_GS:
422                 r = test_facility(133);
423                 break;
424         default:
425                 r = 0;
426         }
427         return r;
428 }
429
430 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
431                                         struct kvm_memory_slot *memslot)
432 {
433         gfn_t cur_gfn, last_gfn;
434         unsigned long address;
435         struct gmap *gmap = kvm->arch.gmap;
436
437         /* Loop over all guest pages */
438         last_gfn = memslot->base_gfn + memslot->npages;
439         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
440                 address = gfn_to_hva_memslot(memslot, cur_gfn);
441
442                 if (test_and_clear_guest_dirty(gmap->mm, address))
443                         mark_page_dirty(kvm, cur_gfn);
444                 if (fatal_signal_pending(current))
445                         return;
446                 cond_resched();
447         }
448 }
449
450 /* Section: vm related */
451 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
452
453 /*
454  * Get (and clear) the dirty memory log for a memory slot.
455  */
456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
457                                struct kvm_dirty_log *log)
458 {
459         int r;
460         unsigned long n;
461         struct kvm_memslots *slots;
462         struct kvm_memory_slot *memslot;
463         int is_dirty = 0;
464
465         if (kvm_is_ucontrol(kvm))
466                 return -EINVAL;
467
468         mutex_lock(&kvm->slots_lock);
469
470         r = -EINVAL;
471         if (log->slot >= KVM_USER_MEM_SLOTS)
472                 goto out;
473
474         slots = kvm_memslots(kvm);
475         memslot = id_to_memslot(slots, log->slot);
476         r = -ENOENT;
477         if (!memslot->dirty_bitmap)
478                 goto out;
479
480         kvm_s390_sync_dirty_log(kvm, memslot);
481         r = kvm_get_dirty_log(kvm, log, &is_dirty);
482         if (r)
483                 goto out;
484
485         /* Clear the dirty log */
486         if (is_dirty) {
487                 n = kvm_dirty_bitmap_bytes(memslot);
488                 memset(memslot->dirty_bitmap, 0, n);
489         }
490         r = 0;
491 out:
492         mutex_unlock(&kvm->slots_lock);
493         return r;
494 }
495
496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
497 {
498         unsigned int i;
499         struct kvm_vcpu *vcpu;
500
501         kvm_for_each_vcpu(i, vcpu, kvm) {
502                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
503         }
504 }
505
506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
507 {
508         int r;
509
510         if (cap->flags)
511                 return -EINVAL;
512
513         switch (cap->cap) {
514         case KVM_CAP_S390_IRQCHIP:
515                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
516                 kvm->arch.use_irqchip = 1;
517                 r = 0;
518                 break;
519         case KVM_CAP_S390_USER_SIGP:
520                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
521                 kvm->arch.user_sigp = 1;
522                 r = 0;
523                 break;
524         case KVM_CAP_S390_VECTOR_REGISTERS:
525                 mutex_lock(&kvm->lock);
526                 if (kvm->created_vcpus) {
527                         r = -EBUSY;
528                 } else if (MACHINE_HAS_VX) {
529                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
530                         set_kvm_facility(kvm->arch.model.fac_list, 129);
531                         if (test_facility(134)) {
532                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
533                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
534                         }
535                         if (test_facility(135)) {
536                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
537                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
538                         }
539                         r = 0;
540                 } else
541                         r = -EINVAL;
542                 mutex_unlock(&kvm->lock);
543                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
544                          r ? "(not available)" : "(success)");
545                 break;
546         case KVM_CAP_S390_RI:
547                 r = -EINVAL;
548                 mutex_lock(&kvm->lock);
549                 if (kvm->created_vcpus) {
550                         r = -EBUSY;
551                 } else if (test_facility(64)) {
552                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
553                         set_kvm_facility(kvm->arch.model.fac_list, 64);
554                         r = 0;
555                 }
556                 mutex_unlock(&kvm->lock);
557                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
558                          r ? "(not available)" : "(success)");
559                 break;
560         case KVM_CAP_S390_AIS:
561                 mutex_lock(&kvm->lock);
562                 if (kvm->created_vcpus) {
563                         r = -EBUSY;
564                 } else {
565                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
566                         set_kvm_facility(kvm->arch.model.fac_list, 72);
567                         r = 0;
568                 }
569                 mutex_unlock(&kvm->lock);
570                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
571                          r ? "(not available)" : "(success)");
572                 break;
573         case KVM_CAP_S390_GS:
574                 r = -EINVAL;
575                 mutex_lock(&kvm->lock);
576                 if (kvm->created_vcpus) {
577                         r = -EBUSY;
578                 } else if (test_facility(133)) {
579                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
580                         set_kvm_facility(kvm->arch.model.fac_list, 133);
581                         r = 0;
582                 }
583                 mutex_unlock(&kvm->lock);
584                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
585                          r ? "(not available)" : "(success)");
586                 break;
587         case KVM_CAP_S390_USER_STSI:
588                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
589                 kvm->arch.user_stsi = 1;
590                 r = 0;
591                 break;
592         case KVM_CAP_S390_USER_INSTR0:
593                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
594                 kvm->arch.user_instr0 = 1;
595                 icpt_operexc_on_all_vcpus(kvm);
596                 r = 0;
597                 break;
598         default:
599                 r = -EINVAL;
600                 break;
601         }
602         return r;
603 }
604
605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
606 {
607         int ret;
608
609         switch (attr->attr) {
610         case KVM_S390_VM_MEM_LIMIT_SIZE:
611                 ret = 0;
612                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
613                          kvm->arch.mem_limit);
614                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
615                         ret = -EFAULT;
616                 break;
617         default:
618                 ret = -ENXIO;
619                 break;
620         }
621         return ret;
622 }
623
624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
625 {
626         int ret;
627         unsigned int idx;
628         switch (attr->attr) {
629         case KVM_S390_VM_MEM_ENABLE_CMMA:
630                 ret = -ENXIO;
631                 if (!sclp.has_cmma)
632                         break;
633
634                 ret = -EBUSY;
635                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
636                 mutex_lock(&kvm->lock);
637                 if (!kvm->created_vcpus) {
638                         kvm->arch.use_cmma = 1;
639                         ret = 0;
640                 }
641                 mutex_unlock(&kvm->lock);
642                 break;
643         case KVM_S390_VM_MEM_CLR_CMMA:
644                 ret = -ENXIO;
645                 if (!sclp.has_cmma)
646                         break;
647                 ret = -EINVAL;
648                 if (!kvm->arch.use_cmma)
649                         break;
650
651                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
652                 mutex_lock(&kvm->lock);
653                 idx = srcu_read_lock(&kvm->srcu);
654                 s390_reset_cmma(kvm->arch.gmap->mm);
655                 srcu_read_unlock(&kvm->srcu, idx);
656                 mutex_unlock(&kvm->lock);
657                 ret = 0;
658                 break;
659         case KVM_S390_VM_MEM_LIMIT_SIZE: {
660                 unsigned long new_limit;
661
662                 if (kvm_is_ucontrol(kvm))
663                         return -EINVAL;
664
665                 if (get_user(new_limit, (u64 __user *)attr->addr))
666                         return -EFAULT;
667
668                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
669                     new_limit > kvm->arch.mem_limit)
670                         return -E2BIG;
671
672                 if (!new_limit)
673                         return -EINVAL;
674
675                 /* gmap_create takes last usable address */
676                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
677                         new_limit -= 1;
678
679                 ret = -EBUSY;
680                 mutex_lock(&kvm->lock);
681                 if (!kvm->created_vcpus) {
682                         /* gmap_create will round the limit up */
683                         struct gmap *new = gmap_create(current->mm, new_limit);
684
685                         if (!new) {
686                                 ret = -ENOMEM;
687                         } else {
688                                 gmap_remove(kvm->arch.gmap);
689                                 new->private = kvm;
690                                 kvm->arch.gmap = new;
691                                 ret = 0;
692                         }
693                 }
694                 mutex_unlock(&kvm->lock);
695                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
696                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
697                          (void *) kvm->arch.gmap->asce);
698                 break;
699         }
700         default:
701                 ret = -ENXIO;
702                 break;
703         }
704         return ret;
705 }
706
707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
708
709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
710 {
711         struct kvm_vcpu *vcpu;
712         int i;
713
714         if (!test_kvm_facility(kvm, 76))
715                 return -EINVAL;
716
717         mutex_lock(&kvm->lock);
718         switch (attr->attr) {
719         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
720                 get_random_bytes(
721                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
722                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
723                 kvm->arch.crypto.aes_kw = 1;
724                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
725                 break;
726         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
727                 get_random_bytes(
728                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
729                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
730                 kvm->arch.crypto.dea_kw = 1;
731                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
732                 break;
733         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
734                 kvm->arch.crypto.aes_kw = 0;
735                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
736                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
737                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
738                 break;
739         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
740                 kvm->arch.crypto.dea_kw = 0;
741                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
742                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
744                 break;
745         default:
746                 mutex_unlock(&kvm->lock);
747                 return -ENXIO;
748         }
749
750         kvm_for_each_vcpu(i, vcpu, kvm) {
751                 kvm_s390_vcpu_crypto_setup(vcpu);
752                 exit_sie(vcpu);
753         }
754         mutex_unlock(&kvm->lock);
755         return 0;
756 }
757
758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
759 {
760         int cx;
761         struct kvm_vcpu *vcpu;
762
763         kvm_for_each_vcpu(cx, vcpu, kvm)
764                 kvm_s390_sync_request(req, vcpu);
765 }
766
767 /*
768  * Must be called with kvm->srcu held to avoid races on memslots, and with
769  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
770  */
771 static int kvm_s390_vm_start_migration(struct kvm *kvm)
772 {
773         struct kvm_s390_migration_state *mgs;
774         struct kvm_memory_slot *ms;
775         /* should be the only one */
776         struct kvm_memslots *slots;
777         unsigned long ram_pages;
778         int slotnr;
779
780         /* migration mode already enabled */
781         if (kvm->arch.migration_state)
782                 return 0;
783
784         slots = kvm_memslots(kvm);
785         if (!slots || !slots->used_slots)
786                 return -EINVAL;
787
788         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
789         if (!mgs)
790                 return -ENOMEM;
791         kvm->arch.migration_state = mgs;
792
793         if (kvm->arch.use_cmma) {
794                 /*
795                  * Get the last slot. They should be sorted by base_gfn, so the
796                  * last slot is also the one at the end of the address space.
797                  * We have verified above that at least one slot is present.
798                  */
799                 ms = slots->memslots + slots->used_slots - 1;
800                 /* round up so we only use full longs */
801                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
802                 /* allocate enough bytes to store all the bits */
803                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
804                 if (!mgs->pgste_bitmap) {
805                         kfree(mgs);
806                         kvm->arch.migration_state = NULL;
807                         return -ENOMEM;
808                 }
809
810                 mgs->bitmap_size = ram_pages;
811                 atomic64_set(&mgs->dirty_pages, ram_pages);
812                 /* mark all the pages in active slots as dirty */
813                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
814                         ms = slots->memslots + slotnr;
815                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
816                 }
817
818                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
819         }
820         return 0;
821 }
822
823 /*
824  * Must be called with kvm->lock to avoid races with ourselves and
825  * kvm_s390_vm_start_migration.
826  */
827 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
828 {
829         struct kvm_s390_migration_state *mgs;
830
831         /* migration mode already disabled */
832         if (!kvm->arch.migration_state)
833                 return 0;
834         mgs = kvm->arch.migration_state;
835         kvm->arch.migration_state = NULL;
836
837         if (kvm->arch.use_cmma) {
838                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
839                 vfree(mgs->pgste_bitmap);
840         }
841         kfree(mgs);
842         return 0;
843 }
844
845 static int kvm_s390_vm_set_migration(struct kvm *kvm,
846                                      struct kvm_device_attr *attr)
847 {
848         int idx, res = -ENXIO;
849
850         mutex_lock(&kvm->lock);
851         switch (attr->attr) {
852         case KVM_S390_VM_MIGRATION_START:
853                 idx = srcu_read_lock(&kvm->srcu);
854                 res = kvm_s390_vm_start_migration(kvm);
855                 srcu_read_unlock(&kvm->srcu, idx);
856                 break;
857         case KVM_S390_VM_MIGRATION_STOP:
858                 res = kvm_s390_vm_stop_migration(kvm);
859                 break;
860         default:
861                 break;
862         }
863         mutex_unlock(&kvm->lock);
864
865         return res;
866 }
867
868 static int kvm_s390_vm_get_migration(struct kvm *kvm,
869                                      struct kvm_device_attr *attr)
870 {
871         u64 mig = (kvm->arch.migration_state != NULL);
872
873         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
874                 return -ENXIO;
875
876         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
877                 return -EFAULT;
878         return 0;
879 }
880
881 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
882 {
883         struct kvm_s390_vm_tod_clock gtod;
884
885         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
886                 return -EFAULT;
887
888         if (test_kvm_facility(kvm, 139))
889                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
890         else if (gtod.epoch_idx == 0)
891                 kvm_s390_set_tod_clock(kvm, gtod.tod);
892         else
893                 return -EINVAL;
894
895         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
896                 gtod.epoch_idx, gtod.tod);
897
898         return 0;
899 }
900
901 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
902 {
903         u8 gtod_high;
904
905         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
906                                            sizeof(gtod_high)))
907                 return -EFAULT;
908
909         if (gtod_high != 0)
910                 return -EINVAL;
911         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
912
913         return 0;
914 }
915
916 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
917 {
918         u64 gtod;
919
920         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
921                 return -EFAULT;
922
923         kvm_s390_set_tod_clock(kvm, gtod);
924         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
925         return 0;
926 }
927
928 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
929 {
930         int ret;
931
932         if (attr->flags)
933                 return -EINVAL;
934
935         switch (attr->attr) {
936         case KVM_S390_VM_TOD_EXT:
937                 ret = kvm_s390_set_tod_ext(kvm, attr);
938                 break;
939         case KVM_S390_VM_TOD_HIGH:
940                 ret = kvm_s390_set_tod_high(kvm, attr);
941                 break;
942         case KVM_S390_VM_TOD_LOW:
943                 ret = kvm_s390_set_tod_low(kvm, attr);
944                 break;
945         default:
946                 ret = -ENXIO;
947                 break;
948         }
949         return ret;
950 }
951
952 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
953                                         struct kvm_s390_vm_tod_clock *gtod)
954 {
955         struct kvm_s390_tod_clock_ext htod;
956
957         preempt_disable();
958
959         get_tod_clock_ext((char *)&htod);
960
961         gtod->tod = htod.tod + kvm->arch.epoch;
962         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
963
964         if (gtod->tod < htod.tod)
965                 gtod->epoch_idx += 1;
966
967         preempt_enable();
968 }
969
970 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972         struct kvm_s390_vm_tod_clock gtod;
973
974         memset(&gtod, 0, sizeof(gtod));
975
976         if (test_kvm_facility(kvm, 139))
977                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
978         else
979                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
980
981         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
982                 return -EFAULT;
983
984         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
985                 gtod.epoch_idx, gtod.tod);
986         return 0;
987 }
988
989 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
990 {
991         u8 gtod_high = 0;
992
993         if (copy_to_user((void __user *)attr->addr, &gtod_high,
994                                          sizeof(gtod_high)))
995                 return -EFAULT;
996         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
997
998         return 0;
999 }
1000
1001 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1002 {
1003         u64 gtod;
1004
1005         gtod = kvm_s390_get_tod_clock_fast(kvm);
1006         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1007                 return -EFAULT;
1008         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1009
1010         return 0;
1011 }
1012
1013 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1014 {
1015         int ret;
1016
1017         if (attr->flags)
1018                 return -EINVAL;
1019
1020         switch (attr->attr) {
1021         case KVM_S390_VM_TOD_EXT:
1022                 ret = kvm_s390_get_tod_ext(kvm, attr);
1023                 break;
1024         case KVM_S390_VM_TOD_HIGH:
1025                 ret = kvm_s390_get_tod_high(kvm, attr);
1026                 break;
1027         case KVM_S390_VM_TOD_LOW:
1028                 ret = kvm_s390_get_tod_low(kvm, attr);
1029                 break;
1030         default:
1031                 ret = -ENXIO;
1032                 break;
1033         }
1034         return ret;
1035 }
1036
1037 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1038 {
1039         struct kvm_s390_vm_cpu_processor *proc;
1040         u16 lowest_ibc, unblocked_ibc;
1041         int ret = 0;
1042
1043         mutex_lock(&kvm->lock);
1044         if (kvm->created_vcpus) {
1045                 ret = -EBUSY;
1046                 goto out;
1047         }
1048         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1049         if (!proc) {
1050                 ret = -ENOMEM;
1051                 goto out;
1052         }
1053         if (!copy_from_user(proc, (void __user *)attr->addr,
1054                             sizeof(*proc))) {
1055                 kvm->arch.model.cpuid = proc->cpuid;
1056                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1057                 unblocked_ibc = sclp.ibc & 0xfff;
1058                 if (lowest_ibc && proc->ibc) {
1059                         if (proc->ibc > unblocked_ibc)
1060                                 kvm->arch.model.ibc = unblocked_ibc;
1061                         else if (proc->ibc < lowest_ibc)
1062                                 kvm->arch.model.ibc = lowest_ibc;
1063                         else
1064                                 kvm->arch.model.ibc = proc->ibc;
1065                 }
1066                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1067                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1068                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1069                          kvm->arch.model.ibc,
1070                          kvm->arch.model.cpuid);
1071                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1072                          kvm->arch.model.fac_list[0],
1073                          kvm->arch.model.fac_list[1],
1074                          kvm->arch.model.fac_list[2]);
1075         } else
1076                 ret = -EFAULT;
1077         kfree(proc);
1078 out:
1079         mutex_unlock(&kvm->lock);
1080         return ret;
1081 }
1082
1083 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1084                                        struct kvm_device_attr *attr)
1085 {
1086         struct kvm_s390_vm_cpu_feat data;
1087
1088         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1089                 return -EFAULT;
1090         if (!bitmap_subset((unsigned long *) data.feat,
1091                            kvm_s390_available_cpu_feat,
1092                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1093                 return -EINVAL;
1094
1095         mutex_lock(&kvm->lock);
1096         if (kvm->created_vcpus) {
1097                 mutex_unlock(&kvm->lock);
1098                 return -EBUSY;
1099         }
1100         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1102         mutex_unlock(&kvm->lock);
1103         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1104                          data.feat[0],
1105                          data.feat[1],
1106                          data.feat[2]);
1107         return 0;
1108 }
1109
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111                                           struct kvm_device_attr *attr)
1112 {
1113         /*
1114          * Once supported by kernel + hw, we have to store the subfunctions
1115          * in kvm->arch and remember that user space configured them.
1116          */
1117         return -ENXIO;
1118 }
1119
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122         int ret = -ENXIO;
1123
1124         switch (attr->attr) {
1125         case KVM_S390_VM_CPU_PROCESSOR:
1126                 ret = kvm_s390_set_processor(kvm, attr);
1127                 break;
1128         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129                 ret = kvm_s390_set_processor_feat(kvm, attr);
1130                 break;
1131         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1133                 break;
1134         }
1135         return ret;
1136 }
1137
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         struct kvm_s390_vm_cpu_processor *proc;
1141         int ret = 0;
1142
1143         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1144         if (!proc) {
1145                 ret = -ENOMEM;
1146                 goto out;
1147         }
1148         proc->cpuid = kvm->arch.model.cpuid;
1149         proc->ibc = kvm->arch.model.ibc;
1150         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151                S390_ARCH_FAC_LIST_SIZE_BYTE);
1152         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153                  kvm->arch.model.ibc,
1154                  kvm->arch.model.cpuid);
1155         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156                  kvm->arch.model.fac_list[0],
1157                  kvm->arch.model.fac_list[1],
1158                  kvm->arch.model.fac_list[2]);
1159         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1160                 ret = -EFAULT;
1161         kfree(proc);
1162 out:
1163         return ret;
1164 }
1165
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         struct kvm_s390_vm_cpu_machine *mach;
1169         int ret = 0;
1170
1171         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1172         if (!mach) {
1173                 ret = -ENOMEM;
1174                 goto out;
1175         }
1176         get_cpu_id((struct cpuid *) &mach->cpuid);
1177         mach->ibc = sclp.ibc;
1178         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179                S390_ARCH_FAC_LIST_SIZE_BYTE);
1180         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181                sizeof(S390_lowcore.stfle_fac_list));
1182         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1183                  kvm->arch.model.ibc,
1184                  kvm->arch.model.cpuid);
1185         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1186                  mach->fac_mask[0],
1187                  mach->fac_mask[1],
1188                  mach->fac_mask[2]);
1189         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1190                  mach->fac_list[0],
1191                  mach->fac_list[1],
1192                  mach->fac_list[2]);
1193         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1194                 ret = -EFAULT;
1195         kfree(mach);
1196 out:
1197         return ret;
1198 }
1199
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201                                        struct kvm_device_attr *attr)
1202 {
1203         struct kvm_s390_vm_cpu_feat data;
1204
1205         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1207         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1208                 return -EFAULT;
1209         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1210                          data.feat[0],
1211                          data.feat[1],
1212                          data.feat[2]);
1213         return 0;
1214 }
1215
1216 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1217                                      struct kvm_device_attr *attr)
1218 {
1219         struct kvm_s390_vm_cpu_feat data;
1220
1221         bitmap_copy((unsigned long *) data.feat,
1222                     kvm_s390_available_cpu_feat,
1223                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1224         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1225                 return -EFAULT;
1226         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1227                          data.feat[0],
1228                          data.feat[1],
1229                          data.feat[2]);
1230         return 0;
1231 }
1232
1233 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1234                                           struct kvm_device_attr *attr)
1235 {
1236         /*
1237          * Once we can actually configure subfunctions (kernel + hw support),
1238          * we have to check if they were already set by user space, if so copy
1239          * them from kvm->arch.
1240          */
1241         return -ENXIO;
1242 }
1243
1244 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1245                                         struct kvm_device_attr *attr)
1246 {
1247         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1248             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1249                 return -EFAULT;
1250         return 0;
1251 }
1252 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1253 {
1254         int ret = -ENXIO;
1255
1256         switch (attr->attr) {
1257         case KVM_S390_VM_CPU_PROCESSOR:
1258                 ret = kvm_s390_get_processor(kvm, attr);
1259                 break;
1260         case KVM_S390_VM_CPU_MACHINE:
1261                 ret = kvm_s390_get_machine(kvm, attr);
1262                 break;
1263         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1264                 ret = kvm_s390_get_processor_feat(kvm, attr);
1265                 break;
1266         case KVM_S390_VM_CPU_MACHINE_FEAT:
1267                 ret = kvm_s390_get_machine_feat(kvm, attr);
1268                 break;
1269         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1270                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1271                 break;
1272         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1273                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1274                 break;
1275         }
1276         return ret;
1277 }
1278
1279 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1280 {
1281         int ret;
1282
1283         switch (attr->group) {
1284         case KVM_S390_VM_MEM_CTRL:
1285                 ret = kvm_s390_set_mem_control(kvm, attr);
1286                 break;
1287         case KVM_S390_VM_TOD:
1288                 ret = kvm_s390_set_tod(kvm, attr);
1289                 break;
1290         case KVM_S390_VM_CPU_MODEL:
1291                 ret = kvm_s390_set_cpu_model(kvm, attr);
1292                 break;
1293         case KVM_S390_VM_CRYPTO:
1294                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1295                 break;
1296         case KVM_S390_VM_MIGRATION:
1297                 ret = kvm_s390_vm_set_migration(kvm, attr);
1298                 break;
1299         default:
1300                 ret = -ENXIO;
1301                 break;
1302         }
1303
1304         return ret;
1305 }
1306
1307 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1308 {
1309         int ret;
1310
1311         switch (attr->group) {
1312         case KVM_S390_VM_MEM_CTRL:
1313                 ret = kvm_s390_get_mem_control(kvm, attr);
1314                 break;
1315         case KVM_S390_VM_TOD:
1316                 ret = kvm_s390_get_tod(kvm, attr);
1317                 break;
1318         case KVM_S390_VM_CPU_MODEL:
1319                 ret = kvm_s390_get_cpu_model(kvm, attr);
1320                 break;
1321         case KVM_S390_VM_MIGRATION:
1322                 ret = kvm_s390_vm_get_migration(kvm, attr);
1323                 break;
1324         default:
1325                 ret = -ENXIO;
1326                 break;
1327         }
1328
1329         return ret;
1330 }
1331
1332 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1333 {
1334         int ret;
1335
1336         switch (attr->group) {
1337         case KVM_S390_VM_MEM_CTRL:
1338                 switch (attr->attr) {
1339                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1340                 case KVM_S390_VM_MEM_CLR_CMMA:
1341                         ret = sclp.has_cmma ? 0 : -ENXIO;
1342                         break;
1343                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1344                         ret = 0;
1345                         break;
1346                 default:
1347                         ret = -ENXIO;
1348                         break;
1349                 }
1350                 break;
1351         case KVM_S390_VM_TOD:
1352                 switch (attr->attr) {
1353                 case KVM_S390_VM_TOD_LOW:
1354                 case KVM_S390_VM_TOD_HIGH:
1355                         ret = 0;
1356                         break;
1357                 default:
1358                         ret = -ENXIO;
1359                         break;
1360                 }
1361                 break;
1362         case KVM_S390_VM_CPU_MODEL:
1363                 switch (attr->attr) {
1364                 case KVM_S390_VM_CPU_PROCESSOR:
1365                 case KVM_S390_VM_CPU_MACHINE:
1366                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1367                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1368                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1369                         ret = 0;
1370                         break;
1371                 /* configuring subfunctions is not supported yet */
1372                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1373                 default:
1374                         ret = -ENXIO;
1375                         break;
1376                 }
1377                 break;
1378         case KVM_S390_VM_CRYPTO:
1379                 switch (attr->attr) {
1380                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1381                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1382                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1383                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1384                         ret = 0;
1385                         break;
1386                 default:
1387                         ret = -ENXIO;
1388                         break;
1389                 }
1390                 break;
1391         case KVM_S390_VM_MIGRATION:
1392                 ret = 0;
1393                 break;
1394         default:
1395                 ret = -ENXIO;
1396                 break;
1397         }
1398
1399         return ret;
1400 }
1401
1402 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1403 {
1404         uint8_t *keys;
1405         uint64_t hva;
1406         int srcu_idx, i, r = 0;
1407
1408         if (args->flags != 0)
1409                 return -EINVAL;
1410
1411         /* Is this guest using storage keys? */
1412         if (!mm_use_skey(current->mm))
1413                 return KVM_S390_GET_SKEYS_NONE;
1414
1415         /* Enforce sane limit on memory allocation */
1416         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1417                 return -EINVAL;
1418
1419         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1420         if (!keys)
1421                 return -ENOMEM;
1422
1423         down_read(&current->mm->mmap_sem);
1424         srcu_idx = srcu_read_lock(&kvm->srcu);
1425         for (i = 0; i < args->count; i++) {
1426                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1427                 if (kvm_is_error_hva(hva)) {
1428                         r = -EFAULT;
1429                         break;
1430                 }
1431
1432                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1433                 if (r)
1434                         break;
1435         }
1436         srcu_read_unlock(&kvm->srcu, srcu_idx);
1437         up_read(&current->mm->mmap_sem);
1438
1439         if (!r) {
1440                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1441                                  sizeof(uint8_t) * args->count);
1442                 if (r)
1443                         r = -EFAULT;
1444         }
1445
1446         kvfree(keys);
1447         return r;
1448 }
1449
1450 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1451 {
1452         uint8_t *keys;
1453         uint64_t hva;
1454         int srcu_idx, i, r = 0;
1455
1456         if (args->flags != 0)
1457                 return -EINVAL;
1458
1459         /* Enforce sane limit on memory allocation */
1460         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1461                 return -EINVAL;
1462
1463         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1464         if (!keys)
1465                 return -ENOMEM;
1466
1467         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1468                            sizeof(uint8_t) * args->count);
1469         if (r) {
1470                 r = -EFAULT;
1471                 goto out;
1472         }
1473
1474         /* Enable storage key handling for the guest */
1475         r = s390_enable_skey();
1476         if (r)
1477                 goto out;
1478
1479         down_read(&current->mm->mmap_sem);
1480         srcu_idx = srcu_read_lock(&kvm->srcu);
1481         for (i = 0; i < args->count; i++) {
1482                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1483                 if (kvm_is_error_hva(hva)) {
1484                         r = -EFAULT;
1485                         break;
1486                 }
1487
1488                 /* Lowest order bit is reserved */
1489                 if (keys[i] & 0x01) {
1490                         r = -EINVAL;
1491                         break;
1492                 }
1493
1494                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1495                 if (r)
1496                         break;
1497         }
1498         srcu_read_unlock(&kvm->srcu, srcu_idx);
1499         up_read(&current->mm->mmap_sem);
1500 out:
1501         kvfree(keys);
1502         return r;
1503 }
1504
1505 /*
1506  * Base address and length must be sent at the start of each block, therefore
1507  * it's cheaper to send some clean data, as long as it's less than the size of
1508  * two longs.
1509  */
1510 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1511 /* for consistency */
1512 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1513
1514 /*
1515  * This function searches for the next page with dirty CMMA attributes, and
1516  * saves the attributes in the buffer up to either the end of the buffer or
1517  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1518  * no trailing clean bytes are saved.
1519  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1520  * output buffer will indicate 0 as length.
1521  */
1522 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1523                                   struct kvm_s390_cmma_log *args)
1524 {
1525         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1526         unsigned long bufsize, hva, pgstev, i, next, cur;
1527         int srcu_idx, peek, r = 0, rr;
1528         u8 *res;
1529
1530         cur = args->start_gfn;
1531         i = next = pgstev = 0;
1532
1533         if (unlikely(!kvm->arch.use_cmma))
1534                 return -ENXIO;
1535         /* Invalid/unsupported flags were specified */
1536         if (args->flags & ~KVM_S390_CMMA_PEEK)
1537                 return -EINVAL;
1538         /* Migration mode query, and we are not doing a migration */
1539         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1540         if (!peek && !s)
1541                 return -EINVAL;
1542         /* CMMA is disabled or was not used, or the buffer has length zero */
1543         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1544         if (!bufsize || !kvm->mm->context.use_cmma) {
1545                 memset(args, 0, sizeof(*args));
1546                 return 0;
1547         }
1548
1549         if (!peek) {
1550                 /* We are not peeking, and there are no dirty pages */
1551                 if (!atomic64_read(&s->dirty_pages)) {
1552                         memset(args, 0, sizeof(*args));
1553                         return 0;
1554                 }
1555                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1556                                     args->start_gfn);
1557                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1558                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1559                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1560                         memset(args, 0, sizeof(*args));
1561                         return 0;
1562                 }
1563                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1564         }
1565
1566         res = vmalloc(bufsize);
1567         if (!res)
1568                 return -ENOMEM;
1569
1570         args->start_gfn = cur;
1571
1572         down_read(&kvm->mm->mmap_sem);
1573         srcu_idx = srcu_read_lock(&kvm->srcu);
1574         while (i < bufsize) {
1575                 hva = gfn_to_hva(kvm, cur);
1576                 if (kvm_is_error_hva(hva)) {
1577                         r = -EFAULT;
1578                         break;
1579                 }
1580                 /* decrement only if we actually flipped the bit to 0 */
1581                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1582                         atomic64_dec(&s->dirty_pages);
1583                 r = get_pgste(kvm->mm, hva, &pgstev);
1584                 if (r < 0)
1585                         pgstev = 0;
1586                 /* save the value */
1587                 res[i++] = (pgstev >> 24) & 0x43;
1588                 /*
1589                  * if the next bit is too far away, stop.
1590                  * if we reached the previous "next", find the next one
1591                  */
1592                 if (!peek) {
1593                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1594                                 break;
1595                         if (cur == next)
1596                                 next = find_next_bit(s->pgste_bitmap,
1597                                                      s->bitmap_size, cur + 1);
1598                 /* reached the end of the bitmap or of the buffer, stop */
1599                         if ((next >= s->bitmap_size) ||
1600                             (next >= args->start_gfn + bufsize))
1601                                 break;
1602                 }
1603                 cur++;
1604         }
1605         srcu_read_unlock(&kvm->srcu, srcu_idx);
1606         up_read(&kvm->mm->mmap_sem);
1607         args->count = i;
1608         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1609
1610         rr = copy_to_user((void __user *)args->values, res, args->count);
1611         if (rr)
1612                 r = -EFAULT;
1613
1614         vfree(res);
1615         return r;
1616 }
1617
1618 /*
1619  * This function sets the CMMA attributes for the given pages. If the input
1620  * buffer has zero length, no action is taken, otherwise the attributes are
1621  * set and the mm->context.use_cmma flag is set.
1622  */
1623 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1624                                   const struct kvm_s390_cmma_log *args)
1625 {
1626         unsigned long hva, mask, pgstev, i;
1627         uint8_t *bits;
1628         int srcu_idx, r = 0;
1629
1630         mask = args->mask;
1631
1632         if (!kvm->arch.use_cmma)
1633                 return -ENXIO;
1634         /* invalid/unsupported flags */
1635         if (args->flags != 0)
1636                 return -EINVAL;
1637         /* Enforce sane limit on memory allocation */
1638         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1639                 return -EINVAL;
1640         /* Nothing to do */
1641         if (args->count == 0)
1642                 return 0;
1643
1644         bits = vmalloc(sizeof(*bits) * args->count);
1645         if (!bits)
1646                 return -ENOMEM;
1647
1648         r = copy_from_user(bits, (void __user *)args->values, args->count);
1649         if (r) {
1650                 r = -EFAULT;
1651                 goto out;
1652         }
1653
1654         down_read(&kvm->mm->mmap_sem);
1655         srcu_idx = srcu_read_lock(&kvm->srcu);
1656         for (i = 0; i < args->count; i++) {
1657                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1658                 if (kvm_is_error_hva(hva)) {
1659                         r = -EFAULT;
1660                         break;
1661                 }
1662
1663                 pgstev = bits[i];
1664                 pgstev = pgstev << 24;
1665                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1666                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1667         }
1668         srcu_read_unlock(&kvm->srcu, srcu_idx);
1669         up_read(&kvm->mm->mmap_sem);
1670
1671         if (!kvm->mm->context.use_cmma) {
1672                 down_write(&kvm->mm->mmap_sem);
1673                 kvm->mm->context.use_cmma = 1;
1674                 up_write(&kvm->mm->mmap_sem);
1675         }
1676 out:
1677         vfree(bits);
1678         return r;
1679 }
1680
1681 long kvm_arch_vm_ioctl(struct file *filp,
1682                        unsigned int ioctl, unsigned long arg)
1683 {
1684         struct kvm *kvm = filp->private_data;
1685         void __user *argp = (void __user *)arg;
1686         struct kvm_device_attr attr;
1687         int r;
1688
1689         switch (ioctl) {
1690         case KVM_S390_INTERRUPT: {
1691                 struct kvm_s390_interrupt s390int;
1692
1693                 r = -EFAULT;
1694                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1695                         break;
1696                 r = kvm_s390_inject_vm(kvm, &s390int);
1697                 break;
1698         }
1699         case KVM_ENABLE_CAP: {
1700                 struct kvm_enable_cap cap;
1701                 r = -EFAULT;
1702                 if (copy_from_user(&cap, argp, sizeof(cap)))
1703                         break;
1704                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1705                 break;
1706         }
1707         case KVM_CREATE_IRQCHIP: {
1708                 struct kvm_irq_routing_entry routing;
1709
1710                 r = -EINVAL;
1711                 if (kvm->arch.use_irqchip) {
1712                         /* Set up dummy routing. */
1713                         memset(&routing, 0, sizeof(routing));
1714                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1715                 }
1716                 break;
1717         }
1718         case KVM_SET_DEVICE_ATTR: {
1719                 r = -EFAULT;
1720                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1721                         break;
1722                 r = kvm_s390_vm_set_attr(kvm, &attr);
1723                 break;
1724         }
1725         case KVM_GET_DEVICE_ATTR: {
1726                 r = -EFAULT;
1727                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1728                         break;
1729                 r = kvm_s390_vm_get_attr(kvm, &attr);
1730                 break;
1731         }
1732         case KVM_HAS_DEVICE_ATTR: {
1733                 r = -EFAULT;
1734                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1735                         break;
1736                 r = kvm_s390_vm_has_attr(kvm, &attr);
1737                 break;
1738         }
1739         case KVM_S390_GET_SKEYS: {
1740                 struct kvm_s390_skeys args;
1741
1742                 r = -EFAULT;
1743                 if (copy_from_user(&args, argp,
1744                                    sizeof(struct kvm_s390_skeys)))
1745                         break;
1746                 r = kvm_s390_get_skeys(kvm, &args);
1747                 break;
1748         }
1749         case KVM_S390_SET_SKEYS: {
1750                 struct kvm_s390_skeys args;
1751
1752                 r = -EFAULT;
1753                 if (copy_from_user(&args, argp,
1754                                    sizeof(struct kvm_s390_skeys)))
1755                         break;
1756                 r = kvm_s390_set_skeys(kvm, &args);
1757                 break;
1758         }
1759         case KVM_S390_GET_CMMA_BITS: {
1760                 struct kvm_s390_cmma_log args;
1761
1762                 r = -EFAULT;
1763                 if (copy_from_user(&args, argp, sizeof(args)))
1764                         break;
1765                 r = kvm_s390_get_cmma_bits(kvm, &args);
1766                 if (!r) {
1767                         r = copy_to_user(argp, &args, sizeof(args));
1768                         if (r)
1769                                 r = -EFAULT;
1770                 }
1771                 break;
1772         }
1773         case KVM_S390_SET_CMMA_BITS: {
1774                 struct kvm_s390_cmma_log args;
1775
1776                 r = -EFAULT;
1777                 if (copy_from_user(&args, argp, sizeof(args)))
1778                         break;
1779                 r = kvm_s390_set_cmma_bits(kvm, &args);
1780                 break;
1781         }
1782         default:
1783                 r = -ENOTTY;
1784         }
1785
1786         return r;
1787 }
1788
1789 static int kvm_s390_query_ap_config(u8 *config)
1790 {
1791         u32 fcn_code = 0x04000000UL;
1792         u32 cc = 0;
1793
1794         memset(config, 0, 128);
1795         asm volatile(
1796                 "lgr 0,%1\n"
1797                 "lgr 2,%2\n"
1798                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1799                 "0: ipm %0\n"
1800                 "srl %0,28\n"
1801                 "1:\n"
1802                 EX_TABLE(0b, 1b)
1803                 : "+r" (cc)
1804                 : "r" (fcn_code), "r" (config)
1805                 : "cc", "0", "2", "memory"
1806         );
1807
1808         return cc;
1809 }
1810
1811 static int kvm_s390_apxa_installed(void)
1812 {
1813         u8 config[128];
1814         int cc;
1815
1816         if (test_facility(12)) {
1817                 cc = kvm_s390_query_ap_config(config);
1818
1819                 if (cc)
1820                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1821                 else
1822                         return config[0] & 0x40;
1823         }
1824
1825         return 0;
1826 }
1827
1828 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1829 {
1830         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1831
1832         if (kvm_s390_apxa_installed())
1833                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1834         else
1835                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1836 }
1837
1838 static u64 kvm_s390_get_initial_cpuid(void)
1839 {
1840         struct cpuid cpuid;
1841
1842         get_cpu_id(&cpuid);
1843         cpuid.version = 0xff;
1844         return *((u64 *) &cpuid);
1845 }
1846
1847 static void kvm_s390_crypto_init(struct kvm *kvm)
1848 {
1849         if (!test_kvm_facility(kvm, 76))
1850                 return;
1851
1852         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1853         kvm_s390_set_crycb_format(kvm);
1854
1855         /* Enable AES/DEA protected key functions by default */
1856         kvm->arch.crypto.aes_kw = 1;
1857         kvm->arch.crypto.dea_kw = 1;
1858         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1859                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1860         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1861                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1862 }
1863
1864 static void sca_dispose(struct kvm *kvm)
1865 {
1866         if (kvm->arch.use_esca)
1867                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1868         else
1869                 free_page((unsigned long)(kvm->arch.sca));
1870         kvm->arch.sca = NULL;
1871 }
1872
1873 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1874 {
1875         gfp_t alloc_flags = GFP_KERNEL;
1876         int i, rc;
1877         char debug_name[16];
1878         static unsigned long sca_offset;
1879
1880         rc = -EINVAL;
1881 #ifdef CONFIG_KVM_S390_UCONTROL
1882         if (type & ~KVM_VM_S390_UCONTROL)
1883                 goto out_err;
1884         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1885                 goto out_err;
1886 #else
1887         if (type)
1888                 goto out_err;
1889 #endif
1890
1891         rc = s390_enable_sie();
1892         if (rc)
1893                 goto out_err;
1894
1895         rc = -ENOMEM;
1896
1897         kvm->arch.use_esca = 0; /* start with basic SCA */
1898         if (!sclp.has_64bscao)
1899                 alloc_flags |= GFP_DMA;
1900         rwlock_init(&kvm->arch.sca_lock);
1901         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1902         if (!kvm->arch.sca)
1903                 goto out_err;
1904         spin_lock(&kvm_lock);
1905         sca_offset += 16;
1906         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1907                 sca_offset = 0;
1908         kvm->arch.sca = (struct bsca_block *)
1909                         ((char *) kvm->arch.sca + sca_offset);
1910         spin_unlock(&kvm_lock);
1911
1912         sprintf(debug_name, "kvm-%u", current->pid);
1913
1914         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1915         if (!kvm->arch.dbf)
1916                 goto out_err;
1917
1918         kvm->arch.sie_page2 =
1919              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1920         if (!kvm->arch.sie_page2)
1921                 goto out_err;
1922
1923         /* Populate the facility mask initially. */
1924         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1925                sizeof(S390_lowcore.stfle_fac_list));
1926         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1927                 if (i < kvm_s390_fac_list_mask_size())
1928                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1929                 else
1930                         kvm->arch.model.fac_mask[i] = 0UL;
1931         }
1932
1933         /* Populate the facility list initially. */
1934         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1935         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1936                S390_ARCH_FAC_LIST_SIZE_BYTE);
1937
1938         /* we are always in czam mode - even on pre z14 machines */
1939         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1940         set_kvm_facility(kvm->arch.model.fac_list, 138);
1941         /* we emulate STHYI in kvm */
1942         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1943         set_kvm_facility(kvm->arch.model.fac_list, 74);
1944         if (MACHINE_HAS_TLB_GUEST) {
1945                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1946                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1947         }
1948
1949         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1950         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1951
1952         kvm_s390_crypto_init(kvm);
1953
1954         mutex_init(&kvm->arch.float_int.ais_lock);
1955         kvm->arch.float_int.simm = 0;
1956         kvm->arch.float_int.nimm = 0;
1957         spin_lock_init(&kvm->arch.float_int.lock);
1958         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1959                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1960         init_waitqueue_head(&kvm->arch.ipte_wq);
1961         mutex_init(&kvm->arch.ipte_mutex);
1962
1963         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1964         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1965
1966         if (type & KVM_VM_S390_UCONTROL) {
1967                 kvm->arch.gmap = NULL;
1968                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1969         } else {
1970                 if (sclp.hamax == U64_MAX)
1971                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1972                 else
1973                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1974                                                     sclp.hamax + 1);
1975                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1976                 if (!kvm->arch.gmap)
1977                         goto out_err;
1978                 kvm->arch.gmap->private = kvm;
1979                 kvm->arch.gmap->pfault_enabled = 0;
1980         }
1981
1982         kvm->arch.css_support = 0;
1983         kvm->arch.use_irqchip = 0;
1984         kvm->arch.epoch = 0;
1985
1986         spin_lock_init(&kvm->arch.start_stop_lock);
1987         kvm_s390_vsie_init(kvm);
1988         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1989
1990         return 0;
1991 out_err:
1992         free_page((unsigned long)kvm->arch.sie_page2);
1993         debug_unregister(kvm->arch.dbf);
1994         sca_dispose(kvm);
1995         KVM_EVENT(3, "creation of vm failed: %d", rc);
1996         return rc;
1997 }
1998
1999 bool kvm_arch_has_vcpu_debugfs(void)
2000 {
2001         return false;
2002 }
2003
2004 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2005 {
2006         return 0;
2007 }
2008
2009 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2010 {
2011         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2012         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2013         kvm_s390_clear_local_irqs(vcpu);
2014         kvm_clear_async_pf_completion_queue(vcpu);
2015         if (!kvm_is_ucontrol(vcpu->kvm))
2016                 sca_del_vcpu(vcpu);
2017
2018         if (kvm_is_ucontrol(vcpu->kvm))
2019                 gmap_remove(vcpu->arch.gmap);
2020
2021         if (vcpu->kvm->arch.use_cmma)
2022                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2023         free_page((unsigned long)(vcpu->arch.sie_block));
2024
2025         kvm_vcpu_uninit(vcpu);
2026         kmem_cache_free(kvm_vcpu_cache, vcpu);
2027 }
2028
2029 static void kvm_free_vcpus(struct kvm *kvm)
2030 {
2031         unsigned int i;
2032         struct kvm_vcpu *vcpu;
2033
2034         kvm_for_each_vcpu(i, vcpu, kvm)
2035                 kvm_arch_vcpu_destroy(vcpu);
2036
2037         mutex_lock(&kvm->lock);
2038         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2039                 kvm->vcpus[i] = NULL;
2040
2041         atomic_set(&kvm->online_vcpus, 0);
2042         mutex_unlock(&kvm->lock);
2043 }
2044
2045 void kvm_arch_destroy_vm(struct kvm *kvm)
2046 {
2047         kvm_free_vcpus(kvm);
2048         sca_dispose(kvm);
2049         debug_unregister(kvm->arch.dbf);
2050         free_page((unsigned long)kvm->arch.sie_page2);
2051         if (!kvm_is_ucontrol(kvm))
2052                 gmap_remove(kvm->arch.gmap);
2053         kvm_s390_destroy_adapters(kvm);
2054         kvm_s390_clear_float_irqs(kvm);
2055         kvm_s390_vsie_destroy(kvm);
2056         if (kvm->arch.migration_state) {
2057                 vfree(kvm->arch.migration_state->pgste_bitmap);
2058                 kfree(kvm->arch.migration_state);
2059         }
2060         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2061 }
2062
2063 /* Section: vcpu related */
2064 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2065 {
2066         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2067         if (!vcpu->arch.gmap)
2068                 return -ENOMEM;
2069         vcpu->arch.gmap->private = vcpu->kvm;
2070
2071         return 0;
2072 }
2073
2074 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2075 {
2076         if (!kvm_s390_use_sca_entries())
2077                 return;
2078         read_lock(&vcpu->kvm->arch.sca_lock);
2079         if (vcpu->kvm->arch.use_esca) {
2080                 struct esca_block *sca = vcpu->kvm->arch.sca;
2081
2082                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2083                 sca->cpu[vcpu->vcpu_id].sda = 0;
2084         } else {
2085                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2086
2087                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2088                 sca->cpu[vcpu->vcpu_id].sda = 0;
2089         }
2090         read_unlock(&vcpu->kvm->arch.sca_lock);
2091 }
2092
2093 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2094 {
2095         if (!kvm_s390_use_sca_entries()) {
2096                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2097
2098                 /* we still need the basic sca for the ipte control */
2099                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2101         }
2102         read_lock(&vcpu->kvm->arch.sca_lock);
2103         if (vcpu->kvm->arch.use_esca) {
2104                 struct esca_block *sca = vcpu->kvm->arch.sca;
2105
2106                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2109                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2110                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2111         } else {
2112                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2113
2114                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2115                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2116                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2117                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2118         }
2119         read_unlock(&vcpu->kvm->arch.sca_lock);
2120 }
2121
2122 /* Basic SCA to Extended SCA data copy routines */
2123 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2124 {
2125         d->sda = s->sda;
2126         d->sigp_ctrl.c = s->sigp_ctrl.c;
2127         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2128 }
2129
2130 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2131 {
2132         int i;
2133
2134         d->ipte_control = s->ipte_control;
2135         d->mcn[0] = s->mcn;
2136         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2137                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2138 }
2139
2140 static int sca_switch_to_extended(struct kvm *kvm)
2141 {
2142         struct bsca_block *old_sca = kvm->arch.sca;
2143         struct esca_block *new_sca;
2144         struct kvm_vcpu *vcpu;
2145         unsigned int vcpu_idx;
2146         u32 scaol, scaoh;
2147
2148         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2149         if (!new_sca)
2150                 return -ENOMEM;
2151
2152         scaoh = (u32)((u64)(new_sca) >> 32);
2153         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2154
2155         kvm_s390_vcpu_block_all(kvm);
2156         write_lock(&kvm->arch.sca_lock);
2157
2158         sca_copy_b_to_e(new_sca, old_sca);
2159
2160         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2161                 vcpu->arch.sie_block->scaoh = scaoh;
2162                 vcpu->arch.sie_block->scaol = scaol;
2163                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2164         }
2165         kvm->arch.sca = new_sca;
2166         kvm->arch.use_esca = 1;
2167
2168         write_unlock(&kvm->arch.sca_lock);
2169         kvm_s390_vcpu_unblock_all(kvm);
2170
2171         free_page((unsigned long)old_sca);
2172
2173         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2174                  old_sca, kvm->arch.sca);
2175         return 0;
2176 }
2177
2178 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2179 {
2180         int rc;
2181
2182         if (!kvm_s390_use_sca_entries()) {
2183                 if (id < KVM_MAX_VCPUS)
2184                         return true;
2185                 return false;
2186         }
2187         if (id < KVM_S390_BSCA_CPU_SLOTS)
2188                 return true;
2189         if (!sclp.has_esca || !sclp.has_64bscao)
2190                 return false;
2191
2192         mutex_lock(&kvm->lock);
2193         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2194         mutex_unlock(&kvm->lock);
2195
2196         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2197 }
2198
2199 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2200 {
2201         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2202         kvm_clear_async_pf_completion_queue(vcpu);
2203         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2204                                     KVM_SYNC_GPRS |
2205                                     KVM_SYNC_ACRS |
2206                                     KVM_SYNC_CRS |
2207                                     KVM_SYNC_ARCH0 |
2208                                     KVM_SYNC_PFAULT;
2209         kvm_s390_set_prefix(vcpu, 0);
2210         if (test_kvm_facility(vcpu->kvm, 64))
2211                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2212         if (test_kvm_facility(vcpu->kvm, 133))
2213                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2214         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2215          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2216          */
2217         if (MACHINE_HAS_VX)
2218                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2219         else
2220                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2221
2222         if (kvm_is_ucontrol(vcpu->kvm))
2223                 return __kvm_ucontrol_vcpu_init(vcpu);
2224
2225         return 0;
2226 }
2227
2228 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2229 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2230 {
2231         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2232         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2233         vcpu->arch.cputm_start = get_tod_clock_fast();
2234         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2235 }
2236
2237 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2238 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2239 {
2240         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2241         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2242         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2243         vcpu->arch.cputm_start = 0;
2244         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2245 }
2246
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2249 {
2250         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2251         vcpu->arch.cputm_enabled = true;
2252         __start_cpu_timer_accounting(vcpu);
2253 }
2254
2255 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2256 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2257 {
2258         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2259         __stop_cpu_timer_accounting(vcpu);
2260         vcpu->arch.cputm_enabled = false;
2261 }
2262
2263 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2264 {
2265         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2266         __enable_cpu_timer_accounting(vcpu);
2267         preempt_enable();
2268 }
2269
2270 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2271 {
2272         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273         __disable_cpu_timer_accounting(vcpu);
2274         preempt_enable();
2275 }
2276
2277 /* set the cpu timer - may only be called from the VCPU thread itself */
2278 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2279 {
2280         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2281         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2282         if (vcpu->arch.cputm_enabled)
2283                 vcpu->arch.cputm_start = get_tod_clock_fast();
2284         vcpu->arch.sie_block->cputm = cputm;
2285         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2286         preempt_enable();
2287 }
2288
2289 /* update and get the cpu timer - can also be called from other VCPU threads */
2290 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2291 {
2292         unsigned int seq;
2293         __u64 value;
2294
2295         if (unlikely(!vcpu->arch.cputm_enabled))
2296                 return vcpu->arch.sie_block->cputm;
2297
2298         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2299         do {
2300                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2301                 /*
2302                  * If the writer would ever execute a read in the critical
2303                  * section, e.g. in irq context, we have a deadlock.
2304                  */
2305                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2306                 value = vcpu->arch.sie_block->cputm;
2307                 /* if cputm_start is 0, accounting is being started/stopped */
2308                 if (likely(vcpu->arch.cputm_start))
2309                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2310         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2311         preempt_enable();
2312         return value;
2313 }
2314
2315 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2316 {
2317
2318         gmap_enable(vcpu->arch.enabled_gmap);
2319         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2320         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321                 __start_cpu_timer_accounting(vcpu);
2322         vcpu->cpu = cpu;
2323 }
2324
2325 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2326 {
2327         vcpu->cpu = -1;
2328         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2329                 __stop_cpu_timer_accounting(vcpu);
2330         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2331         vcpu->arch.enabled_gmap = gmap_get_enabled();
2332         gmap_disable(vcpu->arch.enabled_gmap);
2333
2334 }
2335
2336 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2337 {
2338         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2339         vcpu->arch.sie_block->gpsw.mask = 0UL;
2340         vcpu->arch.sie_block->gpsw.addr = 0UL;
2341         kvm_s390_set_prefix(vcpu, 0);
2342         kvm_s390_set_cpu_timer(vcpu, 0);
2343         vcpu->arch.sie_block->ckc       = 0UL;
2344         vcpu->arch.sie_block->todpr     = 0;
2345         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2346         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2347         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2348         /* make sure the new fpc will be lazily loaded */
2349         save_fpu_regs();
2350         current->thread.fpu.fpc = 0;
2351         vcpu->arch.sie_block->gbea = 1;
2352         vcpu->arch.sie_block->pp = 0;
2353         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2354         kvm_clear_async_pf_completion_queue(vcpu);
2355         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2356                 kvm_s390_vcpu_stop(vcpu);
2357         kvm_s390_clear_local_irqs(vcpu);
2358 }
2359
2360 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2361 {
2362         mutex_lock(&vcpu->kvm->lock);
2363         preempt_disable();
2364         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2365         preempt_enable();
2366         mutex_unlock(&vcpu->kvm->lock);
2367         if (!kvm_is_ucontrol(vcpu->kvm)) {
2368                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2369                 sca_add_vcpu(vcpu);
2370         }
2371         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2372                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2373         /* make vcpu_load load the right gmap on the first trigger */
2374         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2375 }
2376
2377 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2378 {
2379         if (!test_kvm_facility(vcpu->kvm, 76))
2380                 return;
2381
2382         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2383
2384         if (vcpu->kvm->arch.crypto.aes_kw)
2385                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2386         if (vcpu->kvm->arch.crypto.dea_kw)
2387                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2388
2389         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2390 }
2391
2392 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2393 {
2394         free_page(vcpu->arch.sie_block->cbrlo);
2395         vcpu->arch.sie_block->cbrlo = 0;
2396 }
2397
2398 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2399 {
2400         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2401         if (!vcpu->arch.sie_block->cbrlo)
2402                 return -ENOMEM;
2403
2404         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2405         return 0;
2406 }
2407
2408 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2409 {
2410         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2411
2412         vcpu->arch.sie_block->ibc = model->ibc;
2413         if (test_kvm_facility(vcpu->kvm, 7))
2414                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2415 }
2416
2417 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2418 {
2419         int rc = 0;
2420
2421         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2422                                                     CPUSTAT_SM |
2423                                                     CPUSTAT_STOPPED);
2424
2425         if (test_kvm_facility(vcpu->kvm, 78))
2426                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2427         else if (test_kvm_facility(vcpu->kvm, 8))
2428                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2429
2430         kvm_s390_vcpu_setup_model(vcpu);
2431
2432         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2433         if (MACHINE_HAS_ESOP)
2434                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2435         if (test_kvm_facility(vcpu->kvm, 9))
2436                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2437         if (test_kvm_facility(vcpu->kvm, 73))
2438                 vcpu->arch.sie_block->ecb |= ECB_TE;
2439
2440         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2441                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2442         if (test_kvm_facility(vcpu->kvm, 130))
2443                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2444         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2445         if (sclp.has_cei)
2446                 vcpu->arch.sie_block->eca |= ECA_CEI;
2447         if (sclp.has_ib)
2448                 vcpu->arch.sie_block->eca |= ECA_IB;
2449         if (sclp.has_siif)
2450                 vcpu->arch.sie_block->eca |= ECA_SII;
2451         if (sclp.has_sigpif)
2452                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2453         if (test_kvm_facility(vcpu->kvm, 129)) {
2454                 vcpu->arch.sie_block->eca |= ECA_VX;
2455                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2456         }
2457         if (test_kvm_facility(vcpu->kvm, 139))
2458                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2459
2460         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2461                                         | SDNXC;
2462         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2463
2464         if (sclp.has_kss)
2465                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2466         else
2467                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2468
2469         if (vcpu->kvm->arch.use_cmma) {
2470                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2471                 if (rc)
2472                         return rc;
2473         }
2474         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2475         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2476
2477         kvm_s390_vcpu_crypto_setup(vcpu);
2478
2479         return rc;
2480 }
2481
2482 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2483                                       unsigned int id)
2484 {
2485         struct kvm_vcpu *vcpu;
2486         struct sie_page *sie_page;
2487         int rc = -EINVAL;
2488
2489         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2490                 goto out;
2491
2492         rc = -ENOMEM;
2493
2494         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2495         if (!vcpu)
2496                 goto out;
2497
2498         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2499         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2500         if (!sie_page)
2501                 goto out_free_cpu;
2502
2503         vcpu->arch.sie_block = &sie_page->sie_block;
2504         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2505
2506         /* the real guest size will always be smaller than msl */
2507         vcpu->arch.sie_block->mso = 0;
2508         vcpu->arch.sie_block->msl = sclp.hamax;
2509
2510         vcpu->arch.sie_block->icpua = id;
2511         spin_lock_init(&vcpu->arch.local_int.lock);
2512         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2513         vcpu->arch.local_int.wq = &vcpu->wq;
2514         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2515         seqcount_init(&vcpu->arch.cputm_seqcount);
2516
2517         rc = kvm_vcpu_init(vcpu, kvm, id);
2518         if (rc)
2519                 goto out_free_sie_block;
2520         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2521                  vcpu->arch.sie_block);
2522         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2523
2524         return vcpu;
2525 out_free_sie_block:
2526         free_page((unsigned long)(vcpu->arch.sie_block));
2527 out_free_cpu:
2528         kmem_cache_free(kvm_vcpu_cache, vcpu);
2529 out:
2530         return ERR_PTR(rc);
2531 }
2532
2533 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2534 {
2535         return kvm_s390_vcpu_has_irq(vcpu, 0);
2536 }
2537
2538 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2539 {
2540         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2541 }
2542
2543 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2544 {
2545         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2546         exit_sie(vcpu);
2547 }
2548
2549 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2550 {
2551         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2552 }
2553
2554 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2555 {
2556         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2557         exit_sie(vcpu);
2558 }
2559
2560 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2561 {
2562         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2563 }
2564
2565 /*
2566  * Kick a guest cpu out of SIE and wait until SIE is not running.
2567  * If the CPU is not running (e.g. waiting as idle) the function will
2568  * return immediately. */
2569 void exit_sie(struct kvm_vcpu *vcpu)
2570 {
2571         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2572         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2573                 cpu_relax();
2574 }
2575
2576 /* Kick a guest cpu out of SIE to process a request synchronously */
2577 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2578 {
2579         kvm_make_request(req, vcpu);
2580         kvm_s390_vcpu_request(vcpu);
2581 }
2582
2583 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2584                               unsigned long end)
2585 {
2586         struct kvm *kvm = gmap->private;
2587         struct kvm_vcpu *vcpu;
2588         unsigned long prefix;
2589         int i;
2590
2591         if (gmap_is_shadow(gmap))
2592                 return;
2593         if (start >= 1UL << 31)
2594                 /* We are only interested in prefix pages */
2595                 return;
2596         kvm_for_each_vcpu(i, vcpu, kvm) {
2597                 /* match against both prefix pages */
2598                 prefix = kvm_s390_get_prefix(vcpu);
2599                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2600                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2601                                    start, end);
2602                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2603                 }
2604         }
2605 }
2606
2607 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2608 {
2609         /* kvm common code refers to this, but never calls it */
2610         BUG();
2611         return 0;
2612 }
2613
2614 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2615                                            struct kvm_one_reg *reg)
2616 {
2617         int r = -EINVAL;
2618
2619         switch (reg->id) {
2620         case KVM_REG_S390_TODPR:
2621                 r = put_user(vcpu->arch.sie_block->todpr,
2622                              (u32 __user *)reg->addr);
2623                 break;
2624         case KVM_REG_S390_EPOCHDIFF:
2625                 r = put_user(vcpu->arch.sie_block->epoch,
2626                              (u64 __user *)reg->addr);
2627                 break;
2628         case KVM_REG_S390_CPU_TIMER:
2629                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2630                              (u64 __user *)reg->addr);
2631                 break;
2632         case KVM_REG_S390_CLOCK_COMP:
2633                 r = put_user(vcpu->arch.sie_block->ckc,
2634                              (u64 __user *)reg->addr);
2635                 break;
2636         case KVM_REG_S390_PFTOKEN:
2637                 r = put_user(vcpu->arch.pfault_token,
2638                              (u64 __user *)reg->addr);
2639                 break;
2640         case KVM_REG_S390_PFCOMPARE:
2641                 r = put_user(vcpu->arch.pfault_compare,
2642                              (u64 __user *)reg->addr);
2643                 break;
2644         case KVM_REG_S390_PFSELECT:
2645                 r = put_user(vcpu->arch.pfault_select,
2646                              (u64 __user *)reg->addr);
2647                 break;
2648         case KVM_REG_S390_PP:
2649                 r = put_user(vcpu->arch.sie_block->pp,
2650                              (u64 __user *)reg->addr);
2651                 break;
2652         case KVM_REG_S390_GBEA:
2653                 r = put_user(vcpu->arch.sie_block->gbea,
2654                              (u64 __user *)reg->addr);
2655                 break;
2656         default:
2657                 break;
2658         }
2659
2660         return r;
2661 }
2662
2663 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2664                                            struct kvm_one_reg *reg)
2665 {
2666         int r = -EINVAL;
2667         __u64 val;
2668
2669         switch (reg->id) {
2670         case KVM_REG_S390_TODPR:
2671                 r = get_user(vcpu->arch.sie_block->todpr,
2672                              (u32 __user *)reg->addr);
2673                 break;
2674         case KVM_REG_S390_EPOCHDIFF:
2675                 r = get_user(vcpu->arch.sie_block->epoch,
2676                              (u64 __user *)reg->addr);
2677                 break;
2678         case KVM_REG_S390_CPU_TIMER:
2679                 r = get_user(val, (u64 __user *)reg->addr);
2680                 if (!r)
2681                         kvm_s390_set_cpu_timer(vcpu, val);
2682                 break;
2683         case KVM_REG_S390_CLOCK_COMP:
2684                 r = get_user(vcpu->arch.sie_block->ckc,
2685                              (u64 __user *)reg->addr);
2686                 break;
2687         case KVM_REG_S390_PFTOKEN:
2688                 r = get_user(vcpu->arch.pfault_token,
2689                              (u64 __user *)reg->addr);
2690                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2691                         kvm_clear_async_pf_completion_queue(vcpu);
2692                 break;
2693         case KVM_REG_S390_PFCOMPARE:
2694                 r = get_user(vcpu->arch.pfault_compare,
2695                              (u64 __user *)reg->addr);
2696                 break;
2697         case KVM_REG_S390_PFSELECT:
2698                 r = get_user(vcpu->arch.pfault_select,
2699                              (u64 __user *)reg->addr);
2700                 break;
2701         case KVM_REG_S390_PP:
2702                 r = get_user(vcpu->arch.sie_block->pp,
2703                              (u64 __user *)reg->addr);
2704                 break;
2705         case KVM_REG_S390_GBEA:
2706                 r = get_user(vcpu->arch.sie_block->gbea,
2707                              (u64 __user *)reg->addr);
2708                 break;
2709         default:
2710                 break;
2711         }
2712
2713         return r;
2714 }
2715
2716 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2717 {
2718         kvm_s390_vcpu_initial_reset(vcpu);
2719         return 0;
2720 }
2721
2722 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2723 {
2724         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2725         return 0;
2726 }
2727
2728 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2729 {
2730         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2731         return 0;
2732 }
2733
2734 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2735                                   struct kvm_sregs *sregs)
2736 {
2737         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2738         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2739         return 0;
2740 }
2741
2742 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2743                                   struct kvm_sregs *sregs)
2744 {
2745         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2746         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2747         return 0;
2748 }
2749
2750 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2751 {
2752         if (test_fp_ctl(fpu->fpc))
2753                 return -EINVAL;
2754         vcpu->run->s.regs.fpc = fpu->fpc;
2755         if (MACHINE_HAS_VX)
2756                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2757                                  (freg_t *) fpu->fprs);
2758         else
2759                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2760         return 0;
2761 }
2762
2763 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2764 {
2765         /* make sure we have the latest values */
2766         save_fpu_regs();
2767         if (MACHINE_HAS_VX)
2768                 convert_vx_to_fp((freg_t *) fpu->fprs,
2769                                  (__vector128 *) vcpu->run->s.regs.vrs);
2770         else
2771                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2772         fpu->fpc = vcpu->run->s.regs.fpc;
2773         return 0;
2774 }
2775
2776 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2777 {
2778         int rc = 0;
2779
2780         if (!is_vcpu_stopped(vcpu))
2781                 rc = -EBUSY;
2782         else {
2783                 vcpu->run->psw_mask = psw.mask;
2784                 vcpu->run->psw_addr = psw.addr;
2785         }
2786         return rc;
2787 }
2788
2789 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2790                                   struct kvm_translation *tr)
2791 {
2792         return -EINVAL; /* not implemented yet */
2793 }
2794
2795 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2796                               KVM_GUESTDBG_USE_HW_BP | \
2797                               KVM_GUESTDBG_ENABLE)
2798
2799 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2800                                         struct kvm_guest_debug *dbg)
2801 {
2802         int rc = 0;
2803
2804         vcpu->guest_debug = 0;
2805         kvm_s390_clear_bp_data(vcpu);
2806
2807         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2808                 return -EINVAL;
2809         if (!sclp.has_gpere)
2810                 return -EINVAL;
2811
2812         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2813                 vcpu->guest_debug = dbg->control;
2814                 /* enforce guest PER */
2815                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2816
2817                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2818                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2819         } else {
2820                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2821                 vcpu->arch.guestdbg.last_bp = 0;
2822         }
2823
2824         if (rc) {
2825                 vcpu->guest_debug = 0;
2826                 kvm_s390_clear_bp_data(vcpu);
2827                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2828         }
2829
2830         return rc;
2831 }
2832
2833 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2834                                     struct kvm_mp_state *mp_state)
2835 {
2836         /* CHECK_STOP and LOAD are not supported yet */
2837         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2838                                        KVM_MP_STATE_OPERATING;
2839 }
2840
2841 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2842                                     struct kvm_mp_state *mp_state)
2843 {
2844         int rc = 0;
2845
2846         /* user space knows about this interface - let it control the state */
2847         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2848
2849         switch (mp_state->mp_state) {
2850         case KVM_MP_STATE_STOPPED:
2851                 kvm_s390_vcpu_stop(vcpu);
2852                 break;
2853         case KVM_MP_STATE_OPERATING:
2854                 kvm_s390_vcpu_start(vcpu);
2855                 break;
2856         case KVM_MP_STATE_LOAD:
2857         case KVM_MP_STATE_CHECK_STOP:
2858                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2859         default:
2860                 rc = -ENXIO;
2861         }
2862
2863         return rc;
2864 }
2865
2866 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2867 {
2868         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2869 }
2870
2871 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2872 {
2873 retry:
2874         kvm_s390_vcpu_request_handled(vcpu);
2875         if (!kvm_request_pending(vcpu))
2876                 return 0;
2877         /*
2878          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2879          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2880          * This ensures that the ipte instruction for this request has
2881          * already finished. We might race against a second unmapper that
2882          * wants to set the blocking bit. Lets just retry the request loop.
2883          */
2884         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2885                 int rc;
2886                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2887                                           kvm_s390_get_prefix(vcpu),
2888                                           PAGE_SIZE * 2, PROT_WRITE);
2889                 if (rc) {
2890                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2891                         return rc;
2892                 }
2893                 goto retry;
2894         }
2895
2896         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2897                 vcpu->arch.sie_block->ihcpu = 0xffff;
2898                 goto retry;
2899         }
2900
2901         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2902                 if (!ibs_enabled(vcpu)) {
2903                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2904                         atomic_or(CPUSTAT_IBS,
2905                                         &vcpu->arch.sie_block->cpuflags);
2906                 }
2907                 goto retry;
2908         }
2909
2910         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2911                 if (ibs_enabled(vcpu)) {
2912                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2913                         atomic_andnot(CPUSTAT_IBS,
2914                                           &vcpu->arch.sie_block->cpuflags);
2915                 }
2916                 goto retry;
2917         }
2918
2919         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2920                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2921                 goto retry;
2922         }
2923
2924         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2925                 /*
2926                  * Disable CMMA virtualization; we will emulate the ESSA
2927                  * instruction manually, in order to provide additional
2928                  * functionalities needed for live migration.
2929                  */
2930                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2931                 goto retry;
2932         }
2933
2934         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2935                 /*
2936                  * Re-enable CMMA virtualization if CMMA is available and
2937                  * was used.
2938                  */
2939                 if ((vcpu->kvm->arch.use_cmma) &&
2940                     (vcpu->kvm->mm->context.use_cmma))
2941                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2942                 goto retry;
2943         }
2944
2945         /* nothing to do, just clear the request */
2946         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2947
2948         return 0;
2949 }
2950
2951 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2952                                  const struct kvm_s390_vm_tod_clock *gtod)
2953 {
2954         struct kvm_vcpu *vcpu;
2955         struct kvm_s390_tod_clock_ext htod;
2956         int i;
2957
2958         mutex_lock(&kvm->lock);
2959         preempt_disable();
2960
2961         get_tod_clock_ext((char *)&htod);
2962
2963         kvm->arch.epoch = gtod->tod - htod.tod;
2964         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2965
2966         if (kvm->arch.epoch > gtod->tod)
2967                 kvm->arch.epdx -= 1;
2968
2969         kvm_s390_vcpu_block_all(kvm);
2970         kvm_for_each_vcpu(i, vcpu, kvm) {
2971                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2972                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2973         }
2974
2975         kvm_s390_vcpu_unblock_all(kvm);
2976         preempt_enable();
2977         mutex_unlock(&kvm->lock);
2978 }
2979
2980 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2981 {
2982         struct kvm_vcpu *vcpu;
2983         int i;
2984
2985         mutex_lock(&kvm->lock);
2986         preempt_disable();
2987         kvm->arch.epoch = tod - get_tod_clock();
2988         kvm_s390_vcpu_block_all(kvm);
2989         kvm_for_each_vcpu(i, vcpu, kvm)
2990                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2991         kvm_s390_vcpu_unblock_all(kvm);
2992         preempt_enable();
2993         mutex_unlock(&kvm->lock);
2994 }
2995
2996 /**
2997  * kvm_arch_fault_in_page - fault-in guest page if necessary
2998  * @vcpu: The corresponding virtual cpu
2999  * @gpa: Guest physical address
3000  * @writable: Whether the page should be writable or not
3001  *
3002  * Make sure that a guest page has been faulted-in on the host.
3003  *
3004  * Return: Zero on success, negative error code otherwise.
3005  */
3006 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3007 {
3008         return gmap_fault(vcpu->arch.gmap, gpa,
3009                           writable ? FAULT_FLAG_WRITE : 0);
3010 }
3011
3012 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3013                                       unsigned long token)
3014 {
3015         struct kvm_s390_interrupt inti;
3016         struct kvm_s390_irq irq;
3017
3018         if (start_token) {
3019                 irq.u.ext.ext_params2 = token;
3020                 irq.type = KVM_S390_INT_PFAULT_INIT;
3021                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3022         } else {
3023                 inti.type = KVM_S390_INT_PFAULT_DONE;
3024                 inti.parm64 = token;
3025                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3026         }
3027 }
3028
3029 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3030                                      struct kvm_async_pf *work)
3031 {
3032         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3033         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3034 }
3035
3036 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3037                                  struct kvm_async_pf *work)
3038 {
3039         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3040         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3041 }
3042
3043 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3044                                struct kvm_async_pf *work)
3045 {
3046         /* s390 will always inject the page directly */
3047 }
3048
3049 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3050 {
3051         /*
3052          * s390 will always inject the page directly,
3053          * but we still want check_async_completion to cleanup
3054          */
3055         return true;
3056 }
3057
3058 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3059 {
3060         hva_t hva;
3061         struct kvm_arch_async_pf arch;
3062         int rc;
3063
3064         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3065                 return 0;
3066         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3067             vcpu->arch.pfault_compare)
3068                 return 0;
3069         if (psw_extint_disabled(vcpu))
3070                 return 0;
3071         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3072                 return 0;
3073         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3074                 return 0;
3075         if (!vcpu->arch.gmap->pfault_enabled)
3076                 return 0;
3077
3078         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3079         hva += current->thread.gmap_addr & ~PAGE_MASK;
3080         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3081                 return 0;
3082
3083         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3084         return rc;
3085 }
3086
3087 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3088 {
3089         int rc, cpuflags;
3090
3091         /*
3092          * On s390 notifications for arriving pages will be delivered directly
3093          * to the guest but the house keeping for completed pfaults is
3094          * handled outside the worker.
3095          */
3096         kvm_check_async_pf_completion(vcpu);
3097
3098         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3099         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3100
3101         if (need_resched())
3102                 schedule();
3103
3104         if (test_cpu_flag(CIF_MCCK_PENDING))
3105                 s390_handle_mcck();
3106
3107         if (!kvm_is_ucontrol(vcpu->kvm)) {
3108                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3109                 if (rc)
3110                         return rc;
3111         }
3112
3113         rc = kvm_s390_handle_requests(vcpu);
3114         if (rc)
3115                 return rc;
3116
3117         if (guestdbg_enabled(vcpu)) {
3118                 kvm_s390_backup_guest_per_regs(vcpu);
3119                 kvm_s390_patch_guest_per_regs(vcpu);
3120         }
3121
3122         vcpu->arch.sie_block->icptcode = 0;
3123         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3124         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3125         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3126
3127         return 0;
3128 }
3129
3130 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3131 {
3132         struct kvm_s390_pgm_info pgm_info = {
3133                 .code = PGM_ADDRESSING,
3134         };
3135         u8 opcode, ilen;
3136         int rc;
3137
3138         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3139         trace_kvm_s390_sie_fault(vcpu);
3140
3141         /*
3142          * We want to inject an addressing exception, which is defined as a
3143          * suppressing or terminating exception. However, since we came here
3144          * by a DAT access exception, the PSW still points to the faulting
3145          * instruction since DAT exceptions are nullifying. So we've got
3146          * to look up the current opcode to get the length of the instruction
3147          * to be able to forward the PSW.
3148          */
3149         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3150         ilen = insn_length(opcode);
3151         if (rc < 0) {
3152                 return rc;
3153         } else if (rc) {
3154                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3155                  * Forward by arbitrary ilc, injection will take care of
3156                  * nullification if necessary.
3157                  */
3158                 pgm_info = vcpu->arch.pgm;
3159                 ilen = 4;
3160         }
3161         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3162         kvm_s390_forward_psw(vcpu, ilen);
3163         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3164 }
3165
3166 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3167 {
3168         struct mcck_volatile_info *mcck_info;
3169         struct sie_page *sie_page;
3170
3171         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3172                    vcpu->arch.sie_block->icptcode);
3173         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3174
3175         if (guestdbg_enabled(vcpu))
3176                 kvm_s390_restore_guest_per_regs(vcpu);
3177
3178         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3179         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3180
3181         if (exit_reason == -EINTR) {
3182                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3183                 sie_page = container_of(vcpu->arch.sie_block,
3184                                         struct sie_page, sie_block);
3185                 mcck_info = &sie_page->mcck_info;
3186                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3187                 return 0;
3188         }
3189
3190         if (vcpu->arch.sie_block->icptcode > 0) {
3191                 int rc = kvm_handle_sie_intercept(vcpu);
3192
3193                 if (rc != -EOPNOTSUPP)
3194                         return rc;
3195                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3196                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3197                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3198                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3199                 return -EREMOTE;
3200         } else if (exit_reason != -EFAULT) {
3201                 vcpu->stat.exit_null++;
3202                 return 0;
3203         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3204                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3205                 vcpu->run->s390_ucontrol.trans_exc_code =
3206                                                 current->thread.gmap_addr;
3207                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3208                 return -EREMOTE;
3209         } else if (current->thread.gmap_pfault) {
3210                 trace_kvm_s390_major_guest_pfault(vcpu);
3211                 current->thread.gmap_pfault = 0;
3212                 if (kvm_arch_setup_async_pf(vcpu))
3213                         return 0;
3214                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3215         }
3216         return vcpu_post_run_fault_in_sie(vcpu);
3217 }
3218
3219 static int __vcpu_run(struct kvm_vcpu *vcpu)
3220 {
3221         int rc, exit_reason;
3222
3223         /*
3224          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3225          * ning the guest), so that memslots (and other stuff) are protected
3226          */
3227         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3228
3229         do {
3230                 rc = vcpu_pre_run(vcpu);
3231                 if (rc)
3232                         break;
3233
3234                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3235                 /*
3236                  * As PF_VCPU will be used in fault handler, between
3237                  * guest_enter and guest_exit should be no uaccess.
3238                  */
3239                 local_irq_disable();
3240                 guest_enter_irqoff();
3241                 __disable_cpu_timer_accounting(vcpu);
3242                 local_irq_enable();
3243                 exit_reason = sie64a(vcpu->arch.sie_block,
3244                                      vcpu->run->s.regs.gprs);
3245                 local_irq_disable();
3246                 __enable_cpu_timer_accounting(vcpu);
3247                 guest_exit_irqoff();
3248                 local_irq_enable();
3249                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3250
3251                 rc = vcpu_post_run(vcpu, exit_reason);
3252         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3253
3254         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3255         return rc;
3256 }
3257
3258 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3259 {
3260         struct runtime_instr_cb *riccb;
3261         struct gs_cb *gscb;
3262
3263         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3264         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3265         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3266         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3267         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3268                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3269         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3270                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3271                 /* some control register changes require a tlb flush */
3272                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3273         }
3274         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3275                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3276                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3277                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3278                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3279                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3280         }
3281         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3282                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3283                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3284                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3285                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3286                         kvm_clear_async_pf_completion_queue(vcpu);
3287         }
3288         /*
3289          * If userspace sets the riccb (e.g. after migration) to a valid state,
3290          * we should enable RI here instead of doing the lazy enablement.
3291          */
3292         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3293             test_kvm_facility(vcpu->kvm, 64) &&
3294             riccb->v &&
3295             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3296                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3297                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3298         }
3299         /*
3300          * If userspace sets the gscb (e.g. after migration) to non-zero,
3301          * we should enable GS here instead of doing the lazy enablement.
3302          */
3303         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3304             test_kvm_facility(vcpu->kvm, 133) &&
3305             gscb->gssm &&
3306             !vcpu->arch.gs_enabled) {
3307                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3308                 vcpu->arch.sie_block->ecb |= ECB_GS;
3309                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3310                 vcpu->arch.gs_enabled = 1;
3311         }
3312         save_access_regs(vcpu->arch.host_acrs);
3313         restore_access_regs(vcpu->run->s.regs.acrs);
3314         /* save host (userspace) fprs/vrs */
3315         save_fpu_regs();
3316         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3317         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3318         if (MACHINE_HAS_VX)
3319                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3320         else
3321                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3322         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3323         if (test_fp_ctl(current->thread.fpu.fpc))
3324                 /* User space provided an invalid FPC, let's clear it */
3325                 current->thread.fpu.fpc = 0;
3326         if (MACHINE_HAS_GS) {
3327                 preempt_disable();
3328                 __ctl_set_bit(2, 4);
3329                 if (current->thread.gs_cb) {
3330                         vcpu->arch.host_gscb = current->thread.gs_cb;
3331                         save_gs_cb(vcpu->arch.host_gscb);
3332                 }
3333                 if (vcpu->arch.gs_enabled) {
3334                         current->thread.gs_cb = (struct gs_cb *)
3335                                                 &vcpu->run->s.regs.gscb;
3336                         restore_gs_cb(current->thread.gs_cb);
3337                 }
3338                 preempt_enable();
3339         }
3340
3341         kvm_run->kvm_dirty_regs = 0;
3342 }
3343
3344 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3345 {
3346         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3347         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3348         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3349         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3350         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3351         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3352         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3353         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3354         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3355         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3356         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3357         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3358         save_access_regs(vcpu->run->s.regs.acrs);
3359         restore_access_regs(vcpu->arch.host_acrs);
3360         /* Save guest register state */
3361         save_fpu_regs();
3362         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3363         /* Restore will be done lazily at return */
3364         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3365         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3366         if (MACHINE_HAS_GS) {
3367                 __ctl_set_bit(2, 4);
3368                 if (vcpu->arch.gs_enabled)
3369                         save_gs_cb(current->thread.gs_cb);
3370                 preempt_disable();
3371                 current->thread.gs_cb = vcpu->arch.host_gscb;
3372                 restore_gs_cb(vcpu->arch.host_gscb);
3373                 preempt_enable();
3374                 if (!vcpu->arch.host_gscb)
3375                         __ctl_clear_bit(2, 4);
3376                 vcpu->arch.host_gscb = NULL;
3377         }
3378
3379 }
3380
3381 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3382 {
3383         int rc;
3384
3385         if (kvm_run->immediate_exit)
3386                 return -EINTR;
3387
3388         if (guestdbg_exit_pending(vcpu)) {
3389                 kvm_s390_prepare_debug_exit(vcpu);
3390                 return 0;
3391         }
3392
3393         kvm_sigset_activate(vcpu);
3394
3395         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3396                 kvm_s390_vcpu_start(vcpu);
3397         } else if (is_vcpu_stopped(vcpu)) {
3398                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3399                                    vcpu->vcpu_id);
3400                 return -EINVAL;
3401         }
3402
3403         sync_regs(vcpu, kvm_run);
3404         enable_cpu_timer_accounting(vcpu);
3405
3406         might_fault();
3407         rc = __vcpu_run(vcpu);
3408
3409         if (signal_pending(current) && !rc) {
3410                 kvm_run->exit_reason = KVM_EXIT_INTR;
3411                 rc = -EINTR;
3412         }
3413
3414         if (guestdbg_exit_pending(vcpu) && !rc)  {
3415                 kvm_s390_prepare_debug_exit(vcpu);
3416                 rc = 0;
3417         }
3418
3419         if (rc == -EREMOTE) {
3420                 /* userspace support is needed, kvm_run has been prepared */
3421                 rc = 0;
3422         }
3423
3424         disable_cpu_timer_accounting(vcpu);
3425         store_regs(vcpu, kvm_run);
3426
3427         kvm_sigset_deactivate(vcpu);
3428
3429         vcpu->stat.exit_userspace++;
3430         return rc;
3431 }
3432
3433 /*
3434  * store status at address
3435  * we use have two special cases:
3436  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3437  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3438  */
3439 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3440 {
3441         unsigned char archmode = 1;
3442         freg_t fprs[NUM_FPRS];
3443         unsigned int px;
3444         u64 clkcomp, cputm;
3445         int rc;
3446
3447         px = kvm_s390_get_prefix(vcpu);
3448         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3449                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3450                         return -EFAULT;
3451                 gpa = 0;
3452         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3453                 if (write_guest_real(vcpu, 163, &archmode, 1))
3454                         return -EFAULT;
3455                 gpa = px;
3456         } else
3457                 gpa -= __LC_FPREGS_SAVE_AREA;
3458
3459         /* manually convert vector registers if necessary */
3460         if (MACHINE_HAS_VX) {
3461                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3462                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3463                                      fprs, 128);
3464         } else {
3465                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3466                                      vcpu->run->s.regs.fprs, 128);
3467         }
3468         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3469                               vcpu->run->s.regs.gprs, 128);
3470         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3471                               &vcpu->arch.sie_block->gpsw, 16);
3472         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3473                               &px, 4);
3474         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3475                               &vcpu->run->s.regs.fpc, 4);
3476         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3477                               &vcpu->arch.sie_block->todpr, 4);
3478         cputm = kvm_s390_get_cpu_timer(vcpu);
3479         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3480                               &cputm, 8);
3481         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3482         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3483                               &clkcomp, 8);
3484         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3485                               &vcpu->run->s.regs.acrs, 64);
3486         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3487                               &vcpu->arch.sie_block->gcr, 128);
3488         return rc ? -EFAULT : 0;
3489 }
3490
3491 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3492 {
3493         /*
3494          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3495          * switch in the run ioctl. Let's update our copies before we save
3496          * it into the save area
3497          */
3498         save_fpu_regs();
3499         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3500         save_access_regs(vcpu->run->s.regs.acrs);
3501
3502         return kvm_s390_store_status_unloaded(vcpu, addr);
3503 }
3504
3505 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3506 {
3507         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3508         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3509 }
3510
3511 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3512 {
3513         unsigned int i;
3514         struct kvm_vcpu *vcpu;
3515
3516         kvm_for_each_vcpu(i, vcpu, kvm) {
3517                 __disable_ibs_on_vcpu(vcpu);
3518         }
3519 }
3520
3521 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3522 {
3523         if (!sclp.has_ibs)
3524                 return;
3525         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3526         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3527 }
3528
3529 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3530 {
3531         int i, online_vcpus, started_vcpus = 0;
3532
3533         if (!is_vcpu_stopped(vcpu))
3534                 return;
3535
3536         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3537         /* Only one cpu at a time may enter/leave the STOPPED state. */
3538         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3539         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3540
3541         for (i = 0; i < online_vcpus; i++) {
3542                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3543                         started_vcpus++;
3544         }
3545
3546         if (started_vcpus == 0) {
3547                 /* we're the only active VCPU -> speed it up */
3548                 __enable_ibs_on_vcpu(vcpu);
3549         } else if (started_vcpus == 1) {
3550                 /*
3551                  * As we are starting a second VCPU, we have to disable
3552                  * the IBS facility on all VCPUs to remove potentially
3553                  * oustanding ENABLE requests.
3554                  */
3555                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3556         }
3557
3558         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3559         /*
3560          * Another VCPU might have used IBS while we were offline.
3561          * Let's play safe and flush the VCPU at startup.
3562          */
3563         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3564         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3565         return;
3566 }
3567
3568 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3569 {
3570         int i, online_vcpus, started_vcpus = 0;
3571         struct kvm_vcpu *started_vcpu = NULL;
3572
3573         if (is_vcpu_stopped(vcpu))
3574                 return;
3575
3576         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3577         /* Only one cpu at a time may enter/leave the STOPPED state. */
3578         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3579         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3580
3581         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3582         kvm_s390_clear_stop_irq(vcpu);
3583
3584         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3585         __disable_ibs_on_vcpu(vcpu);
3586
3587         for (i = 0; i < online_vcpus; i++) {
3588                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3589                         started_vcpus++;
3590                         started_vcpu = vcpu->kvm->vcpus[i];
3591                 }
3592         }
3593
3594         if (started_vcpus == 1) {
3595                 /*
3596                  * As we only have one VCPU left, we want to enable the
3597                  * IBS facility for that VCPU to speed it up.
3598                  */
3599                 __enable_ibs_on_vcpu(started_vcpu);
3600         }
3601
3602         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3603         return;
3604 }
3605
3606 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3607                                      struct kvm_enable_cap *cap)
3608 {
3609         int r;
3610
3611         if (cap->flags)
3612                 return -EINVAL;
3613
3614         switch (cap->cap) {
3615         case KVM_CAP_S390_CSS_SUPPORT:
3616                 if (!vcpu->kvm->arch.css_support) {
3617                         vcpu->kvm->arch.css_support = 1;
3618                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3619                         trace_kvm_s390_enable_css(vcpu->kvm);
3620                 }
3621                 r = 0;
3622                 break;
3623         default:
3624                 r = -EINVAL;
3625                 break;
3626         }
3627         return r;
3628 }
3629
3630 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3631                                   struct kvm_s390_mem_op *mop)
3632 {
3633         void __user *uaddr = (void __user *)mop->buf;
3634         void *tmpbuf = NULL;
3635         int r, srcu_idx;
3636         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3637                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3638
3639         if (mop->flags & ~supported_flags)
3640                 return -EINVAL;
3641
3642         if (mop->size > MEM_OP_MAX_SIZE)
3643                 return -E2BIG;
3644
3645         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3646                 tmpbuf = vmalloc(mop->size);
3647                 if (!tmpbuf)
3648                         return -ENOMEM;
3649         }
3650
3651         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3652
3653         switch (mop->op) {
3654         case KVM_S390_MEMOP_LOGICAL_READ:
3655                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3656                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3657                                             mop->size, GACC_FETCH);
3658                         break;
3659                 }
3660                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3661                 if (r == 0) {
3662                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3663                                 r = -EFAULT;
3664                 }
3665                 break;
3666         case KVM_S390_MEMOP_LOGICAL_WRITE:
3667                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3668                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3669                                             mop->size, GACC_STORE);
3670                         break;
3671                 }
3672                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3673                         r = -EFAULT;
3674                         break;
3675                 }
3676                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3677                 break;
3678         default:
3679                 r = -EINVAL;
3680         }
3681
3682         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3683
3684         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3685                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3686
3687         vfree(tmpbuf);
3688         return r;
3689 }
3690
3691 long kvm_arch_vcpu_ioctl(struct file *filp,
3692                          unsigned int ioctl, unsigned long arg)
3693 {
3694         struct kvm_vcpu *vcpu = filp->private_data;
3695         void __user *argp = (void __user *)arg;
3696         int idx;
3697         long r;
3698
3699         switch (ioctl) {
3700         case KVM_S390_IRQ: {
3701                 struct kvm_s390_irq s390irq;
3702
3703                 r = -EFAULT;
3704                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3705                         break;
3706                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3707                 break;
3708         }
3709         case KVM_S390_INTERRUPT: {
3710                 struct kvm_s390_interrupt s390int;
3711                 struct kvm_s390_irq s390irq;
3712
3713                 r = -EFAULT;
3714                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3715                         break;
3716                 if (s390int_to_s390irq(&s390int, &s390irq))
3717                         return -EINVAL;
3718                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3719                 break;
3720         }
3721         case KVM_S390_STORE_STATUS:
3722                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3723                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3724                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3725                 break;
3726         case KVM_S390_SET_INITIAL_PSW: {
3727                 psw_t psw;
3728
3729                 r = -EFAULT;
3730                 if (copy_from_user(&psw, argp, sizeof(psw)))
3731                         break;
3732                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3733                 break;
3734         }
3735         case KVM_S390_INITIAL_RESET:
3736                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3737                 break;
3738         case KVM_SET_ONE_REG:
3739         case KVM_GET_ONE_REG: {
3740                 struct kvm_one_reg reg;
3741                 r = -EFAULT;
3742                 if (copy_from_user(&reg, argp, sizeof(reg)))
3743                         break;
3744                 if (ioctl == KVM_SET_ONE_REG)
3745                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3746                 else
3747                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3748                 break;
3749         }
3750 #ifdef CONFIG_KVM_S390_UCONTROL
3751         case KVM_S390_UCAS_MAP: {
3752                 struct kvm_s390_ucas_mapping ucasmap;
3753
3754                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3755                         r = -EFAULT;
3756                         break;
3757                 }
3758
3759                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3760                         r = -EINVAL;
3761                         break;
3762                 }
3763
3764                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3765                                      ucasmap.vcpu_addr, ucasmap.length);
3766                 break;
3767         }
3768         case KVM_S390_UCAS_UNMAP: {
3769                 struct kvm_s390_ucas_mapping ucasmap;
3770
3771                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3772                         r = -EFAULT;
3773                         break;
3774                 }
3775
3776                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3777                         r = -EINVAL;
3778                         break;
3779                 }
3780
3781                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3782                         ucasmap.length);
3783                 break;
3784         }
3785 #endif
3786         case KVM_S390_VCPU_FAULT: {
3787                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3788                 break;
3789         }
3790         case KVM_ENABLE_CAP:
3791         {
3792                 struct kvm_enable_cap cap;
3793                 r = -EFAULT;
3794                 if (copy_from_user(&cap, argp, sizeof(cap)))
3795                         break;
3796                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3797                 break;
3798         }
3799         case KVM_S390_MEM_OP: {
3800                 struct kvm_s390_mem_op mem_op;
3801
3802                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3803                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3804                 else
3805                         r = -EFAULT;
3806                 break;
3807         }
3808         case KVM_S390_SET_IRQ_STATE: {
3809                 struct kvm_s390_irq_state irq_state;
3810
3811                 r = -EFAULT;
3812                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3813                         break;
3814                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3815                     irq_state.len == 0 ||
3816                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3817                         r = -EINVAL;
3818                         break;
3819                 }
3820                 /* do not use irq_state.flags, it will break old QEMUs */
3821                 r = kvm_s390_set_irq_state(vcpu,
3822                                            (void __user *) irq_state.buf,
3823                                            irq_state.len);
3824                 break;
3825         }
3826         case KVM_S390_GET_IRQ_STATE: {
3827                 struct kvm_s390_irq_state irq_state;
3828
3829                 r = -EFAULT;
3830                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3831                         break;
3832                 if (irq_state.len == 0) {
3833                         r = -EINVAL;
3834                         break;
3835                 }
3836                 /* do not use irq_state.flags, it will break old QEMUs */
3837                 r = kvm_s390_get_irq_state(vcpu,
3838                                            (__u8 __user *)  irq_state.buf,
3839                                            irq_state.len);
3840                 break;
3841         }
3842         default:
3843                 r = -ENOTTY;
3844         }
3845         return r;
3846 }
3847
3848 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3849 {
3850 #ifdef CONFIG_KVM_S390_UCONTROL
3851         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3852                  && (kvm_is_ucontrol(vcpu->kvm))) {
3853                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3854                 get_page(vmf->page);
3855                 return 0;
3856         }
3857 #endif
3858         return VM_FAULT_SIGBUS;
3859 }
3860
3861 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3862                             unsigned long npages)
3863 {
3864         return 0;
3865 }
3866
3867 /* Section: memory related */
3868 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3869                                    struct kvm_memory_slot *memslot,
3870                                    const struct kvm_userspace_memory_region *mem,
3871                                    enum kvm_mr_change change)
3872 {
3873         /* A few sanity checks. We can have memory slots which have to be
3874            located/ended at a segment boundary (1MB). The memory in userland is
3875            ok to be fragmented into various different vmas. It is okay to mmap()
3876            and munmap() stuff in this slot after doing this call at any time */
3877
3878         if (mem->userspace_addr & 0xffffful)
3879                 return -EINVAL;
3880
3881         if (mem->memory_size & 0xffffful)
3882                 return -EINVAL;
3883
3884         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3885                 return -EINVAL;
3886
3887         return 0;
3888 }
3889
3890 void kvm_arch_commit_memory_region(struct kvm *kvm,
3891                                 const struct kvm_userspace_memory_region *mem,
3892                                 const struct kvm_memory_slot *old,
3893                                 const struct kvm_memory_slot *new,
3894                                 enum kvm_mr_change change)
3895 {
3896         int rc;
3897
3898         /* If the basics of the memslot do not change, we do not want
3899          * to update the gmap. Every update causes several unnecessary
3900          * segment translation exceptions. This is usually handled just
3901          * fine by the normal fault handler + gmap, but it will also
3902          * cause faults on the prefix page of running guest CPUs.
3903          */
3904         if (old->userspace_addr == mem->userspace_addr &&
3905             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3906             old->npages * PAGE_SIZE == mem->memory_size)
3907                 return;
3908
3909         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3910                 mem->guest_phys_addr, mem->memory_size);
3911         if (rc)
3912                 pr_warn("failed to commit memory region\n");
3913         return;
3914 }
3915
3916 static inline unsigned long nonhyp_mask(int i)
3917 {
3918         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3919
3920         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3921 }
3922
3923 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3924 {
3925         vcpu->valid_wakeup = false;
3926 }
3927
3928 static int __init kvm_s390_init(void)
3929 {
3930         int i;
3931
3932         if (!sclp.has_sief2) {
3933                 pr_info("SIE not available\n");
3934                 return -ENODEV;
3935         }
3936
3937         for (i = 0; i < 16; i++)
3938                 kvm_s390_fac_list_mask[i] |=
3939                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3940
3941         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3942 }
3943
3944 static void __exit kvm_s390_exit(void)
3945 {
3946         kvm_exit();
3947 }
3948
3949 module_init(kvm_s390_init);
3950 module_exit(kvm_s390_exit);
3951
3952 /*
3953  * Enable autoloading of the kvm module.
3954  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3955  * since x86 takes a different approach.
3956  */
3957 #include <linux/miscdevice.h>
3958 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3959 MODULE_ALIAS("devname:kvm");