arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92         { "deliver_program", VCPU_STAT(deliver_program) },
  93         { "deliver_io", VCPU_STAT(deliver_io) },
  94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96         { "inject_ckc", VCPU_STAT(inject_ckc) },
  97         { "inject_cputm", VCPU_STAT(inject_cputm) },
  98         { "inject_external_call", VCPU_STAT(inject_external_call) },
  99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101         { "inject_io", VM_STAT(inject_io) },
 102         { "inject_mchk", VCPU_STAT(inject_mchk) },
 103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104         { "inject_program", VCPU_STAT(inject_program) },
 105         { "inject_restart", VCPU_STAT(inject_restart) },
 106         { "inject_service_signal", VM_STAT(inject_service_signal) },
 107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110         { "inject_virtio", VM_STAT(inject_virtio) },
 111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112         { "instruction_gs", VCPU_STAT(instruction_gs) },
 113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119         { "instruction_sck", VCPU_STAT(instruction_sck) },
 120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121         { "instruction_spx", VCPU_STAT(instruction_spx) },
 122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123         { "instruction_stap", VCPU_STAT(instruction_stap) },
 124         { "instruction_iske", VCPU_STAT(instruction_iske) },
 125         { "instruction_ri", VCPU_STAT(instruction_ri) },
 126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127         { "instruction_sske", VCPU_STAT(instruction_sske) },
 128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129         { "instruction_essa", VCPU_STAT(instruction_essa) },
 130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132         { "instruction_tb", VCPU_STAT(instruction_tb) },
 133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137         { "instruction_sie", VCPU_STAT(instruction_sie) },
 138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161         { NULL }
 162 };
 163
 164 struct kvm_s390_tod_clock_ext {
 165         __u8 epoch_idx;
 166         __u64 tod;
 167         __u8 reserved[7];
 168 } __packed;
 169
 170 /* allow nested virtualization in KVM (if enabled by user space) */
 171 static int nested;
 172 module_param(nested, int, S_IRUGO);
 173 MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175 /* allow 1m huge page guest backing, if !nested */
 176 static int hpage;
 177 module_param(hpage, int, 0444);
 178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 179
 180 /*
 181  * For now we handle at most 16 double words as this is what the s390 base
 182  * kernel handles and stores in the prefix page. If we ever need to go beyond
 183  * this, this requires changes to code, but the external uapi can stay.
 184  */
 185 #define SIZE_INTERNAL 16
 186
 187 /*
 188  * Base feature mask that defines default mask for facilities. Consists of the
 189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 190  */
 191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 192 /*
 193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 194  * and defines the facilities that can be enabled via a cpu model.
 195  */
 196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 197
 198 static unsigned long kvm_s390_fac_size(void)
 199 {
 200         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 202         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 203                 sizeof(S390_lowcore.stfle_fac_list));
 204
 205         return SIZE_INTERNAL;
 206 }
 207
 208 /* available cpu features supported by kvm */
 209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 210 /* available subfunctions indicated via query / "test bit" */
 211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 212
 213 static struct gmap_notifier gmap_notifier;
 214 static struct gmap_notifier vsie_gmap_notifier;
 215 debug_info_t *kvm_s390_dbf;
 216
 217 /* Section: not file related */
 218 int kvm_arch_hardware_enable(void)
 219 {
 220         /* every s390 is virtualization enabled ;-) */
 221         return 0;
 222 }
 223
 224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 225                               unsigned long end);
 226
 227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 228 {
 229         u8 delta_idx = 0;
 230
 231         /*
 232          * The TOD jumps by delta, we have to compensate this by adding
 233          * -delta to the epoch.
 234          */
 235         delta = -delta;
 236
 237         /* sign-extension - we're adding to signed values below */
 238         if ((s64)delta < 0)
 239                 delta_idx = -1;
 240
 241         scb->epoch += delta;
 242         if (scb->ecd & ECD_MEF) {
 243                 scb->epdx += delta_idx;
 244                 if (scb->epoch < delta)
 245                         scb->epdx += 1;
 246         }
 247 }
 248
 249 /*
 250  * This callback is executed during stop_machine(). All CPUs are therefore
 251  * temporarily stopped. In order not to change guest behavior, we have to
 252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 253  * so a CPU won't be stopped while calculating with the epoch.
 254  */
 255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 256                           void *v)
 257 {
 258         struct kvm *kvm;
 259         struct kvm_vcpu *vcpu;
 260         int i;
 261         unsigned long long *delta = v;
 262
 263         list_for_each_entry(kvm, &vm_list, vm_list) {
 264                 kvm_for_each_vcpu(i, vcpu, kvm) {
 265                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 266                         if (i == 0) {
 267                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 268                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 269                         }
 270                         if (vcpu->arch.cputm_enabled)
 271                                 vcpu->arch.cputm_start += *delta;
 272                         if (vcpu->arch.vsie_block)
 273                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 274                                                    *delta);
 275                 }
 276         }
 277         return NOTIFY_OK;
 278 }
 279
 280 static struct notifier_block kvm_clock_notifier = {
 281         .notifier_call = kvm_clock_sync,
 282 };
 283
 284 int kvm_arch_hardware_setup(void)
 285 {
 286         gmap_notifier.notifier_call = kvm_gmap_notifier;
 287         gmap_register_pte_notifier(&gmap_notifier);
 288         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 289         gmap_register_pte_notifier(&vsie_gmap_notifier);
 290         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 291                                        &kvm_clock_notifier);
 292         return 0;
 293 }
 294
 295 void kvm_arch_hardware_unsetup(void)
 296 {
 297         gmap_unregister_pte_notifier(&gmap_notifier);
 298         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 299         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 300                                          &kvm_clock_notifier);
 301 }
 302
 303 static void allow_cpu_feat(unsigned long nr)
 304 {
 305         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 306 }
 307
 308 static inline int plo_test_bit(unsigned char nr)
 309 {
 310         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 311         int cc;
 312
 313         asm volatile(
 314                 /* Parameter registers are ignored for "test bit" */
 315                 "       plo     0,0,0,0(0)\n"
 316                 "       ipm     %0\n"
 317                 "       srl     %0,28\n"
 318                 : "=d" (cc)
 319                 : "d" (r0)
 320                 : "cc");
 321         return cc == 0;
 322 }
 323
 324 static void kvm_s390_cpu_feat_init(void)
 325 {
 326         int i;
 327
 328         for (i = 0; i < 256; ++i) {
 329                 if (plo_test_bit(i))
 330                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 331         }
 332
 333         if (test_facility(28)) /* TOD-clock steering */
 334                 ptff(kvm_s390_available_subfunc.ptff,
 335                      sizeof(kvm_s390_available_subfunc.ptff),
 336                      PTFF_QAF);
 337
 338         if (test_facility(17)) { /* MSA */
 339                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 340                               kvm_s390_available_subfunc.kmac);
 341                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 342                               kvm_s390_available_subfunc.kmc);
 343                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 344                               kvm_s390_available_subfunc.km);
 345                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 346                               kvm_s390_available_subfunc.kimd);
 347                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 348                               kvm_s390_available_subfunc.klmd);
 349         }
 350         if (test_facility(76)) /* MSA3 */
 351                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 352                               kvm_s390_available_subfunc.pckmo);
 353         if (test_facility(77)) { /* MSA4 */
 354                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 355                               kvm_s390_available_subfunc.kmctr);
 356                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 357                               kvm_s390_available_subfunc.kmf);
 358                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 359                               kvm_s390_available_subfunc.kmo);
 360                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 361                               kvm_s390_available_subfunc.pcc);
 362         }
 363         if (test_facility(57)) /* MSA5 */
 364                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 365                               kvm_s390_available_subfunc.ppno);
 366
 367         if (test_facility(146)) /* MSA8 */
 368                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 369                               kvm_s390_available_subfunc.kma);
 370
 371         if (MACHINE_HAS_ESOP)
 372                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 373         /*
 374          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 375          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 376          */
 377         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 378             !test_facility(3) || !nested)
 379                 return;
 380         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 381         if (sclp.has_64bscao)
 382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 383         if (sclp.has_siif)
 384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 385         if (sclp.has_gpere)
 386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 387         if (sclp.has_gsls)
 388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 389         if (sclp.has_ib)
 390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 391         if (sclp.has_cei)
 392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 393         if (sclp.has_ibs)
 394                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 395         if (sclp.has_kss)
 396                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 397         /*
 398          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 399          * all skey handling functions read/set the skey from the PGSTE
 400          * instead of the real storage key.
 401          *
 402          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 403          * pages being detected as preserved although they are resident.
 404          *
 405          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 406          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 407          *
 408          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 409          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 410          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 411          *
 412          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 413          * cannot easily shadow the SCA because of the ipte lock.
 414          */
 415 }
 416
 417 int kvm_arch_init(void *opaque)
 418 {
 419         int rc;
 420
 421         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 422         if (!kvm_s390_dbf)
 423                 return -ENOMEM;
 424
 425         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 426                 rc = -ENOMEM;
 427                 goto out_debug_unreg;
 428         }
 429
 430         kvm_s390_cpu_feat_init();
 431
 432         /* Register floating interrupt controller interface. */
 433         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 434         if (rc) {
 435                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 436                 goto out_debug_unreg;
 437         }
 438
 439         rc = kvm_s390_gib_init(GAL_ISC);
 440         if (rc)
 441                 goto out_gib_destroy;
 442
 443         return 0;
 444
 445 out_gib_destroy:
 446         kvm_s390_gib_destroy();
 447 out_debug_unreg:
 448         debug_unregister(kvm_s390_dbf);
 449         return rc;
 450 }
 451
 452 void kvm_arch_exit(void)
 453 {
 454         kvm_s390_gib_destroy();
 455         debug_unregister(kvm_s390_dbf);
 456 }
 457
 458 /* Section: device related */
 459 long kvm_arch_dev_ioctl(struct file *filp,
 460                         unsigned int ioctl, unsigned long arg)
 461 {
 462         if (ioctl == KVM_S390_ENABLE_SIE)
 463                 return s390_enable_sie();
 464         return -EINVAL;
 465 }
 466
 467 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 468 {
 469         int r;
 470
 471         switch (ext) {
 472         case KVM_CAP_S390_PSW:
 473         case KVM_CAP_S390_GMAP:
 474         case KVM_CAP_SYNC_MMU:
 475 #ifdef CONFIG_KVM_S390_UCONTROL
 476         case KVM_CAP_S390_UCONTROL:
 477 #endif
 478         case KVM_CAP_ASYNC_PF:
 479         case KVM_CAP_SYNC_REGS:
 480         case KVM_CAP_ONE_REG:
 481         case KVM_CAP_ENABLE_CAP:
 482         case KVM_CAP_S390_CSS_SUPPORT:
 483         case KVM_CAP_IOEVENTFD:
 484         case KVM_CAP_DEVICE_CTRL:
 485         case KVM_CAP_S390_IRQCHIP:
 486         case KVM_CAP_VM_ATTRIBUTES:
 487         case KVM_CAP_MP_STATE:
 488         case KVM_CAP_IMMEDIATE_EXIT:
 489         case KVM_CAP_S390_INJECT_IRQ:
 490         case KVM_CAP_S390_USER_SIGP:
 491         case KVM_CAP_S390_USER_STSI:
 492         case KVM_CAP_S390_SKEYS:
 493         case KVM_CAP_S390_IRQ_STATE:
 494         case KVM_CAP_S390_USER_INSTR0:
 495         case KVM_CAP_S390_CMMA_MIGRATION:
 496         case KVM_CAP_S390_AIS:
 497         case KVM_CAP_S390_AIS_MIGRATION:
 498                 r = 1;
 499                 break;
 500         case KVM_CAP_S390_HPAGE_1M:
 501                 r = 0;
 502                 if (hpage && !kvm_is_ucontrol(kvm))
 503                         r = 1;
 504                 break;
 505         case KVM_CAP_S390_MEM_OP:
 506                 r = MEM_OP_MAX_SIZE;
 507                 break;
 508         case KVM_CAP_NR_VCPUS:
 509         case KVM_CAP_MAX_VCPUS:
 510                 r = KVM_S390_BSCA_CPU_SLOTS;
 511                 if (!kvm_s390_use_sca_entries())
 512                         r = KVM_MAX_VCPUS;
 513                 else if (sclp.has_esca && sclp.has_64bscao)
 514                         r = KVM_S390_ESCA_CPU_SLOTS;
 515                 break;
 516         case KVM_CAP_NR_MEMSLOTS:
 517                 r = KVM_USER_MEM_SLOTS;
 518                 break;
 519         case KVM_CAP_S390_COW:
 520                 r = MACHINE_HAS_ESOP;
 521                 break;
 522         case KVM_CAP_S390_VECTOR_REGISTERS:
 523                 r = MACHINE_HAS_VX;
 524                 break;
 525         case KVM_CAP_S390_RI:
 526                 r = test_facility(64);
 527                 break;
 528         case KVM_CAP_S390_GS:
 529                 r = test_facility(133);
 530                 break;
 531         case KVM_CAP_S390_BPB:
 532                 r = test_facility(82);
 533                 break;
 534         default:
 535                 r = 0;
 536         }
 537         return r;
 538 }
 539
 540 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 541                                     struct kvm_memory_slot *memslot)
 542 {
 543         int i;
 544         gfn_t cur_gfn, last_gfn;
 545         unsigned long gaddr, vmaddr;
 546         struct gmap *gmap = kvm->arch.gmap;
 547         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 548
 549         /* Loop over all guest segments */
 550         cur_gfn = memslot->base_gfn;
 551         last_gfn = memslot->base_gfn + memslot->npages;
 552         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 553                 gaddr = gfn_to_gpa(cur_gfn);
 554                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 555                 if (kvm_is_error_hva(vmaddr))
 556                         continue;
 557
 558                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 559                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 560                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 561                         if (test_bit(i, bitmap))
 562                                 mark_page_dirty(kvm, cur_gfn + i);
 563                 }
 564
 565                 if (fatal_signal_pending(current))
 566                         return;
 567                 cond_resched();
 568         }
 569 }
 570
 571 /* Section: vm related */
 572 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 573
 574 /*
 575  * Get (and clear) the dirty memory log for a memory slot.
 576  */
 577 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 578                                struct kvm_dirty_log *log)
 579 {
 580         int r;
 581         unsigned long n;
 582         struct kvm_memslots *slots;
 583         struct kvm_memory_slot *memslot;
 584         int is_dirty = 0;
 585
 586         if (kvm_is_ucontrol(kvm))
 587                 return -EINVAL;
 588
 589         mutex_lock(&kvm->slots_lock);
 590
 591         r = -EINVAL;
 592         if (log->slot >= KVM_USER_MEM_SLOTS)
 593                 goto out;
 594
 595         slots = kvm_memslots(kvm);
 596         memslot = id_to_memslot(slots, log->slot);
 597         r = -ENOENT;
 598         if (!memslot->dirty_bitmap)
 599                 goto out;
 600
 601         kvm_s390_sync_dirty_log(kvm, memslot);
 602         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 603         if (r)
 604                 goto out;
 605
 606         /* Clear the dirty log */
 607         if (is_dirty) {
 608                 n = kvm_dirty_bitmap_bytes(memslot);
 609                 memset(memslot->dirty_bitmap, 0, n);
 610         }
 611         r = 0;
 612 out:
 613         mutex_unlock(&kvm->slots_lock);
 614         return r;
 615 }
 616
 617 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 618 {
 619         unsigned int i;
 620         struct kvm_vcpu *vcpu;
 621
 622         kvm_for_each_vcpu(i, vcpu, kvm) {
 623                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 624         }
 625 }
 626
 627 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 628 {
 629         int r;
 630
 631         if (cap->flags)
 632                 return -EINVAL;
 633
 634         switch (cap->cap) {
 635         case KVM_CAP_S390_IRQCHIP:
 636                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 637                 kvm->arch.use_irqchip = 1;
 638                 r = 0;
 639                 break;
 640         case KVM_CAP_S390_USER_SIGP:
 641                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 642                 kvm->arch.user_sigp = 1;
 643                 r = 0;
 644                 break;
 645         case KVM_CAP_S390_VECTOR_REGISTERS:
 646                 mutex_lock(&kvm->lock);
 647                 if (kvm->created_vcpus) {
 648                         r = -EBUSY;
 649                 } else if (MACHINE_HAS_VX) {
 650                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 651                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 652                         if (test_facility(134)) {
 653                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 654                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 655                         }
 656                         if (test_facility(135)) {
 657                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 658                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 659                         }
 660                         r = 0;
 661                 } else
 662                         r = -EINVAL;
 663                 mutex_unlock(&kvm->lock);
 664                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 665                          r ? "(not available)" : "(success)");
 666                 break;
 667         case KVM_CAP_S390_RI:
 668                 r = -EINVAL;
 669                 mutex_lock(&kvm->lock);
 670                 if (kvm->created_vcpus) {
 671                         r = -EBUSY;
 672                 } else if (test_facility(64)) {
 673                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 674                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 675                         r = 0;
 676                 }
 677                 mutex_unlock(&kvm->lock);
 678                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 679                          r ? "(not available)" : "(success)");
 680                 break;
 681         case KVM_CAP_S390_AIS:
 682                 mutex_lock(&kvm->lock);
 683                 if (kvm->created_vcpus) {
 684                         r = -EBUSY;
 685                 } else {
 686                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 687                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 688                         r = 0;
 689                 }
 690                 mutex_unlock(&kvm->lock);
 691                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 692                          r ? "(not available)" : "(success)");
 693                 break;
 694         case KVM_CAP_S390_GS:
 695                 r = -EINVAL;
 696                 mutex_lock(&kvm->lock);
 697                 if (kvm->created_vcpus) {
 698                         r = -EBUSY;
 699                 } else if (test_facility(133)) {
 700                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 701                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 702                         r = 0;
 703                 }
 704                 mutex_unlock(&kvm->lock);
 705                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 706                          r ? "(not available)" : "(success)");
 707                 break;
 708         case KVM_CAP_S390_HPAGE_1M:
 709                 mutex_lock(&kvm->lock);
 710                 if (kvm->created_vcpus)
 711                         r = -EBUSY;
 712                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 713                         r = -EINVAL;
 714                 else {
 715                         r = 0;
 716                         down_write(&kvm->mm->mmap_sem);
 717                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 718                         up_write(&kvm->mm->mmap_sem);
 719                         /*
 720                          * We might have to create fake 4k page
 721                          * tables. To avoid that the hardware works on
 722                          * stale PGSTEs, we emulate these instructions.
 723                          */
 724                         kvm->arch.use_skf = 0;
 725                         kvm->arch.use_pfmfi = 0;
 726                 }
 727                 mutex_unlock(&kvm->lock);
 728                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 729                          r ? "(not available)" : "(success)");
 730                 break;
 731         case KVM_CAP_S390_USER_STSI:
 732                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 733                 kvm->arch.user_stsi = 1;
 734                 r = 0;
 735                 break;
 736         case KVM_CAP_S390_USER_INSTR0:
 737                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 738                 kvm->arch.user_instr0 = 1;
 739                 icpt_operexc_on_all_vcpus(kvm);
 740                 r = 0;
 741                 break;
 742         default:
 743                 r = -EINVAL;
 744                 break;
 745         }
 746         return r;
 747 }
 748
 749 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 750 {
 751         int ret;
 752
 753         switch (attr->attr) {
 754         case KVM_S390_VM_MEM_LIMIT_SIZE:
 755                 ret = 0;
 756                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 757                          kvm->arch.mem_limit);
 758                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 759                         ret = -EFAULT;
 760                 break;
 761         default:
 762                 ret = -ENXIO;
 763                 break;
 764         }
 765         return ret;
 766 }
 767
 768 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 769 {
 770         int ret;
 771         unsigned int idx;
 772         switch (attr->attr) {
 773         case KVM_S390_VM_MEM_ENABLE_CMMA:
 774                 ret = -ENXIO;
 775                 if (!sclp.has_cmma)
 776                         break;
 777
 778                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 779                 mutex_lock(&kvm->lock);
 780                 if (kvm->created_vcpus)
 781                         ret = -EBUSY;
 782                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 783                         ret = -EINVAL;
 784                 else {
 785                         kvm->arch.use_cmma = 1;
 786                         /* Not compatible with cmma. */
 787                         kvm->arch.use_pfmfi = 0;
 788                         ret = 0;
 789                 }
 790                 mutex_unlock(&kvm->lock);
 791                 break;
 792         case KVM_S390_VM_MEM_CLR_CMMA:
 793                 ret = -ENXIO;
 794                 if (!sclp.has_cmma)
 795                         break;
 796                 ret = -EINVAL;
 797                 if (!kvm->arch.use_cmma)
 798                         break;
 799
 800                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 801                 mutex_lock(&kvm->lock);
 802                 idx = srcu_read_lock(&kvm->srcu);
 803                 s390_reset_cmma(kvm->arch.gmap->mm);
 804                 srcu_read_unlock(&kvm->srcu, idx);
 805                 mutex_unlock(&kvm->lock);
 806                 ret = 0;
 807                 break;
 808         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 809                 unsigned long new_limit;
 810
 811                 if (kvm_is_ucontrol(kvm))
 812                         return -EINVAL;
 813
 814                 if (get_user(new_limit, (u64 __user *)attr->addr))
 815                         return -EFAULT;
 816
 817                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 818                     new_limit > kvm->arch.mem_limit)
 819                         return -E2BIG;
 820
 821                 if (!new_limit)
 822                         return -EINVAL;
 823
 824                 /* gmap_create takes last usable address */
 825                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 826                         new_limit -= 1;
 827
 828                 ret = -EBUSY;
 829                 mutex_lock(&kvm->lock);
 830                 if (!kvm->created_vcpus) {
 831                         /* gmap_create will round the limit up */
 832                         struct gmap *new = gmap_create(current->mm, new_limit);
 833
 834                         if (!new) {
 835                                 ret = -ENOMEM;
 836                         } else {
 837                                 gmap_remove(kvm->arch.gmap);
 838                                 new->private = kvm;
 839                                 kvm->arch.gmap = new;
 840                                 ret = 0;
 841                         }
 842                 }
 843                 mutex_unlock(&kvm->lock);
 844                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 845                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 846                          (void *) kvm->arch.gmap->asce);
 847                 break;
 848         }
 849         default:
 850                 ret = -ENXIO;
 851                 break;
 852         }
 853         return ret;
 854 }
 855
 856 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 857
 858 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 859 {
 860         struct kvm_vcpu *vcpu;
 861         int i;
 862
 863         kvm_s390_vcpu_block_all(kvm);
 864
 865         kvm_for_each_vcpu(i, vcpu, kvm) {
 866                 kvm_s390_vcpu_crypto_setup(vcpu);
 867                 /* recreate the shadow crycb by leaving the VSIE handler */
 868                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 869         }
 870
 871         kvm_s390_vcpu_unblock_all(kvm);
 872 }
 873
 874 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 875 {
 876         mutex_lock(&kvm->lock);
 877         switch (attr->attr) {
 878         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 879                 if (!test_kvm_facility(kvm, 76)) {
 880                         mutex_unlock(&kvm->lock);
 881                         return -EINVAL;
 882                 }
 883                 get_random_bytes(
 884                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 885                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 886                 kvm->arch.crypto.aes_kw = 1;
 887                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 888                 break;
 889         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 890                 if (!test_kvm_facility(kvm, 76)) {
 891                         mutex_unlock(&kvm->lock);
 892                         return -EINVAL;
 893                 }
 894                 get_random_bytes(
 895                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 896                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 897                 kvm->arch.crypto.dea_kw = 1;
 898                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 899                 break;
 900         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 901                 if (!test_kvm_facility(kvm, 76)) {
 902                         mutex_unlock(&kvm->lock);
 903                         return -EINVAL;
 904                 }
 905                 kvm->arch.crypto.aes_kw = 0;
 906                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 907                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 908                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 909                 break;
 910         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 911                 if (!test_kvm_facility(kvm, 76)) {
 912                         mutex_unlock(&kvm->lock);
 913                         return -EINVAL;
 914                 }
 915                 kvm->arch.crypto.dea_kw = 0;
 916                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 917                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 918                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 919                 break;
 920         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 921                 if (!ap_instructions_available()) {
 922                         mutex_unlock(&kvm->lock);
 923                         return -EOPNOTSUPP;
 924                 }
 925                 kvm->arch.crypto.apie = 1;
 926                 break;
 927         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 928                 if (!ap_instructions_available()) {
 929                         mutex_unlock(&kvm->lock);
 930                         return -EOPNOTSUPP;
 931                 }
 932                 kvm->arch.crypto.apie = 0;
 933                 break;
 934         default:
 935                 mutex_unlock(&kvm->lock);
 936                 return -ENXIO;
 937         }
 938
 939         kvm_s390_vcpu_crypto_reset_all(kvm);
 940         mutex_unlock(&kvm->lock);
 941         return 0;
 942 }
 943
 944 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 945 {
 946         int cx;
 947         struct kvm_vcpu *vcpu;
 948
 949         kvm_for_each_vcpu(cx, vcpu, kvm)
 950                 kvm_s390_sync_request(req, vcpu);
 951 }
 952
 953 /*
 954  * Must be called with kvm->srcu held to avoid races on memslots, and with
 955  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 956  */
 957 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 958 {
 959         struct kvm_memory_slot *ms;
 960         struct kvm_memslots *slots;
 961         unsigned long ram_pages = 0;
 962         int slotnr;
 963
 964         /* migration mode already enabled */
 965         if (kvm->arch.migration_mode)
 966                 return 0;
 967         slots = kvm_memslots(kvm);
 968         if (!slots || !slots->used_slots)
 969                 return -EINVAL;
 970
 971         if (!kvm->arch.use_cmma) {
 972                 kvm->arch.migration_mode = 1;
 973                 return 0;
 974         }
 975         /* mark all the pages in active slots as dirty */
 976         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 977                 ms = slots->memslots + slotnr;
 978                 /*
 979                  * The second half of the bitmap is only used on x86,
 980                  * and would be wasted otherwise, so we put it to good
 981                  * use here to keep track of the state of the storage
 982                  * attributes.
 983                  */
 984                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
 985                 ram_pages += ms->npages;
 986         }
 987         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
 988         kvm->arch.migration_mode = 1;
 989         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 990         return 0;
 991 }
 992
 993 /*
 994  * Must be called with kvm->slots_lock to avoid races with ourselves and
 995  * kvm_s390_vm_start_migration.
 996  */
 997 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 998 {
 999         /* migration mode already disabled */
1000         if (!kvm->arch.migration_mode)
1001                 return 0;
1002         kvm->arch.migration_mode = 0;
1003         if (kvm->arch.use_cmma)
1004                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1005         return 0;
1006 }
1007
1008 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1009                                      struct kvm_device_attr *attr)
1010 {
1011         int res = -ENXIO;
1012
1013         mutex_lock(&kvm->slots_lock);
1014         switch (attr->attr) {
1015         case KVM_S390_VM_MIGRATION_START:
1016                 res = kvm_s390_vm_start_migration(kvm);
1017                 break;
1018         case KVM_S390_VM_MIGRATION_STOP:
1019                 res = kvm_s390_vm_stop_migration(kvm);
1020                 break;
1021         default:
1022                 break;
1023         }
1024         mutex_unlock(&kvm->slots_lock);
1025
1026         return res;
1027 }
1028
1029 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1030                                      struct kvm_device_attr *attr)
1031 {
1032         u64 mig = kvm->arch.migration_mode;
1033
1034         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1035                 return -ENXIO;
1036
1037         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1038                 return -EFAULT;
1039         return 0;
1040 }
1041
1042 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1043 {
1044         struct kvm_s390_vm_tod_clock gtod;
1045
1046         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1047                 return -EFAULT;
1048
1049         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1050                 return -EINVAL;
1051         kvm_s390_set_tod_clock(kvm, &gtod);
1052
1053         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1054                 gtod.epoch_idx, gtod.tod);
1055
1056         return 0;
1057 }
1058
1059 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1060 {
1061         u8 gtod_high;
1062
1063         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1064                                            sizeof(gtod_high)))
1065                 return -EFAULT;
1066
1067         if (gtod_high != 0)
1068                 return -EINVAL;
1069         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1070
1071         return 0;
1072 }
1073
1074 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1075 {
1076         struct kvm_s390_vm_tod_clock gtod = { 0 };
1077
1078         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1079                            sizeof(gtod.tod)))
1080                 return -EFAULT;
1081
1082         kvm_s390_set_tod_clock(kvm, &gtod);
1083         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1084         return 0;
1085 }
1086
1087 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1088 {
1089         int ret;
1090
1091         if (attr->flags)
1092                 return -EINVAL;
1093
1094         switch (attr->attr) {
1095         case KVM_S390_VM_TOD_EXT:
1096                 ret = kvm_s390_set_tod_ext(kvm, attr);
1097                 break;
1098         case KVM_S390_VM_TOD_HIGH:
1099                 ret = kvm_s390_set_tod_high(kvm, attr);
1100                 break;
1101         case KVM_S390_VM_TOD_LOW:
1102                 ret = kvm_s390_set_tod_low(kvm, attr);
1103                 break;
1104         default:
1105                 ret = -ENXIO;
1106                 break;
1107         }
1108         return ret;
1109 }
1110
1111 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1112                                    struct kvm_s390_vm_tod_clock *gtod)
1113 {
1114         struct kvm_s390_tod_clock_ext htod;
1115
1116         preempt_disable();
1117
1118         get_tod_clock_ext((char *)&htod);
1119
1120         gtod->tod = htod.tod + kvm->arch.epoch;
1121         gtod->epoch_idx = 0;
1122         if (test_kvm_facility(kvm, 139)) {
1123                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1124                 if (gtod->tod < htod.tod)
1125                         gtod->epoch_idx += 1;
1126         }
1127
1128         preempt_enable();
1129 }
1130
1131 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1132 {
1133         struct kvm_s390_vm_tod_clock gtod;
1134
1135         memset(&gtod, 0, sizeof(gtod));
1136         kvm_s390_get_tod_clock(kvm, &gtod);
1137         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1138                 return -EFAULT;
1139
1140         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1141                 gtod.epoch_idx, gtod.tod);
1142         return 0;
1143 }
1144
1145 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147         u8 gtod_high = 0;
1148
1149         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1150                                          sizeof(gtod_high)))
1151                 return -EFAULT;
1152         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1153
1154         return 0;
1155 }
1156
1157 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1158 {
1159         u64 gtod;
1160
1161         gtod = kvm_s390_get_tod_clock_fast(kvm);
1162         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1163                 return -EFAULT;
1164         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1165
1166         return 0;
1167 }
1168
1169 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170 {
1171         int ret;
1172
1173         if (attr->flags)
1174                 return -EINVAL;
1175
1176         switch (attr->attr) {
1177         case KVM_S390_VM_TOD_EXT:
1178                 ret = kvm_s390_get_tod_ext(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_HIGH:
1181                 ret = kvm_s390_get_tod_high(kvm, attr);
1182                 break;
1183         case KVM_S390_VM_TOD_LOW:
1184                 ret = kvm_s390_get_tod_low(kvm, attr);
1185                 break;
1186         default:
1187                 ret = -ENXIO;
1188                 break;
1189         }
1190         return ret;
1191 }
1192
1193 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1194 {
1195         struct kvm_s390_vm_cpu_processor *proc;
1196         u16 lowest_ibc, unblocked_ibc;
1197         int ret = 0;
1198
1199         mutex_lock(&kvm->lock);
1200         if (kvm->created_vcpus) {
1201                 ret = -EBUSY;
1202                 goto out;
1203         }
1204         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1205         if (!proc) {
1206                 ret = -ENOMEM;
1207                 goto out;
1208         }
1209         if (!copy_from_user(proc, (void __user *)attr->addr,
1210                             sizeof(*proc))) {
1211                 kvm->arch.model.cpuid = proc->cpuid;
1212                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1213                 unblocked_ibc = sclp.ibc & 0xfff;
1214                 if (lowest_ibc && proc->ibc) {
1215                         if (proc->ibc > unblocked_ibc)
1216                                 kvm->arch.model.ibc = unblocked_ibc;
1217                         else if (proc->ibc < lowest_ibc)
1218                                 kvm->arch.model.ibc = lowest_ibc;
1219                         else
1220                                 kvm->arch.model.ibc = proc->ibc;
1221                 }
1222                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1223                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1224                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1225                          kvm->arch.model.ibc,
1226                          kvm->arch.model.cpuid);
1227                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1228                          kvm->arch.model.fac_list[0],
1229                          kvm->arch.model.fac_list[1],
1230                          kvm->arch.model.fac_list[2]);
1231         } else
1232                 ret = -EFAULT;
1233         kfree(proc);
1234 out:
1235         mutex_unlock(&kvm->lock);
1236         return ret;
1237 }
1238
1239 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1240                                        struct kvm_device_attr *attr)
1241 {
1242         struct kvm_s390_vm_cpu_feat data;
1243
1244         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1245                 return -EFAULT;
1246         if (!bitmap_subset((unsigned long *) data.feat,
1247                            kvm_s390_available_cpu_feat,
1248                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1249                 return -EINVAL;
1250
1251         mutex_lock(&kvm->lock);
1252         if (kvm->created_vcpus) {
1253                 mutex_unlock(&kvm->lock);
1254                 return -EBUSY;
1255         }
1256         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1257                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1258         mutex_unlock(&kvm->lock);
1259         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1260                          data.feat[0],
1261                          data.feat[1],
1262                          data.feat[2]);
1263         return 0;
1264 }
1265
1266 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1267                                           struct kvm_device_attr *attr)
1268 {
1269         mutex_lock(&kvm->lock);
1270         if (kvm->created_vcpus) {
1271                 mutex_unlock(&kvm->lock);
1272                 return -EBUSY;
1273         }
1274
1275         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1276                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1277                 mutex_unlock(&kvm->lock);
1278                 return -EFAULT;
1279         }
1280         mutex_unlock(&kvm->lock);
1281
1282         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1283                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1284                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1285                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1286                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1287         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1288                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1289                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1290         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1291                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1292                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1293         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1294                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1295                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1296         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1297                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1298                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1299         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1300                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1301                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1302         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1303                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1304                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1305         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1306                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1307                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1308         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1309                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1310                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1311         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1312                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1313                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1314         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1315                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1316                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1317         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1318                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1319                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1320         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1321                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1322                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1323         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1324                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1325                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1326
1327         return 0;
1328 }
1329
1330 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1331 {
1332         int ret = -ENXIO;
1333
1334         switch (attr->attr) {
1335         case KVM_S390_VM_CPU_PROCESSOR:
1336                 ret = kvm_s390_set_processor(kvm, attr);
1337                 break;
1338         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1339                 ret = kvm_s390_set_processor_feat(kvm, attr);
1340                 break;
1341         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1342                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1343                 break;
1344         }
1345         return ret;
1346 }
1347
1348 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1349 {
1350         struct kvm_s390_vm_cpu_processor *proc;
1351         int ret = 0;
1352
1353         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1354         if (!proc) {
1355                 ret = -ENOMEM;
1356                 goto out;
1357         }
1358         proc->cpuid = kvm->arch.model.cpuid;
1359         proc->ibc = kvm->arch.model.ibc;
1360         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1361                S390_ARCH_FAC_LIST_SIZE_BYTE);
1362         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1363                  kvm->arch.model.ibc,
1364                  kvm->arch.model.cpuid);
1365         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1366                  kvm->arch.model.fac_list[0],
1367                  kvm->arch.model.fac_list[1],
1368                  kvm->arch.model.fac_list[2]);
1369         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1370                 ret = -EFAULT;
1371         kfree(proc);
1372 out:
1373         return ret;
1374 }
1375
1376 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1377 {
1378         struct kvm_s390_vm_cpu_machine *mach;
1379         int ret = 0;
1380
1381         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1382         if (!mach) {
1383                 ret = -ENOMEM;
1384                 goto out;
1385         }
1386         get_cpu_id((struct cpuid *) &mach->cpuid);
1387         mach->ibc = sclp.ibc;
1388         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1389                S390_ARCH_FAC_LIST_SIZE_BYTE);
1390         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1391                sizeof(S390_lowcore.stfle_fac_list));
1392         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1393                  kvm->arch.model.ibc,
1394                  kvm->arch.model.cpuid);
1395         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1396                  mach->fac_mask[0],
1397                  mach->fac_mask[1],
1398                  mach->fac_mask[2]);
1399         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1400                  mach->fac_list[0],
1401                  mach->fac_list[1],
1402                  mach->fac_list[2]);
1403         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1404                 ret = -EFAULT;
1405         kfree(mach);
1406 out:
1407         return ret;
1408 }
1409
1410 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1411                                        struct kvm_device_attr *attr)
1412 {
1413         struct kvm_s390_vm_cpu_feat data;
1414
1415         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1416                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1417         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1418                 return -EFAULT;
1419         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1420                          data.feat[0],
1421                          data.feat[1],
1422                          data.feat[2]);
1423         return 0;
1424 }
1425
1426 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1427                                      struct kvm_device_attr *attr)
1428 {
1429         struct kvm_s390_vm_cpu_feat data;
1430
1431         bitmap_copy((unsigned long *) data.feat,
1432                     kvm_s390_available_cpu_feat,
1433                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1434         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1435                 return -EFAULT;
1436         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1437                          data.feat[0],
1438                          data.feat[1],
1439                          data.feat[2]);
1440         return 0;
1441 }
1442
1443 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1444                                           struct kvm_device_attr *attr)
1445 {
1446         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1447             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1448                 return -EFAULT;
1449
1450         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1451                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1452                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1453                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1454                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1455         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1456                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1457                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1458         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1459                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1460                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1461         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1462                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1463                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1464         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1465                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1466                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1467         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1468                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1469                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1470         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1471                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1472                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1473         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1474                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1475                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1476         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1477                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1478                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1479         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1480                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1481                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1482         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1483                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1484                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1485         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1486                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1487                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1488         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1489                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1490                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1491         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1492                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1493                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1494
1495         return 0;
1496 }
1497
1498 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1499                                         struct kvm_device_attr *attr)
1500 {
1501         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1502             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503                 return -EFAULT;
1504
1505         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1507                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1508                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1509                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1510         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1512                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1513         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1515                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1516         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1518                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1519         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1521                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1522         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1524                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1525         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1527                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1528         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1530                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1531         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1533                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1534         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1536                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1537         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1539                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1540         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1542                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1543         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1545                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1546         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1548                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1549
1550         return 0;
1551 }
1552
1553 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1554 {
1555         int ret = -ENXIO;
1556
1557         switch (attr->attr) {
1558         case KVM_S390_VM_CPU_PROCESSOR:
1559                 ret = kvm_s390_get_processor(kvm, attr);
1560                 break;
1561         case KVM_S390_VM_CPU_MACHINE:
1562                 ret = kvm_s390_get_machine(kvm, attr);
1563                 break;
1564         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1565                 ret = kvm_s390_get_processor_feat(kvm, attr);
1566                 break;
1567         case KVM_S390_VM_CPU_MACHINE_FEAT:
1568                 ret = kvm_s390_get_machine_feat(kvm, attr);
1569                 break;
1570         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1571                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1572                 break;
1573         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1574                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1575                 break;
1576         }
1577         return ret;
1578 }
1579
1580 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1581 {
1582         int ret;
1583
1584         switch (attr->group) {
1585         case KVM_S390_VM_MEM_CTRL:
1586                 ret = kvm_s390_set_mem_control(kvm, attr);
1587                 break;
1588         case KVM_S390_VM_TOD:
1589                 ret = kvm_s390_set_tod(kvm, attr);
1590                 break;
1591         case KVM_S390_VM_CPU_MODEL:
1592                 ret = kvm_s390_set_cpu_model(kvm, attr);
1593                 break;
1594         case KVM_S390_VM_CRYPTO:
1595                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1596                 break;
1597         case KVM_S390_VM_MIGRATION:
1598                 ret = kvm_s390_vm_set_migration(kvm, attr);
1599                 break;
1600         default:
1601                 ret = -ENXIO;
1602                 break;
1603         }
1604
1605         return ret;
1606 }
1607
1608 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1609 {
1610         int ret;
1611
1612         switch (attr->group) {
1613         case KVM_S390_VM_MEM_CTRL:
1614                 ret = kvm_s390_get_mem_control(kvm, attr);
1615                 break;
1616         case KVM_S390_VM_TOD:
1617                 ret = kvm_s390_get_tod(kvm, attr);
1618                 break;
1619         case KVM_S390_VM_CPU_MODEL:
1620                 ret = kvm_s390_get_cpu_model(kvm, attr);
1621                 break;
1622         case KVM_S390_VM_MIGRATION:
1623                 ret = kvm_s390_vm_get_migration(kvm, attr);
1624                 break;
1625         default:
1626                 ret = -ENXIO;
1627                 break;
1628         }
1629
1630         return ret;
1631 }
1632
1633 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1634 {
1635         int ret;
1636
1637         switch (attr->group) {
1638         case KVM_S390_VM_MEM_CTRL:
1639                 switch (attr->attr) {
1640                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1641                 case KVM_S390_VM_MEM_CLR_CMMA:
1642                         ret = sclp.has_cmma ? 0 : -ENXIO;
1643                         break;
1644                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1645                         ret = 0;
1646                         break;
1647                 default:
1648                         ret = -ENXIO;
1649                         break;
1650                 }
1651                 break;
1652         case KVM_S390_VM_TOD:
1653                 switch (attr->attr) {
1654                 case KVM_S390_VM_TOD_LOW:
1655                 case KVM_S390_VM_TOD_HIGH:
1656                         ret = 0;
1657                         break;
1658                 default:
1659                         ret = -ENXIO;
1660                         break;
1661                 }
1662                 break;
1663         case KVM_S390_VM_CPU_MODEL:
1664                 switch (attr->attr) {
1665                 case KVM_S390_VM_CPU_PROCESSOR:
1666                 case KVM_S390_VM_CPU_MACHINE:
1667                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1668                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1669                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1670                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1671                         ret = 0;
1672                         break;
1673                 default:
1674                         ret = -ENXIO;
1675                         break;
1676                 }
1677                 break;
1678         case KVM_S390_VM_CRYPTO:
1679                 switch (attr->attr) {
1680                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1681                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1682                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1683                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1684                         ret = 0;
1685                         break;
1686                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1687                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1688                         ret = ap_instructions_available() ? 0 : -ENXIO;
1689                         break;
1690                 default:
1691                         ret = -ENXIO;
1692                         break;
1693                 }
1694                 break;
1695         case KVM_S390_VM_MIGRATION:
1696                 ret = 0;
1697                 break;
1698         default:
1699                 ret = -ENXIO;
1700                 break;
1701         }
1702
1703         return ret;
1704 }
1705
1706 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1707 {
1708         uint8_t *keys;
1709         uint64_t hva;
1710         int srcu_idx, i, r = 0;
1711
1712         if (args->flags != 0)
1713                 return -EINVAL;
1714
1715         /* Is this guest using storage keys? */
1716         if (!mm_uses_skeys(current->mm))
1717                 return KVM_S390_GET_SKEYS_NONE;
1718
1719         /* Enforce sane limit on memory allocation */
1720         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1721                 return -EINVAL;
1722
1723         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1724         if (!keys)
1725                 return -ENOMEM;
1726
1727         down_read(&current->mm->mmap_sem);
1728         srcu_idx = srcu_read_lock(&kvm->srcu);
1729         for (i = 0; i < args->count; i++) {
1730                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1731                 if (kvm_is_error_hva(hva)) {
1732                         r = -EFAULT;
1733                         break;
1734                 }
1735
1736                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1737                 if (r)
1738                         break;
1739         }
1740         srcu_read_unlock(&kvm->srcu, srcu_idx);
1741         up_read(&current->mm->mmap_sem);
1742
1743         if (!r) {
1744                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1745                                  sizeof(uint8_t) * args->count);
1746                 if (r)
1747                         r = -EFAULT;
1748         }
1749
1750         kvfree(keys);
1751         return r;
1752 }
1753
1754 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1755 {
1756         uint8_t *keys;
1757         uint64_t hva;
1758         int srcu_idx, i, r = 0;
1759         bool unlocked;
1760
1761         if (args->flags != 0)
1762                 return -EINVAL;
1763
1764         /* Enforce sane limit on memory allocation */
1765         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1766                 return -EINVAL;
1767
1768         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1769         if (!keys)
1770                 return -ENOMEM;
1771
1772         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1773                            sizeof(uint8_t) * args->count);
1774         if (r) {
1775                 r = -EFAULT;
1776                 goto out;
1777         }
1778
1779         /* Enable storage key handling for the guest */
1780         r = s390_enable_skey();
1781         if (r)
1782                 goto out;
1783
1784         i = 0;
1785         down_read(&current->mm->mmap_sem);
1786         srcu_idx = srcu_read_lock(&kvm->srcu);
1787         while (i < args->count) {
1788                 unlocked = false;
1789                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1790                 if (kvm_is_error_hva(hva)) {
1791                         r = -EFAULT;
1792                         break;
1793                 }
1794
1795                 /* Lowest order bit is reserved */
1796                 if (keys[i] & 0x01) {
1797                         r = -EINVAL;
1798                         break;
1799                 }
1800
1801                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1802                 if (r) {
1803                         r = fixup_user_fault(current, current->mm, hva,
1804                                              FAULT_FLAG_WRITE, &unlocked);
1805                         if (r)
1806                                 break;
1807                 }
1808                 if (!r)
1809                         i++;
1810         }
1811         srcu_read_unlock(&kvm->srcu, srcu_idx);
1812         up_read(&current->mm->mmap_sem);
1813 out:
1814         kvfree(keys);
1815         return r;
1816 }
1817
1818 /*
1819  * Base address and length must be sent at the start of each block, therefore
1820  * it's cheaper to send some clean data, as long as it's less than the size of
1821  * two longs.
1822  */
1823 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1824 /* for consistency */
1825 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1826
1827 /*
1828  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1829  * address falls in a hole. In that case the index of one of the memslots
1830  * bordering the hole is returned.
1831  */
1832 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1833 {
1834         int start = 0, end = slots->used_slots;
1835         int slot = atomic_read(&slots->lru_slot);
1836         struct kvm_memory_slot *memslots = slots->memslots;
1837
1838         if (gfn >= memslots[slot].base_gfn &&
1839             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1840                 return slot;
1841
1842         while (start < end) {
1843                 slot = start + (end - start) / 2;
1844
1845                 if (gfn >= memslots[slot].base_gfn)
1846                         end = slot;
1847                 else
1848                         start = slot + 1;
1849         }
1850
1851         if (gfn >= memslots[start].base_gfn &&
1852             gfn < memslots[start].base_gfn + memslots[start].npages) {
1853                 atomic_set(&slots->lru_slot, start);
1854         }
1855
1856         return start;
1857 }
1858
1859 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1860                               u8 *res, unsigned long bufsize)
1861 {
1862         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1863
1864         args->count = 0;
1865         while (args->count < bufsize) {
1866                 hva = gfn_to_hva(kvm, cur_gfn);
1867                 /*
1868                  * We return an error if the first value was invalid, but we
1869                  * return successfully if at least one value was copied.
1870                  */
1871                 if (kvm_is_error_hva(hva))
1872                         return args->count ? 0 : -EFAULT;
1873                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1874                         pgstev = 0;
1875                 res[args->count++] = (pgstev >> 24) & 0x43;
1876                 cur_gfn++;
1877         }
1878
1879         return 0;
1880 }
1881
1882 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1883                                               unsigned long cur_gfn)
1884 {
1885         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1886         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1887         unsigned long ofs = cur_gfn - ms->base_gfn;
1888
1889         if (ms->base_gfn + ms->npages <= cur_gfn) {
1890                 slotidx--;
1891                 /* If we are above the highest slot, wrap around */
1892                 if (slotidx < 0)
1893                         slotidx = slots->used_slots - 1;
1894
1895                 ms = slots->memslots + slotidx;
1896                 ofs = 0;
1897         }
1898         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1899         while ((slotidx > 0) && (ofs >= ms->npages)) {
1900                 slotidx--;
1901                 ms = slots->memslots + slotidx;
1902                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1903         }
1904         return ms->base_gfn + ofs;
1905 }
1906
1907 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1908                              u8 *res, unsigned long bufsize)
1909 {
1910         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1911         struct kvm_memslots *slots = kvm_memslots(kvm);
1912         struct kvm_memory_slot *ms;
1913
1914         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1915         ms = gfn_to_memslot(kvm, cur_gfn);
1916         args->count = 0;
1917         args->start_gfn = cur_gfn;
1918         if (!ms)
1919                 return 0;
1920         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1921         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1922
1923         while (args->count < bufsize) {
1924                 hva = gfn_to_hva(kvm, cur_gfn);
1925                 if (kvm_is_error_hva(hva))
1926                         return 0;
1927                 /* Decrement only if we actually flipped the bit to 0 */
1928                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1929                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1930                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1931                         pgstev = 0;
1932                 /* Save the value */
1933                 res[args->count++] = (pgstev >> 24) & 0x43;
1934                 /* If the next bit is too far away, stop. */
1935                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1936                         return 0;
1937                 /* If we reached the previous "next", find the next one */
1938                 if (cur_gfn == next_gfn)
1939                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1940                 /* Reached the end of memory or of the buffer, stop */
1941                 if ((next_gfn >= mem_end) ||
1942                     (next_gfn - args->start_gfn >= bufsize))
1943                         return 0;
1944                 cur_gfn++;
1945                 /* Reached the end of the current memslot, take the next one. */
1946                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1947                         ms = gfn_to_memslot(kvm, cur_gfn);
1948                         if (!ms)
1949                                 return 0;
1950                 }
1951         }
1952         return 0;
1953 }
1954
1955 /*
1956  * This function searches for the next page with dirty CMMA attributes, and
1957  * saves the attributes in the buffer up to either the end of the buffer or
1958  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1959  * no trailing clean bytes are saved.
1960  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1961  * output buffer will indicate 0 as length.
1962  */
1963 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1964                                   struct kvm_s390_cmma_log *args)
1965 {
1966         unsigned long bufsize;
1967         int srcu_idx, peek, ret;
1968         u8 *values;
1969
1970         if (!kvm->arch.use_cmma)
1971                 return -ENXIO;
1972         /* Invalid/unsupported flags were specified */
1973         if (args->flags & ~KVM_S390_CMMA_PEEK)
1974                 return -EINVAL;
1975         /* Migration mode query, and we are not doing a migration */
1976         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1977         if (!peek && !kvm->arch.migration_mode)
1978                 return -EINVAL;
1979         /* CMMA is disabled or was not used, or the buffer has length zero */
1980         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1981         if (!bufsize || !kvm->mm->context.uses_cmm) {
1982                 memset(args, 0, sizeof(*args));
1983                 return 0;
1984         }
1985         /* We are not peeking, and there are no dirty pages */
1986         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1987                 memset(args, 0, sizeof(*args));
1988                 return 0;
1989         }
1990
1991         values = vmalloc(bufsize);
1992         if (!values)
1993                 return -ENOMEM;
1994
1995         down_read(&kvm->mm->mmap_sem);
1996         srcu_idx = srcu_read_lock(&kvm->srcu);
1997         if (peek)
1998                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1999         else
2000                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2001         srcu_read_unlock(&kvm->srcu, srcu_idx);
2002         up_read(&kvm->mm->mmap_sem);
2003
2004         if (kvm->arch.migration_mode)
2005                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2006         else
2007                 args->remaining = 0;
2008
2009         if (copy_to_user((void __user *)args->values, values, args->count))
2010                 ret = -EFAULT;
2011
2012         vfree(values);
2013         return ret;
2014 }
2015
2016 /*
2017  * This function sets the CMMA attributes for the given pages. If the input
2018  * buffer has zero length, no action is taken, otherwise the attributes are
2019  * set and the mm->context.uses_cmm flag is set.
2020  */
2021 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2022                                   const struct kvm_s390_cmma_log *args)
2023 {
2024         unsigned long hva, mask, pgstev, i;
2025         uint8_t *bits;
2026         int srcu_idx, r = 0;
2027
2028         mask = args->mask;
2029
2030         if (!kvm->arch.use_cmma)
2031                 return -ENXIO;
2032         /* invalid/unsupported flags */
2033         if (args->flags != 0)
2034                 return -EINVAL;
2035         /* Enforce sane limit on memory allocation */
2036         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2037                 return -EINVAL;
2038         /* Nothing to do */
2039         if (args->count == 0)
2040                 return 0;
2041
2042         bits = vmalloc(array_size(sizeof(*bits), args->count));
2043         if (!bits)
2044                 return -ENOMEM;
2045
2046         r = copy_from_user(bits, (void __user *)args->values, args->count);
2047         if (r) {
2048                 r = -EFAULT;
2049                 goto out;
2050         }
2051
2052         down_read(&kvm->mm->mmap_sem);
2053         srcu_idx = srcu_read_lock(&kvm->srcu);
2054         for (i = 0; i < args->count; i++) {
2055                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2056                 if (kvm_is_error_hva(hva)) {
2057                         r = -EFAULT;
2058                         break;
2059                 }
2060
2061                 pgstev = bits[i];
2062                 pgstev = pgstev << 24;
2063                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2064                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2065         }
2066         srcu_read_unlock(&kvm->srcu, srcu_idx);
2067         up_read(&kvm->mm->mmap_sem);
2068
2069         if (!kvm->mm->context.uses_cmm) {
2070                 down_write(&kvm->mm->mmap_sem);
2071                 kvm->mm->context.uses_cmm = 1;
2072                 up_write(&kvm->mm->mmap_sem);
2073         }
2074 out:
2075         vfree(bits);
2076         return r;
2077 }
2078
2079 long kvm_arch_vm_ioctl(struct file *filp,
2080                        unsigned int ioctl, unsigned long arg)
2081 {
2082         struct kvm *kvm = filp->private_data;
2083         void __user *argp = (void __user *)arg;
2084         struct kvm_device_attr attr;
2085         int r;
2086
2087         switch (ioctl) {
2088         case KVM_S390_INTERRUPT: {
2089                 struct kvm_s390_interrupt s390int;
2090
2091                 r = -EFAULT;
2092                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2093                         break;
2094                 r = kvm_s390_inject_vm(kvm, &s390int);
2095                 break;
2096         }
2097         case KVM_CREATE_IRQCHIP: {
2098                 struct kvm_irq_routing_entry routing;
2099
2100                 r = -EINVAL;
2101                 if (kvm->arch.use_irqchip) {
2102                         /* Set up dummy routing. */
2103                         memset(&routing, 0, sizeof(routing));
2104                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2105                 }
2106                 break;
2107         }
2108         case KVM_SET_DEVICE_ATTR: {
2109                 r = -EFAULT;
2110                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2111                         break;
2112                 r = kvm_s390_vm_set_attr(kvm, &attr);
2113                 break;
2114         }
2115         case KVM_GET_DEVICE_ATTR: {
2116                 r = -EFAULT;
2117                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2118                         break;
2119                 r = kvm_s390_vm_get_attr(kvm, &attr);
2120                 break;
2121         }
2122         case KVM_HAS_DEVICE_ATTR: {
2123                 r = -EFAULT;
2124                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2125                         break;
2126                 r = kvm_s390_vm_has_attr(kvm, &attr);
2127                 break;
2128         }
2129         case KVM_S390_GET_SKEYS: {
2130                 struct kvm_s390_skeys args;
2131
2132                 r = -EFAULT;
2133                 if (copy_from_user(&args, argp,
2134                                    sizeof(struct kvm_s390_skeys)))
2135                         break;
2136                 r = kvm_s390_get_skeys(kvm, &args);
2137                 break;
2138         }
2139         case KVM_S390_SET_SKEYS: {
2140                 struct kvm_s390_skeys args;
2141
2142                 r = -EFAULT;
2143                 if (copy_from_user(&args, argp,
2144                                    sizeof(struct kvm_s390_skeys)))
2145                         break;
2146                 r = kvm_s390_set_skeys(kvm, &args);
2147                 break;
2148         }
2149         case KVM_S390_GET_CMMA_BITS: {
2150                 struct kvm_s390_cmma_log args;
2151
2152                 r = -EFAULT;
2153                 if (copy_from_user(&args, argp, sizeof(args)))
2154                         break;
2155                 mutex_lock(&kvm->slots_lock);
2156                 r = kvm_s390_get_cmma_bits(kvm, &args);
2157                 mutex_unlock(&kvm->slots_lock);
2158                 if (!r) {
2159                         r = copy_to_user(argp, &args, sizeof(args));
2160                         if (r)
2161                                 r = -EFAULT;
2162                 }
2163                 break;
2164         }
2165         case KVM_S390_SET_CMMA_BITS: {
2166                 struct kvm_s390_cmma_log args;
2167
2168                 r = -EFAULT;
2169                 if (copy_from_user(&args, argp, sizeof(args)))
2170                         break;
2171                 mutex_lock(&kvm->slots_lock);
2172                 r = kvm_s390_set_cmma_bits(kvm, &args);
2173                 mutex_unlock(&kvm->slots_lock);
2174                 break;
2175         }
2176         default:
2177                 r = -ENOTTY;
2178         }
2179
2180         return r;
2181 }
2182
2183 static int kvm_s390_apxa_installed(void)
2184 {
2185         struct ap_config_info info;
2186
2187         if (ap_instructions_available()) {
2188                 if (ap_qci(&info) == 0)
2189                         return info.apxa;
2190         }
2191
2192         return 0;
2193 }
2194
2195 /*
2196  * The format of the crypto control block (CRYCB) is specified in the 3 low
2197  * order bits of the CRYCB designation (CRYCBD) field as follows:
2198  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2199  *           AP extended addressing (APXA) facility are installed.
2200  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2201  * Format 2: Both the APXA and MSAX3 facilities are installed
2202  */
2203 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2204 {
2205         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2206
2207         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2208         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2209
2210         /* Check whether MSAX3 is installed */
2211         if (!test_kvm_facility(kvm, 76))
2212                 return;
2213
2214         if (kvm_s390_apxa_installed())
2215                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2216         else
2217                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2218 }
2219
2220 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2221                                unsigned long *aqm, unsigned long *adm)
2222 {
2223         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2224
2225         mutex_lock(&kvm->lock);
2226         kvm_s390_vcpu_block_all(kvm);
2227
2228         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2229         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2230                 memcpy(crycb->apcb1.apm, apm, 32);
2231                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2232                          apm[0], apm[1], apm[2], apm[3]);
2233                 memcpy(crycb->apcb1.aqm, aqm, 32);
2234                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2235                          aqm[0], aqm[1], aqm[2], aqm[3]);
2236                 memcpy(crycb->apcb1.adm, adm, 32);
2237                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2238                          adm[0], adm[1], adm[2], adm[3]);
2239                 break;
2240         case CRYCB_FORMAT1:
2241         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2242                 memcpy(crycb->apcb0.apm, apm, 8);
2243                 memcpy(crycb->apcb0.aqm, aqm, 2);
2244                 memcpy(crycb->apcb0.adm, adm, 2);
2245                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2246                          apm[0], *((unsigned short *)aqm),
2247                          *((unsigned short *)adm));
2248                 break;
2249         default:        /* Can not happen */
2250                 break;
2251         }
2252
2253         /* recreate the shadow crycb for each vcpu */
2254         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2255         kvm_s390_vcpu_unblock_all(kvm);
2256         mutex_unlock(&kvm->lock);
2257 }
2258 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2259
2260 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2261 {
2262         mutex_lock(&kvm->lock);
2263         kvm_s390_vcpu_block_all(kvm);
2264
2265         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2266                sizeof(kvm->arch.crypto.crycb->apcb0));
2267         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2268                sizeof(kvm->arch.crypto.crycb->apcb1));
2269
2270         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2271         /* recreate the shadow crycb for each vcpu */
2272         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2273         kvm_s390_vcpu_unblock_all(kvm);
2274         mutex_unlock(&kvm->lock);
2275 }
2276 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2277
2278 static u64 kvm_s390_get_initial_cpuid(void)
2279 {
2280         struct cpuid cpuid;
2281
2282         get_cpu_id(&cpuid);
2283         cpuid.version = 0xff;
2284         return *((u64 *) &cpuid);
2285 }
2286
2287 static void kvm_s390_crypto_init(struct kvm *kvm)
2288 {
2289         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2290         kvm_s390_set_crycb_format(kvm);
2291
2292         if (!test_kvm_facility(kvm, 76))
2293                 return;
2294
2295         /* Enable AES/DEA protected key functions by default */
2296         kvm->arch.crypto.aes_kw = 1;
2297         kvm->arch.crypto.dea_kw = 1;
2298         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2299                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2300         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2301                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2302 }
2303
2304 static void sca_dispose(struct kvm *kvm)
2305 {
2306         if (kvm->arch.use_esca)
2307                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2308         else
2309                 free_page((unsigned long)(kvm->arch.sca));
2310         kvm->arch.sca = NULL;
2311 }
2312
2313 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2314 {
2315         gfp_t alloc_flags = GFP_KERNEL;
2316         int i, rc;
2317         char debug_name[16];
2318         static unsigned long sca_offset;
2319
2320         rc = -EINVAL;
2321 #ifdef CONFIG_KVM_S390_UCONTROL
2322         if (type & ~KVM_VM_S390_UCONTROL)
2323                 goto out_err;
2324         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2325                 goto out_err;
2326 #else
2327         if (type)
2328                 goto out_err;
2329 #endif
2330
2331         rc = s390_enable_sie();
2332         if (rc)
2333                 goto out_err;
2334
2335         rc = -ENOMEM;
2336
2337         if (!sclp.has_64bscao)
2338                 alloc_flags |= GFP_DMA;
2339         rwlock_init(&kvm->arch.sca_lock);
2340         /* start with basic SCA */
2341         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2342         if (!kvm->arch.sca)
2343                 goto out_err;
2344         spin_lock(&kvm_lock);
2345         sca_offset += 16;
2346         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2347                 sca_offset = 0;
2348         kvm->arch.sca = (struct bsca_block *)
2349                         ((char *) kvm->arch.sca + sca_offset);
2350         spin_unlock(&kvm_lock);
2351
2352         sprintf(debug_name, "kvm-%u", current->pid);
2353
2354         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2355         if (!kvm->arch.dbf)
2356                 goto out_err;
2357
2358         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2359         kvm->arch.sie_page2 =
2360              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2361         if (!kvm->arch.sie_page2)
2362                 goto out_err;
2363
2364         kvm->arch.sie_page2->kvm = kvm;
2365         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2366
2367         for (i = 0; i < kvm_s390_fac_size(); i++) {
2368                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2369                                               (kvm_s390_fac_base[i] |
2370                                                kvm_s390_fac_ext[i]);
2371                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2372                                               kvm_s390_fac_base[i];
2373         }
2374         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2375
2376         /* we are always in czam mode - even on pre z14 machines */
2377         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2378         set_kvm_facility(kvm->arch.model.fac_list, 138);
2379         /* we emulate STHYI in kvm */
2380         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2381         set_kvm_facility(kvm->arch.model.fac_list, 74);
2382         if (MACHINE_HAS_TLB_GUEST) {
2383                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2384                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2385         }
2386
2387         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2388         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2389
2390         kvm_s390_crypto_init(kvm);
2391
2392         mutex_init(&kvm->arch.float_int.ais_lock);
2393         spin_lock_init(&kvm->arch.float_int.lock);
2394         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2395                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2396         init_waitqueue_head(&kvm->arch.ipte_wq);
2397         mutex_init(&kvm->arch.ipte_mutex);
2398
2399         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2400         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2401
2402         if (type & KVM_VM_S390_UCONTROL) {
2403                 kvm->arch.gmap = NULL;
2404                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2405         } else {
2406                 if (sclp.hamax == U64_MAX)
2407                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2408                 else
2409                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2410                                                     sclp.hamax + 1);
2411                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2412                 if (!kvm->arch.gmap)
2413                         goto out_err;
2414                 kvm->arch.gmap->private = kvm;
2415                 kvm->arch.gmap->pfault_enabled = 0;
2416         }
2417
2418         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2419         kvm->arch.use_skf = sclp.has_skey;
2420         spin_lock_init(&kvm->arch.start_stop_lock);
2421         kvm_s390_vsie_init(kvm);
2422         kvm_s390_gisa_init(kvm);
2423         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2424
2425         return 0;
2426 out_err:
2427         free_page((unsigned long)kvm->arch.sie_page2);
2428         debug_unregister(kvm->arch.dbf);
2429         sca_dispose(kvm);
2430         KVM_EVENT(3, "creation of vm failed: %d", rc);
2431         return rc;
2432 }
2433
2434 bool kvm_arch_has_vcpu_debugfs(void)
2435 {
2436         return false;
2437 }
2438
2439 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2440 {
2441         return 0;
2442 }
2443
2444 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2445 {
2446         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2447         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2448         kvm_s390_clear_local_irqs(vcpu);
2449         kvm_clear_async_pf_completion_queue(vcpu);
2450         if (!kvm_is_ucontrol(vcpu->kvm))
2451                 sca_del_vcpu(vcpu);
2452
2453         if (kvm_is_ucontrol(vcpu->kvm))
2454                 gmap_remove(vcpu->arch.gmap);
2455
2456         if (vcpu->kvm->arch.use_cmma)
2457                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2458         free_page((unsigned long)(vcpu->arch.sie_block));
2459
2460         kvm_vcpu_uninit(vcpu);
2461         kmem_cache_free(kvm_vcpu_cache, vcpu);
2462 }
2463
2464 static void kvm_free_vcpus(struct kvm *kvm)
2465 {
2466         unsigned int i;
2467         struct kvm_vcpu *vcpu;
2468
2469         kvm_for_each_vcpu(i, vcpu, kvm)
2470                 kvm_arch_vcpu_destroy(vcpu);
2471
2472         mutex_lock(&kvm->lock);
2473         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2474                 kvm->vcpus[i] = NULL;
2475
2476         atomic_set(&kvm->online_vcpus, 0);
2477         mutex_unlock(&kvm->lock);
2478 }
2479
2480 void kvm_arch_destroy_vm(struct kvm *kvm)
2481 {
2482         kvm_free_vcpus(kvm);
2483         sca_dispose(kvm);
2484         debug_unregister(kvm->arch.dbf);
2485         kvm_s390_gisa_destroy(kvm);
2486         free_page((unsigned long)kvm->arch.sie_page2);
2487         if (!kvm_is_ucontrol(kvm))
2488                 gmap_remove(kvm->arch.gmap);
2489         kvm_s390_destroy_adapters(kvm);
2490         kvm_s390_clear_float_irqs(kvm);
2491         kvm_s390_vsie_destroy(kvm);
2492         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2493 }
2494
2495 /* Section: vcpu related */
2496 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2497 {
2498         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2499         if (!vcpu->arch.gmap)
2500                 return -ENOMEM;
2501         vcpu->arch.gmap->private = vcpu->kvm;
2502
2503         return 0;
2504 }
2505
2506 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2507 {
2508         if (!kvm_s390_use_sca_entries())
2509                 return;
2510         read_lock(&vcpu->kvm->arch.sca_lock);
2511         if (vcpu->kvm->arch.use_esca) {
2512                 struct esca_block *sca = vcpu->kvm->arch.sca;
2513
2514                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2515                 sca->cpu[vcpu->vcpu_id].sda = 0;
2516         } else {
2517                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2518
2519                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2520                 sca->cpu[vcpu->vcpu_id].sda = 0;
2521         }
2522         read_unlock(&vcpu->kvm->arch.sca_lock);
2523 }
2524
2525 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2526 {
2527         if (!kvm_s390_use_sca_entries()) {
2528                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2529
2530                 /* we still need the basic sca for the ipte control */
2531                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2532                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2533                 return;
2534         }
2535         read_lock(&vcpu->kvm->arch.sca_lock);
2536         if (vcpu->kvm->arch.use_esca) {
2537                 struct esca_block *sca = vcpu->kvm->arch.sca;
2538
2539                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2540                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2541                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2542                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2543                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2544         } else {
2545                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2546
2547                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2548                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2549                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2550                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2551         }
2552         read_unlock(&vcpu->kvm->arch.sca_lock);
2553 }
2554
2555 /* Basic SCA to Extended SCA data copy routines */
2556 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2557 {
2558         d->sda = s->sda;
2559         d->sigp_ctrl.c = s->sigp_ctrl.c;
2560         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2561 }
2562
2563 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2564 {
2565         int i;
2566
2567         d->ipte_control = s->ipte_control;
2568         d->mcn[0] = s->mcn;
2569         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2570                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2571 }
2572
2573 static int sca_switch_to_extended(struct kvm *kvm)
2574 {
2575         struct bsca_block *old_sca = kvm->arch.sca;
2576         struct esca_block *new_sca;
2577         struct kvm_vcpu *vcpu;
2578         unsigned int vcpu_idx;
2579         u32 scaol, scaoh;
2580
2581         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2582         if (!new_sca)
2583                 return -ENOMEM;
2584
2585         scaoh = (u32)((u64)(new_sca) >> 32);
2586         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2587
2588         kvm_s390_vcpu_block_all(kvm);
2589         write_lock(&kvm->arch.sca_lock);
2590
2591         sca_copy_b_to_e(new_sca, old_sca);
2592
2593         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2594                 vcpu->arch.sie_block->scaoh = scaoh;
2595                 vcpu->arch.sie_block->scaol = scaol;
2596                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2597         }
2598         kvm->arch.sca = new_sca;
2599         kvm->arch.use_esca = 1;
2600
2601         write_unlock(&kvm->arch.sca_lock);
2602         kvm_s390_vcpu_unblock_all(kvm);
2603
2604         free_page((unsigned long)old_sca);
2605
2606         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2607                  old_sca, kvm->arch.sca);
2608         return 0;
2609 }
2610
2611 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2612 {
2613         int rc;
2614
2615         if (!kvm_s390_use_sca_entries()) {
2616                 if (id < KVM_MAX_VCPUS)
2617                         return true;
2618                 return false;
2619         }
2620         if (id < KVM_S390_BSCA_CPU_SLOTS)
2621                 return true;
2622         if (!sclp.has_esca || !sclp.has_64bscao)
2623                 return false;
2624
2625         mutex_lock(&kvm->lock);
2626         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2627         mutex_unlock(&kvm->lock);
2628
2629         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2630 }
2631
2632 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2633 {
2634         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2635         kvm_clear_async_pf_completion_queue(vcpu);
2636         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2637                                     KVM_SYNC_GPRS |
2638                                     KVM_SYNC_ACRS |
2639                                     KVM_SYNC_CRS |
2640                                     KVM_SYNC_ARCH0 |
2641                                     KVM_SYNC_PFAULT;
2642         kvm_s390_set_prefix(vcpu, 0);
2643         if (test_kvm_facility(vcpu->kvm, 64))
2644                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2645         if (test_kvm_facility(vcpu->kvm, 82))
2646                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2647         if (test_kvm_facility(vcpu->kvm, 133))
2648                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2649         if (test_kvm_facility(vcpu->kvm, 156))
2650                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2651         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2652          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2653          */
2654         if (MACHINE_HAS_VX)
2655                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2656         else
2657                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2658
2659         if (kvm_is_ucontrol(vcpu->kvm))
2660                 return __kvm_ucontrol_vcpu_init(vcpu);
2661
2662         return 0;
2663 }
2664
2665 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2666 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2667 {
2668         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2669         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2670         vcpu->arch.cputm_start = get_tod_clock_fast();
2671         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2672 }
2673
2674 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2675 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2676 {
2677         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2678         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2679         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2680         vcpu->arch.cputm_start = 0;
2681         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2682 }
2683
2684 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2685 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2686 {
2687         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2688         vcpu->arch.cputm_enabled = true;
2689         __start_cpu_timer_accounting(vcpu);
2690 }
2691
2692 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2693 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2694 {
2695         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2696         __stop_cpu_timer_accounting(vcpu);
2697         vcpu->arch.cputm_enabled = false;
2698 }
2699
2700 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2701 {
2702         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2703         __enable_cpu_timer_accounting(vcpu);
2704         preempt_enable();
2705 }
2706
2707 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2708 {
2709         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2710         __disable_cpu_timer_accounting(vcpu);
2711         preempt_enable();
2712 }
2713
2714 /* set the cpu timer - may only be called from the VCPU thread itself */
2715 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2716 {
2717         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2718         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2719         if (vcpu->arch.cputm_enabled)
2720                 vcpu->arch.cputm_start = get_tod_clock_fast();
2721         vcpu->arch.sie_block->cputm = cputm;
2722         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2723         preempt_enable();
2724 }
2725
2726 /* update and get the cpu timer - can also be called from other VCPU threads */
2727 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2728 {
2729         unsigned int seq;
2730         __u64 value;
2731
2732         if (unlikely(!vcpu->arch.cputm_enabled))
2733                 return vcpu->arch.sie_block->cputm;
2734
2735         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2736         do {
2737                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2738                 /*
2739                  * If the writer would ever execute a read in the critical
2740                  * section, e.g. in irq context, we have a deadlock.
2741                  */
2742                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2743                 value = vcpu->arch.sie_block->cputm;
2744                 /* if cputm_start is 0, accounting is being started/stopped */
2745                 if (likely(vcpu->arch.cputm_start))
2746                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2747         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2748         preempt_enable();
2749         return value;
2750 }
2751
2752 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2753 {
2754
2755         gmap_enable(vcpu->arch.enabled_gmap);
2756         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2757         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2758                 __start_cpu_timer_accounting(vcpu);
2759         vcpu->cpu = cpu;
2760 }
2761
2762 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2763 {
2764         vcpu->cpu = -1;
2765         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2766                 __stop_cpu_timer_accounting(vcpu);
2767         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2768         vcpu->arch.enabled_gmap = gmap_get_enabled();
2769         gmap_disable(vcpu->arch.enabled_gmap);
2770
2771 }
2772
2773 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2774 {
2775         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2776         vcpu->arch.sie_block->gpsw.mask = 0UL;
2777         vcpu->arch.sie_block->gpsw.addr = 0UL;
2778         kvm_s390_set_prefix(vcpu, 0);
2779         kvm_s390_set_cpu_timer(vcpu, 0);
2780         vcpu->arch.sie_block->ckc       = 0UL;
2781         vcpu->arch.sie_block->todpr     = 0;
2782         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2783         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2784                                         CR0_INTERRUPT_KEY_SUBMASK |
2785                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2786         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2787                                         CR14_UNUSED_33 |
2788                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2789         /* make sure the new fpc will be lazily loaded */
2790         save_fpu_regs();
2791         current->thread.fpu.fpc = 0;
2792         vcpu->arch.sie_block->gbea = 1;
2793         vcpu->arch.sie_block->pp = 0;
2794         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2795         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2796         kvm_clear_async_pf_completion_queue(vcpu);
2797         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2798                 kvm_s390_vcpu_stop(vcpu);
2799         kvm_s390_clear_local_irqs(vcpu);
2800 }
2801
2802 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2803 {
2804         mutex_lock(&vcpu->kvm->lock);
2805         preempt_disable();
2806         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2807         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2808         preempt_enable();
2809         mutex_unlock(&vcpu->kvm->lock);
2810         if (!kvm_is_ucontrol(vcpu->kvm)) {
2811                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2812                 sca_add_vcpu(vcpu);
2813         }
2814         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2815                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2816         /* make vcpu_load load the right gmap on the first trigger */
2817         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2818 }
2819
2820 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2821 {
2822         /*
2823          * If the AP instructions are not being interpreted and the MSAX3
2824          * facility is not configured for the guest, there is nothing to set up.
2825          */
2826         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2827                 return;
2828
2829         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2830         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2831         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2832
2833         if (vcpu->kvm->arch.crypto.apie)
2834                 vcpu->arch.sie_block->eca |= ECA_APIE;
2835
2836         /* Set up protected key support */
2837         if (vcpu->kvm->arch.crypto.aes_kw)
2838                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2839         if (vcpu->kvm->arch.crypto.dea_kw)
2840                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2841 }
2842
2843 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2844 {
2845         free_page(vcpu->arch.sie_block->cbrlo);
2846         vcpu->arch.sie_block->cbrlo = 0;
2847 }
2848
2849 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2850 {
2851         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2852         if (!vcpu->arch.sie_block->cbrlo)
2853                 return -ENOMEM;
2854         return 0;
2855 }
2856
2857 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2858 {
2859         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2860
2861         vcpu->arch.sie_block->ibc = model->ibc;
2862         if (test_kvm_facility(vcpu->kvm, 7))
2863                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2864 }
2865
2866 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2867 {
2868         int rc = 0;
2869
2870         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2871                                                     CPUSTAT_SM |
2872                                                     CPUSTAT_STOPPED);
2873
2874         if (test_kvm_facility(vcpu->kvm, 78))
2875                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2876         else if (test_kvm_facility(vcpu->kvm, 8))
2877                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2878
2879         kvm_s390_vcpu_setup_model(vcpu);
2880
2881         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2882         if (MACHINE_HAS_ESOP)
2883                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2884         if (test_kvm_facility(vcpu->kvm, 9))
2885                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2886         if (test_kvm_facility(vcpu->kvm, 73))
2887                 vcpu->arch.sie_block->ecb |= ECB_TE;
2888
2889         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2890                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2891         if (test_kvm_facility(vcpu->kvm, 130))
2892                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2893         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2894         if (sclp.has_cei)
2895                 vcpu->arch.sie_block->eca |= ECA_CEI;
2896         if (sclp.has_ib)
2897                 vcpu->arch.sie_block->eca |= ECA_IB;
2898         if (sclp.has_siif)
2899                 vcpu->arch.sie_block->eca |= ECA_SII;
2900         if (sclp.has_sigpif)
2901                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2902         if (test_kvm_facility(vcpu->kvm, 129)) {
2903                 vcpu->arch.sie_block->eca |= ECA_VX;
2904                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2905         }
2906         if (test_kvm_facility(vcpu->kvm, 139))
2907                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2908         if (test_kvm_facility(vcpu->kvm, 156))
2909                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2910         if (vcpu->arch.sie_block->gd) {
2911                 vcpu->arch.sie_block->eca |= ECA_AIV;
2912                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2913                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2914         }
2915         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2916                                         | SDNXC;
2917         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2918
2919         if (sclp.has_kss)
2920                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2921         else
2922                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2923
2924         if (vcpu->kvm->arch.use_cmma) {
2925                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2926                 if (rc)
2927                         return rc;
2928         }
2929         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2930         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2931
2932         vcpu->arch.sie_block->hpid = HPID_KVM;
2933
2934         kvm_s390_vcpu_crypto_setup(vcpu);
2935
2936         return rc;
2937 }
2938
2939 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2940                                       unsigned int id)
2941 {
2942         struct kvm_vcpu *vcpu;
2943         struct sie_page *sie_page;
2944         int rc = -EINVAL;
2945
2946         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2947                 goto out;
2948
2949         rc = -ENOMEM;
2950
2951         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2952         if (!vcpu)
2953                 goto out;
2954
2955         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2956         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2957         if (!sie_page)
2958                 goto out_free_cpu;
2959
2960         vcpu->arch.sie_block = &sie_page->sie_block;
2961         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2962
2963         /* the real guest size will always be smaller than msl */
2964         vcpu->arch.sie_block->mso = 0;
2965         vcpu->arch.sie_block->msl = sclp.hamax;
2966
2967         vcpu->arch.sie_block->icpua = id;
2968         spin_lock_init(&vcpu->arch.local_int.lock);
2969         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
2970         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2971                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2972         seqcount_init(&vcpu->arch.cputm_seqcount);
2973
2974         rc = kvm_vcpu_init(vcpu, kvm, id);
2975         if (rc)
2976                 goto out_free_sie_block;
2977         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2978                  vcpu->arch.sie_block);
2979         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2980
2981         return vcpu;
2982 out_free_sie_block:
2983         free_page((unsigned long)(vcpu->arch.sie_block));
2984 out_free_cpu:
2985         kmem_cache_free(kvm_vcpu_cache, vcpu);
2986 out:
2987         return ERR_PTR(rc);
2988 }
2989
2990 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2991 {
2992         return kvm_s390_vcpu_has_irq(vcpu, 0);
2993 }
2994
2995 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2996 {
2997         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2998 }
2999
3000 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3001 {
3002         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3003         exit_sie(vcpu);
3004 }
3005
3006 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3007 {
3008         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3009 }
3010
3011 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3012 {
3013         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3014         exit_sie(vcpu);
3015 }
3016
3017 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3018 {
3019         return atomic_read(&vcpu->arch.sie_block->prog20) &
3020                (PROG_BLOCK_SIE | PROG_REQUEST);
3021 }
3022
3023 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3024 {
3025         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3026 }
3027
3028 /*
3029  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3030  * If the CPU is not running (e.g. waiting as idle) the function will
3031  * return immediately. */
3032 void exit_sie(struct kvm_vcpu *vcpu)
3033 {
3034         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3035         kvm_s390_vsie_kick(vcpu);
3036         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3037                 cpu_relax();
3038 }
3039
3040 /* Kick a guest cpu out of SIE to process a request synchronously */
3041 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3042 {
3043         kvm_make_request(req, vcpu);
3044         kvm_s390_vcpu_request(vcpu);
3045 }
3046
3047 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3048                               unsigned long end)
3049 {
3050         struct kvm *kvm = gmap->private;
3051         struct kvm_vcpu *vcpu;
3052         unsigned long prefix;
3053         int i;
3054
3055         if (gmap_is_shadow(gmap))
3056                 return;
3057         if (start >= 1UL << 31)
3058                 /* We are only interested in prefix pages */
3059                 return;
3060         kvm_for_each_vcpu(i, vcpu, kvm) {
3061                 /* match against both prefix pages */
3062                 prefix = kvm_s390_get_prefix(vcpu);
3063                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3064                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3065                                    start, end);
3066                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3067                 }
3068         }
3069 }
3070
3071 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3072 {
3073         /* kvm common code refers to this, but never calls it */
3074         BUG();
3075         return 0;
3076 }
3077
3078 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3079                                            struct kvm_one_reg *reg)
3080 {
3081         int r = -EINVAL;
3082
3083         switch (reg->id) {
3084         case KVM_REG_S390_TODPR:
3085                 r = put_user(vcpu->arch.sie_block->todpr,
3086                              (u32 __user *)reg->addr);
3087                 break;
3088         case KVM_REG_S390_EPOCHDIFF:
3089                 r = put_user(vcpu->arch.sie_block->epoch,
3090                              (u64 __user *)reg->addr);
3091                 break;
3092         case KVM_REG_S390_CPU_TIMER:
3093                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3094                              (u64 __user *)reg->addr);
3095                 break;
3096         case KVM_REG_S390_CLOCK_COMP:
3097                 r = put_user(vcpu->arch.sie_block->ckc,
3098                              (u64 __user *)reg->addr);
3099                 break;
3100         case KVM_REG_S390_PFTOKEN:
3101                 r = put_user(vcpu->arch.pfault_token,
3102                              (u64 __user *)reg->addr);
3103                 break;
3104         case KVM_REG_S390_PFCOMPARE:
3105                 r = put_user(vcpu->arch.pfault_compare,
3106                              (u64 __user *)reg->addr);
3107                 break;
3108         case KVM_REG_S390_PFSELECT:
3109                 r = put_user(vcpu->arch.pfault_select,
3110                              (u64 __user *)reg->addr);
3111                 break;
3112         case KVM_REG_S390_PP:
3113                 r = put_user(vcpu->arch.sie_block->pp,
3114                              (u64 __user *)reg->addr);
3115                 break;
3116         case KVM_REG_S390_GBEA:
3117                 r = put_user(vcpu->arch.sie_block->gbea,
3118                              (u64 __user *)reg->addr);
3119                 break;
3120         default:
3121                 break;
3122         }
3123
3124         return r;
3125 }
3126
3127 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3128                                            struct kvm_one_reg *reg)
3129 {
3130         int r = -EINVAL;
3131         __u64 val;
3132
3133         switch (reg->id) {
3134         case KVM_REG_S390_TODPR:
3135                 r = get_user(vcpu->arch.sie_block->todpr,
3136                              (u32 __user *)reg->addr);
3137                 break;
3138         case KVM_REG_S390_EPOCHDIFF:
3139                 r = get_user(vcpu->arch.sie_block->epoch,
3140                              (u64 __user *)reg->addr);
3141                 break;
3142         case KVM_REG_S390_CPU_TIMER:
3143                 r = get_user(val, (u64 __user *)reg->addr);
3144                 if (!r)
3145                         kvm_s390_set_cpu_timer(vcpu, val);
3146                 break;
3147         case KVM_REG_S390_CLOCK_COMP:
3148                 r = get_user(vcpu->arch.sie_block->ckc,
3149                              (u64 __user *)reg->addr);
3150                 break;
3151         case KVM_REG_S390_PFTOKEN:
3152                 r = get_user(vcpu->arch.pfault_token,
3153                              (u64 __user *)reg->addr);
3154                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3155                         kvm_clear_async_pf_completion_queue(vcpu);
3156                 break;
3157         case KVM_REG_S390_PFCOMPARE:
3158                 r = get_user(vcpu->arch.pfault_compare,
3159                              (u64 __user *)reg->addr);
3160                 break;
3161         case KVM_REG_S390_PFSELECT:
3162                 r = get_user(vcpu->arch.pfault_select,
3163                              (u64 __user *)reg->addr);
3164                 break;
3165         case KVM_REG_S390_PP:
3166                 r = get_user(vcpu->arch.sie_block->pp,
3167                              (u64 __user *)reg->addr);
3168                 break;
3169         case KVM_REG_S390_GBEA:
3170                 r = get_user(vcpu->arch.sie_block->gbea,
3171                              (u64 __user *)reg->addr);
3172                 break;
3173         default:
3174                 break;
3175         }
3176
3177         return r;
3178 }
3179
3180 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3181 {
3182         kvm_s390_vcpu_initial_reset(vcpu);
3183         return 0;
3184 }
3185
3186 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3187 {
3188         vcpu_load(vcpu);
3189         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3190         vcpu_put(vcpu);
3191         return 0;
3192 }
3193
3194 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3195 {
3196         vcpu_load(vcpu);
3197         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3198         vcpu_put(vcpu);
3199         return 0;
3200 }
3201
3202 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3203                                   struct kvm_sregs *sregs)
3204 {
3205         vcpu_load(vcpu);
3206
3207         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3208         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3209
3210         vcpu_put(vcpu);
3211         return 0;
3212 }
3213
3214 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3215                                   struct kvm_sregs *sregs)
3216 {
3217         vcpu_load(vcpu);
3218
3219         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3220         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3221
3222         vcpu_put(vcpu);
3223         return 0;
3224 }
3225
3226 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3227 {
3228         int ret = 0;
3229
3230         vcpu_load(vcpu);
3231
3232         if (test_fp_ctl(fpu->fpc)) {
3233                 ret = -EINVAL;
3234                 goto out;
3235         }
3236         vcpu->run->s.regs.fpc = fpu->fpc;
3237         if (MACHINE_HAS_VX)
3238                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3239                                  (freg_t *) fpu->fprs);
3240         else
3241                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3242
3243 out:
3244         vcpu_put(vcpu);
3245         return ret;
3246 }
3247
3248 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3249 {
3250         vcpu_load(vcpu);
3251
3252         /* make sure we have the latest values */
3253         save_fpu_regs();
3254         if (MACHINE_HAS_VX)
3255                 convert_vx_to_fp((freg_t *) fpu->fprs,
3256                                  (__vector128 *) vcpu->run->s.regs.vrs);
3257         else
3258                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3259         fpu->fpc = vcpu->run->s.regs.fpc;
3260
3261         vcpu_put(vcpu);
3262         return 0;
3263 }
3264
3265 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3266 {
3267         int rc = 0;
3268
3269         if (!is_vcpu_stopped(vcpu))
3270                 rc = -EBUSY;
3271         else {
3272                 vcpu->run->psw_mask = psw.mask;
3273                 vcpu->run->psw_addr = psw.addr;
3274         }
3275         return rc;
3276 }
3277
3278 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3279                                   struct kvm_translation *tr)
3280 {
3281         return -EINVAL; /* not implemented yet */
3282 }
3283
3284 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3285                               KVM_GUESTDBG_USE_HW_BP | \
3286                               KVM_GUESTDBG_ENABLE)
3287
3288 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3289                                         struct kvm_guest_debug *dbg)
3290 {
3291         int rc = 0;
3292
3293         vcpu_load(vcpu);
3294
3295         vcpu->guest_debug = 0;
3296         kvm_s390_clear_bp_data(vcpu);
3297
3298         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3299                 rc = -EINVAL;
3300                 goto out;
3301         }
3302         if (!sclp.has_gpere) {
3303                 rc = -EINVAL;
3304                 goto out;
3305         }
3306
3307         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3308                 vcpu->guest_debug = dbg->control;
3309                 /* enforce guest PER */
3310                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3311
3312                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3313                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3314         } else {
3315                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3316                 vcpu->arch.guestdbg.last_bp = 0;
3317         }
3318
3319         if (rc) {
3320                 vcpu->guest_debug = 0;
3321                 kvm_s390_clear_bp_data(vcpu);
3322                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3323         }
3324
3325 out:
3326         vcpu_put(vcpu);
3327         return rc;
3328 }
3329
3330 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3331                                     struct kvm_mp_state *mp_state)
3332 {
3333         int ret;
3334
3335         vcpu_load(vcpu);
3336
3337         /* CHECK_STOP and LOAD are not supported yet */
3338         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3339                                       KVM_MP_STATE_OPERATING;
3340
3341         vcpu_put(vcpu);
3342         return ret;
3343 }
3344
3345 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3346                                     struct kvm_mp_state *mp_state)
3347 {
3348         int rc = 0;
3349
3350         vcpu_load(vcpu);
3351
3352         /* user space knows about this interface - let it control the state */
3353         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3354
3355         switch (mp_state->mp_state) {
3356         case KVM_MP_STATE_STOPPED:
3357                 kvm_s390_vcpu_stop(vcpu);
3358                 break;
3359         case KVM_MP_STATE_OPERATING:
3360                 kvm_s390_vcpu_start(vcpu);
3361                 break;
3362         case KVM_MP_STATE_LOAD:
3363         case KVM_MP_STATE_CHECK_STOP:
3364                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3365         default:
3366                 rc = -ENXIO;
3367         }
3368
3369         vcpu_put(vcpu);
3370         return rc;
3371 }
3372
3373 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3374 {
3375         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3376 }
3377
3378 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3379 {
3380 retry:
3381         kvm_s390_vcpu_request_handled(vcpu);
3382         if (!kvm_request_pending(vcpu))
3383                 return 0;
3384         /*
3385          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3386          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3387          * This ensures that the ipte instruction for this request has
3388          * already finished. We might race against a second unmapper that
3389          * wants to set the blocking bit. Lets just retry the request loop.
3390          */
3391         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3392                 int rc;
3393                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3394                                           kvm_s390_get_prefix(vcpu),
3395                                           PAGE_SIZE * 2, PROT_WRITE);
3396                 if (rc) {
3397                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3398                         return rc;
3399                 }
3400                 goto retry;
3401         }
3402
3403         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3404                 vcpu->arch.sie_block->ihcpu = 0xffff;
3405                 goto retry;
3406         }
3407
3408         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3409                 if (!ibs_enabled(vcpu)) {
3410                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3411                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3412                 }
3413                 goto retry;
3414         }
3415
3416         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3417                 if (ibs_enabled(vcpu)) {
3418                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3419                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3420                 }
3421                 goto retry;
3422         }
3423
3424         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3425                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3426                 goto retry;
3427         }
3428
3429         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3430                 /*
3431                  * Disable CMM virtualization; we will emulate the ESSA
3432                  * instruction manually, in order to provide additional
3433                  * functionalities needed for live migration.
3434                  */
3435                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3436                 goto retry;
3437         }
3438
3439         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3440                 /*
3441                  * Re-enable CMM virtualization if CMMA is available and
3442                  * CMM has been used.
3443                  */
3444                 if ((vcpu->kvm->arch.use_cmma) &&
3445                     (vcpu->kvm->mm->context.uses_cmm))
3446                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3447                 goto retry;
3448         }
3449
3450         /* nothing to do, just clear the request */
3451         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3452         /* we left the vsie handler, nothing to do, just clear the request */
3453         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3454
3455         return 0;
3456 }
3457
3458 void kvm_s390_set_tod_clock(struct kvm *kvm,
3459                             const struct kvm_s390_vm_tod_clock *gtod)
3460 {
3461         struct kvm_vcpu *vcpu;
3462         struct kvm_s390_tod_clock_ext htod;
3463         int i;
3464
3465         mutex_lock(&kvm->lock);
3466         preempt_disable();
3467
3468         get_tod_clock_ext((char *)&htod);
3469
3470         kvm->arch.epoch = gtod->tod - htod.tod;
3471         kvm->arch.epdx = 0;
3472         if (test_kvm_facility(kvm, 139)) {
3473                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3474                 if (kvm->arch.epoch > gtod->tod)
3475                         kvm->arch.epdx -= 1;
3476         }
3477
3478         kvm_s390_vcpu_block_all(kvm);
3479         kvm_for_each_vcpu(i, vcpu, kvm) {
3480                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3481                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3482         }
3483
3484         kvm_s390_vcpu_unblock_all(kvm);
3485         preempt_enable();
3486         mutex_unlock(&kvm->lock);
3487 }
3488
3489 /**
3490  * kvm_arch_fault_in_page - fault-in guest page if necessary
3491  * @vcpu: The corresponding virtual cpu
3492  * @gpa: Guest physical address
3493  * @writable: Whether the page should be writable or not
3494  *
3495  * Make sure that a guest page has been faulted-in on the host.
3496  *
3497  * Return: Zero on success, negative error code otherwise.
3498  */
3499 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3500 {
3501         return gmap_fault(vcpu->arch.gmap, gpa,
3502                           writable ? FAULT_FLAG_WRITE : 0);
3503 }
3504
3505 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3506                                       unsigned long token)
3507 {
3508         struct kvm_s390_interrupt inti;
3509         struct kvm_s390_irq irq;
3510
3511         if (start_token) {
3512                 irq.u.ext.ext_params2 = token;
3513                 irq.type = KVM_S390_INT_PFAULT_INIT;
3514                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3515         } else {
3516                 inti.type = KVM_S390_INT_PFAULT_DONE;
3517                 inti.parm64 = token;
3518                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3519         }
3520 }
3521
3522 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3523                                      struct kvm_async_pf *work)
3524 {
3525         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3526         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3527 }
3528
3529 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3530                                  struct kvm_async_pf *work)
3531 {
3532         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3533         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3534 }
3535
3536 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3537                                struct kvm_async_pf *work)
3538 {
3539         /* s390 will always inject the page directly */
3540 }
3541
3542 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3543 {
3544         /*
3545          * s390 will always inject the page directly,
3546          * but we still want check_async_completion to cleanup
3547          */
3548         return true;
3549 }
3550
3551 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3552 {
3553         hva_t hva;
3554         struct kvm_arch_async_pf arch;
3555         int rc;
3556
3557         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3558                 return 0;
3559         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3560             vcpu->arch.pfault_compare)
3561                 return 0;
3562         if (psw_extint_disabled(vcpu))
3563                 return 0;
3564         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3565                 return 0;
3566         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3567                 return 0;
3568         if (!vcpu->arch.gmap->pfault_enabled)
3569                 return 0;
3570
3571         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3572         hva += current->thread.gmap_addr & ~PAGE_MASK;
3573         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3574                 return 0;
3575
3576         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3577         return rc;
3578 }
3579
3580 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3581 {
3582         int rc, cpuflags;
3583
3584         /*
3585          * On s390 notifications for arriving pages will be delivered directly
3586          * to the guest but the house keeping for completed pfaults is
3587          * handled outside the worker.
3588          */
3589         kvm_check_async_pf_completion(vcpu);
3590
3591         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3592         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3593
3594         if (need_resched())
3595                 schedule();
3596
3597         if (test_cpu_flag(CIF_MCCK_PENDING))
3598                 s390_handle_mcck();
3599
3600         if (!kvm_is_ucontrol(vcpu->kvm)) {
3601                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3602                 if (rc)
3603                         return rc;
3604         }
3605
3606         rc = kvm_s390_handle_requests(vcpu);
3607         if (rc)
3608                 return rc;
3609
3610         if (guestdbg_enabled(vcpu)) {
3611                 kvm_s390_backup_guest_per_regs(vcpu);
3612                 kvm_s390_patch_guest_per_regs(vcpu);
3613         }
3614
3615         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3616
3617         vcpu->arch.sie_block->icptcode = 0;
3618         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3619         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3620         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3621
3622         return 0;
3623 }
3624
3625 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3626 {
3627         struct kvm_s390_pgm_info pgm_info = {
3628                 .code = PGM_ADDRESSING,
3629         };
3630         u8 opcode, ilen;
3631         int rc;
3632
3633         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3634         trace_kvm_s390_sie_fault(vcpu);
3635
3636         /*
3637          * We want to inject an addressing exception, which is defined as a
3638          * suppressing or terminating exception. However, since we came here
3639          * by a DAT access exception, the PSW still points to the faulting
3640          * instruction since DAT exceptions are nullifying. So we've got
3641          * to look up the current opcode to get the length of the instruction
3642          * to be able to forward the PSW.
3643          */
3644         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3645         ilen = insn_length(opcode);
3646         if (rc < 0) {
3647                 return rc;
3648         } else if (rc) {
3649                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3650                  * Forward by arbitrary ilc, injection will take care of
3651                  * nullification if necessary.
3652                  */
3653                 pgm_info = vcpu->arch.pgm;
3654                 ilen = 4;
3655         }
3656         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3657         kvm_s390_forward_psw(vcpu, ilen);
3658         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3659 }
3660
3661 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3662 {
3663         struct mcck_volatile_info *mcck_info;
3664         struct sie_page *sie_page;
3665
3666         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3667                    vcpu->arch.sie_block->icptcode);
3668         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3669
3670         if (guestdbg_enabled(vcpu))
3671                 kvm_s390_restore_guest_per_regs(vcpu);
3672
3673         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3674         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3675
3676         if (exit_reason == -EINTR) {
3677                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3678                 sie_page = container_of(vcpu->arch.sie_block,
3679                                         struct sie_page, sie_block);
3680                 mcck_info = &sie_page->mcck_info;
3681                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3682                 return 0;
3683         }
3684
3685         if (vcpu->arch.sie_block->icptcode > 0) {
3686                 int rc = kvm_handle_sie_intercept(vcpu);
3687
3688                 if (rc != -EOPNOTSUPP)
3689                         return rc;
3690                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3691                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3692                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3693                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3694                 return -EREMOTE;
3695         } else if (exit_reason != -EFAULT) {
3696                 vcpu->stat.exit_null++;
3697                 return 0;
3698         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3699                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3700                 vcpu->run->s390_ucontrol.trans_exc_code =
3701                                                 current->thread.gmap_addr;
3702                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3703                 return -EREMOTE;
3704         } else if (current->thread.gmap_pfault) {
3705                 trace_kvm_s390_major_guest_pfault(vcpu);
3706                 current->thread.gmap_pfault = 0;
3707                 if (kvm_arch_setup_async_pf(vcpu))
3708                         return 0;
3709                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3710         }
3711         return vcpu_post_run_fault_in_sie(vcpu);
3712 }
3713
3714 static int __vcpu_run(struct kvm_vcpu *vcpu)
3715 {
3716         int rc, exit_reason;
3717
3718         /*
3719          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3720          * ning the guest), so that memslots (and other stuff) are protected
3721          */
3722         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3723
3724         do {
3725                 rc = vcpu_pre_run(vcpu);
3726                 if (rc)
3727                         break;
3728
3729                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3730                 /*
3731                  * As PF_VCPU will be used in fault handler, between
3732                  * guest_enter and guest_exit should be no uaccess.
3733                  */
3734                 local_irq_disable();
3735                 guest_enter_irqoff();
3736                 __disable_cpu_timer_accounting(vcpu);
3737                 local_irq_enable();
3738                 exit_reason = sie64a(vcpu->arch.sie_block,
3739                                      vcpu->run->s.regs.gprs);
3740                 local_irq_disable();
3741                 __enable_cpu_timer_accounting(vcpu);
3742                 guest_exit_irqoff();
3743                 local_irq_enable();
3744                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3745
3746                 rc = vcpu_post_run(vcpu, exit_reason);
3747         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3748
3749         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3750         return rc;
3751 }
3752
3753 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3754 {
3755         struct runtime_instr_cb *riccb;
3756         struct gs_cb *gscb;
3757
3758         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3759         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3760         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3761         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3762         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3763                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3764         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3765                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3766                 /* some control register changes require a tlb flush */
3767                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3768         }
3769         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3770                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3771                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3772                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3773                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3774                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3775         }
3776         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3777                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3778                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3779                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3780                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3781                         kvm_clear_async_pf_completion_queue(vcpu);
3782         }
3783         /*
3784          * If userspace sets the riccb (e.g. after migration) to a valid state,
3785          * we should enable RI here instead of doing the lazy enablement.
3786          */
3787         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3788             test_kvm_facility(vcpu->kvm, 64) &&
3789             riccb->v &&
3790             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3791                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3792                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3793         }
3794         /*
3795          * If userspace sets the gscb (e.g. after migration) to non-zero,
3796          * we should enable GS here instead of doing the lazy enablement.
3797          */
3798         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3799             test_kvm_facility(vcpu->kvm, 133) &&
3800             gscb->gssm &&
3801             !vcpu->arch.gs_enabled) {
3802                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3803                 vcpu->arch.sie_block->ecb |= ECB_GS;
3804                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3805                 vcpu->arch.gs_enabled = 1;
3806         }
3807         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3808             test_kvm_facility(vcpu->kvm, 82)) {
3809                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3810                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3811         }
3812         save_access_regs(vcpu->arch.host_acrs);
3813         restore_access_regs(vcpu->run->s.regs.acrs);
3814         /* save host (userspace) fprs/vrs */
3815         save_fpu_regs();
3816         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3817         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3818         if (MACHINE_HAS_VX)
3819                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3820         else
3821                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3822         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3823         if (test_fp_ctl(current->thread.fpu.fpc))
3824                 /* User space provided an invalid FPC, let's clear it */
3825                 current->thread.fpu.fpc = 0;
3826         if (MACHINE_HAS_GS) {
3827                 preempt_disable();
3828                 __ctl_set_bit(2, 4);
3829                 if (current->thread.gs_cb) {
3830                         vcpu->arch.host_gscb = current->thread.gs_cb;
3831                         save_gs_cb(vcpu->arch.host_gscb);
3832                 }
3833                 if (vcpu->arch.gs_enabled) {
3834                         current->thread.gs_cb = (struct gs_cb *)
3835                                                 &vcpu->run->s.regs.gscb;
3836                         restore_gs_cb(current->thread.gs_cb);
3837                 }
3838                 preempt_enable();
3839         }
3840         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3841
3842         kvm_run->kvm_dirty_regs = 0;
3843 }
3844
3845 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3846 {
3847         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3848         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3849         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3850         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3851         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3852         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3853         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3854         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3855         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3856         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3857         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3858         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3859         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3860         save_access_regs(vcpu->run->s.regs.acrs);
3861         restore_access_regs(vcpu->arch.host_acrs);
3862         /* Save guest register state */
3863         save_fpu_regs();
3864         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3865         /* Restore will be done lazily at return */
3866         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3867         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3868         if (MACHINE_HAS_GS) {
3869                 __ctl_set_bit(2, 4);
3870                 if (vcpu->arch.gs_enabled)
3871                         save_gs_cb(current->thread.gs_cb);
3872                 preempt_disable();
3873                 current->thread.gs_cb = vcpu->arch.host_gscb;
3874                 restore_gs_cb(vcpu->arch.host_gscb);
3875                 preempt_enable();
3876                 if (!vcpu->arch.host_gscb)
3877                         __ctl_clear_bit(2, 4);
3878                 vcpu->arch.host_gscb = NULL;
3879         }
3880         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3881 }
3882
3883 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3884 {
3885         int rc;
3886
3887         if (kvm_run->immediate_exit)
3888                 return -EINTR;
3889
3890         vcpu_load(vcpu);
3891
3892         if (guestdbg_exit_pending(vcpu)) {
3893                 kvm_s390_prepare_debug_exit(vcpu);
3894                 rc = 0;
3895                 goto out;
3896         }
3897
3898         kvm_sigset_activate(vcpu);
3899
3900         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3901                 kvm_s390_vcpu_start(vcpu);
3902         } else if (is_vcpu_stopped(vcpu)) {
3903                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3904                                    vcpu->vcpu_id);
3905                 rc = -EINVAL;
3906                 goto out;
3907         }
3908
3909         sync_regs(vcpu, kvm_run);
3910         enable_cpu_timer_accounting(vcpu);
3911
3912         might_fault();
3913         rc = __vcpu_run(vcpu);
3914
3915         if (signal_pending(current) && !rc) {
3916                 kvm_run->exit_reason = KVM_EXIT_INTR;
3917                 rc = -EINTR;
3918         }
3919
3920         if (guestdbg_exit_pending(vcpu) && !rc)  {
3921                 kvm_s390_prepare_debug_exit(vcpu);
3922                 rc = 0;
3923         }
3924
3925         if (rc == -EREMOTE) {
3926                 /* userspace support is needed, kvm_run has been prepared */
3927                 rc = 0;
3928         }
3929
3930         disable_cpu_timer_accounting(vcpu);
3931         store_regs(vcpu, kvm_run);
3932
3933         kvm_sigset_deactivate(vcpu);
3934
3935         vcpu->stat.exit_userspace++;
3936 out:
3937         vcpu_put(vcpu);
3938         return rc;
3939 }
3940
3941 /*
3942  * store status at address
3943  * we use have two special cases:
3944  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3945  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3946  */
3947 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3948 {
3949         unsigned char archmode = 1;
3950         freg_t fprs[NUM_FPRS];
3951         unsigned int px;
3952         u64 clkcomp, cputm;
3953         int rc;
3954
3955         px = kvm_s390_get_prefix(vcpu);
3956         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3957                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3958                         return -EFAULT;
3959                 gpa = 0;
3960         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3961                 if (write_guest_real(vcpu, 163, &archmode, 1))
3962                         return -EFAULT;
3963                 gpa = px;
3964         } else
3965                 gpa -= __LC_FPREGS_SAVE_AREA;
3966
3967         /* manually convert vector registers if necessary */
3968         if (MACHINE_HAS_VX) {
3969                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3970                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3971                                      fprs, 128);
3972         } else {
3973                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3974                                      vcpu->run->s.regs.fprs, 128);
3975         }
3976         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3977                               vcpu->run->s.regs.gprs, 128);
3978         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3979                               &vcpu->arch.sie_block->gpsw, 16);
3980         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3981                               &px, 4);
3982         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3983                               &vcpu->run->s.regs.fpc, 4);
3984         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3985                               &vcpu->arch.sie_block->todpr, 4);
3986         cputm = kvm_s390_get_cpu_timer(vcpu);
3987         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3988                               &cputm, 8);
3989         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3990         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3991                               &clkcomp, 8);
3992         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3993                               &vcpu->run->s.regs.acrs, 64);
3994         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3995                               &vcpu->arch.sie_block->gcr, 128);
3996         return rc ? -EFAULT : 0;
3997 }
3998
3999 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4000 {
4001         /*
4002          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4003          * switch in the run ioctl. Let's update our copies before we save
4004          * it into the save area
4005          */
4006         save_fpu_regs();
4007         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4008         save_access_regs(vcpu->run->s.regs.acrs);
4009
4010         return kvm_s390_store_status_unloaded(vcpu, addr);
4011 }
4012
4013 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4014 {
4015         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4016         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4017 }
4018
4019 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4020 {
4021         unsigned int i;
4022         struct kvm_vcpu *vcpu;
4023
4024         kvm_for_each_vcpu(i, vcpu, kvm) {
4025                 __disable_ibs_on_vcpu(vcpu);
4026         }
4027 }
4028
4029 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4030 {
4031         if (!sclp.has_ibs)
4032                 return;
4033         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4034         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4035 }
4036
4037 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4038 {
4039         int i, online_vcpus, started_vcpus = 0;
4040
4041         if (!is_vcpu_stopped(vcpu))
4042                 return;
4043
4044         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4045         /* Only one cpu at a time may enter/leave the STOPPED state. */
4046         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4047         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4048
4049         for (i = 0; i < online_vcpus; i++) {
4050                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4051                         started_vcpus++;
4052         }
4053
4054         if (started_vcpus == 0) {
4055                 /* we're the only active VCPU -> speed it up */
4056                 __enable_ibs_on_vcpu(vcpu);
4057         } else if (started_vcpus == 1) {
4058                 /*
4059                  * As we are starting a second VCPU, we have to disable
4060                  * the IBS facility on all VCPUs to remove potentially
4061                  * oustanding ENABLE requests.
4062                  */
4063                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4064         }
4065
4066         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4067         /*
4068          * Another VCPU might have used IBS while we were offline.
4069          * Let's play safe and flush the VCPU at startup.
4070          */
4071         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4072         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4073         return;
4074 }
4075
4076 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4077 {
4078         int i, online_vcpus, started_vcpus = 0;
4079         struct kvm_vcpu *started_vcpu = NULL;
4080
4081         if (is_vcpu_stopped(vcpu))
4082                 return;
4083
4084         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4085         /* Only one cpu at a time may enter/leave the STOPPED state. */
4086         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4087         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4088
4089         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4090         kvm_s390_clear_stop_irq(vcpu);
4091
4092         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4093         __disable_ibs_on_vcpu(vcpu);
4094
4095         for (i = 0; i < online_vcpus; i++) {
4096                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4097                         started_vcpus++;
4098                         started_vcpu = vcpu->kvm->vcpus[i];
4099                 }
4100         }
4101
4102         if (started_vcpus == 1) {
4103                 /*
4104                  * As we only have one VCPU left, we want to enable the
4105                  * IBS facility for that VCPU to speed it up.
4106                  */
4107                 __enable_ibs_on_vcpu(started_vcpu);
4108         }
4109
4110         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4111         return;
4112 }
4113
4114 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4115                                      struct kvm_enable_cap *cap)
4116 {
4117         int r;
4118
4119         if (cap->flags)
4120                 return -EINVAL;
4121
4122         switch (cap->cap) {
4123         case KVM_CAP_S390_CSS_SUPPORT:
4124                 if (!vcpu->kvm->arch.css_support) {
4125                         vcpu->kvm->arch.css_support = 1;
4126                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4127                         trace_kvm_s390_enable_css(vcpu->kvm);
4128                 }
4129                 r = 0;
4130                 break;
4131         default:
4132                 r = -EINVAL;
4133                 break;
4134         }
4135         return r;
4136 }
4137
4138 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4139                                   struct kvm_s390_mem_op *mop)
4140 {
4141         void __user *uaddr = (void __user *)mop->buf;
4142         void *tmpbuf = NULL;
4143         int r, srcu_idx;
4144         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4145                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4146
4147         if (mop->flags & ~supported_flags)
4148                 return -EINVAL;
4149
4150         if (mop->size > MEM_OP_MAX_SIZE)
4151                 return -E2BIG;
4152
4153         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4154                 tmpbuf = vmalloc(mop->size);
4155                 if (!tmpbuf)
4156                         return -ENOMEM;
4157         }
4158
4159         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4160
4161         switch (mop->op) {
4162         case KVM_S390_MEMOP_LOGICAL_READ:
4163                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4164                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4165                                             mop->size, GACC_FETCH);
4166                         break;
4167                 }
4168                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4169                 if (r == 0) {
4170                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4171                                 r = -EFAULT;
4172                 }
4173                 break;
4174         case KVM_S390_MEMOP_LOGICAL_WRITE:
4175                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4176                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4177                                             mop->size, GACC_STORE);
4178                         break;
4179                 }
4180                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4181                         r = -EFAULT;
4182                         break;
4183                 }
4184                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4185                 break;
4186         default:
4187                 r = -EINVAL;
4188         }
4189
4190         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4191
4192         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4193                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4194
4195         vfree(tmpbuf);
4196         return r;
4197 }
4198
4199 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4200                                unsigned int ioctl, unsigned long arg)
4201 {
4202         struct kvm_vcpu *vcpu = filp->private_data;
4203         void __user *argp = (void __user *)arg;
4204
4205         switch (ioctl) {
4206         case KVM_S390_IRQ: {
4207                 struct kvm_s390_irq s390irq;
4208
4209                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4210                         return -EFAULT;
4211                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4212         }
4213         case KVM_S390_INTERRUPT: {
4214                 struct kvm_s390_interrupt s390int;
4215                 struct kvm_s390_irq s390irq;
4216
4217                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4218                         return -EFAULT;
4219                 if (s390int_to_s390irq(&s390int, &s390irq))
4220                         return -EINVAL;
4221                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4222         }
4223         }
4224         return -ENOIOCTLCMD;
4225 }
4226
4227 long kvm_arch_vcpu_ioctl(struct file *filp,
4228                          unsigned int ioctl, unsigned long arg)
4229 {
4230         struct kvm_vcpu *vcpu = filp->private_data;
4231         void __user *argp = (void __user *)arg;
4232         int idx;
4233         long r;
4234
4235         vcpu_load(vcpu);
4236
4237         switch (ioctl) {
4238         case KVM_S390_STORE_STATUS:
4239                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4240                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4241                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4242                 break;
4243         case KVM_S390_SET_INITIAL_PSW: {
4244                 psw_t psw;
4245
4246                 r = -EFAULT;
4247                 if (copy_from_user(&psw, argp, sizeof(psw)))
4248                         break;
4249                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4250                 break;
4251         }
4252         case KVM_S390_INITIAL_RESET:
4253                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4254                 break;
4255         case KVM_SET_ONE_REG:
4256         case KVM_GET_ONE_REG: {
4257                 struct kvm_one_reg reg;
4258                 r = -EFAULT;
4259                 if (copy_from_user(&reg, argp, sizeof(reg)))
4260                         break;
4261                 if (ioctl == KVM_SET_ONE_REG)
4262                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4263                 else
4264                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4265                 break;
4266         }
4267 #ifdef CONFIG_KVM_S390_UCONTROL
4268         case KVM_S390_UCAS_MAP: {
4269                 struct kvm_s390_ucas_mapping ucasmap;
4270
4271                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4272                         r = -EFAULT;
4273                         break;
4274                 }
4275
4276                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4277                         r = -EINVAL;
4278                         break;
4279                 }
4280
4281                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4282                                      ucasmap.vcpu_addr, ucasmap.length);
4283                 break;
4284         }
4285         case KVM_S390_UCAS_UNMAP: {
4286                 struct kvm_s390_ucas_mapping ucasmap;
4287
4288                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4289                         r = -EFAULT;
4290                         break;
4291                 }
4292
4293                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4294                         r = -EINVAL;
4295                         break;
4296                 }
4297
4298                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4299                         ucasmap.length);
4300                 break;
4301         }
4302 #endif
4303         case KVM_S390_VCPU_FAULT: {
4304                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4305                 break;
4306         }
4307         case KVM_ENABLE_CAP:
4308         {
4309                 struct kvm_enable_cap cap;
4310                 r = -EFAULT;
4311                 if (copy_from_user(&cap, argp, sizeof(cap)))
4312                         break;
4313                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4314                 break;
4315         }
4316         case KVM_S390_MEM_OP: {
4317                 struct kvm_s390_mem_op mem_op;
4318
4319                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4320                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4321                 else
4322                         r = -EFAULT;
4323                 break;
4324         }
4325         case KVM_S390_SET_IRQ_STATE: {
4326                 struct kvm_s390_irq_state irq_state;
4327
4328                 r = -EFAULT;
4329                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4330                         break;
4331                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4332                     irq_state.len == 0 ||
4333                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4334                         r = -EINVAL;
4335                         break;
4336                 }
4337                 /* do not use irq_state.flags, it will break old QEMUs */
4338                 r = kvm_s390_set_irq_state(vcpu,
4339                                            (void __user *) irq_state.buf,
4340                                            irq_state.len);
4341                 break;
4342         }
4343         case KVM_S390_GET_IRQ_STATE: {
4344                 struct kvm_s390_irq_state irq_state;
4345
4346                 r = -EFAULT;
4347                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4348                         break;
4349                 if (irq_state.len == 0) {
4350                         r = -EINVAL;
4351                         break;
4352                 }
4353                 /* do not use irq_state.flags, it will break old QEMUs */
4354                 r = kvm_s390_get_irq_state(vcpu,
4355                                            (__u8 __user *)  irq_state.buf,
4356                                            irq_state.len);
4357                 break;
4358         }
4359         default:
4360                 r = -ENOTTY;
4361         }
4362
4363         vcpu_put(vcpu);
4364         return r;
4365 }
4366
4367 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4368 {
4369 #ifdef CONFIG_KVM_S390_UCONTROL
4370         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4371                  && (kvm_is_ucontrol(vcpu->kvm))) {
4372                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4373                 get_page(vmf->page);
4374                 return 0;
4375         }
4376 #endif
4377         return VM_FAULT_SIGBUS;
4378 }
4379
4380 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4381                             unsigned long npages)
4382 {
4383         return 0;
4384 }
4385
4386 /* Section: memory related */
4387 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4388                                    struct kvm_memory_slot *memslot,
4389                                    const struct kvm_userspace_memory_region *mem,
4390                                    enum kvm_mr_change change)
4391 {
4392         /* A few sanity checks. We can have memory slots which have to be
4393            located/ended at a segment boundary (1MB). The memory in userland is
4394            ok to be fragmented into various different vmas. It is okay to mmap()
4395            and munmap() stuff in this slot after doing this call at any time */
4396
4397         if (mem->userspace_addr & 0xffffful)
4398                 return -EINVAL;
4399
4400         if (mem->memory_size & 0xffffful)
4401                 return -EINVAL;
4402
4403         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4404                 return -EINVAL;
4405
4406         return 0;
4407 }
4408
4409 void kvm_arch_commit_memory_region(struct kvm *kvm,
4410                                 const struct kvm_userspace_memory_region *mem,
4411                                 const struct kvm_memory_slot *old,
4412                                 const struct kvm_memory_slot *new,
4413                                 enum kvm_mr_change change)
4414 {
4415         int rc;
4416
4417         /* If the basics of the memslot do not change, we do not want
4418          * to update the gmap. Every update causes several unnecessary
4419          * segment translation exceptions. This is usually handled just
4420          * fine by the normal fault handler + gmap, but it will also
4421          * cause faults on the prefix page of running guest CPUs.
4422          */
4423         if (old->userspace_addr == mem->userspace_addr &&
4424             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4425             old->npages * PAGE_SIZE == mem->memory_size)
4426                 return;
4427
4428         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4429                 mem->guest_phys_addr, mem->memory_size);
4430         if (rc)
4431                 pr_warn("failed to commit memory region\n");
4432         return;
4433 }
4434
4435 static inline unsigned long nonhyp_mask(int i)
4436 {
4437         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4438
4439         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4440 }
4441
4442 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4443 {
4444         vcpu->valid_wakeup = false;
4445 }
4446
4447 static int __init kvm_s390_init(void)
4448 {
4449         int i;
4450
4451         if (!sclp.has_sief2) {
4452                 pr_info("SIE is not available\n");
4453                 return -ENODEV;
4454         }
4455
4456         if (nested && hpage) {
4457                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4458                 return -EINVAL;
4459         }
4460
4461         for (i = 0; i < 16; i++)
4462                 kvm_s390_fac_base[i] |=
4463                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4464
4465         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4466 }
4467
4468 static void __exit kvm_s390_exit(void)
4469 {
4470         kvm_exit();
4471 }
4472
4473 module_init(kvm_s390_init);
4474 module_exit(kvm_s390_exit);
4475
4476 /*
4477  * Enable autoloading of the kvm module.
4478  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4479  * since x86 takes a different approach.
4480  */
4481 #include <linux/miscdevice.h>
4482 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4483 MODULE_ALIAS("devname:kvm");