arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include "kvm-s390.h"
  44 #include "gaccess.h"
  45
  46 #define KMSG_COMPONENT "kvm-s390"
  47 #undef pr_fmt
  48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92         { "deliver_program", VCPU_STAT(deliver_program) },
  93         { "deliver_io", VCPU_STAT(deliver_io) },
  94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96         { "inject_ckc", VCPU_STAT(inject_ckc) },
  97         { "inject_cputm", VCPU_STAT(inject_cputm) },
  98         { "inject_external_call", VCPU_STAT(inject_external_call) },
  99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101         { "inject_io", VM_STAT(inject_io) },
 102         { "inject_mchk", VCPU_STAT(inject_mchk) },
 103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104         { "inject_program", VCPU_STAT(inject_program) },
 105         { "inject_restart", VCPU_STAT(inject_restart) },
 106         { "inject_service_signal", VM_STAT(inject_service_signal) },
 107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110         { "inject_virtio", VM_STAT(inject_virtio) },
 111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112         { "instruction_gs", VCPU_STAT(instruction_gs) },
 113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119         { "instruction_sck", VCPU_STAT(instruction_sck) },
 120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121         { "instruction_spx", VCPU_STAT(instruction_spx) },
 122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123         { "instruction_stap", VCPU_STAT(instruction_stap) },
 124         { "instruction_iske", VCPU_STAT(instruction_iske) },
 125         { "instruction_ri", VCPU_STAT(instruction_ri) },
 126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127         { "instruction_sske", VCPU_STAT(instruction_sske) },
 128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129         { "instruction_essa", VCPU_STAT(instruction_essa) },
 130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132         { "instruction_tb", VCPU_STAT(instruction_tb) },
 133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137         { "instruction_sie", VCPU_STAT(instruction_sie) },
 138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161         { NULL }
 162 };
 163
 164 struct kvm_s390_tod_clock_ext {
 165         __u8 epoch_idx;
 166         __u64 tod;
 167         __u8 reserved[7];
 168 } __packed;
 169
 170 /* allow nested virtualization in KVM (if enabled by user space) */
 171 static int nested;
 172 module_param(nested, int, S_IRUGO);
 173 MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175 /* allow 1m huge page guest backing, if !nested */
 176 static int hpage;
 177 module_param(hpage, int, 0444);
 178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 179
 180 /*
 181  * For now we handle at most 16 double words as this is what the s390 base
 182  * kernel handles and stores in the prefix page. If we ever need to go beyond
 183  * this, this requires changes to code, but the external uapi can stay.
 184  */
 185 #define SIZE_INTERNAL 16
 186
 187 /*
 188  * Base feature mask that defines default mask for facilities. Consists of the
 189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 190  */
 191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 192 /*
 193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 194  * and defines the facilities that can be enabled via a cpu model.
 195  */
 196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 197
 198 static unsigned long kvm_s390_fac_size(void)
 199 {
 200         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 202         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 203                 sizeof(S390_lowcore.stfle_fac_list));
 204
 205         return SIZE_INTERNAL;
 206 }
 207
 208 /* available cpu features supported by kvm */
 209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 210 /* available subfunctions indicated via query / "test bit" */
 211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 212
 213 static struct gmap_notifier gmap_notifier;
 214 static struct gmap_notifier vsie_gmap_notifier;
 215 debug_info_t *kvm_s390_dbf;
 216
 217 /* Section: not file related */
 218 int kvm_arch_hardware_enable(void)
 219 {
 220         /* every s390 is virtualization enabled ;-) */
 221         return 0;
 222 }
 223
 224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 225                               unsigned long end);
 226
 227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 228 {
 229         u8 delta_idx = 0;
 230
 231         /*
 232          * The TOD jumps by delta, we have to compensate this by adding
 233          * -delta to the epoch.
 234          */
 235         delta = -delta;
 236
 237         /* sign-extension - we're adding to signed values below */
 238         if ((s64)delta < 0)
 239                 delta_idx = -1;
 240
 241         scb->epoch += delta;
 242         if (scb->ecd & ECD_MEF) {
 243                 scb->epdx += delta_idx;
 244                 if (scb->epoch < delta)
 245                         scb->epdx += 1;
 246         }
 247 }
 248
 249 /*
 250  * This callback is executed during stop_machine(). All CPUs are therefore
 251  * temporarily stopped. In order not to change guest behavior, we have to
 252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 253  * so a CPU won't be stopped while calculating with the epoch.
 254  */
 255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 256                           void *v)
 257 {
 258         struct kvm *kvm;
 259         struct kvm_vcpu *vcpu;
 260         int i;
 261         unsigned long long *delta = v;
 262
 263         list_for_each_entry(kvm, &vm_list, vm_list) {
 264                 kvm_for_each_vcpu(i, vcpu, kvm) {
 265                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 266                         if (i == 0) {
 267                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 268                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 269                         }
 270                         if (vcpu->arch.cputm_enabled)
 271                                 vcpu->arch.cputm_start += *delta;
 272                         if (vcpu->arch.vsie_block)
 273                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 274                                                    *delta);
 275                 }
 276         }
 277         return NOTIFY_OK;
 278 }
 279
 280 static struct notifier_block kvm_clock_notifier = {
 281         .notifier_call = kvm_clock_sync,
 282 };
 283
 284 int kvm_arch_hardware_setup(void)
 285 {
 286         gmap_notifier.notifier_call = kvm_gmap_notifier;
 287         gmap_register_pte_notifier(&gmap_notifier);
 288         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 289         gmap_register_pte_notifier(&vsie_gmap_notifier);
 290         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 291                                        &kvm_clock_notifier);
 292         return 0;
 293 }
 294
 295 void kvm_arch_hardware_unsetup(void)
 296 {
 297         gmap_unregister_pte_notifier(&gmap_notifier);
 298         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 299         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 300                                          &kvm_clock_notifier);
 301 }
 302
 303 static void allow_cpu_feat(unsigned long nr)
 304 {
 305         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 306 }
 307
 308 static inline int plo_test_bit(unsigned char nr)
 309 {
 310         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 311         int cc;
 312
 313         asm volatile(
 314                 /* Parameter registers are ignored for "test bit" */
 315                 "       plo     0,0,0,0(0)\n"
 316                 "       ipm     %0\n"
 317                 "       srl     %0,28\n"
 318                 : "=d" (cc)
 319                 : "d" (r0)
 320                 : "cc");
 321         return cc == 0;
 322 }
 323
 324 static void kvm_s390_cpu_feat_init(void)
 325 {
 326         int i;
 327
 328         for (i = 0; i < 256; ++i) {
 329                 if (plo_test_bit(i))
 330                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 331         }
 332
 333         if (test_facility(28)) /* TOD-clock steering */
 334                 ptff(kvm_s390_available_subfunc.ptff,
 335                      sizeof(kvm_s390_available_subfunc.ptff),
 336                      PTFF_QAF);
 337
 338         if (test_facility(17)) { /* MSA */
 339                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 340                               kvm_s390_available_subfunc.kmac);
 341                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 342                               kvm_s390_available_subfunc.kmc);
 343                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 344                               kvm_s390_available_subfunc.km);
 345                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 346                               kvm_s390_available_subfunc.kimd);
 347                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 348                               kvm_s390_available_subfunc.klmd);
 349         }
 350         if (test_facility(76)) /* MSA3 */
 351                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 352                               kvm_s390_available_subfunc.pckmo);
 353         if (test_facility(77)) { /* MSA4 */
 354                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 355                               kvm_s390_available_subfunc.kmctr);
 356                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 357                               kvm_s390_available_subfunc.kmf);
 358                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 359                               kvm_s390_available_subfunc.kmo);
 360                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 361                               kvm_s390_available_subfunc.pcc);
 362         }
 363         if (test_facility(57)) /* MSA5 */
 364                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 365                               kvm_s390_available_subfunc.ppno);
 366
 367         if (test_facility(146)) /* MSA8 */
 368                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 369                               kvm_s390_available_subfunc.kma);
 370
 371         if (MACHINE_HAS_ESOP)
 372                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 373         /*
 374          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 375          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 376          */
 377         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 378             !test_facility(3) || !nested)
 379                 return;
 380         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 381         if (sclp.has_64bscao)
 382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 383         if (sclp.has_siif)
 384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 385         if (sclp.has_gpere)
 386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 387         if (sclp.has_gsls)
 388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 389         if (sclp.has_ib)
 390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 391         if (sclp.has_cei)
 392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 393         if (sclp.has_ibs)
 394                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 395         if (sclp.has_kss)
 396                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 397         /*
 398          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 399          * all skey handling functions read/set the skey from the PGSTE
 400          * instead of the real storage key.
 401          *
 402          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 403          * pages being detected as preserved although they are resident.
 404          *
 405          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 406          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 407          *
 408          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 409          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 410          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 411          *
 412          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 413          * cannot easily shadow the SCA because of the ipte lock.
 414          */
 415 }
 416
 417 int kvm_arch_init(void *opaque)
 418 {
 419         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 420         if (!kvm_s390_dbf)
 421                 return -ENOMEM;
 422
 423         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 424                 debug_unregister(kvm_s390_dbf);
 425                 return -ENOMEM;
 426         }
 427
 428         kvm_s390_cpu_feat_init();
 429
 430         /* Register floating interrupt controller interface. */
 431         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 432 }
 433
 434 void kvm_arch_exit(void)
 435 {
 436         debug_unregister(kvm_s390_dbf);
 437 }
 438
 439 /* Section: device related */
 440 long kvm_arch_dev_ioctl(struct file *filp,
 441                         unsigned int ioctl, unsigned long arg)
 442 {
 443         if (ioctl == KVM_S390_ENABLE_SIE)
 444                 return s390_enable_sie();
 445         return -EINVAL;
 446 }
 447
 448 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 449 {
 450         int r;
 451
 452         switch (ext) {
 453         case KVM_CAP_S390_PSW:
 454         case KVM_CAP_S390_GMAP:
 455         case KVM_CAP_SYNC_MMU:
 456 #ifdef CONFIG_KVM_S390_UCONTROL
 457         case KVM_CAP_S390_UCONTROL:
 458 #endif
 459         case KVM_CAP_ASYNC_PF:
 460         case KVM_CAP_SYNC_REGS:
 461         case KVM_CAP_ONE_REG:
 462         case KVM_CAP_ENABLE_CAP:
 463         case KVM_CAP_S390_CSS_SUPPORT:
 464         case KVM_CAP_IOEVENTFD:
 465         case KVM_CAP_DEVICE_CTRL:
 466         case KVM_CAP_ENABLE_CAP_VM:
 467         case KVM_CAP_S390_IRQCHIP:
 468         case KVM_CAP_VM_ATTRIBUTES:
 469         case KVM_CAP_MP_STATE:
 470         case KVM_CAP_IMMEDIATE_EXIT:
 471         case KVM_CAP_S390_INJECT_IRQ:
 472         case KVM_CAP_S390_USER_SIGP:
 473         case KVM_CAP_S390_USER_STSI:
 474         case KVM_CAP_S390_SKEYS:
 475         case KVM_CAP_S390_IRQ_STATE:
 476         case KVM_CAP_S390_USER_INSTR0:
 477         case KVM_CAP_S390_CMMA_MIGRATION:
 478         case KVM_CAP_S390_AIS:
 479         case KVM_CAP_S390_AIS_MIGRATION:
 480                 r = 1;
 481                 break;
 482         case KVM_CAP_S390_HPAGE_1M:
 483                 r = 0;
 484                 if (hpage)
 485                         r = 1;
 486                 break;
 487         case KVM_CAP_S390_MEM_OP:
 488                 r = MEM_OP_MAX_SIZE;
 489                 break;
 490         case KVM_CAP_NR_VCPUS:
 491         case KVM_CAP_MAX_VCPUS:
 492                 r = KVM_S390_BSCA_CPU_SLOTS;
 493                 if (!kvm_s390_use_sca_entries())
 494                         r = KVM_MAX_VCPUS;
 495                 else if (sclp.has_esca && sclp.has_64bscao)
 496                         r = KVM_S390_ESCA_CPU_SLOTS;
 497                 break;
 498         case KVM_CAP_NR_MEMSLOTS:
 499                 r = KVM_USER_MEM_SLOTS;
 500                 break;
 501         case KVM_CAP_S390_COW:
 502                 r = MACHINE_HAS_ESOP;
 503                 break;
 504         case KVM_CAP_S390_VECTOR_REGISTERS:
 505                 r = MACHINE_HAS_VX;
 506                 break;
 507         case KVM_CAP_S390_RI:
 508                 r = test_facility(64);
 509                 break;
 510         case KVM_CAP_S390_GS:
 511                 r = test_facility(133);
 512                 break;
 513         case KVM_CAP_S390_BPB:
 514                 r = test_facility(82);
 515                 break;
 516         default:
 517                 r = 0;
 518         }
 519         return r;
 520 }
 521
 522 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 523                                     struct kvm_memory_slot *memslot)
 524 {
 525         int i;
 526         gfn_t cur_gfn, last_gfn;
 527         unsigned long gaddr, vmaddr;
 528         struct gmap *gmap = kvm->arch.gmap;
 529         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 530
 531         /* Loop over all guest segments */
 532         cur_gfn = memslot->base_gfn;
 533         last_gfn = memslot->base_gfn + memslot->npages;
 534         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 535                 gaddr = gfn_to_gpa(cur_gfn);
 536                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 537                 if (kvm_is_error_hva(vmaddr))
 538                         continue;
 539
 540                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 541                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 542                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 543                         if (test_bit(i, bitmap))
 544                                 mark_page_dirty(kvm, cur_gfn + i);
 545                 }
 546
 547                 if (fatal_signal_pending(current))
 548                         return;
 549                 cond_resched();
 550         }
 551 }
 552
 553 /* Section: vm related */
 554 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 555
 556 /*
 557  * Get (and clear) the dirty memory log for a memory slot.
 558  */
 559 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 560                                struct kvm_dirty_log *log)
 561 {
 562         int r;
 563         unsigned long n;
 564         struct kvm_memslots *slots;
 565         struct kvm_memory_slot *memslot;
 566         int is_dirty = 0;
 567
 568         if (kvm_is_ucontrol(kvm))
 569                 return -EINVAL;
 570
 571         mutex_lock(&kvm->slots_lock);
 572
 573         r = -EINVAL;
 574         if (log->slot >= KVM_USER_MEM_SLOTS)
 575                 goto out;
 576
 577         slots = kvm_memslots(kvm);
 578         memslot = id_to_memslot(slots, log->slot);
 579         r = -ENOENT;
 580         if (!memslot->dirty_bitmap)
 581                 goto out;
 582
 583         kvm_s390_sync_dirty_log(kvm, memslot);
 584         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 585         if (r)
 586                 goto out;
 587
 588         /* Clear the dirty log */
 589         if (is_dirty) {
 590                 n = kvm_dirty_bitmap_bytes(memslot);
 591                 memset(memslot->dirty_bitmap, 0, n);
 592         }
 593         r = 0;
 594 out:
 595         mutex_unlock(&kvm->slots_lock);
 596         return r;
 597 }
 598
 599 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 600 {
 601         unsigned int i;
 602         struct kvm_vcpu *vcpu;
 603
 604         kvm_for_each_vcpu(i, vcpu, kvm) {
 605                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 606         }
 607 }
 608
 609 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 610 {
 611         int r;
 612
 613         if (cap->flags)
 614                 return -EINVAL;
 615
 616         switch (cap->cap) {
 617         case KVM_CAP_S390_IRQCHIP:
 618                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 619                 kvm->arch.use_irqchip = 1;
 620                 r = 0;
 621                 break;
 622         case KVM_CAP_S390_USER_SIGP:
 623                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 624                 kvm->arch.user_sigp = 1;
 625                 r = 0;
 626                 break;
 627         case KVM_CAP_S390_VECTOR_REGISTERS:
 628                 mutex_lock(&kvm->lock);
 629                 if (kvm->created_vcpus) {
 630                         r = -EBUSY;
 631                 } else if (MACHINE_HAS_VX) {
 632                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 633                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 634                         if (test_facility(134)) {
 635                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 636                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 637                         }
 638                         if (test_facility(135)) {
 639                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 640                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 641                         }
 642                         r = 0;
 643                 } else
 644                         r = -EINVAL;
 645                 mutex_unlock(&kvm->lock);
 646                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 647                          r ? "(not available)" : "(success)");
 648                 break;
 649         case KVM_CAP_S390_RI:
 650                 r = -EINVAL;
 651                 mutex_lock(&kvm->lock);
 652                 if (kvm->created_vcpus) {
 653                         r = -EBUSY;
 654                 } else if (test_facility(64)) {
 655                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 656                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 657                         r = 0;
 658                 }
 659                 mutex_unlock(&kvm->lock);
 660                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 661                          r ? "(not available)" : "(success)");
 662                 break;
 663         case KVM_CAP_S390_AIS:
 664                 mutex_lock(&kvm->lock);
 665                 if (kvm->created_vcpus) {
 666                         r = -EBUSY;
 667                 } else {
 668                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 669                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 670                         r = 0;
 671                 }
 672                 mutex_unlock(&kvm->lock);
 673                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 674                          r ? "(not available)" : "(success)");
 675                 break;
 676         case KVM_CAP_S390_GS:
 677                 r = -EINVAL;
 678                 mutex_lock(&kvm->lock);
 679                 if (kvm->created_vcpus) {
 680                         r = -EBUSY;
 681                 } else if (test_facility(133)) {
 682                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 683                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 684                         r = 0;
 685                 }
 686                 mutex_unlock(&kvm->lock);
 687                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 688                          r ? "(not available)" : "(success)");
 689                 break;
 690         case KVM_CAP_S390_HPAGE_1M:
 691                 mutex_lock(&kvm->lock);
 692                 if (kvm->created_vcpus)
 693                         r = -EBUSY;
 694                 else if (!hpage || kvm->arch.use_cmma)
 695                         r = -EINVAL;
 696                 else {
 697                         r = 0;
 698                         down_write(&kvm->mm->mmap_sem);
 699                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 700                         up_write(&kvm->mm->mmap_sem);
 701                         /*
 702                          * We might have to create fake 4k page
 703                          * tables. To avoid that the hardware works on
 704                          * stale PGSTEs, we emulate these instructions.
 705                          */
 706                         kvm->arch.use_skf = 0;
 707                         kvm->arch.use_pfmfi = 0;
 708                 }
 709                 mutex_unlock(&kvm->lock);
 710                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 711                          r ? "(not available)" : "(success)");
 712                 break;
 713         case KVM_CAP_S390_USER_STSI:
 714                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 715                 kvm->arch.user_stsi = 1;
 716                 r = 0;
 717                 break;
 718         case KVM_CAP_S390_USER_INSTR0:
 719                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 720                 kvm->arch.user_instr0 = 1;
 721                 icpt_operexc_on_all_vcpus(kvm);
 722                 r = 0;
 723                 break;
 724         default:
 725                 r = -EINVAL;
 726                 break;
 727         }
 728         return r;
 729 }
 730
 731 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 732 {
 733         int ret;
 734
 735         switch (attr->attr) {
 736         case KVM_S390_VM_MEM_LIMIT_SIZE:
 737                 ret = 0;
 738                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 739                          kvm->arch.mem_limit);
 740                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 741                         ret = -EFAULT;
 742                 break;
 743         default:
 744                 ret = -ENXIO;
 745                 break;
 746         }
 747         return ret;
 748 }
 749
 750 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 751 {
 752         int ret;
 753         unsigned int idx;
 754         switch (attr->attr) {
 755         case KVM_S390_VM_MEM_ENABLE_CMMA:
 756                 ret = -ENXIO;
 757                 if (!sclp.has_cmma)
 758                         break;
 759
 760                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 761                 mutex_lock(&kvm->lock);
 762                 if (kvm->created_vcpus)
 763                         ret = -EBUSY;
 764                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 765                         ret = -EINVAL;
 766                 else {
 767                         kvm->arch.use_cmma = 1;
 768                         /* Not compatible with cmma. */
 769                         kvm->arch.use_pfmfi = 0;
 770                         ret = 0;
 771                 }
 772                 mutex_unlock(&kvm->lock);
 773                 break;
 774         case KVM_S390_VM_MEM_CLR_CMMA:
 775                 ret = -ENXIO;
 776                 if (!sclp.has_cmma)
 777                         break;
 778                 ret = -EINVAL;
 779                 if (!kvm->arch.use_cmma)
 780                         break;
 781
 782                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 783                 mutex_lock(&kvm->lock);
 784                 idx = srcu_read_lock(&kvm->srcu);
 785                 s390_reset_cmma(kvm->arch.gmap->mm);
 786                 srcu_read_unlock(&kvm->srcu, idx);
 787                 mutex_unlock(&kvm->lock);
 788                 ret = 0;
 789                 break;
 790         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 791                 unsigned long new_limit;
 792
 793                 if (kvm_is_ucontrol(kvm))
 794                         return -EINVAL;
 795
 796                 if (get_user(new_limit, (u64 __user *)attr->addr))
 797                         return -EFAULT;
 798
 799                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 800                     new_limit > kvm->arch.mem_limit)
 801                         return -E2BIG;
 802
 803                 if (!new_limit)
 804                         return -EINVAL;
 805
 806                 /* gmap_create takes last usable address */
 807                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 808                         new_limit -= 1;
 809
 810                 ret = -EBUSY;
 811                 mutex_lock(&kvm->lock);
 812                 if (!kvm->created_vcpus) {
 813                         /* gmap_create will round the limit up */
 814                         struct gmap *new = gmap_create(current->mm, new_limit);
 815
 816                         if (!new) {
 817                                 ret = -ENOMEM;
 818                         } else {
 819                                 gmap_remove(kvm->arch.gmap);
 820                                 new->private = kvm;
 821                                 kvm->arch.gmap = new;
 822                                 ret = 0;
 823                         }
 824                 }
 825                 mutex_unlock(&kvm->lock);
 826                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 827                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 828                          (void *) kvm->arch.gmap->asce);
 829                 break;
 830         }
 831         default:
 832                 ret = -ENXIO;
 833                 break;
 834         }
 835         return ret;
 836 }
 837
 838 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 839
 840 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 841 {
 842         struct kvm_vcpu *vcpu;
 843         int i;
 844
 845         kvm_s390_vcpu_block_all(kvm);
 846
 847         kvm_for_each_vcpu(i, vcpu, kvm)
 848                 kvm_s390_vcpu_crypto_setup(vcpu);
 849
 850         kvm_s390_vcpu_unblock_all(kvm);
 851 }
 852
 853 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 854 {
 855         if (!test_kvm_facility(kvm, 76))
 856                 return -EINVAL;
 857
 858         mutex_lock(&kvm->lock);
 859         switch (attr->attr) {
 860         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 861                 get_random_bytes(
 862                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 863                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 864                 kvm->arch.crypto.aes_kw = 1;
 865                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 866                 break;
 867         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 868                 get_random_bytes(
 869                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 870                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 871                 kvm->arch.crypto.dea_kw = 1;
 872                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 873                 break;
 874         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 875                 kvm->arch.crypto.aes_kw = 0;
 876                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 877                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 878                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 879                 break;
 880         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 881                 kvm->arch.crypto.dea_kw = 0;
 882                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 883                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 884                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 885                 break;
 886         default:
 887                 mutex_unlock(&kvm->lock);
 888                 return -ENXIO;
 889         }
 890
 891         kvm_s390_vcpu_crypto_reset_all(kvm);
 892         mutex_unlock(&kvm->lock);
 893         return 0;
 894 }
 895
 896 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 897 {
 898         int cx;
 899         struct kvm_vcpu *vcpu;
 900
 901         kvm_for_each_vcpu(cx, vcpu, kvm)
 902                 kvm_s390_sync_request(req, vcpu);
 903 }
 904
 905 /*
 906  * Must be called with kvm->srcu held to avoid races on memslots, and with
 907  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 908  */
 909 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 910 {
 911         struct kvm_memory_slot *ms;
 912         struct kvm_memslots *slots;
 913         unsigned long ram_pages = 0;
 914         int slotnr;
 915
 916         /* migration mode already enabled */
 917         if (kvm->arch.migration_mode)
 918                 return 0;
 919         slots = kvm_memslots(kvm);
 920         if (!slots || !slots->used_slots)
 921                 return -EINVAL;
 922
 923         if (!kvm->arch.use_cmma) {
 924                 kvm->arch.migration_mode = 1;
 925                 return 0;
 926         }
 927         /* mark all the pages in active slots as dirty */
 928         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 929                 ms = slots->memslots + slotnr;
 930                 /*
 931                  * The second half of the bitmap is only used on x86,
 932                  * and would be wasted otherwise, so we put it to good
 933                  * use here to keep track of the state of the storage
 934                  * attributes.
 935                  */
 936                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
 937                 ram_pages += ms->npages;
 938         }
 939         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
 940         kvm->arch.migration_mode = 1;
 941         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 942         return 0;
 943 }
 944
 945 /*
 946  * Must be called with kvm->slots_lock to avoid races with ourselves and
 947  * kvm_s390_vm_start_migration.
 948  */
 949 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 950 {
 951         /* migration mode already disabled */
 952         if (!kvm->arch.migration_mode)
 953                 return 0;
 954         kvm->arch.migration_mode = 0;
 955         if (kvm->arch.use_cmma)
 956                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 957         return 0;
 958 }
 959
 960 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 961                                      struct kvm_device_attr *attr)
 962 {
 963         int res = -ENXIO;
 964
 965         mutex_lock(&kvm->slots_lock);
 966         switch (attr->attr) {
 967         case KVM_S390_VM_MIGRATION_START:
 968                 res = kvm_s390_vm_start_migration(kvm);
 969                 break;
 970         case KVM_S390_VM_MIGRATION_STOP:
 971                 res = kvm_s390_vm_stop_migration(kvm);
 972                 break;
 973         default:
 974                 break;
 975         }
 976         mutex_unlock(&kvm->slots_lock);
 977
 978         return res;
 979 }
 980
 981 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 982                                      struct kvm_device_attr *attr)
 983 {
 984         u64 mig = kvm->arch.migration_mode;
 985
 986         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 987                 return -ENXIO;
 988
 989         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 990                 return -EFAULT;
 991         return 0;
 992 }
 993
 994 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 995 {
 996         struct kvm_s390_vm_tod_clock gtod;
 997
 998         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 999                 return -EFAULT;
1000
1001         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1002                 return -EINVAL;
1003         kvm_s390_set_tod_clock(kvm, &gtod);
1004
1005         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1006                 gtod.epoch_idx, gtod.tod);
1007
1008         return 0;
1009 }
1010
1011 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1012 {
1013         u8 gtod_high;
1014
1015         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1016                                            sizeof(gtod_high)))
1017                 return -EFAULT;
1018
1019         if (gtod_high != 0)
1020                 return -EINVAL;
1021         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1022
1023         return 0;
1024 }
1025
1026 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1027 {
1028         struct kvm_s390_vm_tod_clock gtod = { 0 };
1029
1030         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1031                            sizeof(gtod.tod)))
1032                 return -EFAULT;
1033
1034         kvm_s390_set_tod_clock(kvm, &gtod);
1035         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1036         return 0;
1037 }
1038
1039 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041         int ret;
1042
1043         if (attr->flags)
1044                 return -EINVAL;
1045
1046         switch (attr->attr) {
1047         case KVM_S390_VM_TOD_EXT:
1048                 ret = kvm_s390_set_tod_ext(kvm, attr);
1049                 break;
1050         case KVM_S390_VM_TOD_HIGH:
1051                 ret = kvm_s390_set_tod_high(kvm, attr);
1052                 break;
1053         case KVM_S390_VM_TOD_LOW:
1054                 ret = kvm_s390_set_tod_low(kvm, attr);
1055                 break;
1056         default:
1057                 ret = -ENXIO;
1058                 break;
1059         }
1060         return ret;
1061 }
1062
1063 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1064                                    struct kvm_s390_vm_tod_clock *gtod)
1065 {
1066         struct kvm_s390_tod_clock_ext htod;
1067
1068         preempt_disable();
1069
1070         get_tod_clock_ext((char *)&htod);
1071
1072         gtod->tod = htod.tod + kvm->arch.epoch;
1073         gtod->epoch_idx = 0;
1074         if (test_kvm_facility(kvm, 139)) {
1075                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1076                 if (gtod->tod < htod.tod)
1077                         gtod->epoch_idx += 1;
1078         }
1079
1080         preempt_enable();
1081 }
1082
1083 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1084 {
1085         struct kvm_s390_vm_tod_clock gtod;
1086
1087         memset(&gtod, 0, sizeof(gtod));
1088         kvm_s390_get_tod_clock(kvm, &gtod);
1089         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1090                 return -EFAULT;
1091
1092         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1093                 gtod.epoch_idx, gtod.tod);
1094         return 0;
1095 }
1096
1097 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099         u8 gtod_high = 0;
1100
1101         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1102                                          sizeof(gtod_high)))
1103                 return -EFAULT;
1104         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1105
1106         return 0;
1107 }
1108
1109 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1110 {
1111         u64 gtod;
1112
1113         gtod = kvm_s390_get_tod_clock_fast(kvm);
1114         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1115                 return -EFAULT;
1116         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1117
1118         return 0;
1119 }
1120
1121 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         int ret;
1124
1125         if (attr->flags)
1126                 return -EINVAL;
1127
1128         switch (attr->attr) {
1129         case KVM_S390_VM_TOD_EXT:
1130                 ret = kvm_s390_get_tod_ext(kvm, attr);
1131                 break;
1132         case KVM_S390_VM_TOD_HIGH:
1133                 ret = kvm_s390_get_tod_high(kvm, attr);
1134                 break;
1135         case KVM_S390_VM_TOD_LOW:
1136                 ret = kvm_s390_get_tod_low(kvm, attr);
1137                 break;
1138         default:
1139                 ret = -ENXIO;
1140                 break;
1141         }
1142         return ret;
1143 }
1144
1145 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147         struct kvm_s390_vm_cpu_processor *proc;
1148         u16 lowest_ibc, unblocked_ibc;
1149         int ret = 0;
1150
1151         mutex_lock(&kvm->lock);
1152         if (kvm->created_vcpus) {
1153                 ret = -EBUSY;
1154                 goto out;
1155         }
1156         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1157         if (!proc) {
1158                 ret = -ENOMEM;
1159                 goto out;
1160         }
1161         if (!copy_from_user(proc, (void __user *)attr->addr,
1162                             sizeof(*proc))) {
1163                 kvm->arch.model.cpuid = proc->cpuid;
1164                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1165                 unblocked_ibc = sclp.ibc & 0xfff;
1166                 if (lowest_ibc && proc->ibc) {
1167                         if (proc->ibc > unblocked_ibc)
1168                                 kvm->arch.model.ibc = unblocked_ibc;
1169                         else if (proc->ibc < lowest_ibc)
1170                                 kvm->arch.model.ibc = lowest_ibc;
1171                         else
1172                                 kvm->arch.model.ibc = proc->ibc;
1173                 }
1174                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1175                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1176                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1177                          kvm->arch.model.ibc,
1178                          kvm->arch.model.cpuid);
1179                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1180                          kvm->arch.model.fac_list[0],
1181                          kvm->arch.model.fac_list[1],
1182                          kvm->arch.model.fac_list[2]);
1183         } else
1184                 ret = -EFAULT;
1185         kfree(proc);
1186 out:
1187         mutex_unlock(&kvm->lock);
1188         return ret;
1189 }
1190
1191 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1192                                        struct kvm_device_attr *attr)
1193 {
1194         struct kvm_s390_vm_cpu_feat data;
1195
1196         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1197                 return -EFAULT;
1198         if (!bitmap_subset((unsigned long *) data.feat,
1199                            kvm_s390_available_cpu_feat,
1200                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1201                 return -EINVAL;
1202
1203         mutex_lock(&kvm->lock);
1204         if (kvm->created_vcpus) {
1205                 mutex_unlock(&kvm->lock);
1206                 return -EBUSY;
1207         }
1208         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1209                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1210         mutex_unlock(&kvm->lock);
1211         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1212                          data.feat[0],
1213                          data.feat[1],
1214                          data.feat[2]);
1215         return 0;
1216 }
1217
1218 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1219                                           struct kvm_device_attr *attr)
1220 {
1221         /*
1222          * Once supported by kernel + hw, we have to store the subfunctions
1223          * in kvm->arch and remember that user space configured them.
1224          */
1225         return -ENXIO;
1226 }
1227
1228 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         int ret = -ENXIO;
1231
1232         switch (attr->attr) {
1233         case KVM_S390_VM_CPU_PROCESSOR:
1234                 ret = kvm_s390_set_processor(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1237                 ret = kvm_s390_set_processor_feat(kvm, attr);
1238                 break;
1239         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1240                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1241                 break;
1242         }
1243         return ret;
1244 }
1245
1246 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248         struct kvm_s390_vm_cpu_processor *proc;
1249         int ret = 0;
1250
1251         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1252         if (!proc) {
1253                 ret = -ENOMEM;
1254                 goto out;
1255         }
1256         proc->cpuid = kvm->arch.model.cpuid;
1257         proc->ibc = kvm->arch.model.ibc;
1258         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1259                S390_ARCH_FAC_LIST_SIZE_BYTE);
1260         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1261                  kvm->arch.model.ibc,
1262                  kvm->arch.model.cpuid);
1263         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1264                  kvm->arch.model.fac_list[0],
1265                  kvm->arch.model.fac_list[1],
1266                  kvm->arch.model.fac_list[2]);
1267         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1268                 ret = -EFAULT;
1269         kfree(proc);
1270 out:
1271         return ret;
1272 }
1273
1274 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1275 {
1276         struct kvm_s390_vm_cpu_machine *mach;
1277         int ret = 0;
1278
1279         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1280         if (!mach) {
1281                 ret = -ENOMEM;
1282                 goto out;
1283         }
1284         get_cpu_id((struct cpuid *) &mach->cpuid);
1285         mach->ibc = sclp.ibc;
1286         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1287                S390_ARCH_FAC_LIST_SIZE_BYTE);
1288         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1289                sizeof(S390_lowcore.stfle_fac_list));
1290         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1291                  kvm->arch.model.ibc,
1292                  kvm->arch.model.cpuid);
1293         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1294                  mach->fac_mask[0],
1295                  mach->fac_mask[1],
1296                  mach->fac_mask[2]);
1297         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1298                  mach->fac_list[0],
1299                  mach->fac_list[1],
1300                  mach->fac_list[2]);
1301         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1302                 ret = -EFAULT;
1303         kfree(mach);
1304 out:
1305         return ret;
1306 }
1307
1308 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1309                                        struct kvm_device_attr *attr)
1310 {
1311         struct kvm_s390_vm_cpu_feat data;
1312
1313         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1314                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1315         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1316                 return -EFAULT;
1317         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1318                          data.feat[0],
1319                          data.feat[1],
1320                          data.feat[2]);
1321         return 0;
1322 }
1323
1324 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1325                                      struct kvm_device_attr *attr)
1326 {
1327         struct kvm_s390_vm_cpu_feat data;
1328
1329         bitmap_copy((unsigned long *) data.feat,
1330                     kvm_s390_available_cpu_feat,
1331                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1332         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1333                 return -EFAULT;
1334         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1335                          data.feat[0],
1336                          data.feat[1],
1337                          data.feat[2]);
1338         return 0;
1339 }
1340
1341 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1342                                           struct kvm_device_attr *attr)
1343 {
1344         /*
1345          * Once we can actually configure subfunctions (kernel + hw support),
1346          * we have to check if they were already set by user space, if so copy
1347          * them from kvm->arch.
1348          */
1349         return -ENXIO;
1350 }
1351
1352 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1353                                         struct kvm_device_attr *attr)
1354 {
1355         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1356             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1357                 return -EFAULT;
1358         return 0;
1359 }
1360 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1361 {
1362         int ret = -ENXIO;
1363
1364         switch (attr->attr) {
1365         case KVM_S390_VM_CPU_PROCESSOR:
1366                 ret = kvm_s390_get_processor(kvm, attr);
1367                 break;
1368         case KVM_S390_VM_CPU_MACHINE:
1369                 ret = kvm_s390_get_machine(kvm, attr);
1370                 break;
1371         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1372                 ret = kvm_s390_get_processor_feat(kvm, attr);
1373                 break;
1374         case KVM_S390_VM_CPU_MACHINE_FEAT:
1375                 ret = kvm_s390_get_machine_feat(kvm, attr);
1376                 break;
1377         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1378                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1379                 break;
1380         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1381                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1382                 break;
1383         }
1384         return ret;
1385 }
1386
1387 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1388 {
1389         int ret;
1390
1391         switch (attr->group) {
1392         case KVM_S390_VM_MEM_CTRL:
1393                 ret = kvm_s390_set_mem_control(kvm, attr);
1394                 break;
1395         case KVM_S390_VM_TOD:
1396                 ret = kvm_s390_set_tod(kvm, attr);
1397                 break;
1398         case KVM_S390_VM_CPU_MODEL:
1399                 ret = kvm_s390_set_cpu_model(kvm, attr);
1400                 break;
1401         case KVM_S390_VM_CRYPTO:
1402                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1403                 break;
1404         case KVM_S390_VM_MIGRATION:
1405                 ret = kvm_s390_vm_set_migration(kvm, attr);
1406                 break;
1407         default:
1408                 ret = -ENXIO;
1409                 break;
1410         }
1411
1412         return ret;
1413 }
1414
1415 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1416 {
1417         int ret;
1418
1419         switch (attr->group) {
1420         case KVM_S390_VM_MEM_CTRL:
1421                 ret = kvm_s390_get_mem_control(kvm, attr);
1422                 break;
1423         case KVM_S390_VM_TOD:
1424                 ret = kvm_s390_get_tod(kvm, attr);
1425                 break;
1426         case KVM_S390_VM_CPU_MODEL:
1427                 ret = kvm_s390_get_cpu_model(kvm, attr);
1428                 break;
1429         case KVM_S390_VM_MIGRATION:
1430                 ret = kvm_s390_vm_get_migration(kvm, attr);
1431                 break;
1432         default:
1433                 ret = -ENXIO;
1434                 break;
1435         }
1436
1437         return ret;
1438 }
1439
1440 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442         int ret;
1443
1444         switch (attr->group) {
1445         case KVM_S390_VM_MEM_CTRL:
1446                 switch (attr->attr) {
1447                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1448                 case KVM_S390_VM_MEM_CLR_CMMA:
1449                         ret = sclp.has_cmma ? 0 : -ENXIO;
1450                         break;
1451                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1452                         ret = 0;
1453                         break;
1454                 default:
1455                         ret = -ENXIO;
1456                         break;
1457                 }
1458                 break;
1459         case KVM_S390_VM_TOD:
1460                 switch (attr->attr) {
1461                 case KVM_S390_VM_TOD_LOW:
1462                 case KVM_S390_VM_TOD_HIGH:
1463                         ret = 0;
1464                         break;
1465                 default:
1466                         ret = -ENXIO;
1467                         break;
1468                 }
1469                 break;
1470         case KVM_S390_VM_CPU_MODEL:
1471                 switch (attr->attr) {
1472                 case KVM_S390_VM_CPU_PROCESSOR:
1473                 case KVM_S390_VM_CPU_MACHINE:
1474                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1475                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1476                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1477                         ret = 0;
1478                         break;
1479                 /* configuring subfunctions is not supported yet */
1480                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1481                 default:
1482                         ret = -ENXIO;
1483                         break;
1484                 }
1485                 break;
1486         case KVM_S390_VM_CRYPTO:
1487                 switch (attr->attr) {
1488                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1489                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1490                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1491                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1492                         ret = 0;
1493                         break;
1494                 default:
1495                         ret = -ENXIO;
1496                         break;
1497                 }
1498                 break;
1499         case KVM_S390_VM_MIGRATION:
1500                 ret = 0;
1501                 break;
1502         default:
1503                 ret = -ENXIO;
1504                 break;
1505         }
1506
1507         return ret;
1508 }
1509
1510 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1511 {
1512         uint8_t *keys;
1513         uint64_t hva;
1514         int srcu_idx, i, r = 0;
1515
1516         if (args->flags != 0)
1517                 return -EINVAL;
1518
1519         /* Is this guest using storage keys? */
1520         if (!mm_uses_skeys(current->mm))
1521                 return KVM_S390_GET_SKEYS_NONE;
1522
1523         /* Enforce sane limit on memory allocation */
1524         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1525                 return -EINVAL;
1526
1527         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1528         if (!keys)
1529                 return -ENOMEM;
1530
1531         down_read(&current->mm->mmap_sem);
1532         srcu_idx = srcu_read_lock(&kvm->srcu);
1533         for (i = 0; i < args->count; i++) {
1534                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1535                 if (kvm_is_error_hva(hva)) {
1536                         r = -EFAULT;
1537                         break;
1538                 }
1539
1540                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1541                 if (r)
1542                         break;
1543         }
1544         srcu_read_unlock(&kvm->srcu, srcu_idx);
1545         up_read(&current->mm->mmap_sem);
1546
1547         if (!r) {
1548                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1549                                  sizeof(uint8_t) * args->count);
1550                 if (r)
1551                         r = -EFAULT;
1552         }
1553
1554         kvfree(keys);
1555         return r;
1556 }
1557
1558 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1559 {
1560         uint8_t *keys;
1561         uint64_t hva;
1562         int srcu_idx, i, r = 0;
1563         bool unlocked;
1564
1565         if (args->flags != 0)
1566                 return -EINVAL;
1567
1568         /* Enforce sane limit on memory allocation */
1569         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1570                 return -EINVAL;
1571
1572         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1573         if (!keys)
1574                 return -ENOMEM;
1575
1576         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1577                            sizeof(uint8_t) * args->count);
1578         if (r) {
1579                 r = -EFAULT;
1580                 goto out;
1581         }
1582
1583         /* Enable storage key handling for the guest */
1584         r = s390_enable_skey();
1585         if (r)
1586                 goto out;
1587
1588         i = 0;
1589         down_read(&current->mm->mmap_sem);
1590         srcu_idx = srcu_read_lock(&kvm->srcu);
1591         while (i < args->count) {
1592                 unlocked = false;
1593                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1594                 if (kvm_is_error_hva(hva)) {
1595                         r = -EFAULT;
1596                         break;
1597                 }
1598
1599                 /* Lowest order bit is reserved */
1600                 if (keys[i] & 0x01) {
1601                         r = -EINVAL;
1602                         break;
1603                 }
1604
1605                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1606                 if (r) {
1607                         r = fixup_user_fault(current, current->mm, hva,
1608                                              FAULT_FLAG_WRITE, &unlocked);
1609                         if (r)
1610                                 break;
1611                 }
1612                 if (!r)
1613                         i++;
1614         }
1615         srcu_read_unlock(&kvm->srcu, srcu_idx);
1616         up_read(&current->mm->mmap_sem);
1617 out:
1618         kvfree(keys);
1619         return r;
1620 }
1621
1622 /*
1623  * Base address and length must be sent at the start of each block, therefore
1624  * it's cheaper to send some clean data, as long as it's less than the size of
1625  * two longs.
1626  */
1627 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1628 /* for consistency */
1629 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1630
1631 /*
1632  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1633  * address falls in a hole. In that case the index of one of the memslots
1634  * bordering the hole is returned.
1635  */
1636 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1637 {
1638         int start = 0, end = slots->used_slots;
1639         int slot = atomic_read(&slots->lru_slot);
1640         struct kvm_memory_slot *memslots = slots->memslots;
1641
1642         if (gfn >= memslots[slot].base_gfn &&
1643             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1644                 return slot;
1645
1646         while (start < end) {
1647                 slot = start + (end - start) / 2;
1648
1649                 if (gfn >= memslots[slot].base_gfn)
1650                         end = slot;
1651                 else
1652                         start = slot + 1;
1653         }
1654
1655         if (gfn >= memslots[start].base_gfn &&
1656             gfn < memslots[start].base_gfn + memslots[start].npages) {
1657                 atomic_set(&slots->lru_slot, start);
1658         }
1659
1660         return start;
1661 }
1662
1663 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1664                               u8 *res, unsigned long bufsize)
1665 {
1666         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1667
1668         args->count = 0;
1669         while (args->count < bufsize) {
1670                 hva = gfn_to_hva(kvm, cur_gfn);
1671                 /*
1672                  * We return an error if the first value was invalid, but we
1673                  * return successfully if at least one value was copied.
1674                  */
1675                 if (kvm_is_error_hva(hva))
1676                         return args->count ? 0 : -EFAULT;
1677                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1678                         pgstev = 0;
1679                 res[args->count++] = (pgstev >> 24) & 0x43;
1680                 cur_gfn++;
1681         }
1682
1683         return 0;
1684 }
1685
1686 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1687                                               unsigned long cur_gfn)
1688 {
1689         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1690         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1691         unsigned long ofs = cur_gfn - ms->base_gfn;
1692
1693         if (ms->base_gfn + ms->npages <= cur_gfn) {
1694                 slotidx--;
1695                 /* If we are above the highest slot, wrap around */
1696                 if (slotidx < 0)
1697                         slotidx = slots->used_slots - 1;
1698
1699                 ms = slots->memslots + slotidx;
1700                 ofs = 0;
1701         }
1702         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1703         while ((slotidx > 0) && (ofs >= ms->npages)) {
1704                 slotidx--;
1705                 ms = slots->memslots + slotidx;
1706                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1707         }
1708         return ms->base_gfn + ofs;
1709 }
1710
1711 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1712                              u8 *res, unsigned long bufsize)
1713 {
1714         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1715         struct kvm_memslots *slots = kvm_memslots(kvm);
1716         struct kvm_memory_slot *ms;
1717
1718         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1719         ms = gfn_to_memslot(kvm, cur_gfn);
1720         args->count = 0;
1721         args->start_gfn = cur_gfn;
1722         if (!ms)
1723                 return 0;
1724         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1725         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1726
1727         while (args->count < bufsize) {
1728                 hva = gfn_to_hva(kvm, cur_gfn);
1729                 if (kvm_is_error_hva(hva))
1730                         return 0;
1731                 /* Decrement only if we actually flipped the bit to 0 */
1732                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1733                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1734                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1735                         pgstev = 0;
1736                 /* Save the value */
1737                 res[args->count++] = (pgstev >> 24) & 0x43;
1738                 /* If the next bit is too far away, stop. */
1739                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1740                         return 0;
1741                 /* If we reached the previous "next", find the next one */
1742                 if (cur_gfn == next_gfn)
1743                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1744                 /* Reached the end of memory or of the buffer, stop */
1745                 if ((next_gfn >= mem_end) ||
1746                     (next_gfn - args->start_gfn >= bufsize))
1747                         return 0;
1748                 cur_gfn++;
1749                 /* Reached the end of the current memslot, take the next one. */
1750                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1751                         ms = gfn_to_memslot(kvm, cur_gfn);
1752                         if (!ms)
1753                                 return 0;
1754                 }
1755         }
1756         return 0;
1757 }
1758
1759 /*
1760  * This function searches for the next page with dirty CMMA attributes, and
1761  * saves the attributes in the buffer up to either the end of the buffer or
1762  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1763  * no trailing clean bytes are saved.
1764  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1765  * output buffer will indicate 0 as length.
1766  */
1767 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1768                                   struct kvm_s390_cmma_log *args)
1769 {
1770         unsigned long bufsize;
1771         int srcu_idx, peek, ret;
1772         u8 *values;
1773
1774         if (!kvm->arch.use_cmma)
1775                 return -ENXIO;
1776         /* Invalid/unsupported flags were specified */
1777         if (args->flags & ~KVM_S390_CMMA_PEEK)
1778                 return -EINVAL;
1779         /* Migration mode query, and we are not doing a migration */
1780         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1781         if (!peek && !kvm->arch.migration_mode)
1782                 return -EINVAL;
1783         /* CMMA is disabled or was not used, or the buffer has length zero */
1784         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1785         if (!bufsize || !kvm->mm->context.uses_cmm) {
1786                 memset(args, 0, sizeof(*args));
1787                 return 0;
1788         }
1789         /* We are not peeking, and there are no dirty pages */
1790         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1791                 memset(args, 0, sizeof(*args));
1792                 return 0;
1793         }
1794
1795         values = vmalloc(bufsize);
1796         if (!values)
1797                 return -ENOMEM;
1798
1799         down_read(&kvm->mm->mmap_sem);
1800         srcu_idx = srcu_read_lock(&kvm->srcu);
1801         if (peek)
1802                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1803         else
1804                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1805         srcu_read_unlock(&kvm->srcu, srcu_idx);
1806         up_read(&kvm->mm->mmap_sem);
1807
1808         if (kvm->arch.migration_mode)
1809                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1810         else
1811                 args->remaining = 0;
1812
1813         if (copy_to_user((void __user *)args->values, values, args->count))
1814                 ret = -EFAULT;
1815
1816         vfree(values);
1817         return ret;
1818 }
1819
1820 /*
1821  * This function sets the CMMA attributes for the given pages. If the input
1822  * buffer has zero length, no action is taken, otherwise the attributes are
1823  * set and the mm->context.uses_cmm flag is set.
1824  */
1825 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1826                                   const struct kvm_s390_cmma_log *args)
1827 {
1828         unsigned long hva, mask, pgstev, i;
1829         uint8_t *bits;
1830         int srcu_idx, r = 0;
1831
1832         mask = args->mask;
1833
1834         if (!kvm->arch.use_cmma)
1835                 return -ENXIO;
1836         /* invalid/unsupported flags */
1837         if (args->flags != 0)
1838                 return -EINVAL;
1839         /* Enforce sane limit on memory allocation */
1840         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1841                 return -EINVAL;
1842         /* Nothing to do */
1843         if (args->count == 0)
1844                 return 0;
1845
1846         bits = vmalloc(array_size(sizeof(*bits), args->count));
1847         if (!bits)
1848                 return -ENOMEM;
1849
1850         r = copy_from_user(bits, (void __user *)args->values, args->count);
1851         if (r) {
1852                 r = -EFAULT;
1853                 goto out;
1854         }
1855
1856         down_read(&kvm->mm->mmap_sem);
1857         srcu_idx = srcu_read_lock(&kvm->srcu);
1858         for (i = 0; i < args->count; i++) {
1859                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1860                 if (kvm_is_error_hva(hva)) {
1861                         r = -EFAULT;
1862                         break;
1863                 }
1864
1865                 pgstev = bits[i];
1866                 pgstev = pgstev << 24;
1867                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1868                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1869         }
1870         srcu_read_unlock(&kvm->srcu, srcu_idx);
1871         up_read(&kvm->mm->mmap_sem);
1872
1873         if (!kvm->mm->context.uses_cmm) {
1874                 down_write(&kvm->mm->mmap_sem);
1875                 kvm->mm->context.uses_cmm = 1;
1876                 up_write(&kvm->mm->mmap_sem);
1877         }
1878 out:
1879         vfree(bits);
1880         return r;
1881 }
1882
1883 long kvm_arch_vm_ioctl(struct file *filp,
1884                        unsigned int ioctl, unsigned long arg)
1885 {
1886         struct kvm *kvm = filp->private_data;
1887         void __user *argp = (void __user *)arg;
1888         struct kvm_device_attr attr;
1889         int r;
1890
1891         switch (ioctl) {
1892         case KVM_S390_INTERRUPT: {
1893                 struct kvm_s390_interrupt s390int;
1894
1895                 r = -EFAULT;
1896                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1897                         break;
1898                 r = kvm_s390_inject_vm(kvm, &s390int);
1899                 break;
1900         }
1901         case KVM_ENABLE_CAP: {
1902                 struct kvm_enable_cap cap;
1903                 r = -EFAULT;
1904                 if (copy_from_user(&cap, argp, sizeof(cap)))
1905                         break;
1906                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1907                 break;
1908         }
1909         case KVM_CREATE_IRQCHIP: {
1910                 struct kvm_irq_routing_entry routing;
1911
1912                 r = -EINVAL;
1913                 if (kvm->arch.use_irqchip) {
1914                         /* Set up dummy routing. */
1915                         memset(&routing, 0, sizeof(routing));
1916                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1917                 }
1918                 break;
1919         }
1920         case KVM_SET_DEVICE_ATTR: {
1921                 r = -EFAULT;
1922                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1923                         break;
1924                 r = kvm_s390_vm_set_attr(kvm, &attr);
1925                 break;
1926         }
1927         case KVM_GET_DEVICE_ATTR: {
1928                 r = -EFAULT;
1929                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1930                         break;
1931                 r = kvm_s390_vm_get_attr(kvm, &attr);
1932                 break;
1933         }
1934         case KVM_HAS_DEVICE_ATTR: {
1935                 r = -EFAULT;
1936                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1937                         break;
1938                 r = kvm_s390_vm_has_attr(kvm, &attr);
1939                 break;
1940         }
1941         case KVM_S390_GET_SKEYS: {
1942                 struct kvm_s390_skeys args;
1943
1944                 r = -EFAULT;
1945                 if (copy_from_user(&args, argp,
1946                                    sizeof(struct kvm_s390_skeys)))
1947                         break;
1948                 r = kvm_s390_get_skeys(kvm, &args);
1949                 break;
1950         }
1951         case KVM_S390_SET_SKEYS: {
1952                 struct kvm_s390_skeys args;
1953
1954                 r = -EFAULT;
1955                 if (copy_from_user(&args, argp,
1956                                    sizeof(struct kvm_s390_skeys)))
1957                         break;
1958                 r = kvm_s390_set_skeys(kvm, &args);
1959                 break;
1960         }
1961         case KVM_S390_GET_CMMA_BITS: {
1962                 struct kvm_s390_cmma_log args;
1963
1964                 r = -EFAULT;
1965                 if (copy_from_user(&args, argp, sizeof(args)))
1966                         break;
1967                 mutex_lock(&kvm->slots_lock);
1968                 r = kvm_s390_get_cmma_bits(kvm, &args);
1969                 mutex_unlock(&kvm->slots_lock);
1970                 if (!r) {
1971                         r = copy_to_user(argp, &args, sizeof(args));
1972                         if (r)
1973                                 r = -EFAULT;
1974                 }
1975                 break;
1976         }
1977         case KVM_S390_SET_CMMA_BITS: {
1978                 struct kvm_s390_cmma_log args;
1979
1980                 r = -EFAULT;
1981                 if (copy_from_user(&args, argp, sizeof(args)))
1982                         break;
1983                 mutex_lock(&kvm->slots_lock);
1984                 r = kvm_s390_set_cmma_bits(kvm, &args);
1985                 mutex_unlock(&kvm->slots_lock);
1986                 break;
1987         }
1988         default:
1989                 r = -ENOTTY;
1990         }
1991
1992         return r;
1993 }
1994
1995 static int kvm_s390_query_ap_config(u8 *config)
1996 {
1997         u32 fcn_code = 0x04000000UL;
1998         u32 cc = 0;
1999
2000         memset(config, 0, 128);
2001         asm volatile(
2002                 "lgr 0,%1\n"
2003                 "lgr 2,%2\n"
2004                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
2005                 "0: ipm %0\n"
2006                 "srl %0,28\n"
2007                 "1:\n"
2008                 EX_TABLE(0b, 1b)
2009                 : "+r" (cc)
2010                 : "r" (fcn_code), "r" (config)
2011                 : "cc", "0", "2", "memory"
2012         );
2013
2014         return cc;
2015 }
2016
2017 static int kvm_s390_apxa_installed(void)
2018 {
2019         u8 config[128];
2020         int cc;
2021
2022         if (test_facility(12)) {
2023                 cc = kvm_s390_query_ap_config(config);
2024
2025                 if (cc)
2026                         pr_err("PQAP(QCI) failed with cc=%d", cc);
2027                 else
2028                         return config[0] & 0x40;
2029         }
2030
2031         return 0;
2032 }
2033
2034 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2035 {
2036         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2037
2038         if (kvm_s390_apxa_installed())
2039                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2040         else
2041                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2042 }
2043
2044 static u64 kvm_s390_get_initial_cpuid(void)
2045 {
2046         struct cpuid cpuid;
2047
2048         get_cpu_id(&cpuid);
2049         cpuid.version = 0xff;
2050         return *((u64 *) &cpuid);
2051 }
2052
2053 static void kvm_s390_crypto_init(struct kvm *kvm)
2054 {
2055         if (!test_kvm_facility(kvm, 76))
2056                 return;
2057
2058         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2059         kvm_s390_set_crycb_format(kvm);
2060
2061         /* Enable AES/DEA protected key functions by default */
2062         kvm->arch.crypto.aes_kw = 1;
2063         kvm->arch.crypto.dea_kw = 1;
2064         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2065                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2066         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2067                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2068 }
2069
2070 static void sca_dispose(struct kvm *kvm)
2071 {
2072         if (kvm->arch.use_esca)
2073                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2074         else
2075                 free_page((unsigned long)(kvm->arch.sca));
2076         kvm->arch.sca = NULL;
2077 }
2078
2079 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2080 {
2081         gfp_t alloc_flags = GFP_KERNEL;
2082         int i, rc;
2083         char debug_name[16];
2084         static unsigned long sca_offset;
2085
2086         rc = -EINVAL;
2087 #ifdef CONFIG_KVM_S390_UCONTROL
2088         if (type & ~KVM_VM_S390_UCONTROL)
2089                 goto out_err;
2090         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2091                 goto out_err;
2092 #else
2093         if (type)
2094                 goto out_err;
2095 #endif
2096
2097         rc = s390_enable_sie();
2098         if (rc)
2099                 goto out_err;
2100
2101         rc = -ENOMEM;
2102
2103         if (!sclp.has_64bscao)
2104                 alloc_flags |= GFP_DMA;
2105         rwlock_init(&kvm->arch.sca_lock);
2106         /* start with basic SCA */
2107         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2108         if (!kvm->arch.sca)
2109                 goto out_err;
2110         spin_lock(&kvm_lock);
2111         sca_offset += 16;
2112         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2113                 sca_offset = 0;
2114         kvm->arch.sca = (struct bsca_block *)
2115                         ((char *) kvm->arch.sca + sca_offset);
2116         spin_unlock(&kvm_lock);
2117
2118         sprintf(debug_name, "kvm-%u", current->pid);
2119
2120         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2121         if (!kvm->arch.dbf)
2122                 goto out_err;
2123
2124         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2125         kvm->arch.sie_page2 =
2126              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2127         if (!kvm->arch.sie_page2)
2128                 goto out_err;
2129
2130         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2131
2132         for (i = 0; i < kvm_s390_fac_size(); i++) {
2133                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2134                                               (kvm_s390_fac_base[i] |
2135                                                kvm_s390_fac_ext[i]);
2136                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2137                                               kvm_s390_fac_base[i];
2138         }
2139
2140         /* we are always in czam mode - even on pre z14 machines */
2141         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2142         set_kvm_facility(kvm->arch.model.fac_list, 138);
2143         /* we emulate STHYI in kvm */
2144         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2145         set_kvm_facility(kvm->arch.model.fac_list, 74);
2146         if (MACHINE_HAS_TLB_GUEST) {
2147                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2148                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2149         }
2150
2151         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2152         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2153
2154         kvm_s390_crypto_init(kvm);
2155
2156         mutex_init(&kvm->arch.float_int.ais_lock);
2157         spin_lock_init(&kvm->arch.float_int.lock);
2158         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2159                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2160         init_waitqueue_head(&kvm->arch.ipte_wq);
2161         mutex_init(&kvm->arch.ipte_mutex);
2162
2163         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2164         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2165
2166         if (type & KVM_VM_S390_UCONTROL) {
2167                 kvm->arch.gmap = NULL;
2168                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2169         } else {
2170                 if (sclp.hamax == U64_MAX)
2171                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2172                 else
2173                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2174                                                     sclp.hamax + 1);
2175                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2176                 if (!kvm->arch.gmap)
2177                         goto out_err;
2178                 kvm->arch.gmap->private = kvm;
2179                 kvm->arch.gmap->pfault_enabled = 0;
2180         }
2181
2182         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2183         kvm->arch.use_skf = sclp.has_skey;
2184         spin_lock_init(&kvm->arch.start_stop_lock);
2185         kvm_s390_vsie_init(kvm);
2186         kvm_s390_gisa_init(kvm);
2187         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2188
2189         return 0;
2190 out_err:
2191         free_page((unsigned long)kvm->arch.sie_page2);
2192         debug_unregister(kvm->arch.dbf);
2193         sca_dispose(kvm);
2194         KVM_EVENT(3, "creation of vm failed: %d", rc);
2195         return rc;
2196 }
2197
2198 bool kvm_arch_has_vcpu_debugfs(void)
2199 {
2200         return false;
2201 }
2202
2203 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2204 {
2205         return 0;
2206 }
2207
2208 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2209 {
2210         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2211         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2212         kvm_s390_clear_local_irqs(vcpu);
2213         kvm_clear_async_pf_completion_queue(vcpu);
2214         if (!kvm_is_ucontrol(vcpu->kvm))
2215                 sca_del_vcpu(vcpu);
2216
2217         if (kvm_is_ucontrol(vcpu->kvm))
2218                 gmap_remove(vcpu->arch.gmap);
2219
2220         if (vcpu->kvm->arch.use_cmma)
2221                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2222         free_page((unsigned long)(vcpu->arch.sie_block));
2223
2224         kvm_vcpu_uninit(vcpu);
2225         kmem_cache_free(kvm_vcpu_cache, vcpu);
2226 }
2227
2228 static void kvm_free_vcpus(struct kvm *kvm)
2229 {
2230         unsigned int i;
2231         struct kvm_vcpu *vcpu;
2232
2233         kvm_for_each_vcpu(i, vcpu, kvm)
2234                 kvm_arch_vcpu_destroy(vcpu);
2235
2236         mutex_lock(&kvm->lock);
2237         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2238                 kvm->vcpus[i] = NULL;
2239
2240         atomic_set(&kvm->online_vcpus, 0);
2241         mutex_unlock(&kvm->lock);
2242 }
2243
2244 void kvm_arch_destroy_vm(struct kvm *kvm)
2245 {
2246         kvm_free_vcpus(kvm);
2247         sca_dispose(kvm);
2248         debug_unregister(kvm->arch.dbf);
2249         kvm_s390_gisa_destroy(kvm);
2250         free_page((unsigned long)kvm->arch.sie_page2);
2251         if (!kvm_is_ucontrol(kvm))
2252                 gmap_remove(kvm->arch.gmap);
2253         kvm_s390_destroy_adapters(kvm);
2254         kvm_s390_clear_float_irqs(kvm);
2255         kvm_s390_vsie_destroy(kvm);
2256         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2257 }
2258
2259 /* Section: vcpu related */
2260 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2261 {
2262         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2263         if (!vcpu->arch.gmap)
2264                 return -ENOMEM;
2265         vcpu->arch.gmap->private = vcpu->kvm;
2266
2267         return 0;
2268 }
2269
2270 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2271 {
2272         if (!kvm_s390_use_sca_entries())
2273                 return;
2274         read_lock(&vcpu->kvm->arch.sca_lock);
2275         if (vcpu->kvm->arch.use_esca) {
2276                 struct esca_block *sca = vcpu->kvm->arch.sca;
2277
2278                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2279                 sca->cpu[vcpu->vcpu_id].sda = 0;
2280         } else {
2281                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2282
2283                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2284                 sca->cpu[vcpu->vcpu_id].sda = 0;
2285         }
2286         read_unlock(&vcpu->kvm->arch.sca_lock);
2287 }
2288
2289 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2290 {
2291         if (!kvm_s390_use_sca_entries()) {
2292                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2293
2294                 /* we still need the basic sca for the ipte control */
2295                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2296                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2297                 return;
2298         }
2299         read_lock(&vcpu->kvm->arch.sca_lock);
2300         if (vcpu->kvm->arch.use_esca) {
2301                 struct esca_block *sca = vcpu->kvm->arch.sca;
2302
2303                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2304                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2305                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2306                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2307                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2308         } else {
2309                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2310
2311                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2312                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2313                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2314                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2315         }
2316         read_unlock(&vcpu->kvm->arch.sca_lock);
2317 }
2318
2319 /* Basic SCA to Extended SCA data copy routines */
2320 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2321 {
2322         d->sda = s->sda;
2323         d->sigp_ctrl.c = s->sigp_ctrl.c;
2324         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2325 }
2326
2327 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2328 {
2329         int i;
2330
2331         d->ipte_control = s->ipte_control;
2332         d->mcn[0] = s->mcn;
2333         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2334                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2335 }
2336
2337 static int sca_switch_to_extended(struct kvm *kvm)
2338 {
2339         struct bsca_block *old_sca = kvm->arch.sca;
2340         struct esca_block *new_sca;
2341         struct kvm_vcpu *vcpu;
2342         unsigned int vcpu_idx;
2343         u32 scaol, scaoh;
2344
2345         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2346         if (!new_sca)
2347                 return -ENOMEM;
2348
2349         scaoh = (u32)((u64)(new_sca) >> 32);
2350         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2351
2352         kvm_s390_vcpu_block_all(kvm);
2353         write_lock(&kvm->arch.sca_lock);
2354
2355         sca_copy_b_to_e(new_sca, old_sca);
2356
2357         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2358                 vcpu->arch.sie_block->scaoh = scaoh;
2359                 vcpu->arch.sie_block->scaol = scaol;
2360                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2361         }
2362         kvm->arch.sca = new_sca;
2363         kvm->arch.use_esca = 1;
2364
2365         write_unlock(&kvm->arch.sca_lock);
2366         kvm_s390_vcpu_unblock_all(kvm);
2367
2368         free_page((unsigned long)old_sca);
2369
2370         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2371                  old_sca, kvm->arch.sca);
2372         return 0;
2373 }
2374
2375 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2376 {
2377         int rc;
2378
2379         if (!kvm_s390_use_sca_entries()) {
2380                 if (id < KVM_MAX_VCPUS)
2381                         return true;
2382                 return false;
2383         }
2384         if (id < KVM_S390_BSCA_CPU_SLOTS)
2385                 return true;
2386         if (!sclp.has_esca || !sclp.has_64bscao)
2387                 return false;
2388
2389         mutex_lock(&kvm->lock);
2390         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2391         mutex_unlock(&kvm->lock);
2392
2393         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2394 }
2395
2396 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2397 {
2398         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2399         kvm_clear_async_pf_completion_queue(vcpu);
2400         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2401                                     KVM_SYNC_GPRS |
2402                                     KVM_SYNC_ACRS |
2403                                     KVM_SYNC_CRS |
2404                                     KVM_SYNC_ARCH0 |
2405                                     KVM_SYNC_PFAULT;
2406         kvm_s390_set_prefix(vcpu, 0);
2407         if (test_kvm_facility(vcpu->kvm, 64))
2408                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2409         if (test_kvm_facility(vcpu->kvm, 82))
2410                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2411         if (test_kvm_facility(vcpu->kvm, 133))
2412                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2413         if (test_kvm_facility(vcpu->kvm, 156))
2414                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2415         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2416          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2417          */
2418         if (MACHINE_HAS_VX)
2419                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2420         else
2421                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2422
2423         if (kvm_is_ucontrol(vcpu->kvm))
2424                 return __kvm_ucontrol_vcpu_init(vcpu);
2425
2426         return 0;
2427 }
2428
2429 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2430 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2431 {
2432         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2433         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2434         vcpu->arch.cputm_start = get_tod_clock_fast();
2435         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2436 }
2437
2438 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2439 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2440 {
2441         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2442         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2443         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2444         vcpu->arch.cputm_start = 0;
2445         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2446 }
2447
2448 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2449 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2450 {
2451         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2452         vcpu->arch.cputm_enabled = true;
2453         __start_cpu_timer_accounting(vcpu);
2454 }
2455
2456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2457 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2458 {
2459         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2460         __stop_cpu_timer_accounting(vcpu);
2461         vcpu->arch.cputm_enabled = false;
2462 }
2463
2464 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2465 {
2466         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2467         __enable_cpu_timer_accounting(vcpu);
2468         preempt_enable();
2469 }
2470
2471 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2472 {
2473         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2474         __disable_cpu_timer_accounting(vcpu);
2475         preempt_enable();
2476 }
2477
2478 /* set the cpu timer - may only be called from the VCPU thread itself */
2479 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2480 {
2481         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2482         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2483         if (vcpu->arch.cputm_enabled)
2484                 vcpu->arch.cputm_start = get_tod_clock_fast();
2485         vcpu->arch.sie_block->cputm = cputm;
2486         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2487         preempt_enable();
2488 }
2489
2490 /* update and get the cpu timer - can also be called from other VCPU threads */
2491 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2492 {
2493         unsigned int seq;
2494         __u64 value;
2495
2496         if (unlikely(!vcpu->arch.cputm_enabled))
2497                 return vcpu->arch.sie_block->cputm;
2498
2499         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2500         do {
2501                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2502                 /*
2503                  * If the writer would ever execute a read in the critical
2504                  * section, e.g. in irq context, we have a deadlock.
2505                  */
2506                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2507                 value = vcpu->arch.sie_block->cputm;
2508                 /* if cputm_start is 0, accounting is being started/stopped */
2509                 if (likely(vcpu->arch.cputm_start))
2510                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2511         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2512         preempt_enable();
2513         return value;
2514 }
2515
2516 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2517 {
2518
2519         gmap_enable(vcpu->arch.enabled_gmap);
2520         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2521         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2522                 __start_cpu_timer_accounting(vcpu);
2523         vcpu->cpu = cpu;
2524 }
2525
2526 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2527 {
2528         vcpu->cpu = -1;
2529         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2530                 __stop_cpu_timer_accounting(vcpu);
2531         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2532         vcpu->arch.enabled_gmap = gmap_get_enabled();
2533         gmap_disable(vcpu->arch.enabled_gmap);
2534
2535 }
2536
2537 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2538 {
2539         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2540         vcpu->arch.sie_block->gpsw.mask = 0UL;
2541         vcpu->arch.sie_block->gpsw.addr = 0UL;
2542         kvm_s390_set_prefix(vcpu, 0);
2543         kvm_s390_set_cpu_timer(vcpu, 0);
2544         vcpu->arch.sie_block->ckc       = 0UL;
2545         vcpu->arch.sie_block->todpr     = 0;
2546         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2547         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2548                                         CR0_INTERRUPT_KEY_SUBMASK |
2549                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2550         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2551                                         CR14_UNUSED_33 |
2552                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2553         /* make sure the new fpc will be lazily loaded */
2554         save_fpu_regs();
2555         current->thread.fpu.fpc = 0;
2556         vcpu->arch.sie_block->gbea = 1;
2557         vcpu->arch.sie_block->pp = 0;
2558         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2559         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2560         kvm_clear_async_pf_completion_queue(vcpu);
2561         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2562                 kvm_s390_vcpu_stop(vcpu);
2563         kvm_s390_clear_local_irqs(vcpu);
2564 }
2565
2566 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2567 {
2568         mutex_lock(&vcpu->kvm->lock);
2569         preempt_disable();
2570         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2571         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2572         preempt_enable();
2573         mutex_unlock(&vcpu->kvm->lock);
2574         if (!kvm_is_ucontrol(vcpu->kvm)) {
2575                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2576                 sca_add_vcpu(vcpu);
2577         }
2578         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2579                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2580         /* make vcpu_load load the right gmap on the first trigger */
2581         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2582 }
2583
2584 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2585 {
2586         if (!test_kvm_facility(vcpu->kvm, 76))
2587                 return;
2588
2589         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2590
2591         if (vcpu->kvm->arch.crypto.aes_kw)
2592                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2593         if (vcpu->kvm->arch.crypto.dea_kw)
2594                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2595
2596         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2597 }
2598
2599 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2600 {
2601         free_page(vcpu->arch.sie_block->cbrlo);
2602         vcpu->arch.sie_block->cbrlo = 0;
2603 }
2604
2605 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2606 {
2607         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2608         if (!vcpu->arch.sie_block->cbrlo)
2609                 return -ENOMEM;
2610         return 0;
2611 }
2612
2613 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2614 {
2615         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2616
2617         vcpu->arch.sie_block->ibc = model->ibc;
2618         if (test_kvm_facility(vcpu->kvm, 7))
2619                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2620 }
2621
2622 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2623 {
2624         int rc = 0;
2625
2626         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2627                                                     CPUSTAT_SM |
2628                                                     CPUSTAT_STOPPED);
2629
2630         if (test_kvm_facility(vcpu->kvm, 78))
2631                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2632         else if (test_kvm_facility(vcpu->kvm, 8))
2633                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2634
2635         kvm_s390_vcpu_setup_model(vcpu);
2636
2637         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2638         if (MACHINE_HAS_ESOP)
2639                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2640         if (test_kvm_facility(vcpu->kvm, 9))
2641                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2642         if (test_kvm_facility(vcpu->kvm, 73))
2643                 vcpu->arch.sie_block->ecb |= ECB_TE;
2644
2645         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2646                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2647         if (test_kvm_facility(vcpu->kvm, 130))
2648                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2649         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2650         if (sclp.has_cei)
2651                 vcpu->arch.sie_block->eca |= ECA_CEI;
2652         if (sclp.has_ib)
2653                 vcpu->arch.sie_block->eca |= ECA_IB;
2654         if (sclp.has_siif)
2655                 vcpu->arch.sie_block->eca |= ECA_SII;
2656         if (sclp.has_sigpif)
2657                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2658         if (test_kvm_facility(vcpu->kvm, 129)) {
2659                 vcpu->arch.sie_block->eca |= ECA_VX;
2660                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2661         }
2662         if (test_kvm_facility(vcpu->kvm, 139))
2663                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2664         if (test_kvm_facility(vcpu->kvm, 156))
2665                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2666         if (vcpu->arch.sie_block->gd) {
2667                 vcpu->arch.sie_block->eca |= ECA_AIV;
2668                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2669                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2670         }
2671         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2672                                         | SDNXC;
2673         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2674
2675         if (sclp.has_kss)
2676                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2677         else
2678                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2679
2680         if (vcpu->kvm->arch.use_cmma) {
2681                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2682                 if (rc)
2683                         return rc;
2684         }
2685         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2686         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2687
2688         kvm_s390_vcpu_crypto_setup(vcpu);
2689
2690         return rc;
2691 }
2692
2693 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2694                                       unsigned int id)
2695 {
2696         struct kvm_vcpu *vcpu;
2697         struct sie_page *sie_page;
2698         int rc = -EINVAL;
2699
2700         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2701                 goto out;
2702
2703         rc = -ENOMEM;
2704
2705         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2706         if (!vcpu)
2707                 goto out;
2708
2709         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2710         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2711         if (!sie_page)
2712                 goto out_free_cpu;
2713
2714         vcpu->arch.sie_block = &sie_page->sie_block;
2715         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2716
2717         /* the real guest size will always be smaller than msl */
2718         vcpu->arch.sie_block->mso = 0;
2719         vcpu->arch.sie_block->msl = sclp.hamax;
2720
2721         vcpu->arch.sie_block->icpua = id;
2722         spin_lock_init(&vcpu->arch.local_int.lock);
2723         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2724         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2725                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2726         seqcount_init(&vcpu->arch.cputm_seqcount);
2727
2728         rc = kvm_vcpu_init(vcpu, kvm, id);
2729         if (rc)
2730                 goto out_free_sie_block;
2731         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2732                  vcpu->arch.sie_block);
2733         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2734
2735         return vcpu;
2736 out_free_sie_block:
2737         free_page((unsigned long)(vcpu->arch.sie_block));
2738 out_free_cpu:
2739         kmem_cache_free(kvm_vcpu_cache, vcpu);
2740 out:
2741         return ERR_PTR(rc);
2742 }
2743
2744 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2745 {
2746         return kvm_s390_vcpu_has_irq(vcpu, 0);
2747 }
2748
2749 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2750 {
2751         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2752 }
2753
2754 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2755 {
2756         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2757         exit_sie(vcpu);
2758 }
2759
2760 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2761 {
2762         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2763 }
2764
2765 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2766 {
2767         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2768         exit_sie(vcpu);
2769 }
2770
2771 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2772 {
2773         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2774 }
2775
2776 /*
2777  * Kick a guest cpu out of SIE and wait until SIE is not running.
2778  * If the CPU is not running (e.g. waiting as idle) the function will
2779  * return immediately. */
2780 void exit_sie(struct kvm_vcpu *vcpu)
2781 {
2782         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2783         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2784                 cpu_relax();
2785 }
2786
2787 /* Kick a guest cpu out of SIE to process a request synchronously */
2788 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2789 {
2790         kvm_make_request(req, vcpu);
2791         kvm_s390_vcpu_request(vcpu);
2792 }
2793
2794 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2795                               unsigned long end)
2796 {
2797         struct kvm *kvm = gmap->private;
2798         struct kvm_vcpu *vcpu;
2799         unsigned long prefix;
2800         int i;
2801
2802         if (gmap_is_shadow(gmap))
2803                 return;
2804         if (start >= 1UL << 31)
2805                 /* We are only interested in prefix pages */
2806                 return;
2807         kvm_for_each_vcpu(i, vcpu, kvm) {
2808                 /* match against both prefix pages */
2809                 prefix = kvm_s390_get_prefix(vcpu);
2810                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2811                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2812                                    start, end);
2813                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2814                 }
2815         }
2816 }
2817
2818 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2819 {
2820         /* kvm common code refers to this, but never calls it */
2821         BUG();
2822         return 0;
2823 }
2824
2825 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2826                                            struct kvm_one_reg *reg)
2827 {
2828         int r = -EINVAL;
2829
2830         switch (reg->id) {
2831         case KVM_REG_S390_TODPR:
2832                 r = put_user(vcpu->arch.sie_block->todpr,
2833                              (u32 __user *)reg->addr);
2834                 break;
2835         case KVM_REG_S390_EPOCHDIFF:
2836                 r = put_user(vcpu->arch.sie_block->epoch,
2837                              (u64 __user *)reg->addr);
2838                 break;
2839         case KVM_REG_S390_CPU_TIMER:
2840                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2841                              (u64 __user *)reg->addr);
2842                 break;
2843         case KVM_REG_S390_CLOCK_COMP:
2844                 r = put_user(vcpu->arch.sie_block->ckc,
2845                              (u64 __user *)reg->addr);
2846                 break;
2847         case KVM_REG_S390_PFTOKEN:
2848                 r = put_user(vcpu->arch.pfault_token,
2849                              (u64 __user *)reg->addr);
2850                 break;
2851         case KVM_REG_S390_PFCOMPARE:
2852                 r = put_user(vcpu->arch.pfault_compare,
2853                              (u64 __user *)reg->addr);
2854                 break;
2855         case KVM_REG_S390_PFSELECT:
2856                 r = put_user(vcpu->arch.pfault_select,
2857                              (u64 __user *)reg->addr);
2858                 break;
2859         case KVM_REG_S390_PP:
2860                 r = put_user(vcpu->arch.sie_block->pp,
2861                              (u64 __user *)reg->addr);
2862                 break;
2863         case KVM_REG_S390_GBEA:
2864                 r = put_user(vcpu->arch.sie_block->gbea,
2865                              (u64 __user *)reg->addr);
2866                 break;
2867         default:
2868                 break;
2869         }
2870
2871         return r;
2872 }
2873
2874 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2875                                            struct kvm_one_reg *reg)
2876 {
2877         int r = -EINVAL;
2878         __u64 val;
2879
2880         switch (reg->id) {
2881         case KVM_REG_S390_TODPR:
2882                 r = get_user(vcpu->arch.sie_block->todpr,
2883                              (u32 __user *)reg->addr);
2884                 break;
2885         case KVM_REG_S390_EPOCHDIFF:
2886                 r = get_user(vcpu->arch.sie_block->epoch,
2887                              (u64 __user *)reg->addr);
2888                 break;
2889         case KVM_REG_S390_CPU_TIMER:
2890                 r = get_user(val, (u64 __user *)reg->addr);
2891                 if (!r)
2892                         kvm_s390_set_cpu_timer(vcpu, val);
2893                 break;
2894         case KVM_REG_S390_CLOCK_COMP:
2895                 r = get_user(vcpu->arch.sie_block->ckc,
2896                              (u64 __user *)reg->addr);
2897                 break;
2898         case KVM_REG_S390_PFTOKEN:
2899                 r = get_user(vcpu->arch.pfault_token,
2900                              (u64 __user *)reg->addr);
2901                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2902                         kvm_clear_async_pf_completion_queue(vcpu);
2903                 break;
2904         case KVM_REG_S390_PFCOMPARE:
2905                 r = get_user(vcpu->arch.pfault_compare,
2906                              (u64 __user *)reg->addr);
2907                 break;
2908         case KVM_REG_S390_PFSELECT:
2909                 r = get_user(vcpu->arch.pfault_select,
2910                              (u64 __user *)reg->addr);
2911                 break;
2912         case KVM_REG_S390_PP:
2913                 r = get_user(vcpu->arch.sie_block->pp,
2914                              (u64 __user *)reg->addr);
2915                 break;
2916         case KVM_REG_S390_GBEA:
2917                 r = get_user(vcpu->arch.sie_block->gbea,
2918                              (u64 __user *)reg->addr);
2919                 break;
2920         default:
2921                 break;
2922         }
2923
2924         return r;
2925 }
2926
2927 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2928 {
2929         kvm_s390_vcpu_initial_reset(vcpu);
2930         return 0;
2931 }
2932
2933 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2934 {
2935         vcpu_load(vcpu);
2936         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2937         vcpu_put(vcpu);
2938         return 0;
2939 }
2940
2941 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2942 {
2943         vcpu_load(vcpu);
2944         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2945         vcpu_put(vcpu);
2946         return 0;
2947 }
2948
2949 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2950                                   struct kvm_sregs *sregs)
2951 {
2952         vcpu_load(vcpu);
2953
2954         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2955         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2956
2957         vcpu_put(vcpu);
2958         return 0;
2959 }
2960
2961 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2962                                   struct kvm_sregs *sregs)
2963 {
2964         vcpu_load(vcpu);
2965
2966         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2967         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2968
2969         vcpu_put(vcpu);
2970         return 0;
2971 }
2972
2973 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2974 {
2975         int ret = 0;
2976
2977         vcpu_load(vcpu);
2978
2979         if (test_fp_ctl(fpu->fpc)) {
2980                 ret = -EINVAL;
2981                 goto out;
2982         }
2983         vcpu->run->s.regs.fpc = fpu->fpc;
2984         if (MACHINE_HAS_VX)
2985                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2986                                  (freg_t *) fpu->fprs);
2987         else
2988                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2989
2990 out:
2991         vcpu_put(vcpu);
2992         return ret;
2993 }
2994
2995 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2996 {
2997         vcpu_load(vcpu);
2998
2999         /* make sure we have the latest values */
3000         save_fpu_regs();
3001         if (MACHINE_HAS_VX)
3002                 convert_vx_to_fp((freg_t *) fpu->fprs,
3003                                  (__vector128 *) vcpu->run->s.regs.vrs);
3004         else
3005                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3006         fpu->fpc = vcpu->run->s.regs.fpc;
3007
3008         vcpu_put(vcpu);
3009         return 0;
3010 }
3011
3012 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3013 {
3014         int rc = 0;
3015
3016         if (!is_vcpu_stopped(vcpu))
3017                 rc = -EBUSY;
3018         else {
3019                 vcpu->run->psw_mask = psw.mask;
3020                 vcpu->run->psw_addr = psw.addr;
3021         }
3022         return rc;
3023 }
3024
3025 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3026                                   struct kvm_translation *tr)
3027 {
3028         return -EINVAL; /* not implemented yet */
3029 }
3030
3031 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3032                               KVM_GUESTDBG_USE_HW_BP | \
3033                               KVM_GUESTDBG_ENABLE)
3034
3035 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3036                                         struct kvm_guest_debug *dbg)
3037 {
3038         int rc = 0;
3039
3040         vcpu_load(vcpu);
3041
3042         vcpu->guest_debug = 0;
3043         kvm_s390_clear_bp_data(vcpu);
3044
3045         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3046                 rc = -EINVAL;
3047                 goto out;
3048         }
3049         if (!sclp.has_gpere) {
3050                 rc = -EINVAL;
3051                 goto out;
3052         }
3053
3054         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3055                 vcpu->guest_debug = dbg->control;
3056                 /* enforce guest PER */
3057                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3058
3059                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3060                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3061         } else {
3062                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3063                 vcpu->arch.guestdbg.last_bp = 0;
3064         }
3065
3066         if (rc) {
3067                 vcpu->guest_debug = 0;
3068                 kvm_s390_clear_bp_data(vcpu);
3069                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3070         }
3071
3072 out:
3073         vcpu_put(vcpu);
3074         return rc;
3075 }
3076
3077 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3078                                     struct kvm_mp_state *mp_state)
3079 {
3080         int ret;
3081
3082         vcpu_load(vcpu);
3083
3084         /* CHECK_STOP and LOAD are not supported yet */
3085         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3086                                       KVM_MP_STATE_OPERATING;
3087
3088         vcpu_put(vcpu);
3089         return ret;
3090 }
3091
3092 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3093                                     struct kvm_mp_state *mp_state)
3094 {
3095         int rc = 0;
3096
3097         vcpu_load(vcpu);
3098
3099         /* user space knows about this interface - let it control the state */
3100         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3101
3102         switch (mp_state->mp_state) {
3103         case KVM_MP_STATE_STOPPED:
3104                 kvm_s390_vcpu_stop(vcpu);
3105                 break;
3106         case KVM_MP_STATE_OPERATING:
3107                 kvm_s390_vcpu_start(vcpu);
3108                 break;
3109         case KVM_MP_STATE_LOAD:
3110         case KVM_MP_STATE_CHECK_STOP:
3111                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3112         default:
3113                 rc = -ENXIO;
3114         }
3115
3116         vcpu_put(vcpu);
3117         return rc;
3118 }
3119
3120 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3121 {
3122         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3123 }
3124
3125 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3126 {
3127 retry:
3128         kvm_s390_vcpu_request_handled(vcpu);
3129         if (!kvm_request_pending(vcpu))
3130                 return 0;
3131         /*
3132          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3133          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3134          * This ensures that the ipte instruction for this request has
3135          * already finished. We might race against a second unmapper that
3136          * wants to set the blocking bit. Lets just retry the request loop.
3137          */
3138         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3139                 int rc;
3140                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3141                                           kvm_s390_get_prefix(vcpu),
3142                                           PAGE_SIZE * 2, PROT_WRITE);
3143                 if (rc) {
3144                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3145                         return rc;
3146                 }
3147                 goto retry;
3148         }
3149
3150         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3151                 vcpu->arch.sie_block->ihcpu = 0xffff;
3152                 goto retry;
3153         }
3154
3155         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3156                 if (!ibs_enabled(vcpu)) {
3157                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3158                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3159                 }
3160                 goto retry;
3161         }
3162
3163         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3164                 if (ibs_enabled(vcpu)) {
3165                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3166                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3167                 }
3168                 goto retry;
3169         }
3170
3171         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3172                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3173                 goto retry;
3174         }
3175
3176         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3177                 /*
3178                  * Disable CMM virtualization; we will emulate the ESSA
3179                  * instruction manually, in order to provide additional
3180                  * functionalities needed for live migration.
3181                  */
3182                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3183                 goto retry;
3184         }
3185
3186         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3187                 /*
3188                  * Re-enable CMM virtualization if CMMA is available and
3189                  * CMM has been used.
3190                  */
3191                 if ((vcpu->kvm->arch.use_cmma) &&
3192                     (vcpu->kvm->mm->context.uses_cmm))
3193                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3194                 goto retry;
3195         }
3196
3197         /* nothing to do, just clear the request */
3198         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3199
3200         return 0;
3201 }
3202
3203 void kvm_s390_set_tod_clock(struct kvm *kvm,
3204                             const struct kvm_s390_vm_tod_clock *gtod)
3205 {
3206         struct kvm_vcpu *vcpu;
3207         struct kvm_s390_tod_clock_ext htod;
3208         int i;
3209
3210         mutex_lock(&kvm->lock);
3211         preempt_disable();
3212
3213         get_tod_clock_ext((char *)&htod);
3214
3215         kvm->arch.epoch = gtod->tod - htod.tod;
3216         kvm->arch.epdx = 0;
3217         if (test_kvm_facility(kvm, 139)) {
3218                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3219                 if (kvm->arch.epoch > gtod->tod)
3220                         kvm->arch.epdx -= 1;
3221         }
3222
3223         kvm_s390_vcpu_block_all(kvm);
3224         kvm_for_each_vcpu(i, vcpu, kvm) {
3225                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3226                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3227         }
3228
3229         kvm_s390_vcpu_unblock_all(kvm);
3230         preempt_enable();
3231         mutex_unlock(&kvm->lock);
3232 }
3233
3234 /**
3235  * kvm_arch_fault_in_page - fault-in guest page if necessary
3236  * @vcpu: The corresponding virtual cpu
3237  * @gpa: Guest physical address
3238  * @writable: Whether the page should be writable or not
3239  *
3240  * Make sure that a guest page has been faulted-in on the host.
3241  *
3242  * Return: Zero on success, negative error code otherwise.
3243  */
3244 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3245 {
3246         return gmap_fault(vcpu->arch.gmap, gpa,
3247                           writable ? FAULT_FLAG_WRITE : 0);
3248 }
3249
3250 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3251                                       unsigned long token)
3252 {
3253         struct kvm_s390_interrupt inti;
3254         struct kvm_s390_irq irq;
3255
3256         if (start_token) {
3257                 irq.u.ext.ext_params2 = token;
3258                 irq.type = KVM_S390_INT_PFAULT_INIT;
3259                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3260         } else {
3261                 inti.type = KVM_S390_INT_PFAULT_DONE;
3262                 inti.parm64 = token;
3263                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3264         }
3265 }
3266
3267 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3268                                      struct kvm_async_pf *work)
3269 {
3270         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3271         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3272 }
3273
3274 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3275                                  struct kvm_async_pf *work)
3276 {
3277         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3278         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3279 }
3280
3281 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3282                                struct kvm_async_pf *work)
3283 {
3284         /* s390 will always inject the page directly */
3285 }
3286
3287 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3288 {
3289         /*
3290          * s390 will always inject the page directly,
3291          * but we still want check_async_completion to cleanup
3292          */
3293         return true;
3294 }
3295
3296 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3297 {
3298         hva_t hva;
3299         struct kvm_arch_async_pf arch;
3300         int rc;
3301
3302         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3303                 return 0;
3304         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3305             vcpu->arch.pfault_compare)
3306                 return 0;
3307         if (psw_extint_disabled(vcpu))
3308                 return 0;
3309         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3310                 return 0;
3311         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3312                 return 0;
3313         if (!vcpu->arch.gmap->pfault_enabled)
3314                 return 0;
3315
3316         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3317         hva += current->thread.gmap_addr & ~PAGE_MASK;
3318         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3319                 return 0;
3320
3321         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3322         return rc;
3323 }
3324
3325 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3326 {
3327         int rc, cpuflags;
3328
3329         /*
3330          * On s390 notifications for arriving pages will be delivered directly
3331          * to the guest but the house keeping for completed pfaults is
3332          * handled outside the worker.
3333          */
3334         kvm_check_async_pf_completion(vcpu);
3335
3336         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3337         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3338
3339         if (need_resched())
3340                 schedule();
3341
3342         if (test_cpu_flag(CIF_MCCK_PENDING))
3343                 s390_handle_mcck();
3344
3345         if (!kvm_is_ucontrol(vcpu->kvm)) {
3346                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3347                 if (rc)
3348                         return rc;
3349         }
3350
3351         rc = kvm_s390_handle_requests(vcpu);
3352         if (rc)
3353                 return rc;
3354
3355         if (guestdbg_enabled(vcpu)) {
3356                 kvm_s390_backup_guest_per_regs(vcpu);
3357                 kvm_s390_patch_guest_per_regs(vcpu);
3358         }
3359
3360         vcpu->arch.sie_block->icptcode = 0;
3361         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3362         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3363         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3364
3365         return 0;
3366 }
3367
3368 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3369 {
3370         struct kvm_s390_pgm_info pgm_info = {
3371                 .code = PGM_ADDRESSING,
3372         };
3373         u8 opcode, ilen;
3374         int rc;
3375
3376         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3377         trace_kvm_s390_sie_fault(vcpu);
3378
3379         /*
3380          * We want to inject an addressing exception, which is defined as a
3381          * suppressing or terminating exception. However, since we came here
3382          * by a DAT access exception, the PSW still points to the faulting
3383          * instruction since DAT exceptions are nullifying. So we've got
3384          * to look up the current opcode to get the length of the instruction
3385          * to be able to forward the PSW.
3386          */
3387         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3388         ilen = insn_length(opcode);
3389         if (rc < 0) {
3390                 return rc;
3391         } else if (rc) {
3392                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3393                  * Forward by arbitrary ilc, injection will take care of
3394                  * nullification if necessary.
3395                  */
3396                 pgm_info = vcpu->arch.pgm;
3397                 ilen = 4;
3398         }
3399         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3400         kvm_s390_forward_psw(vcpu, ilen);
3401         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3402 }
3403
3404 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3405 {
3406         struct mcck_volatile_info *mcck_info;
3407         struct sie_page *sie_page;
3408
3409         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3410                    vcpu->arch.sie_block->icptcode);
3411         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3412
3413         if (guestdbg_enabled(vcpu))
3414                 kvm_s390_restore_guest_per_regs(vcpu);
3415
3416         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3417         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3418
3419         if (exit_reason == -EINTR) {
3420                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3421                 sie_page = container_of(vcpu->arch.sie_block,
3422                                         struct sie_page, sie_block);
3423                 mcck_info = &sie_page->mcck_info;
3424                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3425                 return 0;
3426         }
3427
3428         if (vcpu->arch.sie_block->icptcode > 0) {
3429                 int rc = kvm_handle_sie_intercept(vcpu);
3430
3431                 if (rc != -EOPNOTSUPP)
3432                         return rc;
3433                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3434                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3435                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3436                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3437                 return -EREMOTE;
3438         } else if (exit_reason != -EFAULT) {
3439                 vcpu->stat.exit_null++;
3440                 return 0;
3441         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3442                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3443                 vcpu->run->s390_ucontrol.trans_exc_code =
3444                                                 current->thread.gmap_addr;
3445                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3446                 return -EREMOTE;
3447         } else if (current->thread.gmap_pfault) {
3448                 trace_kvm_s390_major_guest_pfault(vcpu);
3449                 current->thread.gmap_pfault = 0;
3450                 if (kvm_arch_setup_async_pf(vcpu))
3451                         return 0;
3452                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3453         }
3454         return vcpu_post_run_fault_in_sie(vcpu);
3455 }
3456
3457 static int __vcpu_run(struct kvm_vcpu *vcpu)
3458 {
3459         int rc, exit_reason;
3460
3461         /*
3462          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3463          * ning the guest), so that memslots (and other stuff) are protected
3464          */
3465         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3466
3467         do {
3468                 rc = vcpu_pre_run(vcpu);
3469                 if (rc)
3470                         break;
3471
3472                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3473                 /*
3474                  * As PF_VCPU will be used in fault handler, between
3475                  * guest_enter and guest_exit should be no uaccess.
3476                  */
3477                 local_irq_disable();
3478                 guest_enter_irqoff();
3479                 __disable_cpu_timer_accounting(vcpu);
3480                 local_irq_enable();
3481                 exit_reason = sie64a(vcpu->arch.sie_block,
3482                                      vcpu->run->s.regs.gprs);
3483                 local_irq_disable();
3484                 __enable_cpu_timer_accounting(vcpu);
3485                 guest_exit_irqoff();
3486                 local_irq_enable();
3487                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3488
3489                 rc = vcpu_post_run(vcpu, exit_reason);
3490         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3491
3492         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3493         return rc;
3494 }
3495
3496 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3497 {
3498         struct runtime_instr_cb *riccb;
3499         struct gs_cb *gscb;
3500
3501         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3502         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3503         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3504         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3505         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3506                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3507         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3508                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3509                 /* some control register changes require a tlb flush */
3510                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3511         }
3512         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3513                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3514                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3515                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3516                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3517                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3518         }
3519         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3520                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3521                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3522                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3523                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3524                         kvm_clear_async_pf_completion_queue(vcpu);
3525         }
3526         /*
3527          * If userspace sets the riccb (e.g. after migration) to a valid state,
3528          * we should enable RI here instead of doing the lazy enablement.
3529          */
3530         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3531             test_kvm_facility(vcpu->kvm, 64) &&
3532             riccb->v &&
3533             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3534                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3535                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3536         }
3537         /*
3538          * If userspace sets the gscb (e.g. after migration) to non-zero,
3539          * we should enable GS here instead of doing the lazy enablement.
3540          */
3541         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3542             test_kvm_facility(vcpu->kvm, 133) &&
3543             gscb->gssm &&
3544             !vcpu->arch.gs_enabled) {
3545                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3546                 vcpu->arch.sie_block->ecb |= ECB_GS;
3547                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3548                 vcpu->arch.gs_enabled = 1;
3549         }
3550         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3551             test_kvm_facility(vcpu->kvm, 82)) {
3552                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3553                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3554         }
3555         save_access_regs(vcpu->arch.host_acrs);
3556         restore_access_regs(vcpu->run->s.regs.acrs);
3557         /* save host (userspace) fprs/vrs */
3558         save_fpu_regs();
3559         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3560         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3561         if (MACHINE_HAS_VX)
3562                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3563         else
3564                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3565         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3566         if (test_fp_ctl(current->thread.fpu.fpc))
3567                 /* User space provided an invalid FPC, let's clear it */
3568                 current->thread.fpu.fpc = 0;
3569         if (MACHINE_HAS_GS) {
3570                 preempt_disable();
3571                 __ctl_set_bit(2, 4);
3572                 if (current->thread.gs_cb) {
3573                         vcpu->arch.host_gscb = current->thread.gs_cb;
3574                         save_gs_cb(vcpu->arch.host_gscb);
3575                 }
3576                 if (vcpu->arch.gs_enabled) {
3577                         current->thread.gs_cb = (struct gs_cb *)
3578                                                 &vcpu->run->s.regs.gscb;
3579                         restore_gs_cb(current->thread.gs_cb);
3580                 }
3581                 preempt_enable();
3582         }
3583         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3584
3585         kvm_run->kvm_dirty_regs = 0;
3586 }
3587
3588 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3589 {
3590         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3591         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3592         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3593         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3594         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3595         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3596         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3597         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3598         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3599         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3600         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3601         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3602         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3603         save_access_regs(vcpu->run->s.regs.acrs);
3604         restore_access_regs(vcpu->arch.host_acrs);
3605         /* Save guest register state */
3606         save_fpu_regs();
3607         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3608         /* Restore will be done lazily at return */
3609         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3610         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3611         if (MACHINE_HAS_GS) {
3612                 __ctl_set_bit(2, 4);
3613                 if (vcpu->arch.gs_enabled)
3614                         save_gs_cb(current->thread.gs_cb);
3615                 preempt_disable();
3616                 current->thread.gs_cb = vcpu->arch.host_gscb;
3617                 restore_gs_cb(vcpu->arch.host_gscb);
3618                 preempt_enable();
3619                 if (!vcpu->arch.host_gscb)
3620                         __ctl_clear_bit(2, 4);
3621                 vcpu->arch.host_gscb = NULL;
3622         }
3623         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3624 }
3625
3626 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3627 {
3628         int rc;
3629
3630         if (kvm_run->immediate_exit)
3631                 return -EINTR;
3632
3633         vcpu_load(vcpu);
3634
3635         if (guestdbg_exit_pending(vcpu)) {
3636                 kvm_s390_prepare_debug_exit(vcpu);
3637                 rc = 0;
3638                 goto out;
3639         }
3640
3641         kvm_sigset_activate(vcpu);
3642
3643         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3644                 kvm_s390_vcpu_start(vcpu);
3645         } else if (is_vcpu_stopped(vcpu)) {
3646                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3647                                    vcpu->vcpu_id);
3648                 rc = -EINVAL;
3649                 goto out;
3650         }
3651
3652         sync_regs(vcpu, kvm_run);
3653         enable_cpu_timer_accounting(vcpu);
3654
3655         might_fault();
3656         rc = __vcpu_run(vcpu);
3657
3658         if (signal_pending(current) && !rc) {
3659                 kvm_run->exit_reason = KVM_EXIT_INTR;
3660                 rc = -EINTR;
3661         }
3662
3663         if (guestdbg_exit_pending(vcpu) && !rc)  {
3664                 kvm_s390_prepare_debug_exit(vcpu);
3665                 rc = 0;
3666         }
3667
3668         if (rc == -EREMOTE) {
3669                 /* userspace support is needed, kvm_run has been prepared */
3670                 rc = 0;
3671         }
3672
3673         disable_cpu_timer_accounting(vcpu);
3674         store_regs(vcpu, kvm_run);
3675
3676         kvm_sigset_deactivate(vcpu);
3677
3678         vcpu->stat.exit_userspace++;
3679 out:
3680         vcpu_put(vcpu);
3681         return rc;
3682 }
3683
3684 /*
3685  * store status at address
3686  * we use have two special cases:
3687  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3688  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3689  */
3690 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3691 {
3692         unsigned char archmode = 1;
3693         freg_t fprs[NUM_FPRS];
3694         unsigned int px;
3695         u64 clkcomp, cputm;
3696         int rc;
3697
3698         px = kvm_s390_get_prefix(vcpu);
3699         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3700                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3701                         return -EFAULT;
3702                 gpa = 0;
3703         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3704                 if (write_guest_real(vcpu, 163, &archmode, 1))
3705                         return -EFAULT;
3706                 gpa = px;
3707         } else
3708                 gpa -= __LC_FPREGS_SAVE_AREA;
3709
3710         /* manually convert vector registers if necessary */
3711         if (MACHINE_HAS_VX) {
3712                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3713                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3714                                      fprs, 128);
3715         } else {
3716                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3717                                      vcpu->run->s.regs.fprs, 128);
3718         }
3719         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3720                               vcpu->run->s.regs.gprs, 128);
3721         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3722                               &vcpu->arch.sie_block->gpsw, 16);
3723         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3724                               &px, 4);
3725         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3726                               &vcpu->run->s.regs.fpc, 4);
3727         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3728                               &vcpu->arch.sie_block->todpr, 4);
3729         cputm = kvm_s390_get_cpu_timer(vcpu);
3730         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3731                               &cputm, 8);
3732         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3733         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3734                               &clkcomp, 8);
3735         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3736                               &vcpu->run->s.regs.acrs, 64);
3737         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3738                               &vcpu->arch.sie_block->gcr, 128);
3739         return rc ? -EFAULT : 0;
3740 }
3741
3742 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3743 {
3744         /*
3745          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3746          * switch in the run ioctl. Let's update our copies before we save
3747          * it into the save area
3748          */
3749         save_fpu_regs();
3750         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3751         save_access_regs(vcpu->run->s.regs.acrs);
3752
3753         return kvm_s390_store_status_unloaded(vcpu, addr);
3754 }
3755
3756 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3757 {
3758         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3759         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3760 }
3761
3762 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3763 {
3764         unsigned int i;
3765         struct kvm_vcpu *vcpu;
3766
3767         kvm_for_each_vcpu(i, vcpu, kvm) {
3768                 __disable_ibs_on_vcpu(vcpu);
3769         }
3770 }
3771
3772 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3773 {
3774         if (!sclp.has_ibs)
3775                 return;
3776         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3777         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3778 }
3779
3780 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3781 {
3782         int i, online_vcpus, started_vcpus = 0;
3783
3784         if (!is_vcpu_stopped(vcpu))
3785                 return;
3786
3787         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3788         /* Only one cpu at a time may enter/leave the STOPPED state. */
3789         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3790         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3791
3792         for (i = 0; i < online_vcpus; i++) {
3793                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3794                         started_vcpus++;
3795         }
3796
3797         if (started_vcpus == 0) {
3798                 /* we're the only active VCPU -> speed it up */
3799                 __enable_ibs_on_vcpu(vcpu);
3800         } else if (started_vcpus == 1) {
3801                 /*
3802                  * As we are starting a second VCPU, we have to disable
3803                  * the IBS facility on all VCPUs to remove potentially
3804                  * oustanding ENABLE requests.
3805                  */
3806                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3807         }
3808
3809         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3810         /*
3811          * Another VCPU might have used IBS while we were offline.
3812          * Let's play safe and flush the VCPU at startup.
3813          */
3814         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3815         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3816         return;
3817 }
3818
3819 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3820 {
3821         int i, online_vcpus, started_vcpus = 0;
3822         struct kvm_vcpu *started_vcpu = NULL;
3823
3824         if (is_vcpu_stopped(vcpu))
3825                 return;
3826
3827         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3828         /* Only one cpu at a time may enter/leave the STOPPED state. */
3829         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3830         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3831
3832         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3833         kvm_s390_clear_stop_irq(vcpu);
3834
3835         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3836         __disable_ibs_on_vcpu(vcpu);
3837
3838         for (i = 0; i < online_vcpus; i++) {
3839                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3840                         started_vcpus++;
3841                         started_vcpu = vcpu->kvm->vcpus[i];
3842                 }
3843         }
3844
3845         if (started_vcpus == 1) {
3846                 /*
3847                  * As we only have one VCPU left, we want to enable the
3848                  * IBS facility for that VCPU to speed it up.
3849                  */
3850                 __enable_ibs_on_vcpu(started_vcpu);
3851         }
3852
3853         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3854         return;
3855 }
3856
3857 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3858                                      struct kvm_enable_cap *cap)
3859 {
3860         int r;
3861
3862         if (cap->flags)
3863                 return -EINVAL;
3864
3865         switch (cap->cap) {
3866         case KVM_CAP_S390_CSS_SUPPORT:
3867                 if (!vcpu->kvm->arch.css_support) {
3868                         vcpu->kvm->arch.css_support = 1;
3869                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3870                         trace_kvm_s390_enable_css(vcpu->kvm);
3871                 }
3872                 r = 0;
3873                 break;
3874         default:
3875                 r = -EINVAL;
3876                 break;
3877         }
3878         return r;
3879 }
3880
3881 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3882                                   struct kvm_s390_mem_op *mop)
3883 {
3884         void __user *uaddr = (void __user *)mop->buf;
3885         void *tmpbuf = NULL;
3886         int r, srcu_idx;
3887         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3888                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3889
3890         if (mop->flags & ~supported_flags)
3891                 return -EINVAL;
3892
3893         if (mop->size > MEM_OP_MAX_SIZE)
3894                 return -E2BIG;
3895
3896         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3897                 tmpbuf = vmalloc(mop->size);
3898                 if (!tmpbuf)
3899                         return -ENOMEM;
3900         }
3901
3902         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3903
3904         switch (mop->op) {
3905         case KVM_S390_MEMOP_LOGICAL_READ:
3906                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3907                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3908                                             mop->size, GACC_FETCH);
3909                         break;
3910                 }
3911                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3912                 if (r == 0) {
3913                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3914                                 r = -EFAULT;
3915                 }
3916                 break;
3917         case KVM_S390_MEMOP_LOGICAL_WRITE:
3918                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3919                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3920                                             mop->size, GACC_STORE);
3921                         break;
3922                 }
3923                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3924                         r = -EFAULT;
3925                         break;
3926                 }
3927                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3928                 break;
3929         default:
3930                 r = -EINVAL;
3931         }
3932
3933         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3934
3935         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3936                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3937
3938         vfree(tmpbuf);
3939         return r;
3940 }
3941
3942 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3943                                unsigned int ioctl, unsigned long arg)
3944 {
3945         struct kvm_vcpu *vcpu = filp->private_data;
3946         void __user *argp = (void __user *)arg;
3947
3948         switch (ioctl) {
3949         case KVM_S390_IRQ: {
3950                 struct kvm_s390_irq s390irq;
3951
3952                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3953                         return -EFAULT;
3954                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3955         }
3956         case KVM_S390_INTERRUPT: {
3957                 struct kvm_s390_interrupt s390int;
3958                 struct kvm_s390_irq s390irq;
3959
3960                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3961                         return -EFAULT;
3962                 if (s390int_to_s390irq(&s390int, &s390irq))
3963                         return -EINVAL;
3964                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3965         }
3966         }
3967         return -ENOIOCTLCMD;
3968 }
3969
3970 long kvm_arch_vcpu_ioctl(struct file *filp,
3971                          unsigned int ioctl, unsigned long arg)
3972 {
3973         struct kvm_vcpu *vcpu = filp->private_data;
3974         void __user *argp = (void __user *)arg;
3975         int idx;
3976         long r;
3977
3978         vcpu_load(vcpu);
3979
3980         switch (ioctl) {
3981         case KVM_S390_STORE_STATUS:
3982                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3983                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3984                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3985                 break;
3986         case KVM_S390_SET_INITIAL_PSW: {
3987                 psw_t psw;
3988
3989                 r = -EFAULT;
3990                 if (copy_from_user(&psw, argp, sizeof(psw)))
3991                         break;
3992                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3993                 break;
3994         }
3995         case KVM_S390_INITIAL_RESET:
3996                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3997                 break;
3998         case KVM_SET_ONE_REG:
3999         case KVM_GET_ONE_REG: {
4000                 struct kvm_one_reg reg;
4001                 r = -EFAULT;
4002                 if (copy_from_user(&reg, argp, sizeof(reg)))
4003                         break;
4004                 if (ioctl == KVM_SET_ONE_REG)
4005                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4006                 else
4007                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4008                 break;
4009         }
4010 #ifdef CONFIG_KVM_S390_UCONTROL
4011         case KVM_S390_UCAS_MAP: {
4012                 struct kvm_s390_ucas_mapping ucasmap;
4013
4014                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4015                         r = -EFAULT;
4016                         break;
4017                 }
4018
4019                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4020                         r = -EINVAL;
4021                         break;
4022                 }
4023
4024                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4025                                      ucasmap.vcpu_addr, ucasmap.length);
4026                 break;
4027         }
4028         case KVM_S390_UCAS_UNMAP: {
4029                 struct kvm_s390_ucas_mapping ucasmap;
4030
4031                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4032                         r = -EFAULT;
4033                         break;
4034                 }
4035
4036                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4037                         r = -EINVAL;
4038                         break;
4039                 }
4040
4041                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4042                         ucasmap.length);
4043                 break;
4044         }
4045 #endif
4046         case KVM_S390_VCPU_FAULT: {
4047                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4048                 break;
4049         }
4050         case KVM_ENABLE_CAP:
4051         {
4052                 struct kvm_enable_cap cap;
4053                 r = -EFAULT;
4054                 if (copy_from_user(&cap, argp, sizeof(cap)))
4055                         break;
4056                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4057                 break;
4058         }
4059         case KVM_S390_MEM_OP: {
4060                 struct kvm_s390_mem_op mem_op;
4061
4062                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4063                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4064                 else
4065                         r = -EFAULT;
4066                 break;
4067         }
4068         case KVM_S390_SET_IRQ_STATE: {
4069                 struct kvm_s390_irq_state irq_state;
4070
4071                 r = -EFAULT;
4072                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4073                         break;
4074                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4075                     irq_state.len == 0 ||
4076                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4077                         r = -EINVAL;
4078                         break;
4079                 }
4080                 /* do not use irq_state.flags, it will break old QEMUs */
4081                 r = kvm_s390_set_irq_state(vcpu,
4082                                            (void __user *) irq_state.buf,
4083                                            irq_state.len);
4084                 break;
4085         }
4086         case KVM_S390_GET_IRQ_STATE: {
4087                 struct kvm_s390_irq_state irq_state;
4088
4089                 r = -EFAULT;
4090                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4091                         break;
4092                 if (irq_state.len == 0) {
4093                         r = -EINVAL;
4094                         break;
4095                 }
4096                 /* do not use irq_state.flags, it will break old QEMUs */
4097                 r = kvm_s390_get_irq_state(vcpu,
4098                                            (__u8 __user *)  irq_state.buf,
4099                                            irq_state.len);
4100                 break;
4101         }
4102         default:
4103                 r = -ENOTTY;
4104         }
4105
4106         vcpu_put(vcpu);
4107         return r;
4108 }
4109
4110 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4111 {
4112 #ifdef CONFIG_KVM_S390_UCONTROL
4113         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4114                  && (kvm_is_ucontrol(vcpu->kvm))) {
4115                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4116                 get_page(vmf->page);
4117                 return 0;
4118         }
4119 #endif
4120         return VM_FAULT_SIGBUS;
4121 }
4122
4123 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4124                             unsigned long npages)
4125 {
4126         return 0;
4127 }
4128
4129 /* Section: memory related */
4130 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4131                                    struct kvm_memory_slot *memslot,
4132                                    const struct kvm_userspace_memory_region *mem,
4133                                    enum kvm_mr_change change)
4134 {
4135         /* A few sanity checks. We can have memory slots which have to be
4136            located/ended at a segment boundary (1MB). The memory in userland is
4137            ok to be fragmented into various different vmas. It is okay to mmap()
4138            and munmap() stuff in this slot after doing this call at any time */
4139
4140         if (mem->userspace_addr & 0xffffful)
4141                 return -EINVAL;
4142
4143         if (mem->memory_size & 0xffffful)
4144                 return -EINVAL;
4145
4146         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4147                 return -EINVAL;
4148
4149         return 0;
4150 }
4151
4152 void kvm_arch_commit_memory_region(struct kvm *kvm,
4153                                 const struct kvm_userspace_memory_region *mem,
4154                                 const struct kvm_memory_slot *old,
4155                                 const struct kvm_memory_slot *new,
4156                                 enum kvm_mr_change change)
4157 {
4158         int rc;
4159
4160         /* If the basics of the memslot do not change, we do not want
4161          * to update the gmap. Every update causes several unnecessary
4162          * segment translation exceptions. This is usually handled just
4163          * fine by the normal fault handler + gmap, but it will also
4164          * cause faults on the prefix page of running guest CPUs.
4165          */
4166         if (old->userspace_addr == mem->userspace_addr &&
4167             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4168             old->npages * PAGE_SIZE == mem->memory_size)
4169                 return;
4170
4171         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4172                 mem->guest_phys_addr, mem->memory_size);
4173         if (rc)
4174                 pr_warn("failed to commit memory region\n");
4175         return;
4176 }
4177
4178 static inline unsigned long nonhyp_mask(int i)
4179 {
4180         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4181
4182         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4183 }
4184
4185 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4186 {
4187         vcpu->valid_wakeup = false;
4188 }
4189
4190 static int __init kvm_s390_init(void)
4191 {
4192         int i;
4193
4194         if (!sclp.has_sief2) {
4195                 pr_info("SIE not available\n");
4196                 return -ENODEV;
4197         }
4198
4199         if (nested && hpage) {
4200                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4201                 return -EINVAL;
4202         }
4203
4204         for (i = 0; i < 16; i++)
4205                 kvm_s390_fac_base[i] |=
4206                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4207
4208         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4209 }
4210
4211 static void __exit kvm_s390_exit(void)
4212 {
4213         kvm_exit();
4214 }
4215
4216 module_init(kvm_s390_init);
4217 module_exit(kvm_s390_exit);
4218
4219 /*
4220  * Enable autoloading of the kvm module.
4221  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4222  * since x86 takes a different approach.
4223  */
4224 #include <linux/miscdevice.h>
4225 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4226 MODULE_ALIAS("devname:kvm");