arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 159         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 160         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 161         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 162         { NULL }
 163 };
 164
 165 struct kvm_s390_tod_clock_ext {
 166         __u8 epoch_idx;
 167         __u64 tod;
 168         __u8 reserved[7];
 169 } __packed;
 170
 171 /* allow nested virtualization in KVM (if enabled by user space) */
 172 static int nested;
 173 module_param(nested, int, S_IRUGO);
 174 MODULE_PARM_DESC(nested, "Nested virtualization support");
 175
 176 /* allow 1m huge page guest backing, if !nested */
 177 static int hpage;
 178 module_param(hpage, int, 0444);
 179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 180
 181 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 182 static u8 halt_poll_max_steal = 10;
 183 module_param(halt_poll_max_steal, byte, 0644);
 184 MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling");
 185
 186 /*
 187  * For now we handle at most 16 double words as this is what the s390 base
 188  * kernel handles and stores in the prefix page. If we ever need to go beyond
 189  * this, this requires changes to code, but the external uapi can stay.
 190  */
 191 #define SIZE_INTERNAL 16
 192
 193 /*
 194  * Base feature mask that defines default mask for facilities. Consists of the
 195  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 196  */
 197 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 198 /*
 199  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 200  * and defines the facilities that can be enabled via a cpu model.
 201  */
 202 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 203
 204 static unsigned long kvm_s390_fac_size(void)
 205 {
 206         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 209                 sizeof(S390_lowcore.stfle_fac_list));
 210
 211         return SIZE_INTERNAL;
 212 }
 213
 214 /* available cpu features supported by kvm */
 215 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 216 /* available subfunctions indicated via query / "test bit" */
 217 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 218
 219 static struct gmap_notifier gmap_notifier;
 220 static struct gmap_notifier vsie_gmap_notifier;
 221 debug_info_t *kvm_s390_dbf;
 222
 223 /* Section: not file related */
 224 int kvm_arch_hardware_enable(void)
 225 {
 226         /* every s390 is virtualization enabled ;-) */
 227         return 0;
 228 }
 229
 230 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 231                               unsigned long end);
 232
 233 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 234 {
 235         u8 delta_idx = 0;
 236
 237         /*
 238          * The TOD jumps by delta, we have to compensate this by adding
 239          * -delta to the epoch.
 240          */
 241         delta = -delta;
 242
 243         /* sign-extension - we're adding to signed values below */
 244         if ((s64)delta < 0)
 245                 delta_idx = -1;
 246
 247         scb->epoch += delta;
 248         if (scb->ecd & ECD_MEF) {
 249                 scb->epdx += delta_idx;
 250                 if (scb->epoch < delta)
 251                         scb->epdx += 1;
 252         }
 253 }
 254
 255 /*
 256  * This callback is executed during stop_machine(). All CPUs are therefore
 257  * temporarily stopped. In order not to change guest behavior, we have to
 258  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 259  * so a CPU won't be stopped while calculating with the epoch.
 260  */
 261 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 262                           void *v)
 263 {
 264         struct kvm *kvm;
 265         struct kvm_vcpu *vcpu;
 266         int i;
 267         unsigned long long *delta = v;
 268
 269         list_for_each_entry(kvm, &vm_list, vm_list) {
 270                 kvm_for_each_vcpu(i, vcpu, kvm) {
 271                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 272                         if (i == 0) {
 273                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 274                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 275                         }
 276                         if (vcpu->arch.cputm_enabled)
 277                                 vcpu->arch.cputm_start += *delta;
 278                         if (vcpu->arch.vsie_block)
 279                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 280                                                    *delta);
 281                 }
 282         }
 283         return NOTIFY_OK;
 284 }
 285
 286 static struct notifier_block kvm_clock_notifier = {
 287         .notifier_call = kvm_clock_sync,
 288 };
 289
 290 int kvm_arch_hardware_setup(void)
 291 {
 292         gmap_notifier.notifier_call = kvm_gmap_notifier;
 293         gmap_register_pte_notifier(&gmap_notifier);
 294         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 295         gmap_register_pte_notifier(&vsie_gmap_notifier);
 296         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 297                                        &kvm_clock_notifier);
 298         return 0;
 299 }
 300
 301 void kvm_arch_hardware_unsetup(void)
 302 {
 303         gmap_unregister_pte_notifier(&gmap_notifier);
 304         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 305         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 306                                          &kvm_clock_notifier);
 307 }
 308
 309 static void allow_cpu_feat(unsigned long nr)
 310 {
 311         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 312 }
 313
 314 static inline int plo_test_bit(unsigned char nr)
 315 {
 316         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 317         int cc;
 318
 319         asm volatile(
 320                 /* Parameter registers are ignored for "test bit" */
 321                 "       plo     0,0,0,0(0)\n"
 322                 "       ipm     %0\n"
 323                 "       srl     %0,28\n"
 324                 : "=d" (cc)
 325                 : "d" (r0)
 326                 : "cc");
 327         return cc == 0;
 328 }
 329
 330 static inline void __insn32_query(unsigned int opcode, u8 query[32])
 331 {
 332         register unsigned long r0 asm("0") = 0; /* query function */
 333         register unsigned long r1 asm("1") = (unsigned long) query;
 334
 335         asm volatile(
 336                 /* Parameter regs are ignored */
 337                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 338                 : "=m" (*query)
 339                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 340                 : "cc");
 341 }
 342
 343 #define INSN_SORTL 0xb938
 344 #define INSN_DFLTCC 0xb939
 345
 346 static void kvm_s390_cpu_feat_init(void)
 347 {
 348         int i;
 349
 350         for (i = 0; i < 256; ++i) {
 351                 if (plo_test_bit(i))
 352                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 353         }
 354
 355         if (test_facility(28)) /* TOD-clock steering */
 356                 ptff(kvm_s390_available_subfunc.ptff,
 357                      sizeof(kvm_s390_available_subfunc.ptff),
 358                      PTFF_QAF);
 359
 360         if (test_facility(17)) { /* MSA */
 361                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 362                               kvm_s390_available_subfunc.kmac);
 363                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 364                               kvm_s390_available_subfunc.kmc);
 365                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 366                               kvm_s390_available_subfunc.km);
 367                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 368                               kvm_s390_available_subfunc.kimd);
 369                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.klmd);
 371         }
 372         if (test_facility(76)) /* MSA3 */
 373                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.pckmo);
 375         if (test_facility(77)) { /* MSA4 */
 376                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 377                               kvm_s390_available_subfunc.kmctr);
 378                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 379                               kvm_s390_available_subfunc.kmf);
 380                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 381                               kvm_s390_available_subfunc.kmo);
 382                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.pcc);
 384         }
 385         if (test_facility(57)) /* MSA5 */
 386                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 387                               kvm_s390_available_subfunc.ppno);
 388
 389         if (test_facility(146)) /* MSA8 */
 390                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 391                               kvm_s390_available_subfunc.kma);
 392
 393         if (test_facility(155)) /* MSA9 */
 394                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 395                               kvm_s390_available_subfunc.kdsa);
 396
 397         if (test_facility(150)) /* SORTL */
 398                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 399
 400         if (test_facility(151)) /* DFLTCC */
 401                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 402
 403         if (MACHINE_HAS_ESOP)
 404                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 405         /*
 406          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 407          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 408          */
 409         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 410             !test_facility(3) || !nested)
 411                 return;
 412         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 413         if (sclp.has_64bscao)
 414                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 415         if (sclp.has_siif)
 416                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 417         if (sclp.has_gpere)
 418                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 419         if (sclp.has_gsls)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 421         if (sclp.has_ib)
 422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 423         if (sclp.has_cei)
 424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 425         if (sclp.has_ibs)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 427         if (sclp.has_kss)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 429         /*
 430          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 431          * all skey handling functions read/set the skey from the PGSTE
 432          * instead of the real storage key.
 433          *
 434          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 435          * pages being detected as preserved although they are resident.
 436          *
 437          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 438          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 439          *
 440          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 441          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 442          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 443          *
 444          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 445          * cannot easily shadow the SCA because of the ipte lock.
 446          */
 447 }
 448
 449 int kvm_arch_init(void *opaque)
 450 {
 451         int rc;
 452
 453         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 454         if (!kvm_s390_dbf)
 455                 return -ENOMEM;
 456
 457         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 458                 rc = -ENOMEM;
 459                 goto out_debug_unreg;
 460         }
 461
 462         kvm_s390_cpu_feat_init();
 463
 464         /* Register floating interrupt controller interface. */
 465         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 466         if (rc) {
 467                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 468                 goto out_debug_unreg;
 469         }
 470
 471         rc = kvm_s390_gib_init(GAL_ISC);
 472         if (rc)
 473                 goto out_gib_destroy;
 474
 475         return 0;
 476
 477 out_gib_destroy:
 478         kvm_s390_gib_destroy();
 479 out_debug_unreg:
 480         debug_unregister(kvm_s390_dbf);
 481         return rc;
 482 }
 483
 484 void kvm_arch_exit(void)
 485 {
 486         kvm_s390_gib_destroy();
 487         debug_unregister(kvm_s390_dbf);
 488 }
 489
 490 /* Section: device related */
 491 long kvm_arch_dev_ioctl(struct file *filp,
 492                         unsigned int ioctl, unsigned long arg)
 493 {
 494         if (ioctl == KVM_S390_ENABLE_SIE)
 495                 return s390_enable_sie();
 496         return -EINVAL;
 497 }
 498
 499 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 500 {
 501         int r;
 502
 503         switch (ext) {
 504         case KVM_CAP_S390_PSW:
 505         case KVM_CAP_S390_GMAP:
 506         case KVM_CAP_SYNC_MMU:
 507 #ifdef CONFIG_KVM_S390_UCONTROL
 508         case KVM_CAP_S390_UCONTROL:
 509 #endif
 510         case KVM_CAP_ASYNC_PF:
 511         case KVM_CAP_SYNC_REGS:
 512         case KVM_CAP_ONE_REG:
 513         case KVM_CAP_ENABLE_CAP:
 514         case KVM_CAP_S390_CSS_SUPPORT:
 515         case KVM_CAP_IOEVENTFD:
 516         case KVM_CAP_DEVICE_CTRL:
 517         case KVM_CAP_S390_IRQCHIP:
 518         case KVM_CAP_VM_ATTRIBUTES:
 519         case KVM_CAP_MP_STATE:
 520         case KVM_CAP_IMMEDIATE_EXIT:
 521         case KVM_CAP_S390_INJECT_IRQ:
 522         case KVM_CAP_S390_USER_SIGP:
 523         case KVM_CAP_S390_USER_STSI:
 524         case KVM_CAP_S390_SKEYS:
 525         case KVM_CAP_S390_IRQ_STATE:
 526         case KVM_CAP_S390_USER_INSTR0:
 527         case KVM_CAP_S390_CMMA_MIGRATION:
 528         case KVM_CAP_S390_AIS:
 529         case KVM_CAP_S390_AIS_MIGRATION:
 530                 r = 1;
 531                 break;
 532         case KVM_CAP_S390_HPAGE_1M:
 533                 r = 0;
 534                 if (hpage && !kvm_is_ucontrol(kvm))
 535                         r = 1;
 536                 break;
 537         case KVM_CAP_S390_MEM_OP:
 538                 r = MEM_OP_MAX_SIZE;
 539                 break;
 540         case KVM_CAP_NR_VCPUS:
 541         case KVM_CAP_MAX_VCPUS:
 542                 r = KVM_S390_BSCA_CPU_SLOTS;
 543                 if (!kvm_s390_use_sca_entries())
 544                         r = KVM_MAX_VCPUS;
 545                 else if (sclp.has_esca && sclp.has_64bscao)
 546                         r = KVM_S390_ESCA_CPU_SLOTS;
 547                 break;
 548         case KVM_CAP_S390_COW:
 549                 r = MACHINE_HAS_ESOP;
 550                 break;
 551         case KVM_CAP_S390_VECTOR_REGISTERS:
 552                 r = MACHINE_HAS_VX;
 553                 break;
 554         case KVM_CAP_S390_RI:
 555                 r = test_facility(64);
 556                 break;
 557         case KVM_CAP_S390_GS:
 558                 r = test_facility(133);
 559                 break;
 560         case KVM_CAP_S390_BPB:
 561                 r = test_facility(82);
 562                 break;
 563         default:
 564                 r = 0;
 565         }
 566         return r;
 567 }
 568
 569 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 570                                     struct kvm_memory_slot *memslot)
 571 {
 572         int i;
 573         gfn_t cur_gfn, last_gfn;
 574         unsigned long gaddr, vmaddr;
 575         struct gmap *gmap = kvm->arch.gmap;
 576         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 577
 578         /* Loop over all guest segments */
 579         cur_gfn = memslot->base_gfn;
 580         last_gfn = memslot->base_gfn + memslot->npages;
 581         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 582                 gaddr = gfn_to_gpa(cur_gfn);
 583                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 584                 if (kvm_is_error_hva(vmaddr))
 585                         continue;
 586
 587                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 588                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 589                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 590                         if (test_bit(i, bitmap))
 591                                 mark_page_dirty(kvm, cur_gfn + i);
 592                 }
 593
 594                 if (fatal_signal_pending(current))
 595                         return;
 596                 cond_resched();
 597         }
 598 }
 599
 600 /* Section: vm related */
 601 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 602
 603 /*
 604  * Get (and clear) the dirty memory log for a memory slot.
 605  */
 606 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 607                                struct kvm_dirty_log *log)
 608 {
 609         int r;
 610         unsigned long n;
 611         struct kvm_memslots *slots;
 612         struct kvm_memory_slot *memslot;
 613         int is_dirty = 0;
 614
 615         if (kvm_is_ucontrol(kvm))
 616                 return -EINVAL;
 617
 618         mutex_lock(&kvm->slots_lock);
 619
 620         r = -EINVAL;
 621         if (log->slot >= KVM_USER_MEM_SLOTS)
 622                 goto out;
 623
 624         slots = kvm_memslots(kvm);
 625         memslot = id_to_memslot(slots, log->slot);
 626         r = -ENOENT;
 627         if (!memslot->dirty_bitmap)
 628                 goto out;
 629
 630         kvm_s390_sync_dirty_log(kvm, memslot);
 631         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 632         if (r)
 633                 goto out;
 634
 635         /* Clear the dirty log */
 636         if (is_dirty) {
 637                 n = kvm_dirty_bitmap_bytes(memslot);
 638                 memset(memslot->dirty_bitmap, 0, n);
 639         }
 640         r = 0;
 641 out:
 642         mutex_unlock(&kvm->slots_lock);
 643         return r;
 644 }
 645
 646 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 647 {
 648         unsigned int i;
 649         struct kvm_vcpu *vcpu;
 650
 651         kvm_for_each_vcpu(i, vcpu, kvm) {
 652                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 653         }
 654 }
 655
 656 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 657 {
 658         int r;
 659
 660         if (cap->flags)
 661                 return -EINVAL;
 662
 663         switch (cap->cap) {
 664         case KVM_CAP_S390_IRQCHIP:
 665                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 666                 kvm->arch.use_irqchip = 1;
 667                 r = 0;
 668                 break;
 669         case KVM_CAP_S390_USER_SIGP:
 670                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 671                 kvm->arch.user_sigp = 1;
 672                 r = 0;
 673                 break;
 674         case KVM_CAP_S390_VECTOR_REGISTERS:
 675                 mutex_lock(&kvm->lock);
 676                 if (kvm->created_vcpus) {
 677                         r = -EBUSY;
 678                 } else if (MACHINE_HAS_VX) {
 679                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 680                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 681                         if (test_facility(134)) {
 682                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 683                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 684                         }
 685                         if (test_facility(135)) {
 686                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 687                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 688                         }
 689                         if (test_facility(148)) {
 690                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 691                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 692                         }
 693                         if (test_facility(152)) {
 694                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 695                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 696                         }
 697                         r = 0;
 698                 } else
 699                         r = -EINVAL;
 700                 mutex_unlock(&kvm->lock);
 701                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 702                          r ? "(not available)" : "(success)");
 703                 break;
 704         case KVM_CAP_S390_RI:
 705                 r = -EINVAL;
 706                 mutex_lock(&kvm->lock);
 707                 if (kvm->created_vcpus) {
 708                         r = -EBUSY;
 709                 } else if (test_facility(64)) {
 710                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 711                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 712                         r = 0;
 713                 }
 714                 mutex_unlock(&kvm->lock);
 715                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 716                          r ? "(not available)" : "(success)");
 717                 break;
 718         case KVM_CAP_S390_AIS:
 719                 mutex_lock(&kvm->lock);
 720                 if (kvm->created_vcpus) {
 721                         r = -EBUSY;
 722                 } else {
 723                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 724                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 725                         r = 0;
 726                 }
 727                 mutex_unlock(&kvm->lock);
 728                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 729                          r ? "(not available)" : "(success)");
 730                 break;
 731         case KVM_CAP_S390_GS:
 732                 r = -EINVAL;
 733                 mutex_lock(&kvm->lock);
 734                 if (kvm->created_vcpus) {
 735                         r = -EBUSY;
 736                 } else if (test_facility(133)) {
 737                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 738                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 739                         r = 0;
 740                 }
 741                 mutex_unlock(&kvm->lock);
 742                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 743                          r ? "(not available)" : "(success)");
 744                 break;
 745         case KVM_CAP_S390_HPAGE_1M:
 746                 mutex_lock(&kvm->lock);
 747                 if (kvm->created_vcpus)
 748                         r = -EBUSY;
 749                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 750                         r = -EINVAL;
 751                 else {
 752                         r = 0;
 753                         down_write(&kvm->mm->mmap_sem);
 754                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 755                         up_write(&kvm->mm->mmap_sem);
 756                         /*
 757                          * We might have to create fake 4k page
 758                          * tables. To avoid that the hardware works on
 759                          * stale PGSTEs, we emulate these instructions.
 760                          */
 761                         kvm->arch.use_skf = 0;
 762                         kvm->arch.use_pfmfi = 0;
 763                 }
 764                 mutex_unlock(&kvm->lock);
 765                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 766                          r ? "(not available)" : "(success)");
 767                 break;
 768         case KVM_CAP_S390_USER_STSI:
 769                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 770                 kvm->arch.user_stsi = 1;
 771                 r = 0;
 772                 break;
 773         case KVM_CAP_S390_USER_INSTR0:
 774                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 775                 kvm->arch.user_instr0 = 1;
 776                 icpt_operexc_on_all_vcpus(kvm);
 777                 r = 0;
 778                 break;
 779         default:
 780                 r = -EINVAL;
 781                 break;
 782         }
 783         return r;
 784 }
 785
 786 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 787 {
 788         int ret;
 789
 790         switch (attr->attr) {
 791         case KVM_S390_VM_MEM_LIMIT_SIZE:
 792                 ret = 0;
 793                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 794                          kvm->arch.mem_limit);
 795                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 796                         ret = -EFAULT;
 797                 break;
 798         default:
 799                 ret = -ENXIO;
 800                 break;
 801         }
 802         return ret;
 803 }
 804
 805 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 806 {
 807         int ret;
 808         unsigned int idx;
 809         switch (attr->attr) {
 810         case KVM_S390_VM_MEM_ENABLE_CMMA:
 811                 ret = -ENXIO;
 812                 if (!sclp.has_cmma)
 813                         break;
 814
 815                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 816                 mutex_lock(&kvm->lock);
 817                 if (kvm->created_vcpus)
 818                         ret = -EBUSY;
 819                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 820                         ret = -EINVAL;
 821                 else {
 822                         kvm->arch.use_cmma = 1;
 823                         /* Not compatible with cmma. */
 824                         kvm->arch.use_pfmfi = 0;
 825                         ret = 0;
 826                 }
 827                 mutex_unlock(&kvm->lock);
 828                 break;
 829         case KVM_S390_VM_MEM_CLR_CMMA:
 830                 ret = -ENXIO;
 831                 if (!sclp.has_cmma)
 832                         break;
 833                 ret = -EINVAL;
 834                 if (!kvm->arch.use_cmma)
 835                         break;
 836
 837                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 838                 mutex_lock(&kvm->lock);
 839                 idx = srcu_read_lock(&kvm->srcu);
 840                 s390_reset_cmma(kvm->arch.gmap->mm);
 841                 srcu_read_unlock(&kvm->srcu, idx);
 842                 mutex_unlock(&kvm->lock);
 843                 ret = 0;
 844                 break;
 845         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 846                 unsigned long new_limit;
 847
 848                 if (kvm_is_ucontrol(kvm))
 849                         return -EINVAL;
 850
 851                 if (get_user(new_limit, (u64 __user *)attr->addr))
 852                         return -EFAULT;
 853
 854                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 855                     new_limit > kvm->arch.mem_limit)
 856                         return -E2BIG;
 857
 858                 if (!new_limit)
 859                         return -EINVAL;
 860
 861                 /* gmap_create takes last usable address */
 862                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 863                         new_limit -= 1;
 864
 865                 ret = -EBUSY;
 866                 mutex_lock(&kvm->lock);
 867                 if (!kvm->created_vcpus) {
 868                         /* gmap_create will round the limit up */
 869                         struct gmap *new = gmap_create(current->mm, new_limit);
 870
 871                         if (!new) {
 872                                 ret = -ENOMEM;
 873                         } else {
 874                                 gmap_remove(kvm->arch.gmap);
 875                                 new->private = kvm;
 876                                 kvm->arch.gmap = new;
 877                                 ret = 0;
 878                         }
 879                 }
 880                 mutex_unlock(&kvm->lock);
 881                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 882                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 883                          (void *) kvm->arch.gmap->asce);
 884                 break;
 885         }
 886         default:
 887                 ret = -ENXIO;
 888                 break;
 889         }
 890         return ret;
 891 }
 892
 893 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 894
 895 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 896 {
 897         struct kvm_vcpu *vcpu;
 898         int i;
 899
 900         kvm_s390_vcpu_block_all(kvm);
 901
 902         kvm_for_each_vcpu(i, vcpu, kvm) {
 903                 kvm_s390_vcpu_crypto_setup(vcpu);
 904                 /* recreate the shadow crycb by leaving the VSIE handler */
 905                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 906         }
 907
 908         kvm_s390_vcpu_unblock_all(kvm);
 909 }
 910
 911 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 912 {
 913         mutex_lock(&kvm->lock);
 914         switch (attr->attr) {
 915         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 916                 if (!test_kvm_facility(kvm, 76)) {
 917                         mutex_unlock(&kvm->lock);
 918                         return -EINVAL;
 919                 }
 920                 get_random_bytes(
 921                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 922                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 923                 kvm->arch.crypto.aes_kw = 1;
 924                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 925                 break;
 926         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 927                 if (!test_kvm_facility(kvm, 76)) {
 928                         mutex_unlock(&kvm->lock);
 929                         return -EINVAL;
 930                 }
 931                 get_random_bytes(
 932                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 933                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 934                 kvm->arch.crypto.dea_kw = 1;
 935                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 936                 break;
 937         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 938                 if (!test_kvm_facility(kvm, 76)) {
 939                         mutex_unlock(&kvm->lock);
 940                         return -EINVAL;
 941                 }
 942                 kvm->arch.crypto.aes_kw = 0;
 943                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 944                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 945                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 946                 break;
 947         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 948                 if (!test_kvm_facility(kvm, 76)) {
 949                         mutex_unlock(&kvm->lock);
 950                         return -EINVAL;
 951                 }
 952                 kvm->arch.crypto.dea_kw = 0;
 953                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 954                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 955                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 956                 break;
 957         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 958                 if (!ap_instructions_available()) {
 959                         mutex_unlock(&kvm->lock);
 960                         return -EOPNOTSUPP;
 961                 }
 962                 kvm->arch.crypto.apie = 1;
 963                 break;
 964         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 965                 if (!ap_instructions_available()) {
 966                         mutex_unlock(&kvm->lock);
 967                         return -EOPNOTSUPP;
 968                 }
 969                 kvm->arch.crypto.apie = 0;
 970                 break;
 971         default:
 972                 mutex_unlock(&kvm->lock);
 973                 return -ENXIO;
 974         }
 975
 976         kvm_s390_vcpu_crypto_reset_all(kvm);
 977         mutex_unlock(&kvm->lock);
 978         return 0;
 979 }
 980
 981 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 982 {
 983         int cx;
 984         struct kvm_vcpu *vcpu;
 985
 986         kvm_for_each_vcpu(cx, vcpu, kvm)
 987                 kvm_s390_sync_request(req, vcpu);
 988 }
 989
 990 /*
 991  * Must be called with kvm->srcu held to avoid races on memslots, and with
 992  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 993  */
 994 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 995 {
 996         struct kvm_memory_slot *ms;
 997         struct kvm_memslots *slots;
 998         unsigned long ram_pages = 0;
 999         int slotnr;
1000
1001         /* migration mode already enabled */
1002         if (kvm->arch.migration_mode)
1003                 return 0;
1004         slots = kvm_memslots(kvm);
1005         if (!slots || !slots->used_slots)
1006                 return -EINVAL;
1007
1008         if (!kvm->arch.use_cmma) {
1009                 kvm->arch.migration_mode = 1;
1010                 return 0;
1011         }
1012         /* mark all the pages in active slots as dirty */
1013         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1014                 ms = slots->memslots + slotnr;
1015                 /*
1016                  * The second half of the bitmap is only used on x86,
1017                  * and would be wasted otherwise, so we put it to good
1018                  * use here to keep track of the state of the storage
1019                  * attributes.
1020                  */
1021                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1022                 ram_pages += ms->npages;
1023         }
1024         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1025         kvm->arch.migration_mode = 1;
1026         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1027         return 0;
1028 }
1029
1030 /*
1031  * Must be called with kvm->slots_lock to avoid races with ourselves and
1032  * kvm_s390_vm_start_migration.
1033  */
1034 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1035 {
1036         /* migration mode already disabled */
1037         if (!kvm->arch.migration_mode)
1038                 return 0;
1039         kvm->arch.migration_mode = 0;
1040         if (kvm->arch.use_cmma)
1041                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1042         return 0;
1043 }
1044
1045 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1046                                      struct kvm_device_attr *attr)
1047 {
1048         int res = -ENXIO;
1049
1050         mutex_lock(&kvm->slots_lock);
1051         switch (attr->attr) {
1052         case KVM_S390_VM_MIGRATION_START:
1053                 res = kvm_s390_vm_start_migration(kvm);
1054                 break;
1055         case KVM_S390_VM_MIGRATION_STOP:
1056                 res = kvm_s390_vm_stop_migration(kvm);
1057                 break;
1058         default:
1059                 break;
1060         }
1061         mutex_unlock(&kvm->slots_lock);
1062
1063         return res;
1064 }
1065
1066 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1067                                      struct kvm_device_attr *attr)
1068 {
1069         u64 mig = kvm->arch.migration_mode;
1070
1071         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1072                 return -ENXIO;
1073
1074         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1075                 return -EFAULT;
1076         return 0;
1077 }
1078
1079 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081         struct kvm_s390_vm_tod_clock gtod;
1082
1083         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1084                 return -EFAULT;
1085
1086         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1087                 return -EINVAL;
1088         kvm_s390_set_tod_clock(kvm, &gtod);
1089
1090         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1091                 gtod.epoch_idx, gtod.tod);
1092
1093         return 0;
1094 }
1095
1096 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1097 {
1098         u8 gtod_high;
1099
1100         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1101                                            sizeof(gtod_high)))
1102                 return -EFAULT;
1103
1104         if (gtod_high != 0)
1105                 return -EINVAL;
1106         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1107
1108         return 0;
1109 }
1110
1111 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1112 {
1113         struct kvm_s390_vm_tod_clock gtod = { 0 };
1114
1115         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1116                            sizeof(gtod.tod)))
1117                 return -EFAULT;
1118
1119         kvm_s390_set_tod_clock(kvm, &gtod);
1120         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1121         return 0;
1122 }
1123
1124 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1125 {
1126         int ret;
1127
1128         if (attr->flags)
1129                 return -EINVAL;
1130
1131         switch (attr->attr) {
1132         case KVM_S390_VM_TOD_EXT:
1133                 ret = kvm_s390_set_tod_ext(kvm, attr);
1134                 break;
1135         case KVM_S390_VM_TOD_HIGH:
1136                 ret = kvm_s390_set_tod_high(kvm, attr);
1137                 break;
1138         case KVM_S390_VM_TOD_LOW:
1139                 ret = kvm_s390_set_tod_low(kvm, attr);
1140                 break;
1141         default:
1142                 ret = -ENXIO;
1143                 break;
1144         }
1145         return ret;
1146 }
1147
1148 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1149                                    struct kvm_s390_vm_tod_clock *gtod)
1150 {
1151         struct kvm_s390_tod_clock_ext htod;
1152
1153         preempt_disable();
1154
1155         get_tod_clock_ext((char *)&htod);
1156
1157         gtod->tod = htod.tod + kvm->arch.epoch;
1158         gtod->epoch_idx = 0;
1159         if (test_kvm_facility(kvm, 139)) {
1160                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1161                 if (gtod->tod < htod.tod)
1162                         gtod->epoch_idx += 1;
1163         }
1164
1165         preempt_enable();
1166 }
1167
1168 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1169 {
1170         struct kvm_s390_vm_tod_clock gtod;
1171
1172         memset(&gtod, 0, sizeof(gtod));
1173         kvm_s390_get_tod_clock(kvm, &gtod);
1174         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1175                 return -EFAULT;
1176
1177         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1178                 gtod.epoch_idx, gtod.tod);
1179         return 0;
1180 }
1181
1182 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1183 {
1184         u8 gtod_high = 0;
1185
1186         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1187                                          sizeof(gtod_high)))
1188                 return -EFAULT;
1189         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1190
1191         return 0;
1192 }
1193
1194 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1195 {
1196         u64 gtod;
1197
1198         gtod = kvm_s390_get_tod_clock_fast(kvm);
1199         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1200                 return -EFAULT;
1201         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1202
1203         return 0;
1204 }
1205
1206 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1207 {
1208         int ret;
1209
1210         if (attr->flags)
1211                 return -EINVAL;
1212
1213         switch (attr->attr) {
1214         case KVM_S390_VM_TOD_EXT:
1215                 ret = kvm_s390_get_tod_ext(kvm, attr);
1216                 break;
1217         case KVM_S390_VM_TOD_HIGH:
1218                 ret = kvm_s390_get_tod_high(kvm, attr);
1219                 break;
1220         case KVM_S390_VM_TOD_LOW:
1221                 ret = kvm_s390_get_tod_low(kvm, attr);
1222                 break;
1223         default:
1224                 ret = -ENXIO;
1225                 break;
1226         }
1227         return ret;
1228 }
1229
1230 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1231 {
1232         struct kvm_s390_vm_cpu_processor *proc;
1233         u16 lowest_ibc, unblocked_ibc;
1234         int ret = 0;
1235
1236         mutex_lock(&kvm->lock);
1237         if (kvm->created_vcpus) {
1238                 ret = -EBUSY;
1239                 goto out;
1240         }
1241         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1242         if (!proc) {
1243                 ret = -ENOMEM;
1244                 goto out;
1245         }
1246         if (!copy_from_user(proc, (void __user *)attr->addr,
1247                             sizeof(*proc))) {
1248                 kvm->arch.model.cpuid = proc->cpuid;
1249                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1250                 unblocked_ibc = sclp.ibc & 0xfff;
1251                 if (lowest_ibc && proc->ibc) {
1252                         if (proc->ibc > unblocked_ibc)
1253                                 kvm->arch.model.ibc = unblocked_ibc;
1254                         else if (proc->ibc < lowest_ibc)
1255                                 kvm->arch.model.ibc = lowest_ibc;
1256                         else
1257                                 kvm->arch.model.ibc = proc->ibc;
1258                 }
1259                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1260                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1261                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1262                          kvm->arch.model.ibc,
1263                          kvm->arch.model.cpuid);
1264                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1265                          kvm->arch.model.fac_list[0],
1266                          kvm->arch.model.fac_list[1],
1267                          kvm->arch.model.fac_list[2]);
1268         } else
1269                 ret = -EFAULT;
1270         kfree(proc);
1271 out:
1272         mutex_unlock(&kvm->lock);
1273         return ret;
1274 }
1275
1276 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1277                                        struct kvm_device_attr *attr)
1278 {
1279         struct kvm_s390_vm_cpu_feat data;
1280
1281         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1282                 return -EFAULT;
1283         if (!bitmap_subset((unsigned long *) data.feat,
1284                            kvm_s390_available_cpu_feat,
1285                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1286                 return -EINVAL;
1287
1288         mutex_lock(&kvm->lock);
1289         if (kvm->created_vcpus) {
1290                 mutex_unlock(&kvm->lock);
1291                 return -EBUSY;
1292         }
1293         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1294                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1295         mutex_unlock(&kvm->lock);
1296         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1297                          data.feat[0],
1298                          data.feat[1],
1299                          data.feat[2]);
1300         return 0;
1301 }
1302
1303 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1304                                           struct kvm_device_attr *attr)
1305 {
1306         mutex_lock(&kvm->lock);
1307         if (kvm->created_vcpus) {
1308                 mutex_unlock(&kvm->lock);
1309                 return -EBUSY;
1310         }
1311
1312         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1313                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1314                 mutex_unlock(&kvm->lock);
1315                 return -EFAULT;
1316         }
1317         mutex_unlock(&kvm->lock);
1318
1319         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1320                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1321                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1322                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1323                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1324         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1325                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1327         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1329                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1330         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1332                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1333         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1335                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1336         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1339         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1342         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1345         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1348         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1351         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1354         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1357         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1360         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1363         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1366         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1371         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1376
1377         return 0;
1378 }
1379
1380 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1381 {
1382         int ret = -ENXIO;
1383
1384         switch (attr->attr) {
1385         case KVM_S390_VM_CPU_PROCESSOR:
1386                 ret = kvm_s390_set_processor(kvm, attr);
1387                 break;
1388         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1389                 ret = kvm_s390_set_processor_feat(kvm, attr);
1390                 break;
1391         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1392                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1393                 break;
1394         }
1395         return ret;
1396 }
1397
1398 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1399 {
1400         struct kvm_s390_vm_cpu_processor *proc;
1401         int ret = 0;
1402
1403         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1404         if (!proc) {
1405                 ret = -ENOMEM;
1406                 goto out;
1407         }
1408         proc->cpuid = kvm->arch.model.cpuid;
1409         proc->ibc = kvm->arch.model.ibc;
1410         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1411                S390_ARCH_FAC_LIST_SIZE_BYTE);
1412         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1413                  kvm->arch.model.ibc,
1414                  kvm->arch.model.cpuid);
1415         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1416                  kvm->arch.model.fac_list[0],
1417                  kvm->arch.model.fac_list[1],
1418                  kvm->arch.model.fac_list[2]);
1419         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1420                 ret = -EFAULT;
1421         kfree(proc);
1422 out:
1423         return ret;
1424 }
1425
1426 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1427 {
1428         struct kvm_s390_vm_cpu_machine *mach;
1429         int ret = 0;
1430
1431         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1432         if (!mach) {
1433                 ret = -ENOMEM;
1434                 goto out;
1435         }
1436         get_cpu_id((struct cpuid *) &mach->cpuid);
1437         mach->ibc = sclp.ibc;
1438         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1439                S390_ARCH_FAC_LIST_SIZE_BYTE);
1440         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1441                sizeof(S390_lowcore.stfle_fac_list));
1442         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1443                  kvm->arch.model.ibc,
1444                  kvm->arch.model.cpuid);
1445         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1446                  mach->fac_mask[0],
1447                  mach->fac_mask[1],
1448                  mach->fac_mask[2]);
1449         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1450                  mach->fac_list[0],
1451                  mach->fac_list[1],
1452                  mach->fac_list[2]);
1453         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1454                 ret = -EFAULT;
1455         kfree(mach);
1456 out:
1457         return ret;
1458 }
1459
1460 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1461                                        struct kvm_device_attr *attr)
1462 {
1463         struct kvm_s390_vm_cpu_feat data;
1464
1465         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1466                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1467         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1468                 return -EFAULT;
1469         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1470                          data.feat[0],
1471                          data.feat[1],
1472                          data.feat[2]);
1473         return 0;
1474 }
1475
1476 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1477                                      struct kvm_device_attr *attr)
1478 {
1479         struct kvm_s390_vm_cpu_feat data;
1480
1481         bitmap_copy((unsigned long *) data.feat,
1482                     kvm_s390_available_cpu_feat,
1483                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1484         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1485                 return -EFAULT;
1486         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1487                          data.feat[0],
1488                          data.feat[1],
1489                          data.feat[2]);
1490         return 0;
1491 }
1492
1493 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1494                                           struct kvm_device_attr *attr)
1495 {
1496         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1497             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1498                 return -EFAULT;
1499
1500         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1501                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1502                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1503                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1504                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1505         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1506                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1508         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1510                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1511         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1513                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1514         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1516                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1517         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1520         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1523         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1526         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1529         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1532         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1535         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1538         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1541         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1544         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1547         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1552         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1557
1558         return 0;
1559 }
1560
1561 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1562                                         struct kvm_device_attr *attr)
1563 {
1564         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1565             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1566                 return -EFAULT;
1567
1568         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1570                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1571                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1572                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1573         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1575                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1576         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1577                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1578                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1579         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1580                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1581                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1582         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1583                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1584                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1585         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1588         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1589                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1590                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1591         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1592                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1593                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1594         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1596                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1597         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1599                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1600         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1603         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1604                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1605                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1606         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1607                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1608                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1609         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1610                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1612         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1613                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1615         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1618                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1620         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1625
1626         return 0;
1627 }
1628
1629 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1630 {
1631         int ret = -ENXIO;
1632
1633         switch (attr->attr) {
1634         case KVM_S390_VM_CPU_PROCESSOR:
1635                 ret = kvm_s390_get_processor(kvm, attr);
1636                 break;
1637         case KVM_S390_VM_CPU_MACHINE:
1638                 ret = kvm_s390_get_machine(kvm, attr);
1639                 break;
1640         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1641                 ret = kvm_s390_get_processor_feat(kvm, attr);
1642                 break;
1643         case KVM_S390_VM_CPU_MACHINE_FEAT:
1644                 ret = kvm_s390_get_machine_feat(kvm, attr);
1645                 break;
1646         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1647                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1648                 break;
1649         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1650                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1651                 break;
1652         }
1653         return ret;
1654 }
1655
1656 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1657 {
1658         int ret;
1659
1660         switch (attr->group) {
1661         case KVM_S390_VM_MEM_CTRL:
1662                 ret = kvm_s390_set_mem_control(kvm, attr);
1663                 break;
1664         case KVM_S390_VM_TOD:
1665                 ret = kvm_s390_set_tod(kvm, attr);
1666                 break;
1667         case KVM_S390_VM_CPU_MODEL:
1668                 ret = kvm_s390_set_cpu_model(kvm, attr);
1669                 break;
1670         case KVM_S390_VM_CRYPTO:
1671                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1672                 break;
1673         case KVM_S390_VM_MIGRATION:
1674                 ret = kvm_s390_vm_set_migration(kvm, attr);
1675                 break;
1676         default:
1677                 ret = -ENXIO;
1678                 break;
1679         }
1680
1681         return ret;
1682 }
1683
1684 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1685 {
1686         int ret;
1687
1688         switch (attr->group) {
1689         case KVM_S390_VM_MEM_CTRL:
1690                 ret = kvm_s390_get_mem_control(kvm, attr);
1691                 break;
1692         case KVM_S390_VM_TOD:
1693                 ret = kvm_s390_get_tod(kvm, attr);
1694                 break;
1695         case KVM_S390_VM_CPU_MODEL:
1696                 ret = kvm_s390_get_cpu_model(kvm, attr);
1697                 break;
1698         case KVM_S390_VM_MIGRATION:
1699                 ret = kvm_s390_vm_get_migration(kvm, attr);
1700                 break;
1701         default:
1702                 ret = -ENXIO;
1703                 break;
1704         }
1705
1706         return ret;
1707 }
1708
1709 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1710 {
1711         int ret;
1712
1713         switch (attr->group) {
1714         case KVM_S390_VM_MEM_CTRL:
1715                 switch (attr->attr) {
1716                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1717                 case KVM_S390_VM_MEM_CLR_CMMA:
1718                         ret = sclp.has_cmma ? 0 : -ENXIO;
1719                         break;
1720                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1721                         ret = 0;
1722                         break;
1723                 default:
1724                         ret = -ENXIO;
1725                         break;
1726                 }
1727                 break;
1728         case KVM_S390_VM_TOD:
1729                 switch (attr->attr) {
1730                 case KVM_S390_VM_TOD_LOW:
1731                 case KVM_S390_VM_TOD_HIGH:
1732                         ret = 0;
1733                         break;
1734                 default:
1735                         ret = -ENXIO;
1736                         break;
1737                 }
1738                 break;
1739         case KVM_S390_VM_CPU_MODEL:
1740                 switch (attr->attr) {
1741                 case KVM_S390_VM_CPU_PROCESSOR:
1742                 case KVM_S390_VM_CPU_MACHINE:
1743                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1744                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1745                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1746                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1747                         ret = 0;
1748                         break;
1749                 default:
1750                         ret = -ENXIO;
1751                         break;
1752                 }
1753                 break;
1754         case KVM_S390_VM_CRYPTO:
1755                 switch (attr->attr) {
1756                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1757                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1758                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1759                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1760                         ret = 0;
1761                         break;
1762                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1763                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1764                         ret = ap_instructions_available() ? 0 : -ENXIO;
1765                         break;
1766                 default:
1767                         ret = -ENXIO;
1768                         break;
1769                 }
1770                 break;
1771         case KVM_S390_VM_MIGRATION:
1772                 ret = 0;
1773                 break;
1774         default:
1775                 ret = -ENXIO;
1776                 break;
1777         }
1778
1779         return ret;
1780 }
1781
1782 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1783 {
1784         uint8_t *keys;
1785         uint64_t hva;
1786         int srcu_idx, i, r = 0;
1787
1788         if (args->flags != 0)
1789                 return -EINVAL;
1790
1791         /* Is this guest using storage keys? */
1792         if (!mm_uses_skeys(current->mm))
1793                 return KVM_S390_GET_SKEYS_NONE;
1794
1795         /* Enforce sane limit on memory allocation */
1796         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1797                 return -EINVAL;
1798
1799         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1800         if (!keys)
1801                 return -ENOMEM;
1802
1803         down_read(&current->mm->mmap_sem);
1804         srcu_idx = srcu_read_lock(&kvm->srcu);
1805         for (i = 0; i < args->count; i++) {
1806                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1807                 if (kvm_is_error_hva(hva)) {
1808                         r = -EFAULT;
1809                         break;
1810                 }
1811
1812                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1813                 if (r)
1814                         break;
1815         }
1816         srcu_read_unlock(&kvm->srcu, srcu_idx);
1817         up_read(&current->mm->mmap_sem);
1818
1819         if (!r) {
1820                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1821                                  sizeof(uint8_t) * args->count);
1822                 if (r)
1823                         r = -EFAULT;
1824         }
1825
1826         kvfree(keys);
1827         return r;
1828 }
1829
1830 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1831 {
1832         uint8_t *keys;
1833         uint64_t hva;
1834         int srcu_idx, i, r = 0;
1835         bool unlocked;
1836
1837         if (args->flags != 0)
1838                 return -EINVAL;
1839
1840         /* Enforce sane limit on memory allocation */
1841         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1842                 return -EINVAL;
1843
1844         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1845         if (!keys)
1846                 return -ENOMEM;
1847
1848         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1849                            sizeof(uint8_t) * args->count);
1850         if (r) {
1851                 r = -EFAULT;
1852                 goto out;
1853         }
1854
1855         /* Enable storage key handling for the guest */
1856         r = s390_enable_skey();
1857         if (r)
1858                 goto out;
1859
1860         i = 0;
1861         down_read(&current->mm->mmap_sem);
1862         srcu_idx = srcu_read_lock(&kvm->srcu);
1863         while (i < args->count) {
1864                 unlocked = false;
1865                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1866                 if (kvm_is_error_hva(hva)) {
1867                         r = -EFAULT;
1868                         break;
1869                 }
1870
1871                 /* Lowest order bit is reserved */
1872                 if (keys[i] & 0x01) {
1873                         r = -EINVAL;
1874                         break;
1875                 }
1876
1877                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1878                 if (r) {
1879                         r = fixup_user_fault(current, current->mm, hva,
1880                                              FAULT_FLAG_WRITE, &unlocked);
1881                         if (r)
1882                                 break;
1883                 }
1884                 if (!r)
1885                         i++;
1886         }
1887         srcu_read_unlock(&kvm->srcu, srcu_idx);
1888         up_read(&current->mm->mmap_sem);
1889 out:
1890         kvfree(keys);
1891         return r;
1892 }
1893
1894 /*
1895  * Base address and length must be sent at the start of each block, therefore
1896  * it's cheaper to send some clean data, as long as it's less than the size of
1897  * two longs.
1898  */
1899 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1900 /* for consistency */
1901 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1902
1903 /*
1904  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1905  * address falls in a hole. In that case the index of one of the memslots
1906  * bordering the hole is returned.
1907  */
1908 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1909 {
1910         int start = 0, end = slots->used_slots;
1911         int slot = atomic_read(&slots->lru_slot);
1912         struct kvm_memory_slot *memslots = slots->memslots;
1913
1914         if (gfn >= memslots[slot].base_gfn &&
1915             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1916                 return slot;
1917
1918         while (start < end) {
1919                 slot = start + (end - start) / 2;
1920
1921                 if (gfn >= memslots[slot].base_gfn)
1922                         end = slot;
1923                 else
1924                         start = slot + 1;
1925         }
1926
1927         if (gfn >= memslots[start].base_gfn &&
1928             gfn < memslots[start].base_gfn + memslots[start].npages) {
1929                 atomic_set(&slots->lru_slot, start);
1930         }
1931
1932         return start;
1933 }
1934
1935 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1936                               u8 *res, unsigned long bufsize)
1937 {
1938         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1939
1940         args->count = 0;
1941         while (args->count < bufsize) {
1942                 hva = gfn_to_hva(kvm, cur_gfn);
1943                 /*
1944                  * We return an error if the first value was invalid, but we
1945                  * return successfully if at least one value was copied.
1946                  */
1947                 if (kvm_is_error_hva(hva))
1948                         return args->count ? 0 : -EFAULT;
1949                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1950                         pgstev = 0;
1951                 res[args->count++] = (pgstev >> 24) & 0x43;
1952                 cur_gfn++;
1953         }
1954
1955         return 0;
1956 }
1957
1958 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1959                                               unsigned long cur_gfn)
1960 {
1961         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1962         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1963         unsigned long ofs = cur_gfn - ms->base_gfn;
1964
1965         if (ms->base_gfn + ms->npages <= cur_gfn) {
1966                 slotidx--;
1967                 /* If we are above the highest slot, wrap around */
1968                 if (slotidx < 0)
1969                         slotidx = slots->used_slots - 1;
1970
1971                 ms = slots->memslots + slotidx;
1972                 ofs = 0;
1973         }
1974         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1975         while ((slotidx > 0) && (ofs >= ms->npages)) {
1976                 slotidx--;
1977                 ms = slots->memslots + slotidx;
1978                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1979         }
1980         return ms->base_gfn + ofs;
1981 }
1982
1983 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984                              u8 *res, unsigned long bufsize)
1985 {
1986         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1987         struct kvm_memslots *slots = kvm_memslots(kvm);
1988         struct kvm_memory_slot *ms;
1989
1990         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1991         ms = gfn_to_memslot(kvm, cur_gfn);
1992         args->count = 0;
1993         args->start_gfn = cur_gfn;
1994         if (!ms)
1995                 return 0;
1996         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1997         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1998
1999         while (args->count < bufsize) {
2000                 hva = gfn_to_hva(kvm, cur_gfn);
2001                 if (kvm_is_error_hva(hva))
2002                         return 0;
2003                 /* Decrement only if we actually flipped the bit to 0 */
2004                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2005                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2006                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2007                         pgstev = 0;
2008                 /* Save the value */
2009                 res[args->count++] = (pgstev >> 24) & 0x43;
2010                 /* If the next bit is too far away, stop. */
2011                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2012                         return 0;
2013                 /* If we reached the previous "next", find the next one */
2014                 if (cur_gfn == next_gfn)
2015                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2016                 /* Reached the end of memory or of the buffer, stop */
2017                 if ((next_gfn >= mem_end) ||
2018                     (next_gfn - args->start_gfn >= bufsize))
2019                         return 0;
2020                 cur_gfn++;
2021                 /* Reached the end of the current memslot, take the next one. */
2022                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2023                         ms = gfn_to_memslot(kvm, cur_gfn);
2024                         if (!ms)
2025                                 return 0;
2026                 }
2027         }
2028         return 0;
2029 }
2030
2031 /*
2032  * This function searches for the next page with dirty CMMA attributes, and
2033  * saves the attributes in the buffer up to either the end of the buffer or
2034  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2035  * no trailing clean bytes are saved.
2036  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2037  * output buffer will indicate 0 as length.
2038  */
2039 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2040                                   struct kvm_s390_cmma_log *args)
2041 {
2042         unsigned long bufsize;
2043         int srcu_idx, peek, ret;
2044         u8 *values;
2045
2046         if (!kvm->arch.use_cmma)
2047                 return -ENXIO;
2048         /* Invalid/unsupported flags were specified */
2049         if (args->flags & ~KVM_S390_CMMA_PEEK)
2050                 return -EINVAL;
2051         /* Migration mode query, and we are not doing a migration */
2052         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2053         if (!peek && !kvm->arch.migration_mode)
2054                 return -EINVAL;
2055         /* CMMA is disabled or was not used, or the buffer has length zero */
2056         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2057         if (!bufsize || !kvm->mm->context.uses_cmm) {
2058                 memset(args, 0, sizeof(*args));
2059                 return 0;
2060         }
2061         /* We are not peeking, and there are no dirty pages */
2062         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2063                 memset(args, 0, sizeof(*args));
2064                 return 0;
2065         }
2066
2067         values = vmalloc(bufsize);
2068         if (!values)
2069                 return -ENOMEM;
2070
2071         down_read(&kvm->mm->mmap_sem);
2072         srcu_idx = srcu_read_lock(&kvm->srcu);
2073         if (peek)
2074                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2075         else
2076                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2077         srcu_read_unlock(&kvm->srcu, srcu_idx);
2078         up_read(&kvm->mm->mmap_sem);
2079
2080         if (kvm->arch.migration_mode)
2081                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2082         else
2083                 args->remaining = 0;
2084
2085         if (copy_to_user((void __user *)args->values, values, args->count))
2086                 ret = -EFAULT;
2087
2088         vfree(values);
2089         return ret;
2090 }
2091
2092 /*
2093  * This function sets the CMMA attributes for the given pages. If the input
2094  * buffer has zero length, no action is taken, otherwise the attributes are
2095  * set and the mm->context.uses_cmm flag is set.
2096  */
2097 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2098                                   const struct kvm_s390_cmma_log *args)
2099 {
2100         unsigned long hva, mask, pgstev, i;
2101         uint8_t *bits;
2102         int srcu_idx, r = 0;
2103
2104         mask = args->mask;
2105
2106         if (!kvm->arch.use_cmma)
2107                 return -ENXIO;
2108         /* invalid/unsupported flags */
2109         if (args->flags != 0)
2110                 return -EINVAL;
2111         /* Enforce sane limit on memory allocation */
2112         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2113                 return -EINVAL;
2114         /* Nothing to do */
2115         if (args->count == 0)
2116                 return 0;
2117
2118         bits = vmalloc(array_size(sizeof(*bits), args->count));
2119         if (!bits)
2120                 return -ENOMEM;
2121
2122         r = copy_from_user(bits, (void __user *)args->values, args->count);
2123         if (r) {
2124                 r = -EFAULT;
2125                 goto out;
2126         }
2127
2128         down_read(&kvm->mm->mmap_sem);
2129         srcu_idx = srcu_read_lock(&kvm->srcu);
2130         for (i = 0; i < args->count; i++) {
2131                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2132                 if (kvm_is_error_hva(hva)) {
2133                         r = -EFAULT;
2134                         break;
2135                 }
2136
2137                 pgstev = bits[i];
2138                 pgstev = pgstev << 24;
2139                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2140                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2141         }
2142         srcu_read_unlock(&kvm->srcu, srcu_idx);
2143         up_read(&kvm->mm->mmap_sem);
2144
2145         if (!kvm->mm->context.uses_cmm) {
2146                 down_write(&kvm->mm->mmap_sem);
2147                 kvm->mm->context.uses_cmm = 1;
2148                 up_write(&kvm->mm->mmap_sem);
2149         }
2150 out:
2151         vfree(bits);
2152         return r;
2153 }
2154
2155 long kvm_arch_vm_ioctl(struct file *filp,
2156                        unsigned int ioctl, unsigned long arg)
2157 {
2158         struct kvm *kvm = filp->private_data;
2159         void __user *argp = (void __user *)arg;
2160         struct kvm_device_attr attr;
2161         int r;
2162
2163         switch (ioctl) {
2164         case KVM_S390_INTERRUPT: {
2165                 struct kvm_s390_interrupt s390int;
2166
2167                 r = -EFAULT;
2168                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2169                         break;
2170                 r = kvm_s390_inject_vm(kvm, &s390int);
2171                 break;
2172         }
2173         case KVM_CREATE_IRQCHIP: {
2174                 struct kvm_irq_routing_entry routing;
2175
2176                 r = -EINVAL;
2177                 if (kvm->arch.use_irqchip) {
2178                         /* Set up dummy routing. */
2179                         memset(&routing, 0, sizeof(routing));
2180                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2181                 }
2182                 break;
2183         }
2184         case KVM_SET_DEVICE_ATTR: {
2185                 r = -EFAULT;
2186                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2187                         break;
2188                 r = kvm_s390_vm_set_attr(kvm, &attr);
2189                 break;
2190         }
2191         case KVM_GET_DEVICE_ATTR: {
2192                 r = -EFAULT;
2193                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2194                         break;
2195                 r = kvm_s390_vm_get_attr(kvm, &attr);
2196                 break;
2197         }
2198         case KVM_HAS_DEVICE_ATTR: {
2199                 r = -EFAULT;
2200                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2201                         break;
2202                 r = kvm_s390_vm_has_attr(kvm, &attr);
2203                 break;
2204         }
2205         case KVM_S390_GET_SKEYS: {
2206                 struct kvm_s390_skeys args;
2207
2208                 r = -EFAULT;
2209                 if (copy_from_user(&args, argp,
2210                                    sizeof(struct kvm_s390_skeys)))
2211                         break;
2212                 r = kvm_s390_get_skeys(kvm, &args);
2213                 break;
2214         }
2215         case KVM_S390_SET_SKEYS: {
2216                 struct kvm_s390_skeys args;
2217
2218                 r = -EFAULT;
2219                 if (copy_from_user(&args, argp,
2220                                    sizeof(struct kvm_s390_skeys)))
2221                         break;
2222                 r = kvm_s390_set_skeys(kvm, &args);
2223                 break;
2224         }
2225         case KVM_S390_GET_CMMA_BITS: {
2226                 struct kvm_s390_cmma_log args;
2227
2228                 r = -EFAULT;
2229                 if (copy_from_user(&args, argp, sizeof(args)))
2230                         break;
2231                 mutex_lock(&kvm->slots_lock);
2232                 r = kvm_s390_get_cmma_bits(kvm, &args);
2233                 mutex_unlock(&kvm->slots_lock);
2234                 if (!r) {
2235                         r = copy_to_user(argp, &args, sizeof(args));
2236                         if (r)
2237                                 r = -EFAULT;
2238                 }
2239                 break;
2240         }
2241         case KVM_S390_SET_CMMA_BITS: {
2242                 struct kvm_s390_cmma_log args;
2243
2244                 r = -EFAULT;
2245                 if (copy_from_user(&args, argp, sizeof(args)))
2246                         break;
2247                 mutex_lock(&kvm->slots_lock);
2248                 r = kvm_s390_set_cmma_bits(kvm, &args);
2249                 mutex_unlock(&kvm->slots_lock);
2250                 break;
2251         }
2252         default:
2253                 r = -ENOTTY;
2254         }
2255
2256         return r;
2257 }
2258
2259 static int kvm_s390_apxa_installed(void)
2260 {
2261         struct ap_config_info info;
2262
2263         if (ap_instructions_available()) {
2264                 if (ap_qci(&info) == 0)
2265                         return info.apxa;
2266         }
2267
2268         return 0;
2269 }
2270
2271 /*
2272  * The format of the crypto control block (CRYCB) is specified in the 3 low
2273  * order bits of the CRYCB designation (CRYCBD) field as follows:
2274  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2275  *           AP extended addressing (APXA) facility are installed.
2276  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2277  * Format 2: Both the APXA and MSAX3 facilities are installed
2278  */
2279 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2280 {
2281         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2282
2283         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2284         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2285
2286         /* Check whether MSAX3 is installed */
2287         if (!test_kvm_facility(kvm, 76))
2288                 return;
2289
2290         if (kvm_s390_apxa_installed())
2291                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2292         else
2293                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2294 }
2295
2296 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2297                                unsigned long *aqm, unsigned long *adm)
2298 {
2299         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2300
2301         mutex_lock(&kvm->lock);
2302         kvm_s390_vcpu_block_all(kvm);
2303
2304         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2305         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2306                 memcpy(crycb->apcb1.apm, apm, 32);
2307                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2308                          apm[0], apm[1], apm[2], apm[3]);
2309                 memcpy(crycb->apcb1.aqm, aqm, 32);
2310                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2311                          aqm[0], aqm[1], aqm[2], aqm[3]);
2312                 memcpy(crycb->apcb1.adm, adm, 32);
2313                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2314                          adm[0], adm[1], adm[2], adm[3]);
2315                 break;
2316         case CRYCB_FORMAT1:
2317         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2318                 memcpy(crycb->apcb0.apm, apm, 8);
2319                 memcpy(crycb->apcb0.aqm, aqm, 2);
2320                 memcpy(crycb->apcb0.adm, adm, 2);
2321                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2322                          apm[0], *((unsigned short *)aqm),
2323                          *((unsigned short *)adm));
2324                 break;
2325         default:        /* Can not happen */
2326                 break;
2327         }
2328
2329         /* recreate the shadow crycb for each vcpu */
2330         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2331         kvm_s390_vcpu_unblock_all(kvm);
2332         mutex_unlock(&kvm->lock);
2333 }
2334 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2335
2336 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2337 {
2338         mutex_lock(&kvm->lock);
2339         kvm_s390_vcpu_block_all(kvm);
2340
2341         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2342                sizeof(kvm->arch.crypto.crycb->apcb0));
2343         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2344                sizeof(kvm->arch.crypto.crycb->apcb1));
2345
2346         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2347         /* recreate the shadow crycb for each vcpu */
2348         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2349         kvm_s390_vcpu_unblock_all(kvm);
2350         mutex_unlock(&kvm->lock);
2351 }
2352 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2353
2354 static u64 kvm_s390_get_initial_cpuid(void)
2355 {
2356         struct cpuid cpuid;
2357
2358         get_cpu_id(&cpuid);
2359         cpuid.version = 0xff;
2360         return *((u64 *) &cpuid);
2361 }
2362
2363 static void kvm_s390_crypto_init(struct kvm *kvm)
2364 {
2365         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2366         kvm_s390_set_crycb_format(kvm);
2367
2368         if (!test_kvm_facility(kvm, 76))
2369                 return;
2370
2371         /* Enable AES/DEA protected key functions by default */
2372         kvm->arch.crypto.aes_kw = 1;
2373         kvm->arch.crypto.dea_kw = 1;
2374         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2375                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2376         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2377                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2378 }
2379
2380 static void sca_dispose(struct kvm *kvm)
2381 {
2382         if (kvm->arch.use_esca)
2383                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2384         else
2385                 free_page((unsigned long)(kvm->arch.sca));
2386         kvm->arch.sca = NULL;
2387 }
2388
2389 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2390 {
2391         gfp_t alloc_flags = GFP_KERNEL;
2392         int i, rc;
2393         char debug_name[16];
2394         static unsigned long sca_offset;
2395
2396         rc = -EINVAL;
2397 #ifdef CONFIG_KVM_S390_UCONTROL
2398         if (type & ~KVM_VM_S390_UCONTROL)
2399                 goto out_err;
2400         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2401                 goto out_err;
2402 #else
2403         if (type)
2404                 goto out_err;
2405 #endif
2406
2407         rc = s390_enable_sie();
2408         if (rc)
2409                 goto out_err;
2410
2411         rc = -ENOMEM;
2412
2413         if (!sclp.has_64bscao)
2414                 alloc_flags |= GFP_DMA;
2415         rwlock_init(&kvm->arch.sca_lock);
2416         /* start with basic SCA */
2417         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2418         if (!kvm->arch.sca)
2419                 goto out_err;
2420         spin_lock(&kvm_lock);
2421         sca_offset += 16;
2422         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2423                 sca_offset = 0;
2424         kvm->arch.sca = (struct bsca_block *)
2425                         ((char *) kvm->arch.sca + sca_offset);
2426         spin_unlock(&kvm_lock);
2427
2428         sprintf(debug_name, "kvm-%u", current->pid);
2429
2430         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2431         if (!kvm->arch.dbf)
2432                 goto out_err;
2433
2434         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2435         kvm->arch.sie_page2 =
2436              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2437         if (!kvm->arch.sie_page2)
2438                 goto out_err;
2439
2440         kvm->arch.sie_page2->kvm = kvm;
2441         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2442
2443         for (i = 0; i < kvm_s390_fac_size(); i++) {
2444                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2445                                               (kvm_s390_fac_base[i] |
2446                                                kvm_s390_fac_ext[i]);
2447                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2448                                               kvm_s390_fac_base[i];
2449         }
2450         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2451
2452         /* we are always in czam mode - even on pre z14 machines */
2453         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2454         set_kvm_facility(kvm->arch.model.fac_list, 138);
2455         /* we emulate STHYI in kvm */
2456         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2457         set_kvm_facility(kvm->arch.model.fac_list, 74);
2458         if (MACHINE_HAS_TLB_GUEST) {
2459                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2460                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2461         }
2462
2463         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2464         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2465
2466         kvm_s390_crypto_init(kvm);
2467
2468         mutex_init(&kvm->arch.float_int.ais_lock);
2469         spin_lock_init(&kvm->arch.float_int.lock);
2470         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2471                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2472         init_waitqueue_head(&kvm->arch.ipte_wq);
2473         mutex_init(&kvm->arch.ipte_mutex);
2474
2475         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2476         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2477
2478         if (type & KVM_VM_S390_UCONTROL) {
2479                 kvm->arch.gmap = NULL;
2480                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2481         } else {
2482                 if (sclp.hamax == U64_MAX)
2483                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2484                 else
2485                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2486                                                     sclp.hamax + 1);
2487                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2488                 if (!kvm->arch.gmap)
2489                         goto out_err;
2490                 kvm->arch.gmap->private = kvm;
2491                 kvm->arch.gmap->pfault_enabled = 0;
2492         }
2493
2494         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2495         kvm->arch.use_skf = sclp.has_skey;
2496         spin_lock_init(&kvm->arch.start_stop_lock);
2497         kvm_s390_vsie_init(kvm);
2498         kvm_s390_gisa_init(kvm);
2499         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2500
2501         return 0;
2502 out_err:
2503         free_page((unsigned long)kvm->arch.sie_page2);
2504         debug_unregister(kvm->arch.dbf);
2505         sca_dispose(kvm);
2506         KVM_EVENT(3, "creation of vm failed: %d", rc);
2507         return rc;
2508 }
2509
2510 bool kvm_arch_has_vcpu_debugfs(void)
2511 {
2512         return false;
2513 }
2514
2515 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2516 {
2517         return 0;
2518 }
2519
2520 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2521 {
2522         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2523         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2524         kvm_s390_clear_local_irqs(vcpu);
2525         kvm_clear_async_pf_completion_queue(vcpu);
2526         if (!kvm_is_ucontrol(vcpu->kvm))
2527                 sca_del_vcpu(vcpu);
2528
2529         if (kvm_is_ucontrol(vcpu->kvm))
2530                 gmap_remove(vcpu->arch.gmap);
2531
2532         if (vcpu->kvm->arch.use_cmma)
2533                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2534         free_page((unsigned long)(vcpu->arch.sie_block));
2535
2536         kvm_vcpu_uninit(vcpu);
2537         kmem_cache_free(kvm_vcpu_cache, vcpu);
2538 }
2539
2540 static void kvm_free_vcpus(struct kvm *kvm)
2541 {
2542         unsigned int i;
2543         struct kvm_vcpu *vcpu;
2544
2545         kvm_for_each_vcpu(i, vcpu, kvm)
2546                 kvm_arch_vcpu_destroy(vcpu);
2547
2548         mutex_lock(&kvm->lock);
2549         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2550                 kvm->vcpus[i] = NULL;
2551
2552         atomic_set(&kvm->online_vcpus, 0);
2553         mutex_unlock(&kvm->lock);
2554 }
2555
2556 void kvm_arch_destroy_vm(struct kvm *kvm)
2557 {
2558         kvm_free_vcpus(kvm);
2559         sca_dispose(kvm);
2560         debug_unregister(kvm->arch.dbf);
2561         kvm_s390_gisa_destroy(kvm);
2562         free_page((unsigned long)kvm->arch.sie_page2);
2563         if (!kvm_is_ucontrol(kvm))
2564                 gmap_remove(kvm->arch.gmap);
2565         kvm_s390_destroy_adapters(kvm);
2566         kvm_s390_clear_float_irqs(kvm);
2567         kvm_s390_vsie_destroy(kvm);
2568         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2569 }
2570
2571 /* Section: vcpu related */
2572 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2573 {
2574         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2575         if (!vcpu->arch.gmap)
2576                 return -ENOMEM;
2577         vcpu->arch.gmap->private = vcpu->kvm;
2578
2579         return 0;
2580 }
2581
2582 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2583 {
2584         if (!kvm_s390_use_sca_entries())
2585                 return;
2586         read_lock(&vcpu->kvm->arch.sca_lock);
2587         if (vcpu->kvm->arch.use_esca) {
2588                 struct esca_block *sca = vcpu->kvm->arch.sca;
2589
2590                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2591                 sca->cpu[vcpu->vcpu_id].sda = 0;
2592         } else {
2593                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2594
2595                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2596                 sca->cpu[vcpu->vcpu_id].sda = 0;
2597         }
2598         read_unlock(&vcpu->kvm->arch.sca_lock);
2599 }
2600
2601 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2602 {
2603         if (!kvm_s390_use_sca_entries()) {
2604                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2605
2606                 /* we still need the basic sca for the ipte control */
2607                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2608                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2609                 return;
2610         }
2611         read_lock(&vcpu->kvm->arch.sca_lock);
2612         if (vcpu->kvm->arch.use_esca) {
2613                 struct esca_block *sca = vcpu->kvm->arch.sca;
2614
2615                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2616                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2617                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2618                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2619                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2620         } else {
2621                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2622
2623                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2624                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2625                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2626                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2627         }
2628         read_unlock(&vcpu->kvm->arch.sca_lock);
2629 }
2630
2631 /* Basic SCA to Extended SCA data copy routines */
2632 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2633 {
2634         d->sda = s->sda;
2635         d->sigp_ctrl.c = s->sigp_ctrl.c;
2636         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2637 }
2638
2639 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2640 {
2641         int i;
2642
2643         d->ipte_control = s->ipte_control;
2644         d->mcn[0] = s->mcn;
2645         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2646                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2647 }
2648
2649 static int sca_switch_to_extended(struct kvm *kvm)
2650 {
2651         struct bsca_block *old_sca = kvm->arch.sca;
2652         struct esca_block *new_sca;
2653         struct kvm_vcpu *vcpu;
2654         unsigned int vcpu_idx;
2655         u32 scaol, scaoh;
2656
2657         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2658         if (!new_sca)
2659                 return -ENOMEM;
2660
2661         scaoh = (u32)((u64)(new_sca) >> 32);
2662         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2663
2664         kvm_s390_vcpu_block_all(kvm);
2665         write_lock(&kvm->arch.sca_lock);
2666
2667         sca_copy_b_to_e(new_sca, old_sca);
2668
2669         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2670                 vcpu->arch.sie_block->scaoh = scaoh;
2671                 vcpu->arch.sie_block->scaol = scaol;
2672                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2673         }
2674         kvm->arch.sca = new_sca;
2675         kvm->arch.use_esca = 1;
2676
2677         write_unlock(&kvm->arch.sca_lock);
2678         kvm_s390_vcpu_unblock_all(kvm);
2679
2680         free_page((unsigned long)old_sca);
2681
2682         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2683                  old_sca, kvm->arch.sca);
2684         return 0;
2685 }
2686
2687 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2688 {
2689         int rc;
2690
2691         if (!kvm_s390_use_sca_entries()) {
2692                 if (id < KVM_MAX_VCPUS)
2693                         return true;
2694                 return false;
2695         }
2696         if (id < KVM_S390_BSCA_CPU_SLOTS)
2697                 return true;
2698         if (!sclp.has_esca || !sclp.has_64bscao)
2699                 return false;
2700
2701         mutex_lock(&kvm->lock);
2702         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2703         mutex_unlock(&kvm->lock);
2704
2705         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2706 }
2707
2708 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2709 {
2710         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2711         kvm_clear_async_pf_completion_queue(vcpu);
2712         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2713                                     KVM_SYNC_GPRS |
2714                                     KVM_SYNC_ACRS |
2715                                     KVM_SYNC_CRS |
2716                                     KVM_SYNC_ARCH0 |
2717                                     KVM_SYNC_PFAULT;
2718         kvm_s390_set_prefix(vcpu, 0);
2719         if (test_kvm_facility(vcpu->kvm, 64))
2720                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2721         if (test_kvm_facility(vcpu->kvm, 82))
2722                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2723         if (test_kvm_facility(vcpu->kvm, 133))
2724                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2725         if (test_kvm_facility(vcpu->kvm, 156))
2726                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2727         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2728          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2729          */
2730         if (MACHINE_HAS_VX)
2731                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2732         else
2733                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2734
2735         if (kvm_is_ucontrol(vcpu->kvm))
2736                 return __kvm_ucontrol_vcpu_init(vcpu);
2737
2738         return 0;
2739 }
2740
2741 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2742 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2743 {
2744         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2745         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2746         vcpu->arch.cputm_start = get_tod_clock_fast();
2747         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2748 }
2749
2750 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2751 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2752 {
2753         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2754         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2755         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2756         vcpu->arch.cputm_start = 0;
2757         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2758 }
2759
2760 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2761 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2762 {
2763         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2764         vcpu->arch.cputm_enabled = true;
2765         __start_cpu_timer_accounting(vcpu);
2766 }
2767
2768 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2769 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2770 {
2771         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2772         __stop_cpu_timer_accounting(vcpu);
2773         vcpu->arch.cputm_enabled = false;
2774 }
2775
2776 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2777 {
2778         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2779         __enable_cpu_timer_accounting(vcpu);
2780         preempt_enable();
2781 }
2782
2783 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2784 {
2785         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2786         __disable_cpu_timer_accounting(vcpu);
2787         preempt_enable();
2788 }
2789
2790 /* set the cpu timer - may only be called from the VCPU thread itself */
2791 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2792 {
2793         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2794         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2795         if (vcpu->arch.cputm_enabled)
2796                 vcpu->arch.cputm_start = get_tod_clock_fast();
2797         vcpu->arch.sie_block->cputm = cputm;
2798         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2799         preempt_enable();
2800 }
2801
2802 /* update and get the cpu timer - can also be called from other VCPU threads */
2803 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2804 {
2805         unsigned int seq;
2806         __u64 value;
2807
2808         if (unlikely(!vcpu->arch.cputm_enabled))
2809                 return vcpu->arch.sie_block->cputm;
2810
2811         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2812         do {
2813                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2814                 /*
2815                  * If the writer would ever execute a read in the critical
2816                  * section, e.g. in irq context, we have a deadlock.
2817                  */
2818                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2819                 value = vcpu->arch.sie_block->cputm;
2820                 /* if cputm_start is 0, accounting is being started/stopped */
2821                 if (likely(vcpu->arch.cputm_start))
2822                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2823         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2824         preempt_enable();
2825         return value;
2826 }
2827
2828 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2829 {
2830
2831         gmap_enable(vcpu->arch.enabled_gmap);
2832         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2833         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2834                 __start_cpu_timer_accounting(vcpu);
2835         vcpu->cpu = cpu;
2836 }
2837
2838 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2839 {
2840         vcpu->cpu = -1;
2841         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2842                 __stop_cpu_timer_accounting(vcpu);
2843         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2844         vcpu->arch.enabled_gmap = gmap_get_enabled();
2845         gmap_disable(vcpu->arch.enabled_gmap);
2846
2847 }
2848
2849 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2850 {
2851         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2852         vcpu->arch.sie_block->gpsw.mask = 0UL;
2853         vcpu->arch.sie_block->gpsw.addr = 0UL;
2854         kvm_s390_set_prefix(vcpu, 0);
2855         kvm_s390_set_cpu_timer(vcpu, 0);
2856         vcpu->arch.sie_block->ckc       = 0UL;
2857         vcpu->arch.sie_block->todpr     = 0;
2858         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2859         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2860                                         CR0_INTERRUPT_KEY_SUBMASK |
2861                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2862         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2863                                         CR14_UNUSED_33 |
2864                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2865         /* make sure the new fpc will be lazily loaded */
2866         save_fpu_regs();
2867         current->thread.fpu.fpc = 0;
2868         vcpu->arch.sie_block->gbea = 1;
2869         vcpu->arch.sie_block->pp = 0;
2870         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2871         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2872         kvm_clear_async_pf_completion_queue(vcpu);
2873         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2874                 kvm_s390_vcpu_stop(vcpu);
2875         kvm_s390_clear_local_irqs(vcpu);
2876 }
2877
2878 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2879 {
2880         mutex_lock(&vcpu->kvm->lock);
2881         preempt_disable();
2882         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2883         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2884         preempt_enable();
2885         mutex_unlock(&vcpu->kvm->lock);
2886         if (!kvm_is_ucontrol(vcpu->kvm)) {
2887                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2888                 sca_add_vcpu(vcpu);
2889         }
2890         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2891                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2892         /* make vcpu_load load the right gmap on the first trigger */
2893         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2894 }
2895
2896 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2897 {
2898         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2899             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2900                 return true;
2901         return false;
2902 }
2903
2904 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2905 {
2906         /* At least one ECC subfunction must be present */
2907         return kvm_has_pckmo_subfunc(kvm, 32) ||
2908                kvm_has_pckmo_subfunc(kvm, 33) ||
2909                kvm_has_pckmo_subfunc(kvm, 34) ||
2910                kvm_has_pckmo_subfunc(kvm, 40) ||
2911                kvm_has_pckmo_subfunc(kvm, 41);
2912
2913 }
2914
2915 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2916 {
2917         /*
2918          * If the AP instructions are not being interpreted and the MSAX3
2919          * facility is not configured for the guest, there is nothing to set up.
2920          */
2921         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2922                 return;
2923
2924         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2925         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2926         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2927         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2928
2929         if (vcpu->kvm->arch.crypto.apie)
2930                 vcpu->arch.sie_block->eca |= ECA_APIE;
2931
2932         /* Set up protected key support */
2933         if (vcpu->kvm->arch.crypto.aes_kw) {
2934                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2935                 /* ecc is also wrapped with AES key */
2936                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2937                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2938         }
2939
2940         if (vcpu->kvm->arch.crypto.dea_kw)
2941                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2942 }
2943
2944 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2945 {
2946         free_page(vcpu->arch.sie_block->cbrlo);
2947         vcpu->arch.sie_block->cbrlo = 0;
2948 }
2949
2950 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2951 {
2952         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2953         if (!vcpu->arch.sie_block->cbrlo)
2954                 return -ENOMEM;
2955         return 0;
2956 }
2957
2958 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2959 {
2960         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2961
2962         vcpu->arch.sie_block->ibc = model->ibc;
2963         if (test_kvm_facility(vcpu->kvm, 7))
2964                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2965 }
2966
2967 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2968 {
2969         int rc = 0;
2970
2971         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2972                                                     CPUSTAT_SM |
2973                                                     CPUSTAT_STOPPED);
2974
2975         if (test_kvm_facility(vcpu->kvm, 78))
2976                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2977         else if (test_kvm_facility(vcpu->kvm, 8))
2978                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2979
2980         kvm_s390_vcpu_setup_model(vcpu);
2981
2982         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2983         if (MACHINE_HAS_ESOP)
2984                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2985         if (test_kvm_facility(vcpu->kvm, 9))
2986                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2987         if (test_kvm_facility(vcpu->kvm, 73))
2988                 vcpu->arch.sie_block->ecb |= ECB_TE;
2989
2990         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2991                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2992         if (test_kvm_facility(vcpu->kvm, 130))
2993                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2994         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2995         if (sclp.has_cei)
2996                 vcpu->arch.sie_block->eca |= ECA_CEI;
2997         if (sclp.has_ib)
2998                 vcpu->arch.sie_block->eca |= ECA_IB;
2999         if (sclp.has_siif)
3000                 vcpu->arch.sie_block->eca |= ECA_SII;
3001         if (sclp.has_sigpif)
3002                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3003         if (test_kvm_facility(vcpu->kvm, 129)) {
3004                 vcpu->arch.sie_block->eca |= ECA_VX;
3005                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3006         }
3007         if (test_kvm_facility(vcpu->kvm, 139))
3008                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3009         if (test_kvm_facility(vcpu->kvm, 156))
3010                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3011         if (vcpu->arch.sie_block->gd) {
3012                 vcpu->arch.sie_block->eca |= ECA_AIV;
3013                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3014                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3015         }
3016         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3017                                         | SDNXC;
3018         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3019
3020         if (sclp.has_kss)
3021                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3022         else
3023                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3024
3025         if (vcpu->kvm->arch.use_cmma) {
3026                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3027                 if (rc)
3028                         return rc;
3029         }
3030         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3031         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3032
3033         vcpu->arch.sie_block->hpid = HPID_KVM;
3034
3035         kvm_s390_vcpu_crypto_setup(vcpu);
3036
3037         return rc;
3038 }
3039
3040 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3041                                       unsigned int id)
3042 {
3043         struct kvm_vcpu *vcpu;
3044         struct sie_page *sie_page;
3045         int rc = -EINVAL;
3046
3047         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3048                 goto out;
3049
3050         rc = -ENOMEM;
3051
3052         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3053         if (!vcpu)
3054                 goto out;
3055
3056         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3057         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3058         if (!sie_page)
3059                 goto out_free_cpu;
3060
3061         vcpu->arch.sie_block = &sie_page->sie_block;
3062         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3063
3064         /* the real guest size will always be smaller than msl */
3065         vcpu->arch.sie_block->mso = 0;
3066         vcpu->arch.sie_block->msl = sclp.hamax;
3067
3068         vcpu->arch.sie_block->icpua = id;
3069         spin_lock_init(&vcpu->arch.local_int.lock);
3070         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3071         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3072                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3073         seqcount_init(&vcpu->arch.cputm_seqcount);
3074
3075         rc = kvm_vcpu_init(vcpu, kvm, id);
3076         if (rc)
3077                 goto out_free_sie_block;
3078         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3079                  vcpu->arch.sie_block);
3080         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3081
3082         return vcpu;
3083 out_free_sie_block:
3084         free_page((unsigned long)(vcpu->arch.sie_block));
3085 out_free_cpu:
3086         kmem_cache_free(kvm_vcpu_cache, vcpu);
3087 out:
3088         return ERR_PTR(rc);
3089 }
3090
3091 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3092 {
3093         return kvm_s390_vcpu_has_irq(vcpu, 0);
3094 }
3095
3096 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3097 {
3098         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3099 }
3100
3101 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3102 {
3103         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3104         exit_sie(vcpu);
3105 }
3106
3107 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3108 {
3109         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3110 }
3111
3112 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3113 {
3114         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3115         exit_sie(vcpu);
3116 }
3117
3118 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3119 {
3120         return atomic_read(&vcpu->arch.sie_block->prog20) &
3121                (PROG_BLOCK_SIE | PROG_REQUEST);
3122 }
3123
3124 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3125 {
3126         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3127 }
3128
3129 /*
3130  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3131  * If the CPU is not running (e.g. waiting as idle) the function will
3132  * return immediately. */
3133 void exit_sie(struct kvm_vcpu *vcpu)
3134 {
3135         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3136         kvm_s390_vsie_kick(vcpu);
3137         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3138                 cpu_relax();
3139 }
3140
3141 /* Kick a guest cpu out of SIE to process a request synchronously */
3142 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3143 {
3144         kvm_make_request(req, vcpu);
3145         kvm_s390_vcpu_request(vcpu);
3146 }
3147
3148 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3149                               unsigned long end)
3150 {
3151         struct kvm *kvm = gmap->private;
3152         struct kvm_vcpu *vcpu;
3153         unsigned long prefix;
3154         int i;
3155
3156         if (gmap_is_shadow(gmap))
3157                 return;
3158         if (start >= 1UL << 31)
3159                 /* We are only interested in prefix pages */
3160                 return;
3161         kvm_for_each_vcpu(i, vcpu, kvm) {
3162                 /* match against both prefix pages */
3163                 prefix = kvm_s390_get_prefix(vcpu);
3164                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3165                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3166                                    start, end);
3167                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3168                 }
3169         }
3170 }
3171
3172 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3173 {
3174         /* do not poll with more than halt_poll_max_steal percent of steal time */
3175         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3176             halt_poll_max_steal) {
3177                 vcpu->stat.halt_no_poll_steal++;
3178                 return true;
3179         }
3180         return false;
3181 }
3182
3183 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3184 {
3185         /* kvm common code refers to this, but never calls it */
3186         BUG();
3187         return 0;
3188 }
3189
3190 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3191                                            struct kvm_one_reg *reg)
3192 {
3193         int r = -EINVAL;
3194
3195         switch (reg->id) {
3196         case KVM_REG_S390_TODPR:
3197                 r = put_user(vcpu->arch.sie_block->todpr,
3198                              (u32 __user *)reg->addr);
3199                 break;
3200         case KVM_REG_S390_EPOCHDIFF:
3201                 r = put_user(vcpu->arch.sie_block->epoch,
3202                              (u64 __user *)reg->addr);
3203                 break;
3204         case KVM_REG_S390_CPU_TIMER:
3205                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3206                              (u64 __user *)reg->addr);
3207                 break;
3208         case KVM_REG_S390_CLOCK_COMP:
3209                 r = put_user(vcpu->arch.sie_block->ckc,
3210                              (u64 __user *)reg->addr);
3211                 break;
3212         case KVM_REG_S390_PFTOKEN:
3213                 r = put_user(vcpu->arch.pfault_token,
3214                              (u64 __user *)reg->addr);
3215                 break;
3216         case KVM_REG_S390_PFCOMPARE:
3217                 r = put_user(vcpu->arch.pfault_compare,
3218                              (u64 __user *)reg->addr);
3219                 break;
3220         case KVM_REG_S390_PFSELECT:
3221                 r = put_user(vcpu->arch.pfault_select,
3222                              (u64 __user *)reg->addr);
3223                 break;
3224         case KVM_REG_S390_PP:
3225                 r = put_user(vcpu->arch.sie_block->pp,
3226                              (u64 __user *)reg->addr);
3227                 break;
3228         case KVM_REG_S390_GBEA:
3229                 r = put_user(vcpu->arch.sie_block->gbea,
3230                              (u64 __user *)reg->addr);
3231                 break;
3232         default:
3233                 break;
3234         }
3235
3236         return r;
3237 }
3238
3239 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3240                                            struct kvm_one_reg *reg)
3241 {
3242         int r = -EINVAL;
3243         __u64 val;
3244
3245         switch (reg->id) {
3246         case KVM_REG_S390_TODPR:
3247                 r = get_user(vcpu->arch.sie_block->todpr,
3248                              (u32 __user *)reg->addr);
3249                 break;
3250         case KVM_REG_S390_EPOCHDIFF:
3251                 r = get_user(vcpu->arch.sie_block->epoch,
3252                              (u64 __user *)reg->addr);
3253                 break;
3254         case KVM_REG_S390_CPU_TIMER:
3255                 r = get_user(val, (u64 __user *)reg->addr);
3256                 if (!r)
3257                         kvm_s390_set_cpu_timer(vcpu, val);
3258                 break;
3259         case KVM_REG_S390_CLOCK_COMP:
3260                 r = get_user(vcpu->arch.sie_block->ckc,
3261                              (u64 __user *)reg->addr);
3262                 break;
3263         case KVM_REG_S390_PFTOKEN:
3264                 r = get_user(vcpu->arch.pfault_token,
3265                              (u64 __user *)reg->addr);
3266                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3267                         kvm_clear_async_pf_completion_queue(vcpu);
3268                 break;
3269         case KVM_REG_S390_PFCOMPARE:
3270                 r = get_user(vcpu->arch.pfault_compare,
3271                              (u64 __user *)reg->addr);
3272                 break;
3273         case KVM_REG_S390_PFSELECT:
3274                 r = get_user(vcpu->arch.pfault_select,
3275                              (u64 __user *)reg->addr);
3276                 break;
3277         case KVM_REG_S390_PP:
3278                 r = get_user(vcpu->arch.sie_block->pp,
3279                              (u64 __user *)reg->addr);
3280                 break;
3281         case KVM_REG_S390_GBEA:
3282                 r = get_user(vcpu->arch.sie_block->gbea,
3283                              (u64 __user *)reg->addr);
3284                 break;
3285         default:
3286                 break;
3287         }
3288
3289         return r;
3290 }
3291
3292 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3293 {
3294         kvm_s390_vcpu_initial_reset(vcpu);
3295         return 0;
3296 }
3297
3298 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3299 {
3300         vcpu_load(vcpu);
3301         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3302         vcpu_put(vcpu);
3303         return 0;
3304 }
3305
3306 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3307 {
3308         vcpu_load(vcpu);
3309         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3310         vcpu_put(vcpu);
3311         return 0;
3312 }
3313
3314 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3315                                   struct kvm_sregs *sregs)
3316 {
3317         vcpu_load(vcpu);
3318
3319         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3320         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3321
3322         vcpu_put(vcpu);
3323         return 0;
3324 }
3325
3326 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3327                                   struct kvm_sregs *sregs)
3328 {
3329         vcpu_load(vcpu);
3330
3331         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3332         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3333
3334         vcpu_put(vcpu);
3335         return 0;
3336 }
3337
3338 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3339 {
3340         int ret = 0;
3341
3342         vcpu_load(vcpu);
3343
3344         if (test_fp_ctl(fpu->fpc)) {
3345                 ret = -EINVAL;
3346                 goto out;
3347         }
3348         vcpu->run->s.regs.fpc = fpu->fpc;
3349         if (MACHINE_HAS_VX)
3350                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3351                                  (freg_t *) fpu->fprs);
3352         else
3353                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3354
3355 out:
3356         vcpu_put(vcpu);
3357         return ret;
3358 }
3359
3360 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3361 {
3362         vcpu_load(vcpu);
3363
3364         /* make sure we have the latest values */
3365         save_fpu_regs();
3366         if (MACHINE_HAS_VX)
3367                 convert_vx_to_fp((freg_t *) fpu->fprs,
3368                                  (__vector128 *) vcpu->run->s.regs.vrs);
3369         else
3370                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3371         fpu->fpc = vcpu->run->s.regs.fpc;
3372
3373         vcpu_put(vcpu);
3374         return 0;
3375 }
3376
3377 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3378 {
3379         int rc = 0;
3380
3381         if (!is_vcpu_stopped(vcpu))
3382                 rc = -EBUSY;
3383         else {
3384                 vcpu->run->psw_mask = psw.mask;
3385                 vcpu->run->psw_addr = psw.addr;
3386         }
3387         return rc;
3388 }
3389
3390 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3391                                   struct kvm_translation *tr)
3392 {
3393         return -EINVAL; /* not implemented yet */
3394 }
3395
3396 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3397                               KVM_GUESTDBG_USE_HW_BP | \
3398                               KVM_GUESTDBG_ENABLE)
3399
3400 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3401                                         struct kvm_guest_debug *dbg)
3402 {
3403         int rc = 0;
3404
3405         vcpu_load(vcpu);
3406
3407         vcpu->guest_debug = 0;
3408         kvm_s390_clear_bp_data(vcpu);
3409
3410         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3411                 rc = -EINVAL;
3412                 goto out;
3413         }
3414         if (!sclp.has_gpere) {
3415                 rc = -EINVAL;
3416                 goto out;
3417         }
3418
3419         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3420                 vcpu->guest_debug = dbg->control;
3421                 /* enforce guest PER */
3422                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3423
3424                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3425                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3426         } else {
3427                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3428                 vcpu->arch.guestdbg.last_bp = 0;
3429         }
3430
3431         if (rc) {
3432                 vcpu->guest_debug = 0;
3433                 kvm_s390_clear_bp_data(vcpu);
3434                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3435         }
3436
3437 out:
3438         vcpu_put(vcpu);
3439         return rc;
3440 }
3441
3442 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3443                                     struct kvm_mp_state *mp_state)
3444 {
3445         int ret;
3446
3447         vcpu_load(vcpu);
3448
3449         /* CHECK_STOP and LOAD are not supported yet */
3450         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3451                                       KVM_MP_STATE_OPERATING;
3452
3453         vcpu_put(vcpu);
3454         return ret;
3455 }
3456
3457 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3458                                     struct kvm_mp_state *mp_state)
3459 {
3460         int rc = 0;
3461
3462         vcpu_load(vcpu);
3463
3464         /* user space knows about this interface - let it control the state */
3465         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3466
3467         switch (mp_state->mp_state) {
3468         case KVM_MP_STATE_STOPPED:
3469                 kvm_s390_vcpu_stop(vcpu);
3470                 break;
3471         case KVM_MP_STATE_OPERATING:
3472                 kvm_s390_vcpu_start(vcpu);
3473                 break;
3474         case KVM_MP_STATE_LOAD:
3475         case KVM_MP_STATE_CHECK_STOP:
3476                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3477         default:
3478                 rc = -ENXIO;
3479         }
3480
3481         vcpu_put(vcpu);
3482         return rc;
3483 }
3484
3485 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3486 {
3487         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3488 }
3489
3490 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3491 {
3492 retry:
3493         kvm_s390_vcpu_request_handled(vcpu);
3494         if (!kvm_request_pending(vcpu))
3495                 return 0;
3496         /*
3497          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3498          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3499          * This ensures that the ipte instruction for this request has
3500          * already finished. We might race against a second unmapper that
3501          * wants to set the blocking bit. Lets just retry the request loop.
3502          */
3503         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3504                 int rc;
3505                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3506                                           kvm_s390_get_prefix(vcpu),
3507                                           PAGE_SIZE * 2, PROT_WRITE);
3508                 if (rc) {
3509                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3510                         return rc;
3511                 }
3512                 goto retry;
3513         }
3514
3515         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3516                 vcpu->arch.sie_block->ihcpu = 0xffff;
3517                 goto retry;
3518         }
3519
3520         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3521                 if (!ibs_enabled(vcpu)) {
3522                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3523                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3524                 }
3525                 goto retry;
3526         }
3527
3528         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3529                 if (ibs_enabled(vcpu)) {
3530                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3531                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3532                 }
3533                 goto retry;
3534         }
3535
3536         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3537                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3538                 goto retry;
3539         }
3540
3541         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3542                 /*
3543                  * Disable CMM virtualization; we will emulate the ESSA
3544                  * instruction manually, in order to provide additional
3545                  * functionalities needed for live migration.
3546                  */
3547                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3548                 goto retry;
3549         }
3550
3551         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3552                 /*
3553                  * Re-enable CMM virtualization if CMMA is available and
3554                  * CMM has been used.
3555                  */
3556                 if ((vcpu->kvm->arch.use_cmma) &&
3557                     (vcpu->kvm->mm->context.uses_cmm))
3558                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3559                 goto retry;
3560         }
3561
3562         /* nothing to do, just clear the request */
3563         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3564         /* we left the vsie handler, nothing to do, just clear the request */
3565         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3566
3567         return 0;
3568 }
3569
3570 void kvm_s390_set_tod_clock(struct kvm *kvm,
3571                             const struct kvm_s390_vm_tod_clock *gtod)
3572 {
3573         struct kvm_vcpu *vcpu;
3574         struct kvm_s390_tod_clock_ext htod;
3575         int i;
3576
3577         mutex_lock(&kvm->lock);
3578         preempt_disable();
3579
3580         get_tod_clock_ext((char *)&htod);
3581
3582         kvm->arch.epoch = gtod->tod - htod.tod;
3583         kvm->arch.epdx = 0;
3584         if (test_kvm_facility(kvm, 139)) {
3585                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3586                 if (kvm->arch.epoch > gtod->tod)
3587                         kvm->arch.epdx -= 1;
3588         }
3589
3590         kvm_s390_vcpu_block_all(kvm);
3591         kvm_for_each_vcpu(i, vcpu, kvm) {
3592                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3593                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3594         }
3595
3596         kvm_s390_vcpu_unblock_all(kvm);
3597         preempt_enable();
3598         mutex_unlock(&kvm->lock);
3599 }
3600
3601 /**
3602  * kvm_arch_fault_in_page - fault-in guest page if necessary
3603  * @vcpu: The corresponding virtual cpu
3604  * @gpa: Guest physical address
3605  * @writable: Whether the page should be writable or not
3606  *
3607  * Make sure that a guest page has been faulted-in on the host.
3608  *
3609  * Return: Zero on success, negative error code otherwise.
3610  */
3611 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3612 {
3613         return gmap_fault(vcpu->arch.gmap, gpa,
3614                           writable ? FAULT_FLAG_WRITE : 0);
3615 }
3616
3617 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3618                                       unsigned long token)
3619 {
3620         struct kvm_s390_interrupt inti;
3621         struct kvm_s390_irq irq;
3622
3623         if (start_token) {
3624                 irq.u.ext.ext_params2 = token;
3625                 irq.type = KVM_S390_INT_PFAULT_INIT;
3626                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3627         } else {
3628                 inti.type = KVM_S390_INT_PFAULT_DONE;
3629                 inti.parm64 = token;
3630                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3631         }
3632 }
3633
3634 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3635                                      struct kvm_async_pf *work)
3636 {
3637         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3638         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3639 }
3640
3641 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3642                                  struct kvm_async_pf *work)
3643 {
3644         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3645         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3646 }
3647
3648 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3649                                struct kvm_async_pf *work)
3650 {
3651         /* s390 will always inject the page directly */
3652 }
3653
3654 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3655 {
3656         /*
3657          * s390 will always inject the page directly,
3658          * but we still want check_async_completion to cleanup
3659          */
3660         return true;
3661 }
3662
3663 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3664 {
3665         hva_t hva;
3666         struct kvm_arch_async_pf arch;
3667         int rc;
3668
3669         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3670                 return 0;
3671         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3672             vcpu->arch.pfault_compare)
3673                 return 0;
3674         if (psw_extint_disabled(vcpu))
3675                 return 0;
3676         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3677                 return 0;
3678         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3679                 return 0;
3680         if (!vcpu->arch.gmap->pfault_enabled)
3681                 return 0;
3682
3683         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3684         hva += current->thread.gmap_addr & ~PAGE_MASK;
3685         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3686                 return 0;
3687
3688         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3689         return rc;
3690 }
3691
3692 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3693 {
3694         int rc, cpuflags;
3695
3696         /*
3697          * On s390 notifications for arriving pages will be delivered directly
3698          * to the guest but the house keeping for completed pfaults is
3699          * handled outside the worker.
3700          */
3701         kvm_check_async_pf_completion(vcpu);
3702
3703         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3704         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3705
3706         if (need_resched())
3707                 schedule();
3708
3709         if (test_cpu_flag(CIF_MCCK_PENDING))
3710                 s390_handle_mcck();
3711
3712         if (!kvm_is_ucontrol(vcpu->kvm)) {
3713                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3714                 if (rc)
3715                         return rc;
3716         }
3717
3718         rc = kvm_s390_handle_requests(vcpu);
3719         if (rc)
3720                 return rc;
3721
3722         if (guestdbg_enabled(vcpu)) {
3723                 kvm_s390_backup_guest_per_regs(vcpu);
3724                 kvm_s390_patch_guest_per_regs(vcpu);
3725         }
3726
3727         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3728
3729         vcpu->arch.sie_block->icptcode = 0;
3730         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3731         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3732         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3733
3734         return 0;
3735 }
3736
3737 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3738 {
3739         struct kvm_s390_pgm_info pgm_info = {
3740                 .code = PGM_ADDRESSING,
3741         };
3742         u8 opcode, ilen;
3743         int rc;
3744
3745         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3746         trace_kvm_s390_sie_fault(vcpu);
3747
3748         /*
3749          * We want to inject an addressing exception, which is defined as a
3750          * suppressing or terminating exception. However, since we came here
3751          * by a DAT access exception, the PSW still points to the faulting
3752          * instruction since DAT exceptions are nullifying. So we've got
3753          * to look up the current opcode to get the length of the instruction
3754          * to be able to forward the PSW.
3755          */
3756         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3757         ilen = insn_length(opcode);
3758         if (rc < 0) {
3759                 return rc;
3760         } else if (rc) {
3761                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3762                  * Forward by arbitrary ilc, injection will take care of
3763                  * nullification if necessary.
3764                  */
3765                 pgm_info = vcpu->arch.pgm;
3766                 ilen = 4;
3767         }
3768         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3769         kvm_s390_forward_psw(vcpu, ilen);
3770         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3771 }
3772
3773 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3774 {
3775         struct mcck_volatile_info *mcck_info;
3776         struct sie_page *sie_page;
3777
3778         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3779                    vcpu->arch.sie_block->icptcode);
3780         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3781
3782         if (guestdbg_enabled(vcpu))
3783                 kvm_s390_restore_guest_per_regs(vcpu);
3784
3785         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3786         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3787
3788         if (exit_reason == -EINTR) {
3789                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3790                 sie_page = container_of(vcpu->arch.sie_block,
3791                                         struct sie_page, sie_block);
3792                 mcck_info = &sie_page->mcck_info;
3793                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3794                 return 0;
3795         }
3796
3797         if (vcpu->arch.sie_block->icptcode > 0) {
3798                 int rc = kvm_handle_sie_intercept(vcpu);
3799
3800                 if (rc != -EOPNOTSUPP)
3801                         return rc;
3802                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3803                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3804                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3805                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3806                 return -EREMOTE;
3807         } else if (exit_reason != -EFAULT) {
3808                 vcpu->stat.exit_null++;
3809                 return 0;
3810         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3811                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3812                 vcpu->run->s390_ucontrol.trans_exc_code =
3813                                                 current->thread.gmap_addr;
3814                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3815                 return -EREMOTE;
3816         } else if (current->thread.gmap_pfault) {
3817                 trace_kvm_s390_major_guest_pfault(vcpu);
3818                 current->thread.gmap_pfault = 0;
3819                 if (kvm_arch_setup_async_pf(vcpu))
3820                         return 0;
3821                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3822         }
3823         return vcpu_post_run_fault_in_sie(vcpu);
3824 }
3825
3826 static int __vcpu_run(struct kvm_vcpu *vcpu)
3827 {
3828         int rc, exit_reason;
3829
3830         /*
3831          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3832          * ning the guest), so that memslots (and other stuff) are protected
3833          */
3834         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3835
3836         do {
3837                 rc = vcpu_pre_run(vcpu);
3838                 if (rc)
3839                         break;
3840
3841                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3842                 /*
3843                  * As PF_VCPU will be used in fault handler, between
3844                  * guest_enter and guest_exit should be no uaccess.
3845                  */
3846                 local_irq_disable();
3847                 guest_enter_irqoff();
3848                 __disable_cpu_timer_accounting(vcpu);
3849                 local_irq_enable();
3850                 exit_reason = sie64a(vcpu->arch.sie_block,
3851                                      vcpu->run->s.regs.gprs);
3852                 local_irq_disable();
3853                 __enable_cpu_timer_accounting(vcpu);
3854                 guest_exit_irqoff();
3855                 local_irq_enable();
3856                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3857
3858                 rc = vcpu_post_run(vcpu, exit_reason);
3859         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3860
3861         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3862         return rc;
3863 }
3864
3865 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3866 {
3867         struct runtime_instr_cb *riccb;
3868         struct gs_cb *gscb;
3869
3870         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3871         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3872         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3873         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3874         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3875                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3876         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3877                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3878                 /* some control register changes require a tlb flush */
3879                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3880         }
3881         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3882                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3883                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3884                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3885                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3886                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3887         }
3888         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3889                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3890                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3891                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3892                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3893                         kvm_clear_async_pf_completion_queue(vcpu);
3894         }
3895         /*
3896          * If userspace sets the riccb (e.g. after migration) to a valid state,
3897          * we should enable RI here instead of doing the lazy enablement.
3898          */
3899         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3900             test_kvm_facility(vcpu->kvm, 64) &&
3901             riccb->v &&
3902             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3903                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3904                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3905         }
3906         /*
3907          * If userspace sets the gscb (e.g. after migration) to non-zero,
3908          * we should enable GS here instead of doing the lazy enablement.
3909          */
3910         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3911             test_kvm_facility(vcpu->kvm, 133) &&
3912             gscb->gssm &&
3913             !vcpu->arch.gs_enabled) {
3914                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3915                 vcpu->arch.sie_block->ecb |= ECB_GS;
3916                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3917                 vcpu->arch.gs_enabled = 1;
3918         }
3919         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3920             test_kvm_facility(vcpu->kvm, 82)) {
3921                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3922                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3923         }
3924         save_access_regs(vcpu->arch.host_acrs);
3925         restore_access_regs(vcpu->run->s.regs.acrs);
3926         /* save host (userspace) fprs/vrs */
3927         save_fpu_regs();
3928         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3929         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3930         if (MACHINE_HAS_VX)
3931                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3932         else
3933                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3934         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3935         if (test_fp_ctl(current->thread.fpu.fpc))
3936                 /* User space provided an invalid FPC, let's clear it */
3937                 current->thread.fpu.fpc = 0;
3938         if (MACHINE_HAS_GS) {
3939                 preempt_disable();
3940                 __ctl_set_bit(2, 4);
3941                 if (current->thread.gs_cb) {
3942                         vcpu->arch.host_gscb = current->thread.gs_cb;
3943                         save_gs_cb(vcpu->arch.host_gscb);
3944                 }
3945                 if (vcpu->arch.gs_enabled) {
3946                         current->thread.gs_cb = (struct gs_cb *)
3947                                                 &vcpu->run->s.regs.gscb;
3948                         restore_gs_cb(current->thread.gs_cb);
3949                 }
3950                 preempt_enable();
3951         }
3952         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3953
3954         kvm_run->kvm_dirty_regs = 0;
3955 }
3956
3957 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3958 {
3959         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3960         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3961         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3962         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3963         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3964         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3965         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3966         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3967         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3968         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3969         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3970         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3971         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3972         save_access_regs(vcpu->run->s.regs.acrs);
3973         restore_access_regs(vcpu->arch.host_acrs);
3974         /* Save guest register state */
3975         save_fpu_regs();
3976         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3977         /* Restore will be done lazily at return */
3978         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3979         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3980         if (MACHINE_HAS_GS) {
3981                 __ctl_set_bit(2, 4);
3982                 if (vcpu->arch.gs_enabled)
3983                         save_gs_cb(current->thread.gs_cb);
3984                 preempt_disable();
3985                 current->thread.gs_cb = vcpu->arch.host_gscb;
3986                 restore_gs_cb(vcpu->arch.host_gscb);
3987                 preempt_enable();
3988                 if (!vcpu->arch.host_gscb)
3989                         __ctl_clear_bit(2, 4);
3990                 vcpu->arch.host_gscb = NULL;
3991         }
3992         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3993 }
3994
3995 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3996 {
3997         int rc;
3998
3999         if (kvm_run->immediate_exit)
4000                 return -EINTR;
4001
4002         vcpu_load(vcpu);
4003
4004         if (guestdbg_exit_pending(vcpu)) {
4005                 kvm_s390_prepare_debug_exit(vcpu);
4006                 rc = 0;
4007                 goto out;
4008         }
4009
4010         kvm_sigset_activate(vcpu);
4011
4012         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4013                 kvm_s390_vcpu_start(vcpu);
4014         } else if (is_vcpu_stopped(vcpu)) {
4015                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4016                                    vcpu->vcpu_id);
4017                 rc = -EINVAL;
4018                 goto out;
4019         }
4020
4021         sync_regs(vcpu, kvm_run);
4022         enable_cpu_timer_accounting(vcpu);
4023
4024         might_fault();
4025         rc = __vcpu_run(vcpu);
4026
4027         if (signal_pending(current) && !rc) {
4028                 kvm_run->exit_reason = KVM_EXIT_INTR;
4029                 rc = -EINTR;
4030         }
4031
4032         if (guestdbg_exit_pending(vcpu) && !rc)  {
4033                 kvm_s390_prepare_debug_exit(vcpu);
4034                 rc = 0;
4035         }
4036
4037         if (rc == -EREMOTE) {
4038                 /* userspace support is needed, kvm_run has been prepared */
4039                 rc = 0;
4040         }
4041
4042         disable_cpu_timer_accounting(vcpu);
4043         store_regs(vcpu, kvm_run);
4044
4045         kvm_sigset_deactivate(vcpu);
4046
4047         vcpu->stat.exit_userspace++;
4048 out:
4049         vcpu_put(vcpu);
4050         return rc;
4051 }
4052
4053 /*
4054  * store status at address
4055  * we use have two special cases:
4056  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4057  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4058  */
4059 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4060 {
4061         unsigned char archmode = 1;
4062         freg_t fprs[NUM_FPRS];
4063         unsigned int px;
4064         u64 clkcomp, cputm;
4065         int rc;
4066
4067         px = kvm_s390_get_prefix(vcpu);
4068         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4069                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4070                         return -EFAULT;
4071                 gpa = 0;
4072         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4073                 if (write_guest_real(vcpu, 163, &archmode, 1))
4074                         return -EFAULT;
4075                 gpa = px;
4076         } else
4077                 gpa -= __LC_FPREGS_SAVE_AREA;
4078
4079         /* manually convert vector registers if necessary */
4080         if (MACHINE_HAS_VX) {
4081                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4082                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4083                                      fprs, 128);
4084         } else {
4085                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4086                                      vcpu->run->s.regs.fprs, 128);
4087         }
4088         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4089                               vcpu->run->s.regs.gprs, 128);
4090         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4091                               &vcpu->arch.sie_block->gpsw, 16);
4092         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4093                               &px, 4);
4094         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4095                               &vcpu->run->s.regs.fpc, 4);
4096         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4097                               &vcpu->arch.sie_block->todpr, 4);
4098         cputm = kvm_s390_get_cpu_timer(vcpu);
4099         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4100                               &cputm, 8);
4101         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4102         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4103                               &clkcomp, 8);
4104         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4105                               &vcpu->run->s.regs.acrs, 64);
4106         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4107                               &vcpu->arch.sie_block->gcr, 128);
4108         return rc ? -EFAULT : 0;
4109 }
4110
4111 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4112 {
4113         /*
4114          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4115          * switch in the run ioctl. Let's update our copies before we save
4116          * it into the save area
4117          */
4118         save_fpu_regs();
4119         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4120         save_access_regs(vcpu->run->s.regs.acrs);
4121
4122         return kvm_s390_store_status_unloaded(vcpu, addr);
4123 }
4124
4125 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4126 {
4127         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4128         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4129 }
4130
4131 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4132 {
4133         unsigned int i;
4134         struct kvm_vcpu *vcpu;
4135
4136         kvm_for_each_vcpu(i, vcpu, kvm) {
4137                 __disable_ibs_on_vcpu(vcpu);
4138         }
4139 }
4140
4141 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4142 {
4143         if (!sclp.has_ibs)
4144                 return;
4145         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4146         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4147 }
4148
4149 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4150 {
4151         int i, online_vcpus, started_vcpus = 0;
4152
4153         if (!is_vcpu_stopped(vcpu))
4154                 return;
4155
4156         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4157         /* Only one cpu at a time may enter/leave the STOPPED state. */
4158         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4159         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4160
4161         for (i = 0; i < online_vcpus; i++) {
4162                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4163                         started_vcpus++;
4164         }
4165
4166         if (started_vcpus == 0) {
4167                 /* we're the only active VCPU -> speed it up */
4168                 __enable_ibs_on_vcpu(vcpu);
4169         } else if (started_vcpus == 1) {
4170                 /*
4171                  * As we are starting a second VCPU, we have to disable
4172                  * the IBS facility on all VCPUs to remove potentially
4173                  * oustanding ENABLE requests.
4174                  */
4175                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4176         }
4177
4178         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4179         /*
4180          * Another VCPU might have used IBS while we were offline.
4181          * Let's play safe and flush the VCPU at startup.
4182          */
4183         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4184         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4185         return;
4186 }
4187
4188 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4189 {
4190         int i, online_vcpus, started_vcpus = 0;
4191         struct kvm_vcpu *started_vcpu = NULL;
4192
4193         if (is_vcpu_stopped(vcpu))
4194                 return;
4195
4196         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4197         /* Only one cpu at a time may enter/leave the STOPPED state. */
4198         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4199         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4200
4201         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4202         kvm_s390_clear_stop_irq(vcpu);
4203
4204         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4205         __disable_ibs_on_vcpu(vcpu);
4206
4207         for (i = 0; i < online_vcpus; i++) {
4208                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4209                         started_vcpus++;
4210                         started_vcpu = vcpu->kvm->vcpus[i];
4211                 }
4212         }
4213
4214         if (started_vcpus == 1) {
4215                 /*
4216                  * As we only have one VCPU left, we want to enable the
4217                  * IBS facility for that VCPU to speed it up.
4218                  */
4219                 __enable_ibs_on_vcpu(started_vcpu);
4220         }
4221
4222         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4223         return;
4224 }
4225
4226 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4227                                      struct kvm_enable_cap *cap)
4228 {
4229         int r;
4230
4231         if (cap->flags)
4232                 return -EINVAL;
4233
4234         switch (cap->cap) {
4235         case KVM_CAP_S390_CSS_SUPPORT:
4236                 if (!vcpu->kvm->arch.css_support) {
4237                         vcpu->kvm->arch.css_support = 1;
4238                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4239                         trace_kvm_s390_enable_css(vcpu->kvm);
4240                 }
4241                 r = 0;
4242                 break;
4243         default:
4244                 r = -EINVAL;
4245                 break;
4246         }
4247         return r;
4248 }
4249
4250 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4251                                   struct kvm_s390_mem_op *mop)
4252 {
4253         void __user *uaddr = (void __user *)mop->buf;
4254         void *tmpbuf = NULL;
4255         int r, srcu_idx;
4256         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4257                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4258
4259         if (mop->flags & ~supported_flags)
4260                 return -EINVAL;
4261
4262         if (mop->size > MEM_OP_MAX_SIZE)
4263                 return -E2BIG;
4264
4265         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4266                 tmpbuf = vmalloc(mop->size);
4267                 if (!tmpbuf)
4268                         return -ENOMEM;
4269         }
4270
4271         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4272
4273         switch (mop->op) {
4274         case KVM_S390_MEMOP_LOGICAL_READ:
4275                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4276                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4277                                             mop->size, GACC_FETCH);
4278                         break;
4279                 }
4280                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4281                 if (r == 0) {
4282                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4283                                 r = -EFAULT;
4284                 }
4285                 break;
4286         case KVM_S390_MEMOP_LOGICAL_WRITE:
4287                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4288                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4289                                             mop->size, GACC_STORE);
4290                         break;
4291                 }
4292                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4293                         r = -EFAULT;
4294                         break;
4295                 }
4296                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4297                 break;
4298         default:
4299                 r = -EINVAL;
4300         }
4301
4302         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4303
4304         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4305                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4306
4307         vfree(tmpbuf);
4308         return r;
4309 }
4310
4311 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4312                                unsigned int ioctl, unsigned long arg)
4313 {
4314         struct kvm_vcpu *vcpu = filp->private_data;
4315         void __user *argp = (void __user *)arg;
4316
4317         switch (ioctl) {
4318         case KVM_S390_IRQ: {
4319                 struct kvm_s390_irq s390irq;
4320
4321                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4322                         return -EFAULT;
4323                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4324         }
4325         case KVM_S390_INTERRUPT: {
4326                 struct kvm_s390_interrupt s390int;
4327                 struct kvm_s390_irq s390irq;
4328
4329                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4330                         return -EFAULT;
4331                 if (s390int_to_s390irq(&s390int, &s390irq))
4332                         return -EINVAL;
4333                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4334         }
4335         }
4336         return -ENOIOCTLCMD;
4337 }
4338
4339 long kvm_arch_vcpu_ioctl(struct file *filp,
4340                          unsigned int ioctl, unsigned long arg)
4341 {
4342         struct kvm_vcpu *vcpu = filp->private_data;
4343         void __user *argp = (void __user *)arg;
4344         int idx;
4345         long r;
4346
4347         vcpu_load(vcpu);
4348
4349         switch (ioctl) {
4350         case KVM_S390_STORE_STATUS:
4351                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4352                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4353                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4354                 break;
4355         case KVM_S390_SET_INITIAL_PSW: {
4356                 psw_t psw;
4357
4358                 r = -EFAULT;
4359                 if (copy_from_user(&psw, argp, sizeof(psw)))
4360                         break;
4361                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4362                 break;
4363         }
4364         case KVM_S390_INITIAL_RESET:
4365                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4366                 break;
4367         case KVM_SET_ONE_REG:
4368         case KVM_GET_ONE_REG: {
4369                 struct kvm_one_reg reg;
4370                 r = -EFAULT;
4371                 if (copy_from_user(&reg, argp, sizeof(reg)))
4372                         break;
4373                 if (ioctl == KVM_SET_ONE_REG)
4374                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4375                 else
4376                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4377                 break;
4378         }
4379 #ifdef CONFIG_KVM_S390_UCONTROL
4380         case KVM_S390_UCAS_MAP: {
4381                 struct kvm_s390_ucas_mapping ucasmap;
4382
4383                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4384                         r = -EFAULT;
4385                         break;
4386                 }
4387
4388                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4389                         r = -EINVAL;
4390                         break;
4391                 }
4392
4393                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4394                                      ucasmap.vcpu_addr, ucasmap.length);
4395                 break;
4396         }
4397         case KVM_S390_UCAS_UNMAP: {
4398                 struct kvm_s390_ucas_mapping ucasmap;
4399
4400                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4401                         r = -EFAULT;
4402                         break;
4403                 }
4404
4405                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4406                         r = -EINVAL;
4407                         break;
4408                 }
4409
4410                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4411                         ucasmap.length);
4412                 break;
4413         }
4414 #endif
4415         case KVM_S390_VCPU_FAULT: {
4416                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4417                 break;
4418         }
4419         case KVM_ENABLE_CAP:
4420         {
4421                 struct kvm_enable_cap cap;
4422                 r = -EFAULT;
4423                 if (copy_from_user(&cap, argp, sizeof(cap)))
4424                         break;
4425                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4426                 break;
4427         }
4428         case KVM_S390_MEM_OP: {
4429                 struct kvm_s390_mem_op mem_op;
4430
4431                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4432                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4433                 else
4434                         r = -EFAULT;
4435                 break;
4436         }
4437         case KVM_S390_SET_IRQ_STATE: {
4438                 struct kvm_s390_irq_state irq_state;
4439
4440                 r = -EFAULT;
4441                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4442                         break;
4443                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4444                     irq_state.len == 0 ||
4445                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4446                         r = -EINVAL;
4447                         break;
4448                 }
4449                 /* do not use irq_state.flags, it will break old QEMUs */
4450                 r = kvm_s390_set_irq_state(vcpu,
4451                                            (void __user *) irq_state.buf,
4452                                            irq_state.len);
4453                 break;
4454         }
4455         case KVM_S390_GET_IRQ_STATE: {
4456                 struct kvm_s390_irq_state irq_state;
4457
4458                 r = -EFAULT;
4459                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4460                         break;
4461                 if (irq_state.len == 0) {
4462                         r = -EINVAL;
4463                         break;
4464                 }
4465                 /* do not use irq_state.flags, it will break old QEMUs */
4466                 r = kvm_s390_get_irq_state(vcpu,
4467                                            (__u8 __user *)  irq_state.buf,
4468                                            irq_state.len);
4469                 break;
4470         }
4471         default:
4472                 r = -ENOTTY;
4473         }
4474
4475         vcpu_put(vcpu);
4476         return r;
4477 }
4478
4479 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4480 {
4481 #ifdef CONFIG_KVM_S390_UCONTROL
4482         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4483                  && (kvm_is_ucontrol(vcpu->kvm))) {
4484                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4485                 get_page(vmf->page);
4486                 return 0;
4487         }
4488 #endif
4489         return VM_FAULT_SIGBUS;
4490 }
4491
4492 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4493                             unsigned long npages)
4494 {
4495         return 0;
4496 }
4497
4498 /* Section: memory related */
4499 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4500                                    struct kvm_memory_slot *memslot,
4501                                    const struct kvm_userspace_memory_region *mem,
4502                                    enum kvm_mr_change change)
4503 {
4504         /* A few sanity checks. We can have memory slots which have to be
4505            located/ended at a segment boundary (1MB). The memory in userland is
4506            ok to be fragmented into various different vmas. It is okay to mmap()
4507            and munmap() stuff in this slot after doing this call at any time */
4508
4509         if (mem->userspace_addr & 0xffffful)
4510                 return -EINVAL;
4511
4512         if (mem->memory_size & 0xffffful)
4513                 return -EINVAL;
4514
4515         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4516                 return -EINVAL;
4517
4518         return 0;
4519 }
4520
4521 void kvm_arch_commit_memory_region(struct kvm *kvm,
4522                                 const struct kvm_userspace_memory_region *mem,
4523                                 const struct kvm_memory_slot *old,
4524                                 const struct kvm_memory_slot *new,
4525                                 enum kvm_mr_change change)
4526 {
4527         int rc;
4528
4529         /* If the basics of the memslot do not change, we do not want
4530          * to update the gmap. Every update causes several unnecessary
4531          * segment translation exceptions. This is usually handled just
4532          * fine by the normal fault handler + gmap, but it will also
4533          * cause faults on the prefix page of running guest CPUs.
4534          */
4535         if (old->userspace_addr == mem->userspace_addr &&
4536             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4537             old->npages * PAGE_SIZE == mem->memory_size)
4538                 return;
4539
4540         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4541                 mem->guest_phys_addr, mem->memory_size);
4542         if (rc)
4543                 pr_warn("failed to commit memory region\n");
4544         return;
4545 }
4546
4547 static inline unsigned long nonhyp_mask(int i)
4548 {
4549         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4550
4551         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4552 }
4553
4554 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4555 {
4556         vcpu->valid_wakeup = false;
4557 }
4558
4559 static int __init kvm_s390_init(void)
4560 {
4561         int i;
4562
4563         if (!sclp.has_sief2) {
4564                 pr_info("SIE is not available\n");
4565                 return -ENODEV;
4566         }
4567
4568         if (nested && hpage) {
4569                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4570                 return -EINVAL;
4571         }
4572
4573         for (i = 0; i < 16; i++)
4574                 kvm_s390_fac_base[i] |=
4575                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4576
4577         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4578 }
4579
4580 static void __exit kvm_s390_exit(void)
4581 {
4582         kvm_exit();
4583 }
4584
4585 module_init(kvm_s390_init);
4586 module_exit(kvm_s390_exit);
4587
4588 /*
4589  * Enable autoloading of the kvm module.
4590  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4591  * since x86 takes a different approach.
4592  */
4593 #include <linux/miscdevice.h>
4594 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4595 MODULE_ALIAS("devname:kvm");