arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 159         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 160         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 161         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 162         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 163         { NULL }
 164 };
 165
 166 struct kvm_s390_tod_clock_ext {
 167         __u8 epoch_idx;
 168         __u64 tod;
 169         __u8 reserved[7];
 170 } __packed;
 171
 172 /* allow nested virtualization in KVM (if enabled by user space) */
 173 static int nested;
 174 module_param(nested, int, S_IRUGO);
 175 MODULE_PARM_DESC(nested, "Nested virtualization support");
 176
 177 /* allow 1m huge page guest backing, if !nested */
 178 static int hpage;
 179 module_param(hpage, int, 0444);
 180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 181
 182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 183 static u8 halt_poll_max_steal = 10;
 184 module_param(halt_poll_max_steal, byte, 0644);
 185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 186
 187 /*
 188  * For now we handle at most 16 double words as this is what the s390 base
 189  * kernel handles and stores in the prefix page. If we ever need to go beyond
 190  * this, this requires changes to code, but the external uapi can stay.
 191  */
 192 #define SIZE_INTERNAL 16
 193
 194 /*
 195  * Base feature mask that defines default mask for facilities. Consists of the
 196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 197  */
 198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 199 /*
 200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 201  * and defines the facilities that can be enabled via a cpu model.
 202  */
 203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 204
 205 static unsigned long kvm_s390_fac_size(void)
 206 {
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 209         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 210                 sizeof(S390_lowcore.stfle_fac_list));
 211
 212         return SIZE_INTERNAL;
 213 }
 214
 215 /* available cpu features supported by kvm */
 216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 217 /* available subfunctions indicated via query / "test bit" */
 218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 219
 220 static struct gmap_notifier gmap_notifier;
 221 static struct gmap_notifier vsie_gmap_notifier;
 222 debug_info_t *kvm_s390_dbf;
 223
 224 /* Section: not file related */
 225 int kvm_arch_hardware_enable(void)
 226 {
 227         /* every s390 is virtualization enabled ;-) */
 228         return 0;
 229 }
 230
 231 int kvm_arch_check_processor_compat(void)
 232 {
 233         return 0;
 234 }
 235
 236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 237                               unsigned long end);
 238
 239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 240 {
 241         u8 delta_idx = 0;
 242
 243         /*
 244          * The TOD jumps by delta, we have to compensate this by adding
 245          * -delta to the epoch.
 246          */
 247         delta = -delta;
 248
 249         /* sign-extension - we're adding to signed values below */
 250         if ((s64)delta < 0)
 251                 delta_idx = -1;
 252
 253         scb->epoch += delta;
 254         if (scb->ecd & ECD_MEF) {
 255                 scb->epdx += delta_idx;
 256                 if (scb->epoch < delta)
 257                         scb->epdx += 1;
 258         }
 259 }
 260
 261 /*
 262  * This callback is executed during stop_machine(). All CPUs are therefore
 263  * temporarily stopped. In order not to change guest behavior, we have to
 264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 265  * so a CPU won't be stopped while calculating with the epoch.
 266  */
 267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 268                           void *v)
 269 {
 270         struct kvm *kvm;
 271         struct kvm_vcpu *vcpu;
 272         int i;
 273         unsigned long long *delta = v;
 274
 275         list_for_each_entry(kvm, &vm_list, vm_list) {
 276                 kvm_for_each_vcpu(i, vcpu, kvm) {
 277                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 278                         if (i == 0) {
 279                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 280                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 281                         }
 282                         if (vcpu->arch.cputm_enabled)
 283                                 vcpu->arch.cputm_start += *delta;
 284                         if (vcpu->arch.vsie_block)
 285                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 286                                                    *delta);
 287                 }
 288         }
 289         return NOTIFY_OK;
 290 }
 291
 292 static struct notifier_block kvm_clock_notifier = {
 293         .notifier_call = kvm_clock_sync,
 294 };
 295
 296 int kvm_arch_hardware_setup(void)
 297 {
 298         gmap_notifier.notifier_call = kvm_gmap_notifier;
 299         gmap_register_pte_notifier(&gmap_notifier);
 300         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 301         gmap_register_pte_notifier(&vsie_gmap_notifier);
 302         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 303                                        &kvm_clock_notifier);
 304         return 0;
 305 }
 306
 307 void kvm_arch_hardware_unsetup(void)
 308 {
 309         gmap_unregister_pte_notifier(&gmap_notifier);
 310         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 311         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 312                                          &kvm_clock_notifier);
 313 }
 314
 315 static void allow_cpu_feat(unsigned long nr)
 316 {
 317         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 318 }
 319
 320 static inline int plo_test_bit(unsigned char nr)
 321 {
 322         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 323         int cc;
 324
 325         asm volatile(
 326                 /* Parameter registers are ignored for "test bit" */
 327                 "       plo     0,0,0,0(0)\n"
 328                 "       ipm     %0\n"
 329                 "       srl     %0,28\n"
 330                 : "=d" (cc)
 331                 : "d" (r0)
 332                 : "cc");
 333         return cc == 0;
 334 }
 335
 336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 337 {
 338         register unsigned long r0 asm("0") = 0; /* query function */
 339         register unsigned long r1 asm("1") = (unsigned long) query;
 340
 341         asm volatile(
 342                 /* Parameter regs are ignored */
 343                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 344                 :
 345                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 346                 : "cc", "memory");
 347 }
 348
 349 #define INSN_SORTL 0xb938
 350 #define INSN_DFLTCC 0xb939
 351
 352 static void kvm_s390_cpu_feat_init(void)
 353 {
 354         int i;
 355
 356         for (i = 0; i < 256; ++i) {
 357                 if (plo_test_bit(i))
 358                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 359         }
 360
 361         if (test_facility(28)) /* TOD-clock steering */
 362                 ptff(kvm_s390_available_subfunc.ptff,
 363                      sizeof(kvm_s390_available_subfunc.ptff),
 364                      PTFF_QAF);
 365
 366         if (test_facility(17)) { /* MSA */
 367                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 368                               kvm_s390_available_subfunc.kmac);
 369                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.kmc);
 371                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 372                               kvm_s390_available_subfunc.km);
 373                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.kimd);
 375                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 376                               kvm_s390_available_subfunc.klmd);
 377         }
 378         if (test_facility(76)) /* MSA3 */
 379                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 380                               kvm_s390_available_subfunc.pckmo);
 381         if (test_facility(77)) { /* MSA4 */
 382                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.kmctr);
 384                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 385                               kvm_s390_available_subfunc.kmf);
 386                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 387                               kvm_s390_available_subfunc.kmo);
 388                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 389                               kvm_s390_available_subfunc.pcc);
 390         }
 391         if (test_facility(57)) /* MSA5 */
 392                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.ppno);
 394
 395         if (test_facility(146)) /* MSA8 */
 396                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 397                               kvm_s390_available_subfunc.kma);
 398
 399         if (test_facility(155)) /* MSA9 */
 400                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 401                               kvm_s390_available_subfunc.kdsa);
 402
 403         if (test_facility(150)) /* SORTL */
 404                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 405
 406         if (test_facility(151)) /* DFLTCC */
 407                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 408
 409         if (MACHINE_HAS_ESOP)
 410                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 411         /*
 412          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 413          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 414          */
 415         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 416             !test_facility(3) || !nested)
 417                 return;
 418         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 419         if (sclp.has_64bscao)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 421         if (sclp.has_siif)
 422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 423         if (sclp.has_gpere)
 424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 425         if (sclp.has_gsls)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 427         if (sclp.has_ib)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 429         if (sclp.has_cei)
 430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 431         if (sclp.has_ibs)
 432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 433         if (sclp.has_kss)
 434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 435         /*
 436          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 437          * all skey handling functions read/set the skey from the PGSTE
 438          * instead of the real storage key.
 439          *
 440          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 441          * pages being detected as preserved although they are resident.
 442          *
 443          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 444          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 445          *
 446          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 447          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 448          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 449          *
 450          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 451          * cannot easily shadow the SCA because of the ipte lock.
 452          */
 453 }
 454
 455 int kvm_arch_init(void *opaque)
 456 {
 457         int rc = -ENOMEM;
 458
 459         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 460         if (!kvm_s390_dbf)
 461                 return -ENOMEM;
 462
 463         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
 464                 goto out;
 465
 466         kvm_s390_cpu_feat_init();
 467
 468         /* Register floating interrupt controller interface. */
 469         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 470         if (rc) {
 471                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 472                 goto out;
 473         }
 474
 475         rc = kvm_s390_gib_init(GAL_ISC);
 476         if (rc)
 477                 goto out;
 478
 479         return 0;
 480
 481 out:
 482         kvm_arch_exit();
 483         return rc;
 484 }
 485
 486 void kvm_arch_exit(void)
 487 {
 488         kvm_s390_gib_destroy();
 489         debug_unregister(kvm_s390_dbf);
 490 }
 491
 492 /* Section: device related */
 493 long kvm_arch_dev_ioctl(struct file *filp,
 494                         unsigned int ioctl, unsigned long arg)
 495 {
 496         if (ioctl == KVM_S390_ENABLE_SIE)
 497                 return s390_enable_sie();
 498         return -EINVAL;
 499 }
 500
 501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 502 {
 503         int r;
 504
 505         switch (ext) {
 506         case KVM_CAP_S390_PSW:
 507         case KVM_CAP_S390_GMAP:
 508         case KVM_CAP_SYNC_MMU:
 509 #ifdef CONFIG_KVM_S390_UCONTROL
 510         case KVM_CAP_S390_UCONTROL:
 511 #endif
 512         case KVM_CAP_ASYNC_PF:
 513         case KVM_CAP_SYNC_REGS:
 514         case KVM_CAP_ONE_REG:
 515         case KVM_CAP_ENABLE_CAP:
 516         case KVM_CAP_S390_CSS_SUPPORT:
 517         case KVM_CAP_IOEVENTFD:
 518         case KVM_CAP_DEVICE_CTRL:
 519         case KVM_CAP_S390_IRQCHIP:
 520         case KVM_CAP_VM_ATTRIBUTES:
 521         case KVM_CAP_MP_STATE:
 522         case KVM_CAP_IMMEDIATE_EXIT:
 523         case KVM_CAP_S390_INJECT_IRQ:
 524         case KVM_CAP_S390_USER_SIGP:
 525         case KVM_CAP_S390_USER_STSI:
 526         case KVM_CAP_S390_SKEYS:
 527         case KVM_CAP_S390_IRQ_STATE:
 528         case KVM_CAP_S390_USER_INSTR0:
 529         case KVM_CAP_S390_CMMA_MIGRATION:
 530         case KVM_CAP_S390_AIS:
 531         case KVM_CAP_S390_AIS_MIGRATION:
 532                 r = 1;
 533                 break;
 534         case KVM_CAP_S390_HPAGE_1M:
 535                 r = 0;
 536                 if (hpage && !kvm_is_ucontrol(kvm))
 537                         r = 1;
 538                 break;
 539         case KVM_CAP_S390_MEM_OP:
 540                 r = MEM_OP_MAX_SIZE;
 541                 break;
 542         case KVM_CAP_NR_VCPUS:
 543         case KVM_CAP_MAX_VCPUS:
 544         case KVM_CAP_MAX_VCPU_ID:
 545                 r = KVM_S390_BSCA_CPU_SLOTS;
 546                 if (!kvm_s390_use_sca_entries())
 547                         r = KVM_MAX_VCPUS;
 548                 else if (sclp.has_esca && sclp.has_64bscao)
 549                         r = KVM_S390_ESCA_CPU_SLOTS;
 550                 break;
 551         case KVM_CAP_S390_COW:
 552                 r = MACHINE_HAS_ESOP;
 553                 break;
 554         case KVM_CAP_S390_VECTOR_REGISTERS:
 555                 r = MACHINE_HAS_VX;
 556                 break;
 557         case KVM_CAP_S390_RI:
 558                 r = test_facility(64);
 559                 break;
 560         case KVM_CAP_S390_GS:
 561                 r = test_facility(133);
 562                 break;
 563         case KVM_CAP_S390_BPB:
 564                 r = test_facility(82);
 565                 break;
 566         default:
 567                 r = 0;
 568         }
 569         return r;
 570 }
 571
 572 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 573                                     struct kvm_memory_slot *memslot)
 574 {
 575         int i;
 576         gfn_t cur_gfn, last_gfn;
 577         unsigned long gaddr, vmaddr;
 578         struct gmap *gmap = kvm->arch.gmap;
 579         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 580
 581         /* Loop over all guest segments */
 582         cur_gfn = memslot->base_gfn;
 583         last_gfn = memslot->base_gfn + memslot->npages;
 584         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 585                 gaddr = gfn_to_gpa(cur_gfn);
 586                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 587                 if (kvm_is_error_hva(vmaddr))
 588                         continue;
 589
 590                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 591                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 592                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 593                         if (test_bit(i, bitmap))
 594                                 mark_page_dirty(kvm, cur_gfn + i);
 595                 }
 596
 597                 if (fatal_signal_pending(current))
 598                         return;
 599                 cond_resched();
 600         }
 601 }
 602
 603 /* Section: vm related */
 604 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 605
 606 /*
 607  * Get (and clear) the dirty memory log for a memory slot.
 608  */
 609 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 610                                struct kvm_dirty_log *log)
 611 {
 612         int r;
 613         unsigned long n;
 614         struct kvm_memslots *slots;
 615         struct kvm_memory_slot *memslot;
 616         int is_dirty = 0;
 617
 618         if (kvm_is_ucontrol(kvm))
 619                 return -EINVAL;
 620
 621         mutex_lock(&kvm->slots_lock);
 622
 623         r = -EINVAL;
 624         if (log->slot >= KVM_USER_MEM_SLOTS)
 625                 goto out;
 626
 627         slots = kvm_memslots(kvm);
 628         memslot = id_to_memslot(slots, log->slot);
 629         r = -ENOENT;
 630         if (!memslot->dirty_bitmap)
 631                 goto out;
 632
 633         kvm_s390_sync_dirty_log(kvm, memslot);
 634         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 635         if (r)
 636                 goto out;
 637
 638         /* Clear the dirty log */
 639         if (is_dirty) {
 640                 n = kvm_dirty_bitmap_bytes(memslot);
 641                 memset(memslot->dirty_bitmap, 0, n);
 642         }
 643         r = 0;
 644 out:
 645         mutex_unlock(&kvm->slots_lock);
 646         return r;
 647 }
 648
 649 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 650 {
 651         unsigned int i;
 652         struct kvm_vcpu *vcpu;
 653
 654         kvm_for_each_vcpu(i, vcpu, kvm) {
 655                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 656         }
 657 }
 658
 659 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 660 {
 661         int r;
 662
 663         if (cap->flags)
 664                 return -EINVAL;
 665
 666         switch (cap->cap) {
 667         case KVM_CAP_S390_IRQCHIP:
 668                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 669                 kvm->arch.use_irqchip = 1;
 670                 r = 0;
 671                 break;
 672         case KVM_CAP_S390_USER_SIGP:
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 674                 kvm->arch.user_sigp = 1;
 675                 r = 0;
 676                 break;
 677         case KVM_CAP_S390_VECTOR_REGISTERS:
 678                 mutex_lock(&kvm->lock);
 679                 if (kvm->created_vcpus) {
 680                         r = -EBUSY;
 681                 } else if (MACHINE_HAS_VX) {
 682                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 683                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 684                         if (test_facility(134)) {
 685                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 686                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 687                         }
 688                         if (test_facility(135)) {
 689                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 690                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 691                         }
 692                         if (test_facility(148)) {
 693                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 694                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 695                         }
 696                         if (test_facility(152)) {
 697                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 698                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 699                         }
 700                         r = 0;
 701                 } else
 702                         r = -EINVAL;
 703                 mutex_unlock(&kvm->lock);
 704                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 705                          r ? "(not available)" : "(success)");
 706                 break;
 707         case KVM_CAP_S390_RI:
 708                 r = -EINVAL;
 709                 mutex_lock(&kvm->lock);
 710                 if (kvm->created_vcpus) {
 711                         r = -EBUSY;
 712                 } else if (test_facility(64)) {
 713                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 714                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 715                         r = 0;
 716                 }
 717                 mutex_unlock(&kvm->lock);
 718                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 719                          r ? "(not available)" : "(success)");
 720                 break;
 721         case KVM_CAP_S390_AIS:
 722                 mutex_lock(&kvm->lock);
 723                 if (kvm->created_vcpus) {
 724                         r = -EBUSY;
 725                 } else {
 726                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 727                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 728                         r = 0;
 729                 }
 730                 mutex_unlock(&kvm->lock);
 731                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 732                          r ? "(not available)" : "(success)");
 733                 break;
 734         case KVM_CAP_S390_GS:
 735                 r = -EINVAL;
 736                 mutex_lock(&kvm->lock);
 737                 if (kvm->created_vcpus) {
 738                         r = -EBUSY;
 739                 } else if (test_facility(133)) {
 740                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 741                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 742                         r = 0;
 743                 }
 744                 mutex_unlock(&kvm->lock);
 745                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 746                          r ? "(not available)" : "(success)");
 747                 break;
 748         case KVM_CAP_S390_HPAGE_1M:
 749                 mutex_lock(&kvm->lock);
 750                 if (kvm->created_vcpus)
 751                         r = -EBUSY;
 752                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 753                         r = -EINVAL;
 754                 else {
 755                         r = 0;
 756                         down_write(&kvm->mm->mmap_sem);
 757                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 758                         up_write(&kvm->mm->mmap_sem);
 759                         /*
 760                          * We might have to create fake 4k page
 761                          * tables. To avoid that the hardware works on
 762                          * stale PGSTEs, we emulate these instructions.
 763                          */
 764                         kvm->arch.use_skf = 0;
 765                         kvm->arch.use_pfmfi = 0;
 766                 }
 767                 mutex_unlock(&kvm->lock);
 768                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 769                          r ? "(not available)" : "(success)");
 770                 break;
 771         case KVM_CAP_S390_USER_STSI:
 772                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 773                 kvm->arch.user_stsi = 1;
 774                 r = 0;
 775                 break;
 776         case KVM_CAP_S390_USER_INSTR0:
 777                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 778                 kvm->arch.user_instr0 = 1;
 779                 icpt_operexc_on_all_vcpus(kvm);
 780                 r = 0;
 781                 break;
 782         default:
 783                 r = -EINVAL;
 784                 break;
 785         }
 786         return r;
 787 }
 788
 789 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 790 {
 791         int ret;
 792
 793         switch (attr->attr) {
 794         case KVM_S390_VM_MEM_LIMIT_SIZE:
 795                 ret = 0;
 796                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 797                          kvm->arch.mem_limit);
 798                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 799                         ret = -EFAULT;
 800                 break;
 801         default:
 802                 ret = -ENXIO;
 803                 break;
 804         }
 805         return ret;
 806 }
 807
 808 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 809 {
 810         int ret;
 811         unsigned int idx;
 812         switch (attr->attr) {
 813         case KVM_S390_VM_MEM_ENABLE_CMMA:
 814                 ret = -ENXIO;
 815                 if (!sclp.has_cmma)
 816                         break;
 817
 818                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 819                 mutex_lock(&kvm->lock);
 820                 if (kvm->created_vcpus)
 821                         ret = -EBUSY;
 822                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 823                         ret = -EINVAL;
 824                 else {
 825                         kvm->arch.use_cmma = 1;
 826                         /* Not compatible with cmma. */
 827                         kvm->arch.use_pfmfi = 0;
 828                         ret = 0;
 829                 }
 830                 mutex_unlock(&kvm->lock);
 831                 break;
 832         case KVM_S390_VM_MEM_CLR_CMMA:
 833                 ret = -ENXIO;
 834                 if (!sclp.has_cmma)
 835                         break;
 836                 ret = -EINVAL;
 837                 if (!kvm->arch.use_cmma)
 838                         break;
 839
 840                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 841                 mutex_lock(&kvm->lock);
 842                 idx = srcu_read_lock(&kvm->srcu);
 843                 s390_reset_cmma(kvm->arch.gmap->mm);
 844                 srcu_read_unlock(&kvm->srcu, idx);
 845                 mutex_unlock(&kvm->lock);
 846                 ret = 0;
 847                 break;
 848         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 849                 unsigned long new_limit;
 850
 851                 if (kvm_is_ucontrol(kvm))
 852                         return -EINVAL;
 853
 854                 if (get_user(new_limit, (u64 __user *)attr->addr))
 855                         return -EFAULT;
 856
 857                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 858                     new_limit > kvm->arch.mem_limit)
 859                         return -E2BIG;
 860
 861                 if (!new_limit)
 862                         return -EINVAL;
 863
 864                 /* gmap_create takes last usable address */
 865                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 866                         new_limit -= 1;
 867
 868                 ret = -EBUSY;
 869                 mutex_lock(&kvm->lock);
 870                 if (!kvm->created_vcpus) {
 871                         /* gmap_create will round the limit up */
 872                         struct gmap *new = gmap_create(current->mm, new_limit);
 873
 874                         if (!new) {
 875                                 ret = -ENOMEM;
 876                         } else {
 877                                 gmap_remove(kvm->arch.gmap);
 878                                 new->private = kvm;
 879                                 kvm->arch.gmap = new;
 880                                 ret = 0;
 881                         }
 882                 }
 883                 mutex_unlock(&kvm->lock);
 884                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 885                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 886                          (void *) kvm->arch.gmap->asce);
 887                 break;
 888         }
 889         default:
 890                 ret = -ENXIO;
 891                 break;
 892         }
 893         return ret;
 894 }
 895
 896 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 897
 898 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 899 {
 900         struct kvm_vcpu *vcpu;
 901         int i;
 902
 903         kvm_s390_vcpu_block_all(kvm);
 904
 905         kvm_for_each_vcpu(i, vcpu, kvm) {
 906                 kvm_s390_vcpu_crypto_setup(vcpu);
 907                 /* recreate the shadow crycb by leaving the VSIE handler */
 908                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 909         }
 910
 911         kvm_s390_vcpu_unblock_all(kvm);
 912 }
 913
 914 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         mutex_lock(&kvm->lock);
 917         switch (attr->attr) {
 918         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 919                 if (!test_kvm_facility(kvm, 76)) {
 920                         mutex_unlock(&kvm->lock);
 921                         return -EINVAL;
 922                 }
 923                 get_random_bytes(
 924                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 925                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 926                 kvm->arch.crypto.aes_kw = 1;
 927                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 928                 break;
 929         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 930                 if (!test_kvm_facility(kvm, 76)) {
 931                         mutex_unlock(&kvm->lock);
 932                         return -EINVAL;
 933                 }
 934                 get_random_bytes(
 935                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 936                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 937                 kvm->arch.crypto.dea_kw = 1;
 938                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 939                 break;
 940         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 941                 if (!test_kvm_facility(kvm, 76)) {
 942                         mutex_unlock(&kvm->lock);
 943                         return -EINVAL;
 944                 }
 945                 kvm->arch.crypto.aes_kw = 0;
 946                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 947                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 948                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 949                 break;
 950         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 951                 if (!test_kvm_facility(kvm, 76)) {
 952                         mutex_unlock(&kvm->lock);
 953                         return -EINVAL;
 954                 }
 955                 kvm->arch.crypto.dea_kw = 0;
 956                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 957                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 958                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 959                 break;
 960         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 961                 if (!ap_instructions_available()) {
 962                         mutex_unlock(&kvm->lock);
 963                         return -EOPNOTSUPP;
 964                 }
 965                 kvm->arch.crypto.apie = 1;
 966                 break;
 967         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 968                 if (!ap_instructions_available()) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EOPNOTSUPP;
 971                 }
 972                 kvm->arch.crypto.apie = 0;
 973                 break;
 974         default:
 975                 mutex_unlock(&kvm->lock);
 976                 return -ENXIO;
 977         }
 978
 979         kvm_s390_vcpu_crypto_reset_all(kvm);
 980         mutex_unlock(&kvm->lock);
 981         return 0;
 982 }
 983
 984 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 985 {
 986         int cx;
 987         struct kvm_vcpu *vcpu;
 988
 989         kvm_for_each_vcpu(cx, vcpu, kvm)
 990                 kvm_s390_sync_request(req, vcpu);
 991 }
 992
 993 /*
 994  * Must be called with kvm->srcu held to avoid races on memslots, and with
 995  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 996  */
 997 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 998 {
 999         struct kvm_memory_slot *ms;
1000         struct kvm_memslots *slots;
1001         unsigned long ram_pages = 0;
1002         int slotnr;
1003
1004         /* migration mode already enabled */
1005         if (kvm->arch.migration_mode)
1006                 return 0;
1007         slots = kvm_memslots(kvm);
1008         if (!slots || !slots->used_slots)
1009                 return -EINVAL;
1010
1011         if (!kvm->arch.use_cmma) {
1012                 kvm->arch.migration_mode = 1;
1013                 return 0;
1014         }
1015         /* mark all the pages in active slots as dirty */
1016         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1017                 ms = slots->memslots + slotnr;
1018                 if (!ms->dirty_bitmap)
1019                         return -EINVAL;
1020                 /*
1021                  * The second half of the bitmap is only used on x86,
1022                  * and would be wasted otherwise, so we put it to good
1023                  * use here to keep track of the state of the storage
1024                  * attributes.
1025                  */
1026                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1027                 ram_pages += ms->npages;
1028         }
1029         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1030         kvm->arch.migration_mode = 1;
1031         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1032         return 0;
1033 }
1034
1035 /*
1036  * Must be called with kvm->slots_lock to avoid races with ourselves and
1037  * kvm_s390_vm_start_migration.
1038  */
1039 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1040 {
1041         /* migration mode already disabled */
1042         if (!kvm->arch.migration_mode)
1043                 return 0;
1044         kvm->arch.migration_mode = 0;
1045         if (kvm->arch.use_cmma)
1046                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1047         return 0;
1048 }
1049
1050 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1051                                      struct kvm_device_attr *attr)
1052 {
1053         int res = -ENXIO;
1054
1055         mutex_lock(&kvm->slots_lock);
1056         switch (attr->attr) {
1057         case KVM_S390_VM_MIGRATION_START:
1058                 res = kvm_s390_vm_start_migration(kvm);
1059                 break;
1060         case KVM_S390_VM_MIGRATION_STOP:
1061                 res = kvm_s390_vm_stop_migration(kvm);
1062                 break;
1063         default:
1064                 break;
1065         }
1066         mutex_unlock(&kvm->slots_lock);
1067
1068         return res;
1069 }
1070
1071 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1072                                      struct kvm_device_attr *attr)
1073 {
1074         u64 mig = kvm->arch.migration_mode;
1075
1076         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1077                 return -ENXIO;
1078
1079         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1080                 return -EFAULT;
1081         return 0;
1082 }
1083
1084 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1085 {
1086         struct kvm_s390_vm_tod_clock gtod;
1087
1088         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1089                 return -EFAULT;
1090
1091         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1092                 return -EINVAL;
1093         kvm_s390_set_tod_clock(kvm, &gtod);
1094
1095         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1096                 gtod.epoch_idx, gtod.tod);
1097
1098         return 0;
1099 }
1100
1101 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         u8 gtod_high;
1104
1105         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1106                                            sizeof(gtod_high)))
1107                 return -EFAULT;
1108
1109         if (gtod_high != 0)
1110                 return -EINVAL;
1111         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1112
1113         return 0;
1114 }
1115
1116 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118         struct kvm_s390_vm_tod_clock gtod = { 0 };
1119
1120         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1121                            sizeof(gtod.tod)))
1122                 return -EFAULT;
1123
1124         kvm_s390_set_tod_clock(kvm, &gtod);
1125         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1126         return 0;
1127 }
1128
1129 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131         int ret;
1132
1133         if (attr->flags)
1134                 return -EINVAL;
1135
1136         switch (attr->attr) {
1137         case KVM_S390_VM_TOD_EXT:
1138                 ret = kvm_s390_set_tod_ext(kvm, attr);
1139                 break;
1140         case KVM_S390_VM_TOD_HIGH:
1141                 ret = kvm_s390_set_tod_high(kvm, attr);
1142                 break;
1143         case KVM_S390_VM_TOD_LOW:
1144                 ret = kvm_s390_set_tod_low(kvm, attr);
1145                 break;
1146         default:
1147                 ret = -ENXIO;
1148                 break;
1149         }
1150         return ret;
1151 }
1152
1153 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1154                                    struct kvm_s390_vm_tod_clock *gtod)
1155 {
1156         struct kvm_s390_tod_clock_ext htod;
1157
1158         preempt_disable();
1159
1160         get_tod_clock_ext((char *)&htod);
1161
1162         gtod->tod = htod.tod + kvm->arch.epoch;
1163         gtod->epoch_idx = 0;
1164         if (test_kvm_facility(kvm, 139)) {
1165                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1166                 if (gtod->tod < htod.tod)
1167                         gtod->epoch_idx += 1;
1168         }
1169
1170         preempt_enable();
1171 }
1172
1173 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         struct kvm_s390_vm_tod_clock gtod;
1176
1177         memset(&gtod, 0, sizeof(gtod));
1178         kvm_s390_get_tod_clock(kvm, &gtod);
1179         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1180                 return -EFAULT;
1181
1182         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1183                 gtod.epoch_idx, gtod.tod);
1184         return 0;
1185 }
1186
1187 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1188 {
1189         u8 gtod_high = 0;
1190
1191         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1192                                          sizeof(gtod_high)))
1193                 return -EFAULT;
1194         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1195
1196         return 0;
1197 }
1198
1199 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201         u64 gtod;
1202
1203         gtod = kvm_s390_get_tod_clock_fast(kvm);
1204         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1205                 return -EFAULT;
1206         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1207
1208         return 0;
1209 }
1210
1211 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213         int ret;
1214
1215         if (attr->flags)
1216                 return -EINVAL;
1217
1218         switch (attr->attr) {
1219         case KVM_S390_VM_TOD_EXT:
1220                 ret = kvm_s390_get_tod_ext(kvm, attr);
1221                 break;
1222         case KVM_S390_VM_TOD_HIGH:
1223                 ret = kvm_s390_get_tod_high(kvm, attr);
1224                 break;
1225         case KVM_S390_VM_TOD_LOW:
1226                 ret = kvm_s390_get_tod_low(kvm, attr);
1227                 break;
1228         default:
1229                 ret = -ENXIO;
1230                 break;
1231         }
1232         return ret;
1233 }
1234
1235 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237         struct kvm_s390_vm_cpu_processor *proc;
1238         u16 lowest_ibc, unblocked_ibc;
1239         int ret = 0;
1240
1241         mutex_lock(&kvm->lock);
1242         if (kvm->created_vcpus) {
1243                 ret = -EBUSY;
1244                 goto out;
1245         }
1246         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1247         if (!proc) {
1248                 ret = -ENOMEM;
1249                 goto out;
1250         }
1251         if (!copy_from_user(proc, (void __user *)attr->addr,
1252                             sizeof(*proc))) {
1253                 kvm->arch.model.cpuid = proc->cpuid;
1254                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1255                 unblocked_ibc = sclp.ibc & 0xfff;
1256                 if (lowest_ibc && proc->ibc) {
1257                         if (proc->ibc > unblocked_ibc)
1258                                 kvm->arch.model.ibc = unblocked_ibc;
1259                         else if (proc->ibc < lowest_ibc)
1260                                 kvm->arch.model.ibc = lowest_ibc;
1261                         else
1262                                 kvm->arch.model.ibc = proc->ibc;
1263                 }
1264                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1265                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1266                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1267                          kvm->arch.model.ibc,
1268                          kvm->arch.model.cpuid);
1269                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1270                          kvm->arch.model.fac_list[0],
1271                          kvm->arch.model.fac_list[1],
1272                          kvm->arch.model.fac_list[2]);
1273         } else
1274                 ret = -EFAULT;
1275         kfree(proc);
1276 out:
1277         mutex_unlock(&kvm->lock);
1278         return ret;
1279 }
1280
1281 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1282                                        struct kvm_device_attr *attr)
1283 {
1284         struct kvm_s390_vm_cpu_feat data;
1285
1286         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1287                 return -EFAULT;
1288         if (!bitmap_subset((unsigned long *) data.feat,
1289                            kvm_s390_available_cpu_feat,
1290                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1291                 return -EINVAL;
1292
1293         mutex_lock(&kvm->lock);
1294         if (kvm->created_vcpus) {
1295                 mutex_unlock(&kvm->lock);
1296                 return -EBUSY;
1297         }
1298         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1299                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1300         mutex_unlock(&kvm->lock);
1301         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1302                          data.feat[0],
1303                          data.feat[1],
1304                          data.feat[2]);
1305         return 0;
1306 }
1307
1308 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1309                                           struct kvm_device_attr *attr)
1310 {
1311         mutex_lock(&kvm->lock);
1312         if (kvm->created_vcpus) {
1313                 mutex_unlock(&kvm->lock);
1314                 return -EBUSY;
1315         }
1316
1317         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1318                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1319                 mutex_unlock(&kvm->lock);
1320                 return -EFAULT;
1321         }
1322         mutex_unlock(&kvm->lock);
1323
1324         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1325                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1329         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1330                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1332         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1333                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1335         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1338         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1341         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1344         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1347         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1350         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1353         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1356         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1359         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1362         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1365         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1376         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1381
1382         return 0;
1383 }
1384
1385 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1386 {
1387         int ret = -ENXIO;
1388
1389         switch (attr->attr) {
1390         case KVM_S390_VM_CPU_PROCESSOR:
1391                 ret = kvm_s390_set_processor(kvm, attr);
1392                 break;
1393         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1394                 ret = kvm_s390_set_processor_feat(kvm, attr);
1395                 break;
1396         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1397                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1398                 break;
1399         }
1400         return ret;
1401 }
1402
1403 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1404 {
1405         struct kvm_s390_vm_cpu_processor *proc;
1406         int ret = 0;
1407
1408         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1409         if (!proc) {
1410                 ret = -ENOMEM;
1411                 goto out;
1412         }
1413         proc->cpuid = kvm->arch.model.cpuid;
1414         proc->ibc = kvm->arch.model.ibc;
1415         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1416                S390_ARCH_FAC_LIST_SIZE_BYTE);
1417         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1418                  kvm->arch.model.ibc,
1419                  kvm->arch.model.cpuid);
1420         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1421                  kvm->arch.model.fac_list[0],
1422                  kvm->arch.model.fac_list[1],
1423                  kvm->arch.model.fac_list[2]);
1424         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1425                 ret = -EFAULT;
1426         kfree(proc);
1427 out:
1428         return ret;
1429 }
1430
1431 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1432 {
1433         struct kvm_s390_vm_cpu_machine *mach;
1434         int ret = 0;
1435
1436         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1437         if (!mach) {
1438                 ret = -ENOMEM;
1439                 goto out;
1440         }
1441         get_cpu_id((struct cpuid *) &mach->cpuid);
1442         mach->ibc = sclp.ibc;
1443         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1444                S390_ARCH_FAC_LIST_SIZE_BYTE);
1445         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1446                sizeof(S390_lowcore.stfle_fac_list));
1447         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1448                  kvm->arch.model.ibc,
1449                  kvm->arch.model.cpuid);
1450         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1451                  mach->fac_mask[0],
1452                  mach->fac_mask[1],
1453                  mach->fac_mask[2]);
1454         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1455                  mach->fac_list[0],
1456                  mach->fac_list[1],
1457                  mach->fac_list[2]);
1458         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1459                 ret = -EFAULT;
1460         kfree(mach);
1461 out:
1462         return ret;
1463 }
1464
1465 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1466                                        struct kvm_device_attr *attr)
1467 {
1468         struct kvm_s390_vm_cpu_feat data;
1469
1470         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1471                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1472         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1473                 return -EFAULT;
1474         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1475                          data.feat[0],
1476                          data.feat[1],
1477                          data.feat[2]);
1478         return 0;
1479 }
1480
1481 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1482                                      struct kvm_device_attr *attr)
1483 {
1484         struct kvm_s390_vm_cpu_feat data;
1485
1486         bitmap_copy((unsigned long *) data.feat,
1487                     kvm_s390_available_cpu_feat,
1488                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1489         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1490                 return -EFAULT;
1491         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492                          data.feat[0],
1493                          data.feat[1],
1494                          data.feat[2]);
1495         return 0;
1496 }
1497
1498 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1499                                           struct kvm_device_attr *attr)
1500 {
1501         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1502             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503                 return -EFAULT;
1504
1505         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1510         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1513         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1516         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1519         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1522         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1525         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1528         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1531         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1534         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1537         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1540         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1543         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1546         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1557         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1562
1563         return 0;
1564 }
1565
1566 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1567                                         struct kvm_device_attr *attr)
1568 {
1569         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1570             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1571                 return -EFAULT;
1572
1573         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1575                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1577                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1578         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1579                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1580                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1581         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1582                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1583                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1584         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1587         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1588                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1589                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1590         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1593         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1594                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1595                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1596         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1597                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1598                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1599         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1600                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1602         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1605         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1606                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1608         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1611         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1614         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1625         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1630
1631         return 0;
1632 }
1633
1634 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1635 {
1636         int ret = -ENXIO;
1637
1638         switch (attr->attr) {
1639         case KVM_S390_VM_CPU_PROCESSOR:
1640                 ret = kvm_s390_get_processor(kvm, attr);
1641                 break;
1642         case KVM_S390_VM_CPU_MACHINE:
1643                 ret = kvm_s390_get_machine(kvm, attr);
1644                 break;
1645         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1646                 ret = kvm_s390_get_processor_feat(kvm, attr);
1647                 break;
1648         case KVM_S390_VM_CPU_MACHINE_FEAT:
1649                 ret = kvm_s390_get_machine_feat(kvm, attr);
1650                 break;
1651         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1652                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1653                 break;
1654         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1655                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1656                 break;
1657         }
1658         return ret;
1659 }
1660
1661 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1662 {
1663         int ret;
1664
1665         switch (attr->group) {
1666         case KVM_S390_VM_MEM_CTRL:
1667                 ret = kvm_s390_set_mem_control(kvm, attr);
1668                 break;
1669         case KVM_S390_VM_TOD:
1670                 ret = kvm_s390_set_tod(kvm, attr);
1671                 break;
1672         case KVM_S390_VM_CPU_MODEL:
1673                 ret = kvm_s390_set_cpu_model(kvm, attr);
1674                 break;
1675         case KVM_S390_VM_CRYPTO:
1676                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1677                 break;
1678         case KVM_S390_VM_MIGRATION:
1679                 ret = kvm_s390_vm_set_migration(kvm, attr);
1680                 break;
1681         default:
1682                 ret = -ENXIO;
1683                 break;
1684         }
1685
1686         return ret;
1687 }
1688
1689 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1690 {
1691         int ret;
1692
1693         switch (attr->group) {
1694         case KVM_S390_VM_MEM_CTRL:
1695                 ret = kvm_s390_get_mem_control(kvm, attr);
1696                 break;
1697         case KVM_S390_VM_TOD:
1698                 ret = kvm_s390_get_tod(kvm, attr);
1699                 break;
1700         case KVM_S390_VM_CPU_MODEL:
1701                 ret = kvm_s390_get_cpu_model(kvm, attr);
1702                 break;
1703         case KVM_S390_VM_MIGRATION:
1704                 ret = kvm_s390_vm_get_migration(kvm, attr);
1705                 break;
1706         default:
1707                 ret = -ENXIO;
1708                 break;
1709         }
1710
1711         return ret;
1712 }
1713
1714 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1715 {
1716         int ret;
1717
1718         switch (attr->group) {
1719         case KVM_S390_VM_MEM_CTRL:
1720                 switch (attr->attr) {
1721                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1722                 case KVM_S390_VM_MEM_CLR_CMMA:
1723                         ret = sclp.has_cmma ? 0 : -ENXIO;
1724                         break;
1725                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1726                         ret = 0;
1727                         break;
1728                 default:
1729                         ret = -ENXIO;
1730                         break;
1731                 }
1732                 break;
1733         case KVM_S390_VM_TOD:
1734                 switch (attr->attr) {
1735                 case KVM_S390_VM_TOD_LOW:
1736                 case KVM_S390_VM_TOD_HIGH:
1737                         ret = 0;
1738                         break;
1739                 default:
1740                         ret = -ENXIO;
1741                         break;
1742                 }
1743                 break;
1744         case KVM_S390_VM_CPU_MODEL:
1745                 switch (attr->attr) {
1746                 case KVM_S390_VM_CPU_PROCESSOR:
1747                 case KVM_S390_VM_CPU_MACHINE:
1748                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1749                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1750                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1751                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1752                         ret = 0;
1753                         break;
1754                 default:
1755                         ret = -ENXIO;
1756                         break;
1757                 }
1758                 break;
1759         case KVM_S390_VM_CRYPTO:
1760                 switch (attr->attr) {
1761                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1762                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1763                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1765                         ret = 0;
1766                         break;
1767                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1768                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1769                         ret = ap_instructions_available() ? 0 : -ENXIO;
1770                         break;
1771                 default:
1772                         ret = -ENXIO;
1773                         break;
1774                 }
1775                 break;
1776         case KVM_S390_VM_MIGRATION:
1777                 ret = 0;
1778                 break;
1779         default:
1780                 ret = -ENXIO;
1781                 break;
1782         }
1783
1784         return ret;
1785 }
1786
1787 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1788 {
1789         uint8_t *keys;
1790         uint64_t hva;
1791         int srcu_idx, i, r = 0;
1792
1793         if (args->flags != 0)
1794                 return -EINVAL;
1795
1796         /* Is this guest using storage keys? */
1797         if (!mm_uses_skeys(current->mm))
1798                 return KVM_S390_GET_SKEYS_NONE;
1799
1800         /* Enforce sane limit on memory allocation */
1801         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1802                 return -EINVAL;
1803
1804         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1805         if (!keys)
1806                 return -ENOMEM;
1807
1808         down_read(&current->mm->mmap_sem);
1809         srcu_idx = srcu_read_lock(&kvm->srcu);
1810         for (i = 0; i < args->count; i++) {
1811                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1812                 if (kvm_is_error_hva(hva)) {
1813                         r = -EFAULT;
1814                         break;
1815                 }
1816
1817                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1818                 if (r)
1819                         break;
1820         }
1821         srcu_read_unlock(&kvm->srcu, srcu_idx);
1822         up_read(&current->mm->mmap_sem);
1823
1824         if (!r) {
1825                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1826                                  sizeof(uint8_t) * args->count);
1827                 if (r)
1828                         r = -EFAULT;
1829         }
1830
1831         kvfree(keys);
1832         return r;
1833 }
1834
1835 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1836 {
1837         uint8_t *keys;
1838         uint64_t hva;
1839         int srcu_idx, i, r = 0;
1840         bool unlocked;
1841
1842         if (args->flags != 0)
1843                 return -EINVAL;
1844
1845         /* Enforce sane limit on memory allocation */
1846         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1847                 return -EINVAL;
1848
1849         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1850         if (!keys)
1851                 return -ENOMEM;
1852
1853         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1854                            sizeof(uint8_t) * args->count);
1855         if (r) {
1856                 r = -EFAULT;
1857                 goto out;
1858         }
1859
1860         /* Enable storage key handling for the guest */
1861         r = s390_enable_skey();
1862         if (r)
1863                 goto out;
1864
1865         i = 0;
1866         down_read(&current->mm->mmap_sem);
1867         srcu_idx = srcu_read_lock(&kvm->srcu);
1868         while (i < args->count) {
1869                 unlocked = false;
1870                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1871                 if (kvm_is_error_hva(hva)) {
1872                         r = -EFAULT;
1873                         break;
1874                 }
1875
1876                 /* Lowest order bit is reserved */
1877                 if (keys[i] & 0x01) {
1878                         r = -EINVAL;
1879                         break;
1880                 }
1881
1882                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1883                 if (r) {
1884                         r = fixup_user_fault(current, current->mm, hva,
1885                                              FAULT_FLAG_WRITE, &unlocked);
1886                         if (r)
1887                                 break;
1888                 }
1889                 if (!r)
1890                         i++;
1891         }
1892         srcu_read_unlock(&kvm->srcu, srcu_idx);
1893         up_read(&current->mm->mmap_sem);
1894 out:
1895         kvfree(keys);
1896         return r;
1897 }
1898
1899 /*
1900  * Base address and length must be sent at the start of each block, therefore
1901  * it's cheaper to send some clean data, as long as it's less than the size of
1902  * two longs.
1903  */
1904 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1905 /* for consistency */
1906 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1907
1908 /*
1909  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1910  * address falls in a hole. In that case the index of one of the memslots
1911  * bordering the hole is returned.
1912  */
1913 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1914 {
1915         int start = 0, end = slots->used_slots;
1916         int slot = atomic_read(&slots->lru_slot);
1917         struct kvm_memory_slot *memslots = slots->memslots;
1918
1919         if (gfn >= memslots[slot].base_gfn &&
1920             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1921                 return slot;
1922
1923         while (start < end) {
1924                 slot = start + (end - start) / 2;
1925
1926                 if (gfn >= memslots[slot].base_gfn)
1927                         end = slot;
1928                 else
1929                         start = slot + 1;
1930         }
1931
1932         if (gfn >= memslots[start].base_gfn &&
1933             gfn < memslots[start].base_gfn + memslots[start].npages) {
1934                 atomic_set(&slots->lru_slot, start);
1935         }
1936
1937         return start;
1938 }
1939
1940 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1941                               u8 *res, unsigned long bufsize)
1942 {
1943         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1944
1945         args->count = 0;
1946         while (args->count < bufsize) {
1947                 hva = gfn_to_hva(kvm, cur_gfn);
1948                 /*
1949                  * We return an error if the first value was invalid, but we
1950                  * return successfully if at least one value was copied.
1951                  */
1952                 if (kvm_is_error_hva(hva))
1953                         return args->count ? 0 : -EFAULT;
1954                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1955                         pgstev = 0;
1956                 res[args->count++] = (pgstev >> 24) & 0x43;
1957                 cur_gfn++;
1958         }
1959
1960         return 0;
1961 }
1962
1963 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1964                                               unsigned long cur_gfn)
1965 {
1966         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1967         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1968         unsigned long ofs = cur_gfn - ms->base_gfn;
1969
1970         if (ms->base_gfn + ms->npages <= cur_gfn) {
1971                 slotidx--;
1972                 /* If we are above the highest slot, wrap around */
1973                 if (slotidx < 0)
1974                         slotidx = slots->used_slots - 1;
1975
1976                 ms = slots->memslots + slotidx;
1977                 ofs = 0;
1978         }
1979         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1980         while ((slotidx > 0) && (ofs >= ms->npages)) {
1981                 slotidx--;
1982                 ms = slots->memslots + slotidx;
1983                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1984         }
1985         return ms->base_gfn + ofs;
1986 }
1987
1988 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1989                              u8 *res, unsigned long bufsize)
1990 {
1991         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1992         struct kvm_memslots *slots = kvm_memslots(kvm);
1993         struct kvm_memory_slot *ms;
1994
1995         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1996         ms = gfn_to_memslot(kvm, cur_gfn);
1997         args->count = 0;
1998         args->start_gfn = cur_gfn;
1999         if (!ms)
2000                 return 0;
2001         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2002         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2003
2004         while (args->count < bufsize) {
2005                 hva = gfn_to_hva(kvm, cur_gfn);
2006                 if (kvm_is_error_hva(hva))
2007                         return 0;
2008                 /* Decrement only if we actually flipped the bit to 0 */
2009                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2010                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2011                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2012                         pgstev = 0;
2013                 /* Save the value */
2014                 res[args->count++] = (pgstev >> 24) & 0x43;
2015                 /* If the next bit is too far away, stop. */
2016                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2017                         return 0;
2018                 /* If we reached the previous "next", find the next one */
2019                 if (cur_gfn == next_gfn)
2020                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2021                 /* Reached the end of memory or of the buffer, stop */
2022                 if ((next_gfn >= mem_end) ||
2023                     (next_gfn - args->start_gfn >= bufsize))
2024                         return 0;
2025                 cur_gfn++;
2026                 /* Reached the end of the current memslot, take the next one. */
2027                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2028                         ms = gfn_to_memslot(kvm, cur_gfn);
2029                         if (!ms)
2030                                 return 0;
2031                 }
2032         }
2033         return 0;
2034 }
2035
2036 /*
2037  * This function searches for the next page with dirty CMMA attributes, and
2038  * saves the attributes in the buffer up to either the end of the buffer or
2039  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2040  * no trailing clean bytes are saved.
2041  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2042  * output buffer will indicate 0 as length.
2043  */
2044 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2045                                   struct kvm_s390_cmma_log *args)
2046 {
2047         unsigned long bufsize;
2048         int srcu_idx, peek, ret;
2049         u8 *values;
2050
2051         if (!kvm->arch.use_cmma)
2052                 return -ENXIO;
2053         /* Invalid/unsupported flags were specified */
2054         if (args->flags & ~KVM_S390_CMMA_PEEK)
2055                 return -EINVAL;
2056         /* Migration mode query, and we are not doing a migration */
2057         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2058         if (!peek && !kvm->arch.migration_mode)
2059                 return -EINVAL;
2060         /* CMMA is disabled or was not used, or the buffer has length zero */
2061         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2062         if (!bufsize || !kvm->mm->context.uses_cmm) {
2063                 memset(args, 0, sizeof(*args));
2064                 return 0;
2065         }
2066         /* We are not peeking, and there are no dirty pages */
2067         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2068                 memset(args, 0, sizeof(*args));
2069                 return 0;
2070         }
2071
2072         values = vmalloc(bufsize);
2073         if (!values)
2074                 return -ENOMEM;
2075
2076         down_read(&kvm->mm->mmap_sem);
2077         srcu_idx = srcu_read_lock(&kvm->srcu);
2078         if (peek)
2079                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2080         else
2081                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2082         srcu_read_unlock(&kvm->srcu, srcu_idx);
2083         up_read(&kvm->mm->mmap_sem);
2084
2085         if (kvm->arch.migration_mode)
2086                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2087         else
2088                 args->remaining = 0;
2089
2090         if (copy_to_user((void __user *)args->values, values, args->count))
2091                 ret = -EFAULT;
2092
2093         vfree(values);
2094         return ret;
2095 }
2096
2097 /*
2098  * This function sets the CMMA attributes for the given pages. If the input
2099  * buffer has zero length, no action is taken, otherwise the attributes are
2100  * set and the mm->context.uses_cmm flag is set.
2101  */
2102 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2103                                   const struct kvm_s390_cmma_log *args)
2104 {
2105         unsigned long hva, mask, pgstev, i;
2106         uint8_t *bits;
2107         int srcu_idx, r = 0;
2108
2109         mask = args->mask;
2110
2111         if (!kvm->arch.use_cmma)
2112                 return -ENXIO;
2113         /* invalid/unsupported flags */
2114         if (args->flags != 0)
2115                 return -EINVAL;
2116         /* Enforce sane limit on memory allocation */
2117         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2118                 return -EINVAL;
2119         /* Nothing to do */
2120         if (args->count == 0)
2121                 return 0;
2122
2123         bits = vmalloc(array_size(sizeof(*bits), args->count));
2124         if (!bits)
2125                 return -ENOMEM;
2126
2127         r = copy_from_user(bits, (void __user *)args->values, args->count);
2128         if (r) {
2129                 r = -EFAULT;
2130                 goto out;
2131         }
2132
2133         down_read(&kvm->mm->mmap_sem);
2134         srcu_idx = srcu_read_lock(&kvm->srcu);
2135         for (i = 0; i < args->count; i++) {
2136                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2137                 if (kvm_is_error_hva(hva)) {
2138                         r = -EFAULT;
2139                         break;
2140                 }
2141
2142                 pgstev = bits[i];
2143                 pgstev = pgstev << 24;
2144                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2145                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2146         }
2147         srcu_read_unlock(&kvm->srcu, srcu_idx);
2148         up_read(&kvm->mm->mmap_sem);
2149
2150         if (!kvm->mm->context.uses_cmm) {
2151                 down_write(&kvm->mm->mmap_sem);
2152                 kvm->mm->context.uses_cmm = 1;
2153                 up_write(&kvm->mm->mmap_sem);
2154         }
2155 out:
2156         vfree(bits);
2157         return r;
2158 }
2159
2160 long kvm_arch_vm_ioctl(struct file *filp,
2161                        unsigned int ioctl, unsigned long arg)
2162 {
2163         struct kvm *kvm = filp->private_data;
2164         void __user *argp = (void __user *)arg;
2165         struct kvm_device_attr attr;
2166         int r;
2167
2168         switch (ioctl) {
2169         case KVM_S390_INTERRUPT: {
2170                 struct kvm_s390_interrupt s390int;
2171
2172                 r = -EFAULT;
2173                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2174                         break;
2175                 r = kvm_s390_inject_vm(kvm, &s390int);
2176                 break;
2177         }
2178         case KVM_CREATE_IRQCHIP: {
2179                 struct kvm_irq_routing_entry routing;
2180
2181                 r = -EINVAL;
2182                 if (kvm->arch.use_irqchip) {
2183                         /* Set up dummy routing. */
2184                         memset(&routing, 0, sizeof(routing));
2185                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2186                 }
2187                 break;
2188         }
2189         case KVM_SET_DEVICE_ATTR: {
2190                 r = -EFAULT;
2191                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2192                         break;
2193                 r = kvm_s390_vm_set_attr(kvm, &attr);
2194                 break;
2195         }
2196         case KVM_GET_DEVICE_ATTR: {
2197                 r = -EFAULT;
2198                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2199                         break;
2200                 r = kvm_s390_vm_get_attr(kvm, &attr);
2201                 break;
2202         }
2203         case KVM_HAS_DEVICE_ATTR: {
2204                 r = -EFAULT;
2205                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2206                         break;
2207                 r = kvm_s390_vm_has_attr(kvm, &attr);
2208                 break;
2209         }
2210         case KVM_S390_GET_SKEYS: {
2211                 struct kvm_s390_skeys args;
2212
2213                 r = -EFAULT;
2214                 if (copy_from_user(&args, argp,
2215                                    sizeof(struct kvm_s390_skeys)))
2216                         break;
2217                 r = kvm_s390_get_skeys(kvm, &args);
2218                 break;
2219         }
2220         case KVM_S390_SET_SKEYS: {
2221                 struct kvm_s390_skeys args;
2222
2223                 r = -EFAULT;
2224                 if (copy_from_user(&args, argp,
2225                                    sizeof(struct kvm_s390_skeys)))
2226                         break;
2227                 r = kvm_s390_set_skeys(kvm, &args);
2228                 break;
2229         }
2230         case KVM_S390_GET_CMMA_BITS: {
2231                 struct kvm_s390_cmma_log args;
2232
2233                 r = -EFAULT;
2234                 if (copy_from_user(&args, argp, sizeof(args)))
2235                         break;
2236                 mutex_lock(&kvm->slots_lock);
2237                 r = kvm_s390_get_cmma_bits(kvm, &args);
2238                 mutex_unlock(&kvm->slots_lock);
2239                 if (!r) {
2240                         r = copy_to_user(argp, &args, sizeof(args));
2241                         if (r)
2242                                 r = -EFAULT;
2243                 }
2244                 break;
2245         }
2246         case KVM_S390_SET_CMMA_BITS: {
2247                 struct kvm_s390_cmma_log args;
2248
2249                 r = -EFAULT;
2250                 if (copy_from_user(&args, argp, sizeof(args)))
2251                         break;
2252                 mutex_lock(&kvm->slots_lock);
2253                 r = kvm_s390_set_cmma_bits(kvm, &args);
2254                 mutex_unlock(&kvm->slots_lock);
2255                 break;
2256         }
2257         default:
2258                 r = -ENOTTY;
2259         }
2260
2261         return r;
2262 }
2263
2264 static int kvm_s390_apxa_installed(void)
2265 {
2266         struct ap_config_info info;
2267
2268         if (ap_instructions_available()) {
2269                 if (ap_qci(&info) == 0)
2270                         return info.apxa;
2271         }
2272
2273         return 0;
2274 }
2275
2276 /*
2277  * The format of the crypto control block (CRYCB) is specified in the 3 low
2278  * order bits of the CRYCB designation (CRYCBD) field as follows:
2279  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2280  *           AP extended addressing (APXA) facility are installed.
2281  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2282  * Format 2: Both the APXA and MSAX3 facilities are installed
2283  */
2284 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2285 {
2286         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2287
2288         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2289         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2290
2291         /* Check whether MSAX3 is installed */
2292         if (!test_kvm_facility(kvm, 76))
2293                 return;
2294
2295         if (kvm_s390_apxa_installed())
2296                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2297         else
2298                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2299 }
2300
2301 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2302                                unsigned long *aqm, unsigned long *adm)
2303 {
2304         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2305
2306         mutex_lock(&kvm->lock);
2307         kvm_s390_vcpu_block_all(kvm);
2308
2309         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2310         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2311                 memcpy(crycb->apcb1.apm, apm, 32);
2312                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2313                          apm[0], apm[1], apm[2], apm[3]);
2314                 memcpy(crycb->apcb1.aqm, aqm, 32);
2315                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2316                          aqm[0], aqm[1], aqm[2], aqm[3]);
2317                 memcpy(crycb->apcb1.adm, adm, 32);
2318                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2319                          adm[0], adm[1], adm[2], adm[3]);
2320                 break;
2321         case CRYCB_FORMAT1:
2322         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2323                 memcpy(crycb->apcb0.apm, apm, 8);
2324                 memcpy(crycb->apcb0.aqm, aqm, 2);
2325                 memcpy(crycb->apcb0.adm, adm, 2);
2326                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2327                          apm[0], *((unsigned short *)aqm),
2328                          *((unsigned short *)adm));
2329                 break;
2330         default:        /* Can not happen */
2331                 break;
2332         }
2333
2334         /* recreate the shadow crycb for each vcpu */
2335         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2336         kvm_s390_vcpu_unblock_all(kvm);
2337         mutex_unlock(&kvm->lock);
2338 }
2339 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2340
2341 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2342 {
2343         mutex_lock(&kvm->lock);
2344         kvm_s390_vcpu_block_all(kvm);
2345
2346         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2347                sizeof(kvm->arch.crypto.crycb->apcb0));
2348         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2349                sizeof(kvm->arch.crypto.crycb->apcb1));
2350
2351         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2352         /* recreate the shadow crycb for each vcpu */
2353         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2354         kvm_s390_vcpu_unblock_all(kvm);
2355         mutex_unlock(&kvm->lock);
2356 }
2357 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2358
2359 static u64 kvm_s390_get_initial_cpuid(void)
2360 {
2361         struct cpuid cpuid;
2362
2363         get_cpu_id(&cpuid);
2364         cpuid.version = 0xff;
2365         return *((u64 *) &cpuid);
2366 }
2367
2368 static void kvm_s390_crypto_init(struct kvm *kvm)
2369 {
2370         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2371         kvm_s390_set_crycb_format(kvm);
2372
2373         if (!test_kvm_facility(kvm, 76))
2374                 return;
2375
2376         /* Enable AES/DEA protected key functions by default */
2377         kvm->arch.crypto.aes_kw = 1;
2378         kvm->arch.crypto.dea_kw = 1;
2379         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2380                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2381         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2382                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2383 }
2384
2385 static void sca_dispose(struct kvm *kvm)
2386 {
2387         if (kvm->arch.use_esca)
2388                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2389         else
2390                 free_page((unsigned long)(kvm->arch.sca));
2391         kvm->arch.sca = NULL;
2392 }
2393
2394 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2395 {
2396         gfp_t alloc_flags = GFP_KERNEL;
2397         int i, rc;
2398         char debug_name[16];
2399         static unsigned long sca_offset;
2400
2401         rc = -EINVAL;
2402 #ifdef CONFIG_KVM_S390_UCONTROL
2403         if (type & ~KVM_VM_S390_UCONTROL)
2404                 goto out_err;
2405         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2406                 goto out_err;
2407 #else
2408         if (type)
2409                 goto out_err;
2410 #endif
2411
2412         rc = s390_enable_sie();
2413         if (rc)
2414                 goto out_err;
2415
2416         rc = -ENOMEM;
2417
2418         if (!sclp.has_64bscao)
2419                 alloc_flags |= GFP_DMA;
2420         rwlock_init(&kvm->arch.sca_lock);
2421         /* start with basic SCA */
2422         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2423         if (!kvm->arch.sca)
2424                 goto out_err;
2425         mutex_lock(&kvm_lock);
2426         sca_offset += 16;
2427         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2428                 sca_offset = 0;
2429         kvm->arch.sca = (struct bsca_block *)
2430                         ((char *) kvm->arch.sca + sca_offset);
2431         mutex_unlock(&kvm_lock);
2432
2433         sprintf(debug_name, "kvm-%u", current->pid);
2434
2435         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2436         if (!kvm->arch.dbf)
2437                 goto out_err;
2438
2439         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2440         kvm->arch.sie_page2 =
2441              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2442         if (!kvm->arch.sie_page2)
2443                 goto out_err;
2444
2445         kvm->arch.sie_page2->kvm = kvm;
2446         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2447
2448         for (i = 0; i < kvm_s390_fac_size(); i++) {
2449                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2450                                               (kvm_s390_fac_base[i] |
2451                                                kvm_s390_fac_ext[i]);
2452                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2453                                               kvm_s390_fac_base[i];
2454         }
2455         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2456
2457         /* we are always in czam mode - even on pre z14 machines */
2458         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2459         set_kvm_facility(kvm->arch.model.fac_list, 138);
2460         /* we emulate STHYI in kvm */
2461         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2462         set_kvm_facility(kvm->arch.model.fac_list, 74);
2463         if (MACHINE_HAS_TLB_GUEST) {
2464                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2465                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2466         }
2467
2468         if (css_general_characteristics.aiv && test_facility(65))
2469                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2470
2471         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2472         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2473
2474         kvm_s390_crypto_init(kvm);
2475
2476         mutex_init(&kvm->arch.float_int.ais_lock);
2477         spin_lock_init(&kvm->arch.float_int.lock);
2478         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2479                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2480         init_waitqueue_head(&kvm->arch.ipte_wq);
2481         mutex_init(&kvm->arch.ipte_mutex);
2482
2483         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2484         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2485
2486         if (type & KVM_VM_S390_UCONTROL) {
2487                 kvm->arch.gmap = NULL;
2488                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2489         } else {
2490                 if (sclp.hamax == U64_MAX)
2491                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2492                 else
2493                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2494                                                     sclp.hamax + 1);
2495                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2496                 if (!kvm->arch.gmap)
2497                         goto out_err;
2498                 kvm->arch.gmap->private = kvm;
2499                 kvm->arch.gmap->pfault_enabled = 0;
2500         }
2501
2502         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2503         kvm->arch.use_skf = sclp.has_skey;
2504         spin_lock_init(&kvm->arch.start_stop_lock);
2505         kvm_s390_vsie_init(kvm);
2506         kvm_s390_gisa_init(kvm);
2507         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2508
2509         return 0;
2510 out_err:
2511         free_page((unsigned long)kvm->arch.sie_page2);
2512         debug_unregister(kvm->arch.dbf);
2513         sca_dispose(kvm);
2514         KVM_EVENT(3, "creation of vm failed: %d", rc);
2515         return rc;
2516 }
2517
2518 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2519 {
2520         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2521         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2522         kvm_s390_clear_local_irqs(vcpu);
2523         kvm_clear_async_pf_completion_queue(vcpu);
2524         if (!kvm_is_ucontrol(vcpu->kvm))
2525                 sca_del_vcpu(vcpu);
2526
2527         if (kvm_is_ucontrol(vcpu->kvm))
2528                 gmap_remove(vcpu->arch.gmap);
2529
2530         if (vcpu->kvm->arch.use_cmma)
2531                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2532         free_page((unsigned long)(vcpu->arch.sie_block));
2533
2534         kvm_vcpu_uninit(vcpu);
2535         kmem_cache_free(kvm_vcpu_cache, vcpu);
2536 }
2537
2538 static void kvm_free_vcpus(struct kvm *kvm)
2539 {
2540         unsigned int i;
2541         struct kvm_vcpu *vcpu;
2542
2543         kvm_for_each_vcpu(i, vcpu, kvm)
2544                 kvm_arch_vcpu_destroy(vcpu);
2545
2546         mutex_lock(&kvm->lock);
2547         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2548                 kvm->vcpus[i] = NULL;
2549
2550         atomic_set(&kvm->online_vcpus, 0);
2551         mutex_unlock(&kvm->lock);
2552 }
2553
2554 void kvm_arch_destroy_vm(struct kvm *kvm)
2555 {
2556         kvm_free_vcpus(kvm);
2557         sca_dispose(kvm);
2558         debug_unregister(kvm->arch.dbf);
2559         kvm_s390_gisa_destroy(kvm);
2560         free_page((unsigned long)kvm->arch.sie_page2);
2561         if (!kvm_is_ucontrol(kvm))
2562                 gmap_remove(kvm->arch.gmap);
2563         kvm_s390_destroy_adapters(kvm);
2564         kvm_s390_clear_float_irqs(kvm);
2565         kvm_s390_vsie_destroy(kvm);
2566         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2567 }
2568
2569 /* Section: vcpu related */
2570 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2571 {
2572         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2573         if (!vcpu->arch.gmap)
2574                 return -ENOMEM;
2575         vcpu->arch.gmap->private = vcpu->kvm;
2576
2577         return 0;
2578 }
2579
2580 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2581 {
2582         if (!kvm_s390_use_sca_entries())
2583                 return;
2584         read_lock(&vcpu->kvm->arch.sca_lock);
2585         if (vcpu->kvm->arch.use_esca) {
2586                 struct esca_block *sca = vcpu->kvm->arch.sca;
2587
2588                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2589                 sca->cpu[vcpu->vcpu_id].sda = 0;
2590         } else {
2591                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2592
2593                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2594                 sca->cpu[vcpu->vcpu_id].sda = 0;
2595         }
2596         read_unlock(&vcpu->kvm->arch.sca_lock);
2597 }
2598
2599 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2600 {
2601         if (!kvm_s390_use_sca_entries()) {
2602                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2603
2604                 /* we still need the basic sca for the ipte control */
2605                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2606                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2607                 return;
2608         }
2609         read_lock(&vcpu->kvm->arch.sca_lock);
2610         if (vcpu->kvm->arch.use_esca) {
2611                 struct esca_block *sca = vcpu->kvm->arch.sca;
2612
2613                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2614                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2615                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2616                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2617                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2618         } else {
2619                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2620
2621                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2622                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2623                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2624                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2625         }
2626         read_unlock(&vcpu->kvm->arch.sca_lock);
2627 }
2628
2629 /* Basic SCA to Extended SCA data copy routines */
2630 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2631 {
2632         d->sda = s->sda;
2633         d->sigp_ctrl.c = s->sigp_ctrl.c;
2634         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2635 }
2636
2637 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2638 {
2639         int i;
2640
2641         d->ipte_control = s->ipte_control;
2642         d->mcn[0] = s->mcn;
2643         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2644                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2645 }
2646
2647 static int sca_switch_to_extended(struct kvm *kvm)
2648 {
2649         struct bsca_block *old_sca = kvm->arch.sca;
2650         struct esca_block *new_sca;
2651         struct kvm_vcpu *vcpu;
2652         unsigned int vcpu_idx;
2653         u32 scaol, scaoh;
2654
2655         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2656         if (!new_sca)
2657                 return -ENOMEM;
2658
2659         scaoh = (u32)((u64)(new_sca) >> 32);
2660         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2661
2662         kvm_s390_vcpu_block_all(kvm);
2663         write_lock(&kvm->arch.sca_lock);
2664
2665         sca_copy_b_to_e(new_sca, old_sca);
2666
2667         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2668                 vcpu->arch.sie_block->scaoh = scaoh;
2669                 vcpu->arch.sie_block->scaol = scaol;
2670                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2671         }
2672         kvm->arch.sca = new_sca;
2673         kvm->arch.use_esca = 1;
2674
2675         write_unlock(&kvm->arch.sca_lock);
2676         kvm_s390_vcpu_unblock_all(kvm);
2677
2678         free_page((unsigned long)old_sca);
2679
2680         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2681                  old_sca, kvm->arch.sca);
2682         return 0;
2683 }
2684
2685 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2686 {
2687         int rc;
2688
2689         if (!kvm_s390_use_sca_entries()) {
2690                 if (id < KVM_MAX_VCPUS)
2691                         return true;
2692                 return false;
2693         }
2694         if (id < KVM_S390_BSCA_CPU_SLOTS)
2695                 return true;
2696         if (!sclp.has_esca || !sclp.has_64bscao)
2697                 return false;
2698
2699         mutex_lock(&kvm->lock);
2700         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2701         mutex_unlock(&kvm->lock);
2702
2703         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2704 }
2705
2706 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2707 {
2708         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2709         kvm_clear_async_pf_completion_queue(vcpu);
2710         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2711                                     KVM_SYNC_GPRS |
2712                                     KVM_SYNC_ACRS |
2713                                     KVM_SYNC_CRS |
2714                                     KVM_SYNC_ARCH0 |
2715                                     KVM_SYNC_PFAULT;
2716         kvm_s390_set_prefix(vcpu, 0);
2717         if (test_kvm_facility(vcpu->kvm, 64))
2718                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2719         if (test_kvm_facility(vcpu->kvm, 82))
2720                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2721         if (test_kvm_facility(vcpu->kvm, 133))
2722                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2723         if (test_kvm_facility(vcpu->kvm, 156))
2724                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2725         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2726          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2727          */
2728         if (MACHINE_HAS_VX)
2729                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2730         else
2731                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2732
2733         if (kvm_is_ucontrol(vcpu->kvm))
2734                 return __kvm_ucontrol_vcpu_init(vcpu);
2735
2736         return 0;
2737 }
2738
2739 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2740 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2741 {
2742         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2743         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2744         vcpu->arch.cputm_start = get_tod_clock_fast();
2745         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2746 }
2747
2748 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2749 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2750 {
2751         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2752         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2753         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2754         vcpu->arch.cputm_start = 0;
2755         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2756 }
2757
2758 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2759 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2760 {
2761         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2762         vcpu->arch.cputm_enabled = true;
2763         __start_cpu_timer_accounting(vcpu);
2764 }
2765
2766 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2767 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2768 {
2769         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2770         __stop_cpu_timer_accounting(vcpu);
2771         vcpu->arch.cputm_enabled = false;
2772 }
2773
2774 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2775 {
2776         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2777         __enable_cpu_timer_accounting(vcpu);
2778         preempt_enable();
2779 }
2780
2781 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2782 {
2783         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2784         __disable_cpu_timer_accounting(vcpu);
2785         preempt_enable();
2786 }
2787
2788 /* set the cpu timer - may only be called from the VCPU thread itself */
2789 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2790 {
2791         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2792         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2793         if (vcpu->arch.cputm_enabled)
2794                 vcpu->arch.cputm_start = get_tod_clock_fast();
2795         vcpu->arch.sie_block->cputm = cputm;
2796         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2797         preempt_enable();
2798 }
2799
2800 /* update and get the cpu timer - can also be called from other VCPU threads */
2801 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2802 {
2803         unsigned int seq;
2804         __u64 value;
2805
2806         if (unlikely(!vcpu->arch.cputm_enabled))
2807                 return vcpu->arch.sie_block->cputm;
2808
2809         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2810         do {
2811                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2812                 /*
2813                  * If the writer would ever execute a read in the critical
2814                  * section, e.g. in irq context, we have a deadlock.
2815                  */
2816                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2817                 value = vcpu->arch.sie_block->cputm;
2818                 /* if cputm_start is 0, accounting is being started/stopped */
2819                 if (likely(vcpu->arch.cputm_start))
2820                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2821         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2822         preempt_enable();
2823         return value;
2824 }
2825
2826 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2827 {
2828
2829         gmap_enable(vcpu->arch.enabled_gmap);
2830         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2831         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2832                 __start_cpu_timer_accounting(vcpu);
2833         vcpu->cpu = cpu;
2834 }
2835
2836 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2837 {
2838         vcpu->cpu = -1;
2839         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2840                 __stop_cpu_timer_accounting(vcpu);
2841         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2842         vcpu->arch.enabled_gmap = gmap_get_enabled();
2843         gmap_disable(vcpu->arch.enabled_gmap);
2844
2845 }
2846
2847 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2848 {
2849         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2850         vcpu->arch.sie_block->gpsw.mask = 0UL;
2851         vcpu->arch.sie_block->gpsw.addr = 0UL;
2852         kvm_s390_set_prefix(vcpu, 0);
2853         kvm_s390_set_cpu_timer(vcpu, 0);
2854         vcpu->arch.sie_block->ckc       = 0UL;
2855         vcpu->arch.sie_block->todpr     = 0;
2856         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2857         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2858                                         CR0_INTERRUPT_KEY_SUBMASK |
2859                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2860         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2861                                         CR14_UNUSED_33 |
2862                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2863         vcpu->run->s.regs.fpc = 0;
2864         vcpu->arch.sie_block->gbea = 1;
2865         vcpu->arch.sie_block->pp = 0;
2866         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2867         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2868         kvm_clear_async_pf_completion_queue(vcpu);
2869         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2870                 kvm_s390_vcpu_stop(vcpu);
2871         kvm_s390_clear_local_irqs(vcpu);
2872 }
2873
2874 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2875 {
2876         mutex_lock(&vcpu->kvm->lock);
2877         preempt_disable();
2878         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2879         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2880         preempt_enable();
2881         mutex_unlock(&vcpu->kvm->lock);
2882         if (!kvm_is_ucontrol(vcpu->kvm)) {
2883                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2884                 sca_add_vcpu(vcpu);
2885         }
2886         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2887                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2888         /* make vcpu_load load the right gmap on the first trigger */
2889         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2890 }
2891
2892 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2893 {
2894         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2895             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2896                 return true;
2897         return false;
2898 }
2899
2900 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2901 {
2902         /* At least one ECC subfunction must be present */
2903         return kvm_has_pckmo_subfunc(kvm, 32) ||
2904                kvm_has_pckmo_subfunc(kvm, 33) ||
2905                kvm_has_pckmo_subfunc(kvm, 34) ||
2906                kvm_has_pckmo_subfunc(kvm, 40) ||
2907                kvm_has_pckmo_subfunc(kvm, 41);
2908
2909 }
2910
2911 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2912 {
2913         /*
2914          * If the AP instructions are not being interpreted and the MSAX3
2915          * facility is not configured for the guest, there is nothing to set up.
2916          */
2917         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2918                 return;
2919
2920         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2921         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2922         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2923         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2924
2925         if (vcpu->kvm->arch.crypto.apie)
2926                 vcpu->arch.sie_block->eca |= ECA_APIE;
2927
2928         /* Set up protected key support */
2929         if (vcpu->kvm->arch.crypto.aes_kw) {
2930                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2931                 /* ecc is also wrapped with AES key */
2932                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2933                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2934         }
2935
2936         if (vcpu->kvm->arch.crypto.dea_kw)
2937                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2938 }
2939
2940 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2941 {
2942         free_page(vcpu->arch.sie_block->cbrlo);
2943         vcpu->arch.sie_block->cbrlo = 0;
2944 }
2945
2946 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2947 {
2948         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2949         if (!vcpu->arch.sie_block->cbrlo)
2950                 return -ENOMEM;
2951         return 0;
2952 }
2953
2954 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2955 {
2956         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2957
2958         vcpu->arch.sie_block->ibc = model->ibc;
2959         if (test_kvm_facility(vcpu->kvm, 7))
2960                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2961 }
2962
2963 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2964 {
2965         int rc = 0;
2966
2967         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2968                                                     CPUSTAT_SM |
2969                                                     CPUSTAT_STOPPED);
2970
2971         if (test_kvm_facility(vcpu->kvm, 78))
2972                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2973         else if (test_kvm_facility(vcpu->kvm, 8))
2974                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2975
2976         kvm_s390_vcpu_setup_model(vcpu);
2977
2978         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2979         if (MACHINE_HAS_ESOP)
2980                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2981         if (test_kvm_facility(vcpu->kvm, 9))
2982                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2983         if (test_kvm_facility(vcpu->kvm, 73))
2984                 vcpu->arch.sie_block->ecb |= ECB_TE;
2985
2986         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2987                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2988         if (test_kvm_facility(vcpu->kvm, 130))
2989                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2990         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2991         if (sclp.has_cei)
2992                 vcpu->arch.sie_block->eca |= ECA_CEI;
2993         if (sclp.has_ib)
2994                 vcpu->arch.sie_block->eca |= ECA_IB;
2995         if (sclp.has_siif)
2996                 vcpu->arch.sie_block->eca |= ECA_SII;
2997         if (sclp.has_sigpif)
2998                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2999         if (test_kvm_facility(vcpu->kvm, 129)) {
3000                 vcpu->arch.sie_block->eca |= ECA_VX;
3001                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3002         }
3003         if (test_kvm_facility(vcpu->kvm, 139))
3004                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3005         if (test_kvm_facility(vcpu->kvm, 156))
3006                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3007         if (vcpu->arch.sie_block->gd) {
3008                 vcpu->arch.sie_block->eca |= ECA_AIV;
3009                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3010                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3011         }
3012         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3013                                         | SDNXC;
3014         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3015
3016         if (sclp.has_kss)
3017                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3018         else
3019                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3020
3021         if (vcpu->kvm->arch.use_cmma) {
3022                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3023                 if (rc)
3024                         return rc;
3025         }
3026         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3027         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3028
3029         vcpu->arch.sie_block->hpid = HPID_KVM;
3030
3031         kvm_s390_vcpu_crypto_setup(vcpu);
3032
3033         return rc;
3034 }
3035
3036 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3037                                       unsigned int id)
3038 {
3039         struct kvm_vcpu *vcpu;
3040         struct sie_page *sie_page;
3041         int rc = -EINVAL;
3042
3043         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3044                 goto out;
3045
3046         rc = -ENOMEM;
3047
3048         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3049         if (!vcpu)
3050                 goto out;
3051
3052         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3053         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3054         if (!sie_page)
3055                 goto out_free_cpu;
3056
3057         vcpu->arch.sie_block = &sie_page->sie_block;
3058         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3059
3060         /* the real guest size will always be smaller than msl */
3061         vcpu->arch.sie_block->mso = 0;
3062         vcpu->arch.sie_block->msl = sclp.hamax;
3063
3064         vcpu->arch.sie_block->icpua = id;
3065         spin_lock_init(&vcpu->arch.local_int.lock);
3066         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3067         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3068                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3069         seqcount_init(&vcpu->arch.cputm_seqcount);
3070
3071         rc = kvm_vcpu_init(vcpu, kvm, id);
3072         if (rc)
3073                 goto out_free_sie_block;
3074         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3075                  vcpu->arch.sie_block);
3076         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3077
3078         return vcpu;
3079 out_free_sie_block:
3080         free_page((unsigned long)(vcpu->arch.sie_block));
3081 out_free_cpu:
3082         kmem_cache_free(kvm_vcpu_cache, vcpu);
3083 out:
3084         return ERR_PTR(rc);
3085 }
3086
3087 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3088 {
3089         return kvm_s390_vcpu_has_irq(vcpu, 0);
3090 }
3091
3092 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3093 {
3094         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3095 }
3096
3097 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3098 {
3099         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3100         exit_sie(vcpu);
3101 }
3102
3103 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3104 {
3105         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3106 }
3107
3108 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3109 {
3110         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3111         exit_sie(vcpu);
3112 }
3113
3114 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3115 {
3116         return atomic_read(&vcpu->arch.sie_block->prog20) &
3117                (PROG_BLOCK_SIE | PROG_REQUEST);
3118 }
3119
3120 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3121 {
3122         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3123 }
3124
3125 /*
3126  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3127  * If the CPU is not running (e.g. waiting as idle) the function will
3128  * return immediately. */
3129 void exit_sie(struct kvm_vcpu *vcpu)
3130 {
3131         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3132         kvm_s390_vsie_kick(vcpu);
3133         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3134                 cpu_relax();
3135 }
3136
3137 /* Kick a guest cpu out of SIE to process a request synchronously */
3138 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3139 {
3140         kvm_make_request(req, vcpu);
3141         kvm_s390_vcpu_request(vcpu);
3142 }
3143
3144 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3145                               unsigned long end)
3146 {
3147         struct kvm *kvm = gmap->private;
3148         struct kvm_vcpu *vcpu;
3149         unsigned long prefix;
3150         int i;
3151
3152         if (gmap_is_shadow(gmap))
3153                 return;
3154         if (start >= 1UL << 31)
3155                 /* We are only interested in prefix pages */
3156                 return;
3157         kvm_for_each_vcpu(i, vcpu, kvm) {
3158                 /* match against both prefix pages */
3159                 prefix = kvm_s390_get_prefix(vcpu);
3160                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3161                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3162                                    start, end);
3163                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3164                 }
3165         }
3166 }
3167
3168 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3169 {
3170         /* do not poll with more than halt_poll_max_steal percent of steal time */
3171         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3172             halt_poll_max_steal) {
3173                 vcpu->stat.halt_no_poll_steal++;
3174                 return true;
3175         }
3176         return false;
3177 }
3178
3179 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3180 {
3181         /* kvm common code refers to this, but never calls it */
3182         BUG();
3183         return 0;
3184 }
3185
3186 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3187                                            struct kvm_one_reg *reg)
3188 {
3189         int r = -EINVAL;
3190
3191         switch (reg->id) {
3192         case KVM_REG_S390_TODPR:
3193                 r = put_user(vcpu->arch.sie_block->todpr,
3194                              (u32 __user *)reg->addr);
3195                 break;
3196         case KVM_REG_S390_EPOCHDIFF:
3197                 r = put_user(vcpu->arch.sie_block->epoch,
3198                              (u64 __user *)reg->addr);
3199                 break;
3200         case KVM_REG_S390_CPU_TIMER:
3201                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3202                              (u64 __user *)reg->addr);
3203                 break;
3204         case KVM_REG_S390_CLOCK_COMP:
3205                 r = put_user(vcpu->arch.sie_block->ckc,
3206                              (u64 __user *)reg->addr);
3207                 break;
3208         case KVM_REG_S390_PFTOKEN:
3209                 r = put_user(vcpu->arch.pfault_token,
3210                              (u64 __user *)reg->addr);
3211                 break;
3212         case KVM_REG_S390_PFCOMPARE:
3213                 r = put_user(vcpu->arch.pfault_compare,
3214                              (u64 __user *)reg->addr);
3215                 break;
3216         case KVM_REG_S390_PFSELECT:
3217                 r = put_user(vcpu->arch.pfault_select,
3218                              (u64 __user *)reg->addr);
3219                 break;
3220         case KVM_REG_S390_PP:
3221                 r = put_user(vcpu->arch.sie_block->pp,
3222                              (u64 __user *)reg->addr);
3223                 break;
3224         case KVM_REG_S390_GBEA:
3225                 r = put_user(vcpu->arch.sie_block->gbea,
3226                              (u64 __user *)reg->addr);
3227                 break;
3228         default:
3229                 break;
3230         }
3231
3232         return r;
3233 }
3234
3235 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3236                                            struct kvm_one_reg *reg)
3237 {
3238         int r = -EINVAL;
3239         __u64 val;
3240
3241         switch (reg->id) {
3242         case KVM_REG_S390_TODPR:
3243                 r = get_user(vcpu->arch.sie_block->todpr,
3244                              (u32 __user *)reg->addr);
3245                 break;
3246         case KVM_REG_S390_EPOCHDIFF:
3247                 r = get_user(vcpu->arch.sie_block->epoch,
3248                              (u64 __user *)reg->addr);
3249                 break;
3250         case KVM_REG_S390_CPU_TIMER:
3251                 r = get_user(val, (u64 __user *)reg->addr);
3252                 if (!r)
3253                         kvm_s390_set_cpu_timer(vcpu, val);
3254                 break;
3255         case KVM_REG_S390_CLOCK_COMP:
3256                 r = get_user(vcpu->arch.sie_block->ckc,
3257                              (u64 __user *)reg->addr);
3258                 break;
3259         case KVM_REG_S390_PFTOKEN:
3260                 r = get_user(vcpu->arch.pfault_token,
3261                              (u64 __user *)reg->addr);
3262                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3263                         kvm_clear_async_pf_completion_queue(vcpu);
3264                 break;
3265         case KVM_REG_S390_PFCOMPARE:
3266                 r = get_user(vcpu->arch.pfault_compare,
3267                              (u64 __user *)reg->addr);
3268                 break;
3269         case KVM_REG_S390_PFSELECT:
3270                 r = get_user(vcpu->arch.pfault_select,
3271                              (u64 __user *)reg->addr);
3272                 break;
3273         case KVM_REG_S390_PP:
3274                 r = get_user(vcpu->arch.sie_block->pp,
3275                              (u64 __user *)reg->addr);
3276                 break;
3277         case KVM_REG_S390_GBEA:
3278                 r = get_user(vcpu->arch.sie_block->gbea,
3279                              (u64 __user *)reg->addr);
3280                 break;
3281         default:
3282                 break;
3283         }
3284
3285         return r;
3286 }
3287
3288 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3289 {
3290         kvm_s390_vcpu_initial_reset(vcpu);
3291         return 0;
3292 }
3293
3294 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3295 {
3296         vcpu_load(vcpu);
3297         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3298         vcpu_put(vcpu);
3299         return 0;
3300 }
3301
3302 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3303 {
3304         vcpu_load(vcpu);
3305         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3306         vcpu_put(vcpu);
3307         return 0;
3308 }
3309
3310 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3311                                   struct kvm_sregs *sregs)
3312 {
3313         vcpu_load(vcpu);
3314
3315         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3316         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3317
3318         vcpu_put(vcpu);
3319         return 0;
3320 }
3321
3322 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3323                                   struct kvm_sregs *sregs)
3324 {
3325         vcpu_load(vcpu);
3326
3327         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3328         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3329
3330         vcpu_put(vcpu);
3331         return 0;
3332 }
3333
3334 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3335 {
3336         int ret = 0;
3337
3338         vcpu_load(vcpu);
3339
3340         if (test_fp_ctl(fpu->fpc)) {
3341                 ret = -EINVAL;
3342                 goto out;
3343         }
3344         vcpu->run->s.regs.fpc = fpu->fpc;
3345         if (MACHINE_HAS_VX)
3346                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3347                                  (freg_t *) fpu->fprs);
3348         else
3349                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3350
3351 out:
3352         vcpu_put(vcpu);
3353         return ret;
3354 }
3355
3356 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3357 {
3358         vcpu_load(vcpu);
3359
3360         /* make sure we have the latest values */
3361         save_fpu_regs();
3362         if (MACHINE_HAS_VX)
3363                 convert_vx_to_fp((freg_t *) fpu->fprs,
3364                                  (__vector128 *) vcpu->run->s.regs.vrs);
3365         else
3366                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3367         fpu->fpc = vcpu->run->s.regs.fpc;
3368
3369         vcpu_put(vcpu);
3370         return 0;
3371 }
3372
3373 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3374 {
3375         int rc = 0;
3376
3377         if (!is_vcpu_stopped(vcpu))
3378                 rc = -EBUSY;
3379         else {
3380                 vcpu->run->psw_mask = psw.mask;
3381                 vcpu->run->psw_addr = psw.addr;
3382         }
3383         return rc;
3384 }
3385
3386 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3387                                   struct kvm_translation *tr)
3388 {
3389         return -EINVAL; /* not implemented yet */
3390 }
3391
3392 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3393                               KVM_GUESTDBG_USE_HW_BP | \
3394                               KVM_GUESTDBG_ENABLE)
3395
3396 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3397                                         struct kvm_guest_debug *dbg)
3398 {
3399         int rc = 0;
3400
3401         vcpu_load(vcpu);
3402
3403         vcpu->guest_debug = 0;
3404         kvm_s390_clear_bp_data(vcpu);
3405
3406         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3407                 rc = -EINVAL;
3408                 goto out;
3409         }
3410         if (!sclp.has_gpere) {
3411                 rc = -EINVAL;
3412                 goto out;
3413         }
3414
3415         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3416                 vcpu->guest_debug = dbg->control;
3417                 /* enforce guest PER */
3418                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3419
3420                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3421                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3422         } else {
3423                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3424                 vcpu->arch.guestdbg.last_bp = 0;
3425         }
3426
3427         if (rc) {
3428                 vcpu->guest_debug = 0;
3429                 kvm_s390_clear_bp_data(vcpu);
3430                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3431         }
3432
3433 out:
3434         vcpu_put(vcpu);
3435         return rc;
3436 }
3437
3438 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3439                                     struct kvm_mp_state *mp_state)
3440 {
3441         int ret;
3442
3443         vcpu_load(vcpu);
3444
3445         /* CHECK_STOP and LOAD are not supported yet */
3446         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3447                                       KVM_MP_STATE_OPERATING;
3448
3449         vcpu_put(vcpu);
3450         return ret;
3451 }
3452
3453 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3454                                     struct kvm_mp_state *mp_state)
3455 {
3456         int rc = 0;
3457
3458         vcpu_load(vcpu);
3459
3460         /* user space knows about this interface - let it control the state */
3461         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3462
3463         switch (mp_state->mp_state) {
3464         case KVM_MP_STATE_STOPPED:
3465                 kvm_s390_vcpu_stop(vcpu);
3466                 break;
3467         case KVM_MP_STATE_OPERATING:
3468                 kvm_s390_vcpu_start(vcpu);
3469                 break;
3470         case KVM_MP_STATE_LOAD:
3471         case KVM_MP_STATE_CHECK_STOP:
3472                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3473         default:
3474                 rc = -ENXIO;
3475         }
3476
3477         vcpu_put(vcpu);
3478         return rc;
3479 }
3480
3481 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3482 {
3483         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3484 }
3485
3486 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3487 {
3488 retry:
3489         kvm_s390_vcpu_request_handled(vcpu);
3490         if (!kvm_request_pending(vcpu))
3491                 return 0;
3492         /*
3493          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3494          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3495          * This ensures that the ipte instruction for this request has
3496          * already finished. We might race against a second unmapper that
3497          * wants to set the blocking bit. Lets just retry the request loop.
3498          */
3499         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3500                 int rc;
3501                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3502                                           kvm_s390_get_prefix(vcpu),
3503                                           PAGE_SIZE * 2, PROT_WRITE);
3504                 if (rc) {
3505                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3506                         return rc;
3507                 }
3508                 goto retry;
3509         }
3510
3511         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3512                 vcpu->arch.sie_block->ihcpu = 0xffff;
3513                 goto retry;
3514         }
3515
3516         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3517                 if (!ibs_enabled(vcpu)) {
3518                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3519                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3520                 }
3521                 goto retry;
3522         }
3523
3524         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3525                 if (ibs_enabled(vcpu)) {
3526                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3527                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3528                 }
3529                 goto retry;
3530         }
3531
3532         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3533                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3534                 goto retry;
3535         }
3536
3537         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3538                 /*
3539                  * Disable CMM virtualization; we will emulate the ESSA
3540                  * instruction manually, in order to provide additional
3541                  * functionalities needed for live migration.
3542                  */
3543                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3544                 goto retry;
3545         }
3546
3547         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3548                 /*
3549                  * Re-enable CMM virtualization if CMMA is available and
3550                  * CMM has been used.
3551                  */
3552                 if ((vcpu->kvm->arch.use_cmma) &&
3553                     (vcpu->kvm->mm->context.uses_cmm))
3554                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3555                 goto retry;
3556         }
3557
3558         /* nothing to do, just clear the request */
3559         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3560         /* we left the vsie handler, nothing to do, just clear the request */
3561         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3562
3563         return 0;
3564 }
3565
3566 void kvm_s390_set_tod_clock(struct kvm *kvm,
3567                             const struct kvm_s390_vm_tod_clock *gtod)
3568 {
3569         struct kvm_vcpu *vcpu;
3570         struct kvm_s390_tod_clock_ext htod;
3571         int i;
3572
3573         mutex_lock(&kvm->lock);
3574         preempt_disable();
3575
3576         get_tod_clock_ext((char *)&htod);
3577
3578         kvm->arch.epoch = gtod->tod - htod.tod;
3579         kvm->arch.epdx = 0;
3580         if (test_kvm_facility(kvm, 139)) {
3581                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3582                 if (kvm->arch.epoch > gtod->tod)
3583                         kvm->arch.epdx -= 1;
3584         }
3585
3586         kvm_s390_vcpu_block_all(kvm);
3587         kvm_for_each_vcpu(i, vcpu, kvm) {
3588                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3589                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3590         }
3591
3592         kvm_s390_vcpu_unblock_all(kvm);
3593         preempt_enable();
3594         mutex_unlock(&kvm->lock);
3595 }
3596
3597 /**
3598  * kvm_arch_fault_in_page - fault-in guest page if necessary
3599  * @vcpu: The corresponding virtual cpu
3600  * @gpa: Guest physical address
3601  * @writable: Whether the page should be writable or not
3602  *
3603  * Make sure that a guest page has been faulted-in on the host.
3604  *
3605  * Return: Zero on success, negative error code otherwise.
3606  */
3607 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3608 {
3609         return gmap_fault(vcpu->arch.gmap, gpa,
3610                           writable ? FAULT_FLAG_WRITE : 0);
3611 }
3612
3613 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3614                                       unsigned long token)
3615 {
3616         struct kvm_s390_interrupt inti;
3617         struct kvm_s390_irq irq;
3618
3619         if (start_token) {
3620                 irq.u.ext.ext_params2 = token;
3621                 irq.type = KVM_S390_INT_PFAULT_INIT;
3622                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3623         } else {
3624                 inti.type = KVM_S390_INT_PFAULT_DONE;
3625                 inti.parm64 = token;
3626                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3627         }
3628 }
3629
3630 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3631                                      struct kvm_async_pf *work)
3632 {
3633         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3634         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3635 }
3636
3637 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3638                                  struct kvm_async_pf *work)
3639 {
3640         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3641         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3642 }
3643
3644 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3645                                struct kvm_async_pf *work)
3646 {
3647         /* s390 will always inject the page directly */
3648 }
3649
3650 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3651 {
3652         /*
3653          * s390 will always inject the page directly,
3654          * but we still want check_async_completion to cleanup
3655          */
3656         return true;
3657 }
3658
3659 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3660 {
3661         hva_t hva;
3662         struct kvm_arch_async_pf arch;
3663         int rc;
3664
3665         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3666                 return 0;
3667         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3668             vcpu->arch.pfault_compare)
3669                 return 0;
3670         if (psw_extint_disabled(vcpu))
3671                 return 0;
3672         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3673                 return 0;
3674         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3675                 return 0;
3676         if (!vcpu->arch.gmap->pfault_enabled)
3677                 return 0;
3678
3679         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3680         hva += current->thread.gmap_addr & ~PAGE_MASK;
3681         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3682                 return 0;
3683
3684         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3685         return rc;
3686 }
3687
3688 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3689 {
3690         int rc, cpuflags;
3691
3692         /*
3693          * On s390 notifications for arriving pages will be delivered directly
3694          * to the guest but the house keeping for completed pfaults is
3695          * handled outside the worker.
3696          */
3697         kvm_check_async_pf_completion(vcpu);
3698
3699         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3700         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3701
3702         if (need_resched())
3703                 schedule();
3704
3705         if (test_cpu_flag(CIF_MCCK_PENDING))
3706                 s390_handle_mcck();
3707
3708         if (!kvm_is_ucontrol(vcpu->kvm)) {
3709                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3710                 if (rc)
3711                         return rc;
3712         }
3713
3714         rc = kvm_s390_handle_requests(vcpu);
3715         if (rc)
3716                 return rc;
3717
3718         if (guestdbg_enabled(vcpu)) {
3719                 kvm_s390_backup_guest_per_regs(vcpu);
3720                 kvm_s390_patch_guest_per_regs(vcpu);
3721         }
3722
3723         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3724
3725         vcpu->arch.sie_block->icptcode = 0;
3726         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3727         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3728         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3729
3730         return 0;
3731 }
3732
3733 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3734 {
3735         struct kvm_s390_pgm_info pgm_info = {
3736                 .code = PGM_ADDRESSING,
3737         };
3738         u8 opcode, ilen;
3739         int rc;
3740
3741         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3742         trace_kvm_s390_sie_fault(vcpu);
3743
3744         /*
3745          * We want to inject an addressing exception, which is defined as a
3746          * suppressing or terminating exception. However, since we came here
3747          * by a DAT access exception, the PSW still points to the faulting
3748          * instruction since DAT exceptions are nullifying. So we've got
3749          * to look up the current opcode to get the length of the instruction
3750          * to be able to forward the PSW.
3751          */
3752         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3753         ilen = insn_length(opcode);
3754         if (rc < 0) {
3755                 return rc;
3756         } else if (rc) {
3757                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3758                  * Forward by arbitrary ilc, injection will take care of
3759                  * nullification if necessary.
3760                  */
3761                 pgm_info = vcpu->arch.pgm;
3762                 ilen = 4;
3763         }
3764         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3765         kvm_s390_forward_psw(vcpu, ilen);
3766         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3767 }
3768
3769 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3770 {
3771         struct mcck_volatile_info *mcck_info;
3772         struct sie_page *sie_page;
3773
3774         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3775                    vcpu->arch.sie_block->icptcode);
3776         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3777
3778         if (guestdbg_enabled(vcpu))
3779                 kvm_s390_restore_guest_per_regs(vcpu);
3780
3781         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3782         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3783
3784         if (exit_reason == -EINTR) {
3785                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3786                 sie_page = container_of(vcpu->arch.sie_block,
3787                                         struct sie_page, sie_block);
3788                 mcck_info = &sie_page->mcck_info;
3789                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3790                 return 0;
3791         }
3792
3793         if (vcpu->arch.sie_block->icptcode > 0) {
3794                 int rc = kvm_handle_sie_intercept(vcpu);
3795
3796                 if (rc != -EOPNOTSUPP)
3797                         return rc;
3798                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3799                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3800                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3801                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3802                 return -EREMOTE;
3803         } else if (exit_reason != -EFAULT) {
3804                 vcpu->stat.exit_null++;
3805                 return 0;
3806         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3807                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3808                 vcpu->run->s390_ucontrol.trans_exc_code =
3809                                                 current->thread.gmap_addr;
3810                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3811                 return -EREMOTE;
3812         } else if (current->thread.gmap_pfault) {
3813                 trace_kvm_s390_major_guest_pfault(vcpu);
3814                 current->thread.gmap_pfault = 0;
3815                 if (kvm_arch_setup_async_pf(vcpu))
3816                         return 0;
3817                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3818         }
3819         return vcpu_post_run_fault_in_sie(vcpu);
3820 }
3821
3822 static int __vcpu_run(struct kvm_vcpu *vcpu)
3823 {
3824         int rc, exit_reason;
3825
3826         /*
3827          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3828          * ning the guest), so that memslots (and other stuff) are protected
3829          */
3830         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3831
3832         do {
3833                 rc = vcpu_pre_run(vcpu);
3834                 if (rc)
3835                         break;
3836
3837                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3838                 /*
3839                  * As PF_VCPU will be used in fault handler, between
3840                  * guest_enter and guest_exit should be no uaccess.
3841                  */
3842                 local_irq_disable();
3843                 guest_enter_irqoff();
3844                 __disable_cpu_timer_accounting(vcpu);
3845                 local_irq_enable();
3846                 exit_reason = sie64a(vcpu->arch.sie_block,
3847                                      vcpu->run->s.regs.gprs);
3848                 local_irq_disable();
3849                 __enable_cpu_timer_accounting(vcpu);
3850                 guest_exit_irqoff();
3851                 local_irq_enable();
3852                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3853
3854                 rc = vcpu_post_run(vcpu, exit_reason);
3855         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3856
3857         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3858         return rc;
3859 }
3860
3861 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3862 {
3863         struct runtime_instr_cb *riccb;
3864         struct gs_cb *gscb;
3865
3866         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3867         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3868         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3869         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3870         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3871                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3872         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3873                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3874                 /* some control register changes require a tlb flush */
3875                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3876         }
3877         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3878                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3879                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3880                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3881                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3882                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3883         }
3884         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3885                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3886                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3887                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3888                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3889                         kvm_clear_async_pf_completion_queue(vcpu);
3890         }
3891         /*
3892          * If userspace sets the riccb (e.g. after migration) to a valid state,
3893          * we should enable RI here instead of doing the lazy enablement.
3894          */
3895         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3896             test_kvm_facility(vcpu->kvm, 64) &&
3897             riccb->v &&
3898             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3899                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3900                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3901         }
3902         /*
3903          * If userspace sets the gscb (e.g. after migration) to non-zero,
3904          * we should enable GS here instead of doing the lazy enablement.
3905          */
3906         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3907             test_kvm_facility(vcpu->kvm, 133) &&
3908             gscb->gssm &&
3909             !vcpu->arch.gs_enabled) {
3910                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3911                 vcpu->arch.sie_block->ecb |= ECB_GS;
3912                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3913                 vcpu->arch.gs_enabled = 1;
3914         }
3915         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3916             test_kvm_facility(vcpu->kvm, 82)) {
3917                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3918                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3919         }
3920         save_access_regs(vcpu->arch.host_acrs);
3921         restore_access_regs(vcpu->run->s.regs.acrs);
3922         /* save host (userspace) fprs/vrs */
3923         save_fpu_regs();
3924         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3925         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3926         if (MACHINE_HAS_VX)
3927                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3928         else
3929                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3930         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3931         if (test_fp_ctl(current->thread.fpu.fpc))
3932                 /* User space provided an invalid FPC, let's clear it */
3933                 current->thread.fpu.fpc = 0;
3934         if (MACHINE_HAS_GS) {
3935                 preempt_disable();
3936                 __ctl_set_bit(2, 4);
3937                 if (current->thread.gs_cb) {
3938                         vcpu->arch.host_gscb = current->thread.gs_cb;
3939                         save_gs_cb(vcpu->arch.host_gscb);
3940                 }
3941                 if (vcpu->arch.gs_enabled) {
3942                         current->thread.gs_cb = (struct gs_cb *)
3943                                                 &vcpu->run->s.regs.gscb;
3944                         restore_gs_cb(current->thread.gs_cb);
3945                 }
3946                 preempt_enable();
3947         }
3948         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3949
3950         kvm_run->kvm_dirty_regs = 0;
3951 }
3952
3953 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3954 {
3955         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3956         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3957         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3958         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3959         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3960         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3961         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3962         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3963         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3964         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3965         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3966         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3967         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3968         save_access_regs(vcpu->run->s.regs.acrs);
3969         restore_access_regs(vcpu->arch.host_acrs);
3970         /* Save guest register state */
3971         save_fpu_regs();
3972         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3973         /* Restore will be done lazily at return */
3974         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3975         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3976         if (MACHINE_HAS_GS) {
3977                 __ctl_set_bit(2, 4);
3978                 if (vcpu->arch.gs_enabled)
3979                         save_gs_cb(current->thread.gs_cb);
3980                 preempt_disable();
3981                 current->thread.gs_cb = vcpu->arch.host_gscb;
3982                 restore_gs_cb(vcpu->arch.host_gscb);
3983                 preempt_enable();
3984                 if (!vcpu->arch.host_gscb)
3985                         __ctl_clear_bit(2, 4);
3986                 vcpu->arch.host_gscb = NULL;
3987         }
3988         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3989 }
3990
3991 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3992 {
3993         int rc;
3994
3995         if (kvm_run->immediate_exit)
3996                 return -EINTR;
3997
3998         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
3999             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4000                 return -EINVAL;
4001
4002         vcpu_load(vcpu);
4003
4004         if (guestdbg_exit_pending(vcpu)) {
4005                 kvm_s390_prepare_debug_exit(vcpu);
4006                 rc = 0;
4007                 goto out;
4008         }
4009
4010         kvm_sigset_activate(vcpu);
4011
4012         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4013                 kvm_s390_vcpu_start(vcpu);
4014         } else if (is_vcpu_stopped(vcpu)) {
4015                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4016                                    vcpu->vcpu_id);
4017                 rc = -EINVAL;
4018                 goto out;
4019         }
4020
4021         sync_regs(vcpu, kvm_run);
4022         enable_cpu_timer_accounting(vcpu);
4023
4024         might_fault();
4025         rc = __vcpu_run(vcpu);
4026
4027         if (signal_pending(current) && !rc) {
4028                 kvm_run->exit_reason = KVM_EXIT_INTR;
4029                 rc = -EINTR;
4030         }
4031
4032         if (guestdbg_exit_pending(vcpu) && !rc)  {
4033                 kvm_s390_prepare_debug_exit(vcpu);
4034                 rc = 0;
4035         }
4036
4037         if (rc == -EREMOTE) {
4038                 /* userspace support is needed, kvm_run has been prepared */
4039                 rc = 0;
4040         }
4041
4042         disable_cpu_timer_accounting(vcpu);
4043         store_regs(vcpu, kvm_run);
4044
4045         kvm_sigset_deactivate(vcpu);
4046
4047         vcpu->stat.exit_userspace++;
4048 out:
4049         vcpu_put(vcpu);
4050         return rc;
4051 }
4052
4053 /*
4054  * store status at address
4055  * we use have two special cases:
4056  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4057  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4058  */
4059 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4060 {
4061         unsigned char archmode = 1;
4062         freg_t fprs[NUM_FPRS];
4063         unsigned int px;
4064         u64 clkcomp, cputm;
4065         int rc;
4066
4067         px = kvm_s390_get_prefix(vcpu);
4068         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4069                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4070                         return -EFAULT;
4071                 gpa = 0;
4072         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4073                 if (write_guest_real(vcpu, 163, &archmode, 1))
4074                         return -EFAULT;
4075                 gpa = px;
4076         } else
4077                 gpa -= __LC_FPREGS_SAVE_AREA;
4078
4079         /* manually convert vector registers if necessary */
4080         if (MACHINE_HAS_VX) {
4081                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4082                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4083                                      fprs, 128);
4084         } else {
4085                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4086                                      vcpu->run->s.regs.fprs, 128);
4087         }
4088         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4089                               vcpu->run->s.regs.gprs, 128);
4090         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4091                               &vcpu->arch.sie_block->gpsw, 16);
4092         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4093                               &px, 4);
4094         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4095                               &vcpu->run->s.regs.fpc, 4);
4096         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4097                               &vcpu->arch.sie_block->todpr, 4);
4098         cputm = kvm_s390_get_cpu_timer(vcpu);
4099         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4100                               &cputm, 8);
4101         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4102         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4103                               &clkcomp, 8);
4104         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4105                               &vcpu->run->s.regs.acrs, 64);
4106         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4107                               &vcpu->arch.sie_block->gcr, 128);
4108         return rc ? -EFAULT : 0;
4109 }
4110
4111 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4112 {
4113         /*
4114          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4115          * switch in the run ioctl. Let's update our copies before we save
4116          * it into the save area
4117          */
4118         save_fpu_regs();
4119         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4120         save_access_regs(vcpu->run->s.regs.acrs);
4121
4122         return kvm_s390_store_status_unloaded(vcpu, addr);
4123 }
4124
4125 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4126 {
4127         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4128         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4129 }
4130
4131 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4132 {
4133         unsigned int i;
4134         struct kvm_vcpu *vcpu;
4135
4136         kvm_for_each_vcpu(i, vcpu, kvm) {
4137                 __disable_ibs_on_vcpu(vcpu);
4138         }
4139 }
4140
4141 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4142 {
4143         if (!sclp.has_ibs)
4144                 return;
4145         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4146         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4147 }
4148
4149 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4150 {
4151         int i, online_vcpus, started_vcpus = 0;
4152
4153         if (!is_vcpu_stopped(vcpu))
4154                 return;
4155
4156         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4157         /* Only one cpu at a time may enter/leave the STOPPED state. */
4158         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4159         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4160
4161         for (i = 0; i < online_vcpus; i++) {
4162                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4163                         started_vcpus++;
4164         }
4165
4166         if (started_vcpus == 0) {
4167                 /* we're the only active VCPU -> speed it up */
4168                 __enable_ibs_on_vcpu(vcpu);
4169         } else if (started_vcpus == 1) {
4170                 /*
4171                  * As we are starting a second VCPU, we have to disable
4172                  * the IBS facility on all VCPUs to remove potentially
4173                  * oustanding ENABLE requests.
4174                  */
4175                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4176         }
4177
4178         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4179         /*
4180          * Another VCPU might have used IBS while we were offline.
4181          * Let's play safe and flush the VCPU at startup.
4182          */
4183         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4184         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4185         return;
4186 }
4187
4188 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4189 {
4190         int i, online_vcpus, started_vcpus = 0;
4191         struct kvm_vcpu *started_vcpu = NULL;
4192
4193         if (is_vcpu_stopped(vcpu))
4194                 return;
4195
4196         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4197         /* Only one cpu at a time may enter/leave the STOPPED state. */
4198         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4199         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4200
4201         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4202         kvm_s390_clear_stop_irq(vcpu);
4203
4204         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4205         __disable_ibs_on_vcpu(vcpu);
4206
4207         for (i = 0; i < online_vcpus; i++) {
4208                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4209                         started_vcpus++;
4210                         started_vcpu = vcpu->kvm->vcpus[i];
4211                 }
4212         }
4213
4214         if (started_vcpus == 1) {
4215                 /*
4216                  * As we only have one VCPU left, we want to enable the
4217                  * IBS facility for that VCPU to speed it up.
4218                  */
4219                 __enable_ibs_on_vcpu(started_vcpu);
4220         }
4221
4222         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4223         return;
4224 }
4225
4226 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4227                                      struct kvm_enable_cap *cap)
4228 {
4229         int r;
4230
4231         if (cap->flags)
4232                 return -EINVAL;
4233
4234         switch (cap->cap) {
4235         case KVM_CAP_S390_CSS_SUPPORT:
4236                 if (!vcpu->kvm->arch.css_support) {
4237                         vcpu->kvm->arch.css_support = 1;
4238                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4239                         trace_kvm_s390_enable_css(vcpu->kvm);
4240                 }
4241                 r = 0;
4242                 break;
4243         default:
4244                 r = -EINVAL;
4245                 break;
4246         }
4247         return r;
4248 }
4249
4250 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4251                                   struct kvm_s390_mem_op *mop)
4252 {
4253         void __user *uaddr = (void __user *)mop->buf;
4254         void *tmpbuf = NULL;
4255         int r, srcu_idx;
4256         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4257                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4258
4259         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4260                 return -EINVAL;
4261
4262         if (mop->size > MEM_OP_MAX_SIZE)
4263                 return -E2BIG;
4264
4265         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4266                 tmpbuf = vmalloc(mop->size);
4267                 if (!tmpbuf)
4268                         return -ENOMEM;
4269         }
4270
4271         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4272
4273         switch (mop->op) {
4274         case KVM_S390_MEMOP_LOGICAL_READ:
4275                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4276                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4277                                             mop->size, GACC_FETCH);
4278                         break;
4279                 }
4280                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4281                 if (r == 0) {
4282                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4283                                 r = -EFAULT;
4284                 }
4285                 break;
4286         case KVM_S390_MEMOP_LOGICAL_WRITE:
4287                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4288                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4289                                             mop->size, GACC_STORE);
4290                         break;
4291                 }
4292                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4293                         r = -EFAULT;
4294                         break;
4295                 }
4296                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4297                 break;
4298         default:
4299                 r = -EINVAL;
4300         }
4301
4302         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4303
4304         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4305                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4306
4307         vfree(tmpbuf);
4308         return r;
4309 }
4310
4311 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4312                                unsigned int ioctl, unsigned long arg)
4313 {
4314         struct kvm_vcpu *vcpu = filp->private_data;
4315         void __user *argp = (void __user *)arg;
4316
4317         switch (ioctl) {
4318         case KVM_S390_IRQ: {
4319                 struct kvm_s390_irq s390irq;
4320
4321                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4322                         return -EFAULT;
4323                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4324         }
4325         case KVM_S390_INTERRUPT: {
4326                 struct kvm_s390_interrupt s390int;
4327                 struct kvm_s390_irq s390irq = {};
4328
4329                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4330                         return -EFAULT;
4331                 if (s390int_to_s390irq(&s390int, &s390irq))
4332                         return -EINVAL;
4333                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4334         }
4335         }
4336         return -ENOIOCTLCMD;
4337 }
4338
4339 long kvm_arch_vcpu_ioctl(struct file *filp,
4340                          unsigned int ioctl, unsigned long arg)
4341 {
4342         struct kvm_vcpu *vcpu = filp->private_data;
4343         void __user *argp = (void __user *)arg;
4344         int idx;
4345         long r;
4346
4347         vcpu_load(vcpu);
4348
4349         switch (ioctl) {
4350         case KVM_S390_STORE_STATUS:
4351                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4352                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4353                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4354                 break;
4355         case KVM_S390_SET_INITIAL_PSW: {
4356                 psw_t psw;
4357
4358                 r = -EFAULT;
4359                 if (copy_from_user(&psw, argp, sizeof(psw)))
4360                         break;
4361                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4362                 break;
4363         }
4364         case KVM_S390_INITIAL_RESET:
4365                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4366                 break;
4367         case KVM_SET_ONE_REG:
4368         case KVM_GET_ONE_REG: {
4369                 struct kvm_one_reg reg;
4370                 r = -EFAULT;
4371                 if (copy_from_user(&reg, argp, sizeof(reg)))
4372                         break;
4373                 if (ioctl == KVM_SET_ONE_REG)
4374                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4375                 else
4376                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4377                 break;
4378         }
4379 #ifdef CONFIG_KVM_S390_UCONTROL
4380         case KVM_S390_UCAS_MAP: {
4381                 struct kvm_s390_ucas_mapping ucasmap;
4382
4383                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4384                         r = -EFAULT;
4385                         break;
4386                 }
4387
4388                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4389                         r = -EINVAL;
4390                         break;
4391                 }
4392
4393                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4394                                      ucasmap.vcpu_addr, ucasmap.length);
4395                 break;
4396         }
4397         case KVM_S390_UCAS_UNMAP: {
4398                 struct kvm_s390_ucas_mapping ucasmap;
4399
4400                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4401                         r = -EFAULT;
4402                         break;
4403                 }
4404
4405                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4406                         r = -EINVAL;
4407                         break;
4408                 }
4409
4410                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4411                         ucasmap.length);
4412                 break;
4413         }
4414 #endif
4415         case KVM_S390_VCPU_FAULT: {
4416                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4417                 break;
4418         }
4419         case KVM_ENABLE_CAP:
4420         {
4421                 struct kvm_enable_cap cap;
4422                 r = -EFAULT;
4423                 if (copy_from_user(&cap, argp, sizeof(cap)))
4424                         break;
4425                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4426                 break;
4427         }
4428         case KVM_S390_MEM_OP: {
4429                 struct kvm_s390_mem_op mem_op;
4430
4431                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4432                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4433                 else
4434                         r = -EFAULT;
4435                 break;
4436         }
4437         case KVM_S390_SET_IRQ_STATE: {
4438                 struct kvm_s390_irq_state irq_state;
4439
4440                 r = -EFAULT;
4441                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4442                         break;
4443                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4444                     irq_state.len == 0 ||
4445                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4446                         r = -EINVAL;
4447                         break;
4448                 }
4449                 /* do not use irq_state.flags, it will break old QEMUs */
4450                 r = kvm_s390_set_irq_state(vcpu,
4451                                            (void __user *) irq_state.buf,
4452                                            irq_state.len);
4453                 break;
4454         }
4455         case KVM_S390_GET_IRQ_STATE: {
4456                 struct kvm_s390_irq_state irq_state;
4457
4458                 r = -EFAULT;
4459                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4460                         break;
4461                 if (irq_state.len == 0) {
4462                         r = -EINVAL;
4463                         break;
4464                 }
4465                 /* do not use irq_state.flags, it will break old QEMUs */
4466                 r = kvm_s390_get_irq_state(vcpu,
4467                                            (__u8 __user *)  irq_state.buf,
4468                                            irq_state.len);
4469                 break;
4470         }
4471         default:
4472                 r = -ENOTTY;
4473         }
4474
4475         vcpu_put(vcpu);
4476         return r;
4477 }
4478
4479 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4480 {
4481 #ifdef CONFIG_KVM_S390_UCONTROL
4482         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4483                  && (kvm_is_ucontrol(vcpu->kvm))) {
4484                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4485                 get_page(vmf->page);
4486                 return 0;
4487         }
4488 #endif
4489         return VM_FAULT_SIGBUS;
4490 }
4491
4492 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4493                             unsigned long npages)
4494 {
4495         return 0;
4496 }
4497
4498 /* Section: memory related */
4499 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4500                                    struct kvm_memory_slot *memslot,
4501                                    const struct kvm_userspace_memory_region *mem,
4502                                    enum kvm_mr_change change)
4503 {
4504         /* A few sanity checks. We can have memory slots which have to be
4505            located/ended at a segment boundary (1MB). The memory in userland is
4506            ok to be fragmented into various different vmas. It is okay to mmap()
4507            and munmap() stuff in this slot after doing this call at any time */
4508
4509         if (mem->userspace_addr & 0xffffful)
4510                 return -EINVAL;
4511
4512         if (mem->memory_size & 0xffffful)
4513                 return -EINVAL;
4514
4515         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4516                 return -EINVAL;
4517
4518         return 0;
4519 }
4520
4521 void kvm_arch_commit_memory_region(struct kvm *kvm,
4522                                 const struct kvm_userspace_memory_region *mem,
4523                                 const struct kvm_memory_slot *old,
4524                                 const struct kvm_memory_slot *new,
4525                                 enum kvm_mr_change change)
4526 {
4527         int rc = 0;
4528
4529         switch (change) {
4530         case KVM_MR_DELETE:
4531                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4532                                         old->npages * PAGE_SIZE);
4533                 break;
4534         case KVM_MR_MOVE:
4535                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4536                                         old->npages * PAGE_SIZE);
4537                 if (rc)
4538                         break;
4539                 /* FALLTHROUGH */
4540         case KVM_MR_CREATE:
4541                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4542                                       mem->guest_phys_addr, mem->memory_size);
4543                 break;
4544         case KVM_MR_FLAGS_ONLY:
4545                 break;
4546         default:
4547                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4548         }
4549         if (rc)
4550                 pr_warn("failed to commit memory region\n");
4551         return;
4552 }
4553
4554 static inline unsigned long nonhyp_mask(int i)
4555 {
4556         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4557
4558         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4559 }
4560
4561 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4562 {
4563         vcpu->valid_wakeup = false;
4564 }
4565
4566 static int __init kvm_s390_init(void)
4567 {
4568         int i;
4569
4570         if (!sclp.has_sief2) {
4571                 pr_info("SIE is not available\n");
4572                 return -ENODEV;
4573         }
4574
4575         if (nested && hpage) {
4576                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4577                 return -EINVAL;
4578         }
4579
4580         for (i = 0; i < 16; i++)
4581                 kvm_s390_fac_base[i] |=
4582                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4583
4584         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4585 }
4586
4587 static void __exit kvm_s390_exit(void)
4588 {
4589         kvm_exit();
4590 }
4591
4592 module_init(kvm_s390_init);
4593 module_exit(kvm_s390_exit);
4594
4595 /*
4596  * Enable autoloading of the kvm module.
4597  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4598  * since x86 takes a different approach.
4599  */
4600 #include <linux/miscdevice.h>
4601 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4602 MODULE_ALIAS("devname:kvm");