arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 159         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 160         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 161         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 162         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 163         { NULL }
 164 };
 165
 166 struct kvm_s390_tod_clock_ext {
 167         __u8 epoch_idx;
 168         __u64 tod;
 169         __u8 reserved[7];
 170 } __packed;
 171
 172 /* allow nested virtualization in KVM (if enabled by user space) */
 173 static int nested;
 174 module_param(nested, int, S_IRUGO);
 175 MODULE_PARM_DESC(nested, "Nested virtualization support");
 176
 177 /* allow 1m huge page guest backing, if !nested */
 178 static int hpage;
 179 module_param(hpage, int, 0444);
 180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 181
 182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 183 static u8 halt_poll_max_steal = 10;
 184 module_param(halt_poll_max_steal, byte, 0644);
 185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 186
 187 /*
 188  * For now we handle at most 16 double words as this is what the s390 base
 189  * kernel handles and stores in the prefix page. If we ever need to go beyond
 190  * this, this requires changes to code, but the external uapi can stay.
 191  */
 192 #define SIZE_INTERNAL 16
 193
 194 /*
 195  * Base feature mask that defines default mask for facilities. Consists of the
 196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 197  */
 198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 199 /*
 200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 201  * and defines the facilities that can be enabled via a cpu model.
 202  */
 203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 204
 205 static unsigned long kvm_s390_fac_size(void)
 206 {
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 209         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 210                 sizeof(S390_lowcore.stfle_fac_list));
 211
 212         return SIZE_INTERNAL;
 213 }
 214
 215 /* available cpu features supported by kvm */
 216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 217 /* available subfunctions indicated via query / "test bit" */
 218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 219
 220 static struct gmap_notifier gmap_notifier;
 221 static struct gmap_notifier vsie_gmap_notifier;
 222 debug_info_t *kvm_s390_dbf;
 223
 224 /* Section: not file related */
 225 int kvm_arch_hardware_enable(void)
 226 {
 227         /* every s390 is virtualization enabled ;-) */
 228         return 0;
 229 }
 230
 231 int kvm_arch_check_processor_compat(void)
 232 {
 233         return 0;
 234 }
 235
 236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 237                               unsigned long end);
 238
 239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 240 {
 241         u8 delta_idx = 0;
 242
 243         /*
 244          * The TOD jumps by delta, we have to compensate this by adding
 245          * -delta to the epoch.
 246          */
 247         delta = -delta;
 248
 249         /* sign-extension - we're adding to signed values below */
 250         if ((s64)delta < 0)
 251                 delta_idx = -1;
 252
 253         scb->epoch += delta;
 254         if (scb->ecd & ECD_MEF) {
 255                 scb->epdx += delta_idx;
 256                 if (scb->epoch < delta)
 257                         scb->epdx += 1;
 258         }
 259 }
 260
 261 /*
 262  * This callback is executed during stop_machine(). All CPUs are therefore
 263  * temporarily stopped. In order not to change guest behavior, we have to
 264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 265  * so a CPU won't be stopped while calculating with the epoch.
 266  */
 267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 268                           void *v)
 269 {
 270         struct kvm *kvm;
 271         struct kvm_vcpu *vcpu;
 272         int i;
 273         unsigned long long *delta = v;
 274
 275         list_for_each_entry(kvm, &vm_list, vm_list) {
 276                 kvm_for_each_vcpu(i, vcpu, kvm) {
 277                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 278                         if (i == 0) {
 279                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 280                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 281                         }
 282                         if (vcpu->arch.cputm_enabled)
 283                                 vcpu->arch.cputm_start += *delta;
 284                         if (vcpu->arch.vsie_block)
 285                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 286                                                    *delta);
 287                 }
 288         }
 289         return NOTIFY_OK;
 290 }
 291
 292 static struct notifier_block kvm_clock_notifier = {
 293         .notifier_call = kvm_clock_sync,
 294 };
 295
 296 int kvm_arch_hardware_setup(void)
 297 {
 298         gmap_notifier.notifier_call = kvm_gmap_notifier;
 299         gmap_register_pte_notifier(&gmap_notifier);
 300         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 301         gmap_register_pte_notifier(&vsie_gmap_notifier);
 302         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 303                                        &kvm_clock_notifier);
 304         return 0;
 305 }
 306
 307 void kvm_arch_hardware_unsetup(void)
 308 {
 309         gmap_unregister_pte_notifier(&gmap_notifier);
 310         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 311         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 312                                          &kvm_clock_notifier);
 313 }
 314
 315 static void allow_cpu_feat(unsigned long nr)
 316 {
 317         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 318 }
 319
 320 static inline int plo_test_bit(unsigned char nr)
 321 {
 322         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 323         int cc;
 324
 325         asm volatile(
 326                 /* Parameter registers are ignored for "test bit" */
 327                 "       plo     0,0,0,0(0)\n"
 328                 "       ipm     %0\n"
 329                 "       srl     %0,28\n"
 330                 : "=d" (cc)
 331                 : "d" (r0)
 332                 : "cc");
 333         return cc == 0;
 334 }
 335
 336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 337 {
 338         register unsigned long r0 asm("0") = 0; /* query function */
 339         register unsigned long r1 asm("1") = (unsigned long) query;
 340
 341         asm volatile(
 342                 /* Parameter regs are ignored */
 343                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 344                 :
 345                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 346                 : "cc", "memory");
 347 }
 348
 349 #define INSN_SORTL 0xb938
 350 #define INSN_DFLTCC 0xb939
 351
 352 static void kvm_s390_cpu_feat_init(void)
 353 {
 354         int i;
 355
 356         for (i = 0; i < 256; ++i) {
 357                 if (plo_test_bit(i))
 358                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 359         }
 360
 361         if (test_facility(28)) /* TOD-clock steering */
 362                 ptff(kvm_s390_available_subfunc.ptff,
 363                      sizeof(kvm_s390_available_subfunc.ptff),
 364                      PTFF_QAF);
 365
 366         if (test_facility(17)) { /* MSA */
 367                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 368                               kvm_s390_available_subfunc.kmac);
 369                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.kmc);
 371                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 372                               kvm_s390_available_subfunc.km);
 373                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.kimd);
 375                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 376                               kvm_s390_available_subfunc.klmd);
 377         }
 378         if (test_facility(76)) /* MSA3 */
 379                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 380                               kvm_s390_available_subfunc.pckmo);
 381         if (test_facility(77)) { /* MSA4 */
 382                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.kmctr);
 384                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 385                               kvm_s390_available_subfunc.kmf);
 386                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 387                               kvm_s390_available_subfunc.kmo);
 388                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 389                               kvm_s390_available_subfunc.pcc);
 390         }
 391         if (test_facility(57)) /* MSA5 */
 392                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.ppno);
 394
 395         if (test_facility(146)) /* MSA8 */
 396                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 397                               kvm_s390_available_subfunc.kma);
 398
 399         if (test_facility(155)) /* MSA9 */
 400                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 401                               kvm_s390_available_subfunc.kdsa);
 402
 403         if (test_facility(150)) /* SORTL */
 404                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 405
 406         if (test_facility(151)) /* DFLTCC */
 407                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 408
 409         if (MACHINE_HAS_ESOP)
 410                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 411         /*
 412          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 413          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 414          */
 415         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 416             !test_facility(3) || !nested)
 417                 return;
 418         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 419         if (sclp.has_64bscao)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 421         if (sclp.has_siif)
 422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 423         if (sclp.has_gpere)
 424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 425         if (sclp.has_gsls)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 427         if (sclp.has_ib)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 429         if (sclp.has_cei)
 430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 431         if (sclp.has_ibs)
 432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 433         if (sclp.has_kss)
 434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 435         /*
 436          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 437          * all skey handling functions read/set the skey from the PGSTE
 438          * instead of the real storage key.
 439          *
 440          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 441          * pages being detected as preserved although they are resident.
 442          *
 443          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 444          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 445          *
 446          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 447          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 448          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 449          *
 450          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 451          * cannot easily shadow the SCA because of the ipte lock.
 452          */
 453 }
 454
 455 int kvm_arch_init(void *opaque)
 456 {
 457         int rc = -ENOMEM;
 458
 459         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 460         if (!kvm_s390_dbf)
 461                 return -ENOMEM;
 462
 463         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
 464                 goto out;
 465
 466         kvm_s390_cpu_feat_init();
 467
 468         /* Register floating interrupt controller interface. */
 469         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 470         if (rc) {
 471                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 472                 goto out;
 473         }
 474
 475         rc = kvm_s390_gib_init(GAL_ISC);
 476         if (rc)
 477                 goto out;
 478
 479         return 0;
 480
 481 out:
 482         kvm_arch_exit();
 483         return rc;
 484 }
 485
 486 void kvm_arch_exit(void)
 487 {
 488         kvm_s390_gib_destroy();
 489         debug_unregister(kvm_s390_dbf);
 490 }
 491
 492 /* Section: device related */
 493 long kvm_arch_dev_ioctl(struct file *filp,
 494                         unsigned int ioctl, unsigned long arg)
 495 {
 496         if (ioctl == KVM_S390_ENABLE_SIE)
 497                 return s390_enable_sie();
 498         return -EINVAL;
 499 }
 500
 501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 502 {
 503         int r;
 504
 505         switch (ext) {
 506         case KVM_CAP_S390_PSW:
 507         case KVM_CAP_S390_GMAP:
 508         case KVM_CAP_SYNC_MMU:
 509 #ifdef CONFIG_KVM_S390_UCONTROL
 510         case KVM_CAP_S390_UCONTROL:
 511 #endif
 512         case KVM_CAP_ASYNC_PF:
 513         case KVM_CAP_SYNC_REGS:
 514         case KVM_CAP_ONE_REG:
 515         case KVM_CAP_ENABLE_CAP:
 516         case KVM_CAP_S390_CSS_SUPPORT:
 517         case KVM_CAP_IOEVENTFD:
 518         case KVM_CAP_DEVICE_CTRL:
 519         case KVM_CAP_S390_IRQCHIP:
 520         case KVM_CAP_VM_ATTRIBUTES:
 521         case KVM_CAP_MP_STATE:
 522         case KVM_CAP_IMMEDIATE_EXIT:
 523         case KVM_CAP_S390_INJECT_IRQ:
 524         case KVM_CAP_S390_USER_SIGP:
 525         case KVM_CAP_S390_USER_STSI:
 526         case KVM_CAP_S390_SKEYS:
 527         case KVM_CAP_S390_IRQ_STATE:
 528         case KVM_CAP_S390_USER_INSTR0:
 529         case KVM_CAP_S390_CMMA_MIGRATION:
 530         case KVM_CAP_S390_AIS:
 531         case KVM_CAP_S390_AIS_MIGRATION:
 532                 r = 1;
 533                 break;
 534         case KVM_CAP_S390_HPAGE_1M:
 535                 r = 0;
 536                 if (hpage && !kvm_is_ucontrol(kvm))
 537                         r = 1;
 538                 break;
 539         case KVM_CAP_S390_MEM_OP:
 540                 r = MEM_OP_MAX_SIZE;
 541                 break;
 542         case KVM_CAP_NR_VCPUS:
 543         case KVM_CAP_MAX_VCPUS:
 544         case KVM_CAP_MAX_VCPU_ID:
 545                 r = KVM_S390_BSCA_CPU_SLOTS;
 546                 if (!kvm_s390_use_sca_entries())
 547                         r = KVM_MAX_VCPUS;
 548                 else if (sclp.has_esca && sclp.has_64bscao)
 549                         r = KVM_S390_ESCA_CPU_SLOTS;
 550                 break;
 551         case KVM_CAP_S390_COW:
 552                 r = MACHINE_HAS_ESOP;
 553                 break;
 554         case KVM_CAP_S390_VECTOR_REGISTERS:
 555                 r = MACHINE_HAS_VX;
 556                 break;
 557         case KVM_CAP_S390_RI:
 558                 r = test_facility(64);
 559                 break;
 560         case KVM_CAP_S390_GS:
 561                 r = test_facility(133);
 562                 break;
 563         case KVM_CAP_S390_BPB:
 564                 r = test_facility(82);
 565                 break;
 566         default:
 567                 r = 0;
 568         }
 569         return r;
 570 }
 571
 572 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 573                                     struct kvm_memory_slot *memslot)
 574 {
 575         int i;
 576         gfn_t cur_gfn, last_gfn;
 577         unsigned long gaddr, vmaddr;
 578         struct gmap *gmap = kvm->arch.gmap;
 579         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 580
 581         /* Loop over all guest segments */
 582         cur_gfn = memslot->base_gfn;
 583         last_gfn = memslot->base_gfn + memslot->npages;
 584         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 585                 gaddr = gfn_to_gpa(cur_gfn);
 586                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 587                 if (kvm_is_error_hva(vmaddr))
 588                         continue;
 589
 590                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 591                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 592                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 593                         if (test_bit(i, bitmap))
 594                                 mark_page_dirty(kvm, cur_gfn + i);
 595                 }
 596
 597                 if (fatal_signal_pending(current))
 598                         return;
 599                 cond_resched();
 600         }
 601 }
 602
 603 /* Section: vm related */
 604 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 605
 606 /*
 607  * Get (and clear) the dirty memory log for a memory slot.
 608  */
 609 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 610                                struct kvm_dirty_log *log)
 611 {
 612         int r;
 613         unsigned long n;
 614         struct kvm_memslots *slots;
 615         struct kvm_memory_slot *memslot;
 616         int is_dirty = 0;
 617
 618         if (kvm_is_ucontrol(kvm))
 619                 return -EINVAL;
 620
 621         mutex_lock(&kvm->slots_lock);
 622
 623         r = -EINVAL;
 624         if (log->slot >= KVM_USER_MEM_SLOTS)
 625                 goto out;
 626
 627         slots = kvm_memslots(kvm);
 628         memslot = id_to_memslot(slots, log->slot);
 629         r = -ENOENT;
 630         if (!memslot->dirty_bitmap)
 631                 goto out;
 632
 633         kvm_s390_sync_dirty_log(kvm, memslot);
 634         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 635         if (r)
 636                 goto out;
 637
 638         /* Clear the dirty log */
 639         if (is_dirty) {
 640                 n = kvm_dirty_bitmap_bytes(memslot);
 641                 memset(memslot->dirty_bitmap, 0, n);
 642         }
 643         r = 0;
 644 out:
 645         mutex_unlock(&kvm->slots_lock);
 646         return r;
 647 }
 648
 649 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 650 {
 651         unsigned int i;
 652         struct kvm_vcpu *vcpu;
 653
 654         kvm_for_each_vcpu(i, vcpu, kvm) {
 655                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 656         }
 657 }
 658
 659 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 660 {
 661         int r;
 662
 663         if (cap->flags)
 664                 return -EINVAL;
 665
 666         switch (cap->cap) {
 667         case KVM_CAP_S390_IRQCHIP:
 668                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 669                 kvm->arch.use_irqchip = 1;
 670                 r = 0;
 671                 break;
 672         case KVM_CAP_S390_USER_SIGP:
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 674                 kvm->arch.user_sigp = 1;
 675                 r = 0;
 676                 break;
 677         case KVM_CAP_S390_VECTOR_REGISTERS:
 678                 mutex_lock(&kvm->lock);
 679                 if (kvm->created_vcpus) {
 680                         r = -EBUSY;
 681                 } else if (MACHINE_HAS_VX) {
 682                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 683                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 684                         if (test_facility(134)) {
 685                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 686                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 687                         }
 688                         if (test_facility(135)) {
 689                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 690                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 691                         }
 692                         if (test_facility(148)) {
 693                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 694                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 695                         }
 696                         if (test_facility(152)) {
 697                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 698                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 699                         }
 700                         r = 0;
 701                 } else
 702                         r = -EINVAL;
 703                 mutex_unlock(&kvm->lock);
 704                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 705                          r ? "(not available)" : "(success)");
 706                 break;
 707         case KVM_CAP_S390_RI:
 708                 r = -EINVAL;
 709                 mutex_lock(&kvm->lock);
 710                 if (kvm->created_vcpus) {
 711                         r = -EBUSY;
 712                 } else if (test_facility(64)) {
 713                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 714                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 715                         r = 0;
 716                 }
 717                 mutex_unlock(&kvm->lock);
 718                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 719                          r ? "(not available)" : "(success)");
 720                 break;
 721         case KVM_CAP_S390_AIS:
 722                 mutex_lock(&kvm->lock);
 723                 if (kvm->created_vcpus) {
 724                         r = -EBUSY;
 725                 } else {
 726                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 727                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 728                         r = 0;
 729                 }
 730                 mutex_unlock(&kvm->lock);
 731                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 732                          r ? "(not available)" : "(success)");
 733                 break;
 734         case KVM_CAP_S390_GS:
 735                 r = -EINVAL;
 736                 mutex_lock(&kvm->lock);
 737                 if (kvm->created_vcpus) {
 738                         r = -EBUSY;
 739                 } else if (test_facility(133)) {
 740                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 741                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 742                         r = 0;
 743                 }
 744                 mutex_unlock(&kvm->lock);
 745                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 746                          r ? "(not available)" : "(success)");
 747                 break;
 748         case KVM_CAP_S390_HPAGE_1M:
 749                 mutex_lock(&kvm->lock);
 750                 if (kvm->created_vcpus)
 751                         r = -EBUSY;
 752                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 753                         r = -EINVAL;
 754                 else {
 755                         r = 0;
 756                         down_write(&kvm->mm->mmap_sem);
 757                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 758                         up_write(&kvm->mm->mmap_sem);
 759                         /*
 760                          * We might have to create fake 4k page
 761                          * tables. To avoid that the hardware works on
 762                          * stale PGSTEs, we emulate these instructions.
 763                          */
 764                         kvm->arch.use_skf = 0;
 765                         kvm->arch.use_pfmfi = 0;
 766                 }
 767                 mutex_unlock(&kvm->lock);
 768                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 769                          r ? "(not available)" : "(success)");
 770                 break;
 771         case KVM_CAP_S390_USER_STSI:
 772                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 773                 kvm->arch.user_stsi = 1;
 774                 r = 0;
 775                 break;
 776         case KVM_CAP_S390_USER_INSTR0:
 777                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 778                 kvm->arch.user_instr0 = 1;
 779                 icpt_operexc_on_all_vcpus(kvm);
 780                 r = 0;
 781                 break;
 782         default:
 783                 r = -EINVAL;
 784                 break;
 785         }
 786         return r;
 787 }
 788
 789 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 790 {
 791         int ret;
 792
 793         switch (attr->attr) {
 794         case KVM_S390_VM_MEM_LIMIT_SIZE:
 795                 ret = 0;
 796                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 797                          kvm->arch.mem_limit);
 798                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 799                         ret = -EFAULT;
 800                 break;
 801         default:
 802                 ret = -ENXIO;
 803                 break;
 804         }
 805         return ret;
 806 }
 807
 808 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 809 {
 810         int ret;
 811         unsigned int idx;
 812         switch (attr->attr) {
 813         case KVM_S390_VM_MEM_ENABLE_CMMA:
 814                 ret = -ENXIO;
 815                 if (!sclp.has_cmma)
 816                         break;
 817
 818                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 819                 mutex_lock(&kvm->lock);
 820                 if (kvm->created_vcpus)
 821                         ret = -EBUSY;
 822                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 823                         ret = -EINVAL;
 824                 else {
 825                         kvm->arch.use_cmma = 1;
 826                         /* Not compatible with cmma. */
 827                         kvm->arch.use_pfmfi = 0;
 828                         ret = 0;
 829                 }
 830                 mutex_unlock(&kvm->lock);
 831                 break;
 832         case KVM_S390_VM_MEM_CLR_CMMA:
 833                 ret = -ENXIO;
 834                 if (!sclp.has_cmma)
 835                         break;
 836                 ret = -EINVAL;
 837                 if (!kvm->arch.use_cmma)
 838                         break;
 839
 840                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 841                 mutex_lock(&kvm->lock);
 842                 idx = srcu_read_lock(&kvm->srcu);
 843                 s390_reset_cmma(kvm->arch.gmap->mm);
 844                 srcu_read_unlock(&kvm->srcu, idx);
 845                 mutex_unlock(&kvm->lock);
 846                 ret = 0;
 847                 break;
 848         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 849                 unsigned long new_limit;
 850
 851                 if (kvm_is_ucontrol(kvm))
 852                         return -EINVAL;
 853
 854                 if (get_user(new_limit, (u64 __user *)attr->addr))
 855                         return -EFAULT;
 856
 857                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 858                     new_limit > kvm->arch.mem_limit)
 859                         return -E2BIG;
 860
 861                 if (!new_limit)
 862                         return -EINVAL;
 863
 864                 /* gmap_create takes last usable address */
 865                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 866                         new_limit -= 1;
 867
 868                 ret = -EBUSY;
 869                 mutex_lock(&kvm->lock);
 870                 if (!kvm->created_vcpus) {
 871                         /* gmap_create will round the limit up */
 872                         struct gmap *new = gmap_create(current->mm, new_limit);
 873
 874                         if (!new) {
 875                                 ret = -ENOMEM;
 876                         } else {
 877                                 gmap_remove(kvm->arch.gmap);
 878                                 new->private = kvm;
 879                                 kvm->arch.gmap = new;
 880                                 ret = 0;
 881                         }
 882                 }
 883                 mutex_unlock(&kvm->lock);
 884                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 885                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 886                          (void *) kvm->arch.gmap->asce);
 887                 break;
 888         }
 889         default:
 890                 ret = -ENXIO;
 891                 break;
 892         }
 893         return ret;
 894 }
 895
 896 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 897
 898 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 899 {
 900         struct kvm_vcpu *vcpu;
 901         int i;
 902
 903         kvm_s390_vcpu_block_all(kvm);
 904
 905         kvm_for_each_vcpu(i, vcpu, kvm) {
 906                 kvm_s390_vcpu_crypto_setup(vcpu);
 907                 /* recreate the shadow crycb by leaving the VSIE handler */
 908                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 909         }
 910
 911         kvm_s390_vcpu_unblock_all(kvm);
 912 }
 913
 914 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         mutex_lock(&kvm->lock);
 917         switch (attr->attr) {
 918         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 919                 if (!test_kvm_facility(kvm, 76)) {
 920                         mutex_unlock(&kvm->lock);
 921                         return -EINVAL;
 922                 }
 923                 get_random_bytes(
 924                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 925                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 926                 kvm->arch.crypto.aes_kw = 1;
 927                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 928                 break;
 929         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 930                 if (!test_kvm_facility(kvm, 76)) {
 931                         mutex_unlock(&kvm->lock);
 932                         return -EINVAL;
 933                 }
 934                 get_random_bytes(
 935                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 936                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 937                 kvm->arch.crypto.dea_kw = 1;
 938                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 939                 break;
 940         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 941                 if (!test_kvm_facility(kvm, 76)) {
 942                         mutex_unlock(&kvm->lock);
 943                         return -EINVAL;
 944                 }
 945                 kvm->arch.crypto.aes_kw = 0;
 946                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 947                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 948                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 949                 break;
 950         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 951                 if (!test_kvm_facility(kvm, 76)) {
 952                         mutex_unlock(&kvm->lock);
 953                         return -EINVAL;
 954                 }
 955                 kvm->arch.crypto.dea_kw = 0;
 956                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 957                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 958                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 959                 break;
 960         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 961                 if (!ap_instructions_available()) {
 962                         mutex_unlock(&kvm->lock);
 963                         return -EOPNOTSUPP;
 964                 }
 965                 kvm->arch.crypto.apie = 1;
 966                 break;
 967         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 968                 if (!ap_instructions_available()) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EOPNOTSUPP;
 971                 }
 972                 kvm->arch.crypto.apie = 0;
 973                 break;
 974         default:
 975                 mutex_unlock(&kvm->lock);
 976                 return -ENXIO;
 977         }
 978
 979         kvm_s390_vcpu_crypto_reset_all(kvm);
 980         mutex_unlock(&kvm->lock);
 981         return 0;
 982 }
 983
 984 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 985 {
 986         int cx;
 987         struct kvm_vcpu *vcpu;
 988
 989         kvm_for_each_vcpu(cx, vcpu, kvm)
 990                 kvm_s390_sync_request(req, vcpu);
 991 }
 992
 993 /*
 994  * Must be called with kvm->srcu held to avoid races on memslots, and with
 995  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 996  */
 997 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 998 {
 999         struct kvm_memory_slot *ms;
1000         struct kvm_memslots *slots;
1001         unsigned long ram_pages = 0;
1002         int slotnr;
1003
1004         /* migration mode already enabled */
1005         if (kvm->arch.migration_mode)
1006                 return 0;
1007         slots = kvm_memslots(kvm);
1008         if (!slots || !slots->used_slots)
1009                 return -EINVAL;
1010
1011         if (!kvm->arch.use_cmma) {
1012                 kvm->arch.migration_mode = 1;
1013                 return 0;
1014         }
1015         /* mark all the pages in active slots as dirty */
1016         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1017                 ms = slots->memslots + slotnr;
1018                 if (!ms->dirty_bitmap)
1019                         return -EINVAL;
1020                 /*
1021                  * The second half of the bitmap is only used on x86,
1022                  * and would be wasted otherwise, so we put it to good
1023                  * use here to keep track of the state of the storage
1024                  * attributes.
1025                  */
1026                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1027                 ram_pages += ms->npages;
1028         }
1029         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1030         kvm->arch.migration_mode = 1;
1031         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1032         return 0;
1033 }
1034
1035 /*
1036  * Must be called with kvm->slots_lock to avoid races with ourselves and
1037  * kvm_s390_vm_start_migration.
1038  */
1039 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1040 {
1041         /* migration mode already disabled */
1042         if (!kvm->arch.migration_mode)
1043                 return 0;
1044         kvm->arch.migration_mode = 0;
1045         if (kvm->arch.use_cmma)
1046                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1047         return 0;
1048 }
1049
1050 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1051                                      struct kvm_device_attr *attr)
1052 {
1053         int res = -ENXIO;
1054
1055         mutex_lock(&kvm->slots_lock);
1056         switch (attr->attr) {
1057         case KVM_S390_VM_MIGRATION_START:
1058                 res = kvm_s390_vm_start_migration(kvm);
1059                 break;
1060         case KVM_S390_VM_MIGRATION_STOP:
1061                 res = kvm_s390_vm_stop_migration(kvm);
1062                 break;
1063         default:
1064                 break;
1065         }
1066         mutex_unlock(&kvm->slots_lock);
1067
1068         return res;
1069 }
1070
1071 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1072                                      struct kvm_device_attr *attr)
1073 {
1074         u64 mig = kvm->arch.migration_mode;
1075
1076         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1077                 return -ENXIO;
1078
1079         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1080                 return -EFAULT;
1081         return 0;
1082 }
1083
1084 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1085 {
1086         struct kvm_s390_vm_tod_clock gtod;
1087
1088         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1089                 return -EFAULT;
1090
1091         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1092                 return -EINVAL;
1093         kvm_s390_set_tod_clock(kvm, &gtod);
1094
1095         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1096                 gtod.epoch_idx, gtod.tod);
1097
1098         return 0;
1099 }
1100
1101 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         u8 gtod_high;
1104
1105         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1106                                            sizeof(gtod_high)))
1107                 return -EFAULT;
1108
1109         if (gtod_high != 0)
1110                 return -EINVAL;
1111         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1112
1113         return 0;
1114 }
1115
1116 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118         struct kvm_s390_vm_tod_clock gtod = { 0 };
1119
1120         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1121                            sizeof(gtod.tod)))
1122                 return -EFAULT;
1123
1124         kvm_s390_set_tod_clock(kvm, &gtod);
1125         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1126         return 0;
1127 }
1128
1129 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131         int ret;
1132
1133         if (attr->flags)
1134                 return -EINVAL;
1135
1136         switch (attr->attr) {
1137         case KVM_S390_VM_TOD_EXT:
1138                 ret = kvm_s390_set_tod_ext(kvm, attr);
1139                 break;
1140         case KVM_S390_VM_TOD_HIGH:
1141                 ret = kvm_s390_set_tod_high(kvm, attr);
1142                 break;
1143         case KVM_S390_VM_TOD_LOW:
1144                 ret = kvm_s390_set_tod_low(kvm, attr);
1145                 break;
1146         default:
1147                 ret = -ENXIO;
1148                 break;
1149         }
1150         return ret;
1151 }
1152
1153 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1154                                    struct kvm_s390_vm_tod_clock *gtod)
1155 {
1156         struct kvm_s390_tod_clock_ext htod;
1157
1158         preempt_disable();
1159
1160         get_tod_clock_ext((char *)&htod);
1161
1162         gtod->tod = htod.tod + kvm->arch.epoch;
1163         gtod->epoch_idx = 0;
1164         if (test_kvm_facility(kvm, 139)) {
1165                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1166                 if (gtod->tod < htod.tod)
1167                         gtod->epoch_idx += 1;
1168         }
1169
1170         preempt_enable();
1171 }
1172
1173 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         struct kvm_s390_vm_tod_clock gtod;
1176
1177         memset(&gtod, 0, sizeof(gtod));
1178         kvm_s390_get_tod_clock(kvm, &gtod);
1179         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1180                 return -EFAULT;
1181
1182         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1183                 gtod.epoch_idx, gtod.tod);
1184         return 0;
1185 }
1186
1187 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1188 {
1189         u8 gtod_high = 0;
1190
1191         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1192                                          sizeof(gtod_high)))
1193                 return -EFAULT;
1194         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1195
1196         return 0;
1197 }
1198
1199 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201         u64 gtod;
1202
1203         gtod = kvm_s390_get_tod_clock_fast(kvm);
1204         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1205                 return -EFAULT;
1206         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1207
1208         return 0;
1209 }
1210
1211 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213         int ret;
1214
1215         if (attr->flags)
1216                 return -EINVAL;
1217
1218         switch (attr->attr) {
1219         case KVM_S390_VM_TOD_EXT:
1220                 ret = kvm_s390_get_tod_ext(kvm, attr);
1221                 break;
1222         case KVM_S390_VM_TOD_HIGH:
1223                 ret = kvm_s390_get_tod_high(kvm, attr);
1224                 break;
1225         case KVM_S390_VM_TOD_LOW:
1226                 ret = kvm_s390_get_tod_low(kvm, attr);
1227                 break;
1228         default:
1229                 ret = -ENXIO;
1230                 break;
1231         }
1232         return ret;
1233 }
1234
1235 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237         struct kvm_s390_vm_cpu_processor *proc;
1238         u16 lowest_ibc, unblocked_ibc;
1239         int ret = 0;
1240
1241         mutex_lock(&kvm->lock);
1242         if (kvm->created_vcpus) {
1243                 ret = -EBUSY;
1244                 goto out;
1245         }
1246         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1247         if (!proc) {
1248                 ret = -ENOMEM;
1249                 goto out;
1250         }
1251         if (!copy_from_user(proc, (void __user *)attr->addr,
1252                             sizeof(*proc))) {
1253                 kvm->arch.model.cpuid = proc->cpuid;
1254                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1255                 unblocked_ibc = sclp.ibc & 0xfff;
1256                 if (lowest_ibc && proc->ibc) {
1257                         if (proc->ibc > unblocked_ibc)
1258                                 kvm->arch.model.ibc = unblocked_ibc;
1259                         else if (proc->ibc < lowest_ibc)
1260                                 kvm->arch.model.ibc = lowest_ibc;
1261                         else
1262                                 kvm->arch.model.ibc = proc->ibc;
1263                 }
1264                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1265                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1266                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1267                          kvm->arch.model.ibc,
1268                          kvm->arch.model.cpuid);
1269                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1270                          kvm->arch.model.fac_list[0],
1271                          kvm->arch.model.fac_list[1],
1272                          kvm->arch.model.fac_list[2]);
1273         } else
1274                 ret = -EFAULT;
1275         kfree(proc);
1276 out:
1277         mutex_unlock(&kvm->lock);
1278         return ret;
1279 }
1280
1281 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1282                                        struct kvm_device_attr *attr)
1283 {
1284         struct kvm_s390_vm_cpu_feat data;
1285
1286         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1287                 return -EFAULT;
1288         if (!bitmap_subset((unsigned long *) data.feat,
1289                            kvm_s390_available_cpu_feat,
1290                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1291                 return -EINVAL;
1292
1293         mutex_lock(&kvm->lock);
1294         if (kvm->created_vcpus) {
1295                 mutex_unlock(&kvm->lock);
1296                 return -EBUSY;
1297         }
1298         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1299                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1300         mutex_unlock(&kvm->lock);
1301         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1302                          data.feat[0],
1303                          data.feat[1],
1304                          data.feat[2]);
1305         return 0;
1306 }
1307
1308 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1309                                           struct kvm_device_attr *attr)
1310 {
1311         mutex_lock(&kvm->lock);
1312         if (kvm->created_vcpus) {
1313                 mutex_unlock(&kvm->lock);
1314                 return -EBUSY;
1315         }
1316
1317         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1318                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1319                 mutex_unlock(&kvm->lock);
1320                 return -EFAULT;
1321         }
1322         mutex_unlock(&kvm->lock);
1323
1324         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1325                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1329         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1330                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1332         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1333                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1335         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1338         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1341         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1344         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1347         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1350         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1353         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1356         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1359         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1362         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1365         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1376         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1381
1382         return 0;
1383 }
1384
1385 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1386 {
1387         int ret = -ENXIO;
1388
1389         switch (attr->attr) {
1390         case KVM_S390_VM_CPU_PROCESSOR:
1391                 ret = kvm_s390_set_processor(kvm, attr);
1392                 break;
1393         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1394                 ret = kvm_s390_set_processor_feat(kvm, attr);
1395                 break;
1396         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1397                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1398                 break;
1399         }
1400         return ret;
1401 }
1402
1403 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1404 {
1405         struct kvm_s390_vm_cpu_processor *proc;
1406         int ret = 0;
1407
1408         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1409         if (!proc) {
1410                 ret = -ENOMEM;
1411                 goto out;
1412         }
1413         proc->cpuid = kvm->arch.model.cpuid;
1414         proc->ibc = kvm->arch.model.ibc;
1415         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1416                S390_ARCH_FAC_LIST_SIZE_BYTE);
1417         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1418                  kvm->arch.model.ibc,
1419                  kvm->arch.model.cpuid);
1420         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1421                  kvm->arch.model.fac_list[0],
1422                  kvm->arch.model.fac_list[1],
1423                  kvm->arch.model.fac_list[2]);
1424         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1425                 ret = -EFAULT;
1426         kfree(proc);
1427 out:
1428         return ret;
1429 }
1430
1431 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1432 {
1433         struct kvm_s390_vm_cpu_machine *mach;
1434         int ret = 0;
1435
1436         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1437         if (!mach) {
1438                 ret = -ENOMEM;
1439                 goto out;
1440         }
1441         get_cpu_id((struct cpuid *) &mach->cpuid);
1442         mach->ibc = sclp.ibc;
1443         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1444                S390_ARCH_FAC_LIST_SIZE_BYTE);
1445         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1446                sizeof(S390_lowcore.stfle_fac_list));
1447         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1448                  kvm->arch.model.ibc,
1449                  kvm->arch.model.cpuid);
1450         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1451                  mach->fac_mask[0],
1452                  mach->fac_mask[1],
1453                  mach->fac_mask[2]);
1454         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1455                  mach->fac_list[0],
1456                  mach->fac_list[1],
1457                  mach->fac_list[2]);
1458         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1459                 ret = -EFAULT;
1460         kfree(mach);
1461 out:
1462         return ret;
1463 }
1464
1465 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1466                                        struct kvm_device_attr *attr)
1467 {
1468         struct kvm_s390_vm_cpu_feat data;
1469
1470         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1471                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1472         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1473                 return -EFAULT;
1474         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1475                          data.feat[0],
1476                          data.feat[1],
1477                          data.feat[2]);
1478         return 0;
1479 }
1480
1481 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1482                                      struct kvm_device_attr *attr)
1483 {
1484         struct kvm_s390_vm_cpu_feat data;
1485
1486         bitmap_copy((unsigned long *) data.feat,
1487                     kvm_s390_available_cpu_feat,
1488                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1489         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1490                 return -EFAULT;
1491         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492                          data.feat[0],
1493                          data.feat[1],
1494                          data.feat[2]);
1495         return 0;
1496 }
1497
1498 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1499                                           struct kvm_device_attr *attr)
1500 {
1501         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1502             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503                 return -EFAULT;
1504
1505         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1510         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1513         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1516         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1519         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1522         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1525         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1528         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1531         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1534         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1537         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1540         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1543         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1546         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1557         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1562
1563         return 0;
1564 }
1565
1566 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1567                                         struct kvm_device_attr *attr)
1568 {
1569         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1570             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1571                 return -EFAULT;
1572
1573         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1575                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1577                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1578         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1579                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1580                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1581         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1582                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1583                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1584         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1587         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1588                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1589                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1590         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1593         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1594                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1595                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1596         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1597                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1598                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1599         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1600                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1602         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1605         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1606                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1608         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1611         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1614         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1625         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1630
1631         return 0;
1632 }
1633
1634 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1635 {
1636         int ret = -ENXIO;
1637
1638         switch (attr->attr) {
1639         case KVM_S390_VM_CPU_PROCESSOR:
1640                 ret = kvm_s390_get_processor(kvm, attr);
1641                 break;
1642         case KVM_S390_VM_CPU_MACHINE:
1643                 ret = kvm_s390_get_machine(kvm, attr);
1644                 break;
1645         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1646                 ret = kvm_s390_get_processor_feat(kvm, attr);
1647                 break;
1648         case KVM_S390_VM_CPU_MACHINE_FEAT:
1649                 ret = kvm_s390_get_machine_feat(kvm, attr);
1650                 break;
1651         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1652                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1653                 break;
1654         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1655                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1656                 break;
1657         }
1658         return ret;
1659 }
1660
1661 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1662 {
1663         int ret;
1664
1665         switch (attr->group) {
1666         case KVM_S390_VM_MEM_CTRL:
1667                 ret = kvm_s390_set_mem_control(kvm, attr);
1668                 break;
1669         case KVM_S390_VM_TOD:
1670                 ret = kvm_s390_set_tod(kvm, attr);
1671                 break;
1672         case KVM_S390_VM_CPU_MODEL:
1673                 ret = kvm_s390_set_cpu_model(kvm, attr);
1674                 break;
1675         case KVM_S390_VM_CRYPTO:
1676                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1677                 break;
1678         case KVM_S390_VM_MIGRATION:
1679                 ret = kvm_s390_vm_set_migration(kvm, attr);
1680                 break;
1681         default:
1682                 ret = -ENXIO;
1683                 break;
1684         }
1685
1686         return ret;
1687 }
1688
1689 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1690 {
1691         int ret;
1692
1693         switch (attr->group) {
1694         case KVM_S390_VM_MEM_CTRL:
1695                 ret = kvm_s390_get_mem_control(kvm, attr);
1696                 break;
1697         case KVM_S390_VM_TOD:
1698                 ret = kvm_s390_get_tod(kvm, attr);
1699                 break;
1700         case KVM_S390_VM_CPU_MODEL:
1701                 ret = kvm_s390_get_cpu_model(kvm, attr);
1702                 break;
1703         case KVM_S390_VM_MIGRATION:
1704                 ret = kvm_s390_vm_get_migration(kvm, attr);
1705                 break;
1706         default:
1707                 ret = -ENXIO;
1708                 break;
1709         }
1710
1711         return ret;
1712 }
1713
1714 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1715 {
1716         int ret;
1717
1718         switch (attr->group) {
1719         case KVM_S390_VM_MEM_CTRL:
1720                 switch (attr->attr) {
1721                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1722                 case KVM_S390_VM_MEM_CLR_CMMA:
1723                         ret = sclp.has_cmma ? 0 : -ENXIO;
1724                         break;
1725                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1726                         ret = 0;
1727                         break;
1728                 default:
1729                         ret = -ENXIO;
1730                         break;
1731                 }
1732                 break;
1733         case KVM_S390_VM_TOD:
1734                 switch (attr->attr) {
1735                 case KVM_S390_VM_TOD_LOW:
1736                 case KVM_S390_VM_TOD_HIGH:
1737                         ret = 0;
1738                         break;
1739                 default:
1740                         ret = -ENXIO;
1741                         break;
1742                 }
1743                 break;
1744         case KVM_S390_VM_CPU_MODEL:
1745                 switch (attr->attr) {
1746                 case KVM_S390_VM_CPU_PROCESSOR:
1747                 case KVM_S390_VM_CPU_MACHINE:
1748                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1749                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1750                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1751                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1752                         ret = 0;
1753                         break;
1754                 default:
1755                         ret = -ENXIO;
1756                         break;
1757                 }
1758                 break;
1759         case KVM_S390_VM_CRYPTO:
1760                 switch (attr->attr) {
1761                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1762                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1763                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1765                         ret = 0;
1766                         break;
1767                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1768                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1769                         ret = ap_instructions_available() ? 0 : -ENXIO;
1770                         break;
1771                 default:
1772                         ret = -ENXIO;
1773                         break;
1774                 }
1775                 break;
1776         case KVM_S390_VM_MIGRATION:
1777                 ret = 0;
1778                 break;
1779         default:
1780                 ret = -ENXIO;
1781                 break;
1782         }
1783
1784         return ret;
1785 }
1786
1787 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1788 {
1789         uint8_t *keys;
1790         uint64_t hva;
1791         int srcu_idx, i, r = 0;
1792
1793         if (args->flags != 0)
1794                 return -EINVAL;
1795
1796         /* Is this guest using storage keys? */
1797         if (!mm_uses_skeys(current->mm))
1798                 return KVM_S390_GET_SKEYS_NONE;
1799
1800         /* Enforce sane limit on memory allocation */
1801         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1802                 return -EINVAL;
1803
1804         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1805         if (!keys)
1806                 return -ENOMEM;
1807
1808         down_read(&current->mm->mmap_sem);
1809         srcu_idx = srcu_read_lock(&kvm->srcu);
1810         for (i = 0; i < args->count; i++) {
1811                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1812                 if (kvm_is_error_hva(hva)) {
1813                         r = -EFAULT;
1814                         break;
1815                 }
1816
1817                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1818                 if (r)
1819                         break;
1820         }
1821         srcu_read_unlock(&kvm->srcu, srcu_idx);
1822         up_read(&current->mm->mmap_sem);
1823
1824         if (!r) {
1825                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1826                                  sizeof(uint8_t) * args->count);
1827                 if (r)
1828                         r = -EFAULT;
1829         }
1830
1831         kvfree(keys);
1832         return r;
1833 }
1834
1835 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1836 {
1837         uint8_t *keys;
1838         uint64_t hva;
1839         int srcu_idx, i, r = 0;
1840         bool unlocked;
1841
1842         if (args->flags != 0)
1843                 return -EINVAL;
1844
1845         /* Enforce sane limit on memory allocation */
1846         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1847                 return -EINVAL;
1848
1849         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1850         if (!keys)
1851                 return -ENOMEM;
1852
1853         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1854                            sizeof(uint8_t) * args->count);
1855         if (r) {
1856                 r = -EFAULT;
1857                 goto out;
1858         }
1859
1860         /* Enable storage key handling for the guest */
1861         r = s390_enable_skey();
1862         if (r)
1863                 goto out;
1864
1865         i = 0;
1866         down_read(&current->mm->mmap_sem);
1867         srcu_idx = srcu_read_lock(&kvm->srcu);
1868         while (i < args->count) {
1869                 unlocked = false;
1870                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1871                 if (kvm_is_error_hva(hva)) {
1872                         r = -EFAULT;
1873                         break;
1874                 }
1875
1876                 /* Lowest order bit is reserved */
1877                 if (keys[i] & 0x01) {
1878                         r = -EINVAL;
1879                         break;
1880                 }
1881
1882                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1883                 if (r) {
1884                         r = fixup_user_fault(current, current->mm, hva,
1885                                              FAULT_FLAG_WRITE, &unlocked);
1886                         if (r)
1887                                 break;
1888                 }
1889                 if (!r)
1890                         i++;
1891         }
1892         srcu_read_unlock(&kvm->srcu, srcu_idx);
1893         up_read(&current->mm->mmap_sem);
1894 out:
1895         kvfree(keys);
1896         return r;
1897 }
1898
1899 /*
1900  * Base address and length must be sent at the start of each block, therefore
1901  * it's cheaper to send some clean data, as long as it's less than the size of
1902  * two longs.
1903  */
1904 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1905 /* for consistency */
1906 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1907
1908 /*
1909  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1910  * address falls in a hole. In that case the index of one of the memslots
1911  * bordering the hole is returned.
1912  */
1913 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1914 {
1915         int start = 0, end = slots->used_slots;
1916         int slot = atomic_read(&slots->lru_slot);
1917         struct kvm_memory_slot *memslots = slots->memslots;
1918
1919         if (gfn >= memslots[slot].base_gfn &&
1920             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1921                 return slot;
1922
1923         while (start < end) {
1924                 slot = start + (end - start) / 2;
1925
1926                 if (gfn >= memslots[slot].base_gfn)
1927                         end = slot;
1928                 else
1929                         start = slot + 1;
1930         }
1931
1932         if (gfn >= memslots[start].base_gfn &&
1933             gfn < memslots[start].base_gfn + memslots[start].npages) {
1934                 atomic_set(&slots->lru_slot, start);
1935         }
1936
1937         return start;
1938 }
1939
1940 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1941                               u8 *res, unsigned long bufsize)
1942 {
1943         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1944
1945         args->count = 0;
1946         while (args->count < bufsize) {
1947                 hva = gfn_to_hva(kvm, cur_gfn);
1948                 /*
1949                  * We return an error if the first value was invalid, but we
1950                  * return successfully if at least one value was copied.
1951                  */
1952                 if (kvm_is_error_hva(hva))
1953                         return args->count ? 0 : -EFAULT;
1954                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1955                         pgstev = 0;
1956                 res[args->count++] = (pgstev >> 24) & 0x43;
1957                 cur_gfn++;
1958         }
1959
1960         return 0;
1961 }
1962
1963 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1964                                               unsigned long cur_gfn)
1965 {
1966         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1967         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1968         unsigned long ofs = cur_gfn - ms->base_gfn;
1969
1970         if (ms->base_gfn + ms->npages <= cur_gfn) {
1971                 slotidx--;
1972                 /* If we are above the highest slot, wrap around */
1973                 if (slotidx < 0)
1974                         slotidx = slots->used_slots - 1;
1975
1976                 ms = slots->memslots + slotidx;
1977                 ofs = 0;
1978         }
1979         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1980         while ((slotidx > 0) && (ofs >= ms->npages)) {
1981                 slotidx--;
1982                 ms = slots->memslots + slotidx;
1983                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1984         }
1985         return ms->base_gfn + ofs;
1986 }
1987
1988 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1989                              u8 *res, unsigned long bufsize)
1990 {
1991         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1992         struct kvm_memslots *slots = kvm_memslots(kvm);
1993         struct kvm_memory_slot *ms;
1994
1995         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1996         ms = gfn_to_memslot(kvm, cur_gfn);
1997         args->count = 0;
1998         args->start_gfn = cur_gfn;
1999         if (!ms)
2000                 return 0;
2001         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2002         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2003
2004         while (args->count < bufsize) {
2005                 hva = gfn_to_hva(kvm, cur_gfn);
2006                 if (kvm_is_error_hva(hva))
2007                         return 0;
2008                 /* Decrement only if we actually flipped the bit to 0 */
2009                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2010                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2011                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2012                         pgstev = 0;
2013                 /* Save the value */
2014                 res[args->count++] = (pgstev >> 24) & 0x43;
2015                 /* If the next bit is too far away, stop. */
2016                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2017                         return 0;
2018                 /* If we reached the previous "next", find the next one */
2019                 if (cur_gfn == next_gfn)
2020                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2021                 /* Reached the end of memory or of the buffer, stop */
2022                 if ((next_gfn >= mem_end) ||
2023                     (next_gfn - args->start_gfn >= bufsize))
2024                         return 0;
2025                 cur_gfn++;
2026                 /* Reached the end of the current memslot, take the next one. */
2027                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2028                         ms = gfn_to_memslot(kvm, cur_gfn);
2029                         if (!ms)
2030                                 return 0;
2031                 }
2032         }
2033         return 0;
2034 }
2035
2036 /*
2037  * This function searches for the next page with dirty CMMA attributes, and
2038  * saves the attributes in the buffer up to either the end of the buffer or
2039  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2040  * no trailing clean bytes are saved.
2041  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2042  * output buffer will indicate 0 as length.
2043  */
2044 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2045                                   struct kvm_s390_cmma_log *args)
2046 {
2047         unsigned long bufsize;
2048         int srcu_idx, peek, ret;
2049         u8 *values;
2050
2051         if (!kvm->arch.use_cmma)
2052                 return -ENXIO;
2053         /* Invalid/unsupported flags were specified */
2054         if (args->flags & ~KVM_S390_CMMA_PEEK)
2055                 return -EINVAL;
2056         /* Migration mode query, and we are not doing a migration */
2057         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2058         if (!peek && !kvm->arch.migration_mode)
2059                 return -EINVAL;
2060         /* CMMA is disabled or was not used, or the buffer has length zero */
2061         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2062         if (!bufsize || !kvm->mm->context.uses_cmm) {
2063                 memset(args, 0, sizeof(*args));
2064                 return 0;
2065         }
2066         /* We are not peeking, and there are no dirty pages */
2067         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2068                 memset(args, 0, sizeof(*args));
2069                 return 0;
2070         }
2071
2072         values = vmalloc(bufsize);
2073         if (!values)
2074                 return -ENOMEM;
2075
2076         down_read(&kvm->mm->mmap_sem);
2077         srcu_idx = srcu_read_lock(&kvm->srcu);
2078         if (peek)
2079                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2080         else
2081                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2082         srcu_read_unlock(&kvm->srcu, srcu_idx);
2083         up_read(&kvm->mm->mmap_sem);
2084
2085         if (kvm->arch.migration_mode)
2086                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2087         else
2088                 args->remaining = 0;
2089
2090         if (copy_to_user((void __user *)args->values, values, args->count))
2091                 ret = -EFAULT;
2092
2093         vfree(values);
2094         return ret;
2095 }
2096
2097 /*
2098  * This function sets the CMMA attributes for the given pages. If the input
2099  * buffer has zero length, no action is taken, otherwise the attributes are
2100  * set and the mm->context.uses_cmm flag is set.
2101  */
2102 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2103                                   const struct kvm_s390_cmma_log *args)
2104 {
2105         unsigned long hva, mask, pgstev, i;
2106         uint8_t *bits;
2107         int srcu_idx, r = 0;
2108
2109         mask = args->mask;
2110
2111         if (!kvm->arch.use_cmma)
2112                 return -ENXIO;
2113         /* invalid/unsupported flags */
2114         if (args->flags != 0)
2115                 return -EINVAL;
2116         /* Enforce sane limit on memory allocation */
2117         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2118                 return -EINVAL;
2119         /* Nothing to do */
2120         if (args->count == 0)
2121                 return 0;
2122
2123         bits = vmalloc(array_size(sizeof(*bits), args->count));
2124         if (!bits)
2125                 return -ENOMEM;
2126
2127         r = copy_from_user(bits, (void __user *)args->values, args->count);
2128         if (r) {
2129                 r = -EFAULT;
2130                 goto out;
2131         }
2132
2133         down_read(&kvm->mm->mmap_sem);
2134         srcu_idx = srcu_read_lock(&kvm->srcu);
2135         for (i = 0; i < args->count; i++) {
2136                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2137                 if (kvm_is_error_hva(hva)) {
2138                         r = -EFAULT;
2139                         break;
2140                 }
2141
2142                 pgstev = bits[i];
2143                 pgstev = pgstev << 24;
2144                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2145                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2146         }
2147         srcu_read_unlock(&kvm->srcu, srcu_idx);
2148         up_read(&kvm->mm->mmap_sem);
2149
2150         if (!kvm->mm->context.uses_cmm) {
2151                 down_write(&kvm->mm->mmap_sem);
2152                 kvm->mm->context.uses_cmm = 1;
2153                 up_write(&kvm->mm->mmap_sem);
2154         }
2155 out:
2156         vfree(bits);
2157         return r;
2158 }
2159
2160 long kvm_arch_vm_ioctl(struct file *filp,
2161                        unsigned int ioctl, unsigned long arg)
2162 {
2163         struct kvm *kvm = filp->private_data;
2164         void __user *argp = (void __user *)arg;
2165         struct kvm_device_attr attr;
2166         int r;
2167
2168         switch (ioctl) {
2169         case KVM_S390_INTERRUPT: {
2170                 struct kvm_s390_interrupt s390int;
2171
2172                 r = -EFAULT;
2173                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2174                         break;
2175                 r = kvm_s390_inject_vm(kvm, &s390int);
2176                 break;
2177         }
2178         case KVM_CREATE_IRQCHIP: {
2179                 struct kvm_irq_routing_entry routing;
2180
2181                 r = -EINVAL;
2182                 if (kvm->arch.use_irqchip) {
2183                         /* Set up dummy routing. */
2184                         memset(&routing, 0, sizeof(routing));
2185                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2186                 }
2187                 break;
2188         }
2189         case KVM_SET_DEVICE_ATTR: {
2190                 r = -EFAULT;
2191                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2192                         break;
2193                 r = kvm_s390_vm_set_attr(kvm, &attr);
2194                 break;
2195         }
2196         case KVM_GET_DEVICE_ATTR: {
2197                 r = -EFAULT;
2198                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2199                         break;
2200                 r = kvm_s390_vm_get_attr(kvm, &attr);
2201                 break;
2202         }
2203         case KVM_HAS_DEVICE_ATTR: {
2204                 r = -EFAULT;
2205                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2206                         break;
2207                 r = kvm_s390_vm_has_attr(kvm, &attr);
2208                 break;
2209         }
2210         case KVM_S390_GET_SKEYS: {
2211                 struct kvm_s390_skeys args;
2212
2213                 r = -EFAULT;
2214                 if (copy_from_user(&args, argp,
2215                                    sizeof(struct kvm_s390_skeys)))
2216                         break;
2217                 r = kvm_s390_get_skeys(kvm, &args);
2218                 break;
2219         }
2220         case KVM_S390_SET_SKEYS: {
2221                 struct kvm_s390_skeys args;
2222
2223                 r = -EFAULT;
2224                 if (copy_from_user(&args, argp,
2225                                    sizeof(struct kvm_s390_skeys)))
2226                         break;
2227                 r = kvm_s390_set_skeys(kvm, &args);
2228                 break;
2229         }
2230         case KVM_S390_GET_CMMA_BITS: {
2231                 struct kvm_s390_cmma_log args;
2232
2233                 r = -EFAULT;
2234                 if (copy_from_user(&args, argp, sizeof(args)))
2235                         break;
2236                 mutex_lock(&kvm->slots_lock);
2237                 r = kvm_s390_get_cmma_bits(kvm, &args);
2238                 mutex_unlock(&kvm->slots_lock);
2239                 if (!r) {
2240                         r = copy_to_user(argp, &args, sizeof(args));
2241                         if (r)
2242                                 r = -EFAULT;
2243                 }
2244                 break;
2245         }
2246         case KVM_S390_SET_CMMA_BITS: {
2247                 struct kvm_s390_cmma_log args;
2248
2249                 r = -EFAULT;
2250                 if (copy_from_user(&args, argp, sizeof(args)))
2251                         break;
2252                 mutex_lock(&kvm->slots_lock);
2253                 r = kvm_s390_set_cmma_bits(kvm, &args);
2254                 mutex_unlock(&kvm->slots_lock);
2255                 break;
2256         }
2257         default:
2258                 r = -ENOTTY;
2259         }
2260
2261         return r;
2262 }
2263
2264 static int kvm_s390_apxa_installed(void)
2265 {
2266         struct ap_config_info info;
2267
2268         if (ap_instructions_available()) {
2269                 if (ap_qci(&info) == 0)
2270                         return info.apxa;
2271         }
2272
2273         return 0;
2274 }
2275
2276 /*
2277  * The format of the crypto control block (CRYCB) is specified in the 3 low
2278  * order bits of the CRYCB designation (CRYCBD) field as follows:
2279  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2280  *           AP extended addressing (APXA) facility are installed.
2281  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2282  * Format 2: Both the APXA and MSAX3 facilities are installed
2283  */
2284 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2285 {
2286         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2287
2288         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2289         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2290
2291         /* Check whether MSAX3 is installed */
2292         if (!test_kvm_facility(kvm, 76))
2293                 return;
2294
2295         if (kvm_s390_apxa_installed())
2296                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2297         else
2298                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2299 }
2300
2301 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2302                                unsigned long *aqm, unsigned long *adm)
2303 {
2304         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2305
2306         mutex_lock(&kvm->lock);
2307         kvm_s390_vcpu_block_all(kvm);
2308
2309         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2310         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2311                 memcpy(crycb->apcb1.apm, apm, 32);
2312                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2313                          apm[0], apm[1], apm[2], apm[3]);
2314                 memcpy(crycb->apcb1.aqm, aqm, 32);
2315                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2316                          aqm[0], aqm[1], aqm[2], aqm[3]);
2317                 memcpy(crycb->apcb1.adm, adm, 32);
2318                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2319                          adm[0], adm[1], adm[2], adm[3]);
2320                 break;
2321         case CRYCB_FORMAT1:
2322         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2323                 memcpy(crycb->apcb0.apm, apm, 8);
2324                 memcpy(crycb->apcb0.aqm, aqm, 2);
2325                 memcpy(crycb->apcb0.adm, adm, 2);
2326                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2327                          apm[0], *((unsigned short *)aqm),
2328                          *((unsigned short *)adm));
2329                 break;
2330         default:        /* Can not happen */
2331                 break;
2332         }
2333
2334         /* recreate the shadow crycb for each vcpu */
2335         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2336         kvm_s390_vcpu_unblock_all(kvm);
2337         mutex_unlock(&kvm->lock);
2338 }
2339 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2340
2341 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2342 {
2343         mutex_lock(&kvm->lock);
2344         kvm_s390_vcpu_block_all(kvm);
2345
2346         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2347                sizeof(kvm->arch.crypto.crycb->apcb0));
2348         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2349                sizeof(kvm->arch.crypto.crycb->apcb1));
2350
2351         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2352         /* recreate the shadow crycb for each vcpu */
2353         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2354         kvm_s390_vcpu_unblock_all(kvm);
2355         mutex_unlock(&kvm->lock);
2356 }
2357 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2358
2359 static u64 kvm_s390_get_initial_cpuid(void)
2360 {
2361         struct cpuid cpuid;
2362
2363         get_cpu_id(&cpuid);
2364         cpuid.version = 0xff;
2365         return *((u64 *) &cpuid);
2366 }
2367
2368 static void kvm_s390_crypto_init(struct kvm *kvm)
2369 {
2370         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2371         kvm_s390_set_crycb_format(kvm);
2372
2373         if (!test_kvm_facility(kvm, 76))
2374                 return;
2375
2376         /* Enable AES/DEA protected key functions by default */
2377         kvm->arch.crypto.aes_kw = 1;
2378         kvm->arch.crypto.dea_kw = 1;
2379         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2380                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2381         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2382                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2383 }
2384
2385 static void sca_dispose(struct kvm *kvm)
2386 {
2387         if (kvm->arch.use_esca)
2388                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2389         else
2390                 free_page((unsigned long)(kvm->arch.sca));
2391         kvm->arch.sca = NULL;
2392 }
2393
2394 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2395 {
2396         gfp_t alloc_flags = GFP_KERNEL;
2397         int i, rc;
2398         char debug_name[16];
2399         static unsigned long sca_offset;
2400
2401         rc = -EINVAL;
2402 #ifdef CONFIG_KVM_S390_UCONTROL
2403         if (type & ~KVM_VM_S390_UCONTROL)
2404                 goto out_err;
2405         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2406                 goto out_err;
2407 #else
2408         if (type)
2409                 goto out_err;
2410 #endif
2411
2412         rc = s390_enable_sie();
2413         if (rc)
2414                 goto out_err;
2415
2416         rc = -ENOMEM;
2417
2418         if (!sclp.has_64bscao)
2419                 alloc_flags |= GFP_DMA;
2420         rwlock_init(&kvm->arch.sca_lock);
2421         /* start with basic SCA */
2422         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2423         if (!kvm->arch.sca)
2424                 goto out_err;
2425         mutex_lock(&kvm_lock);
2426         sca_offset += 16;
2427         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2428                 sca_offset = 0;
2429         kvm->arch.sca = (struct bsca_block *)
2430                         ((char *) kvm->arch.sca + sca_offset);
2431         mutex_unlock(&kvm_lock);
2432
2433         sprintf(debug_name, "kvm-%u", current->pid);
2434
2435         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2436         if (!kvm->arch.dbf)
2437                 goto out_err;
2438
2439         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2440         kvm->arch.sie_page2 =
2441              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2442         if (!kvm->arch.sie_page2)
2443                 goto out_err;
2444
2445         kvm->arch.sie_page2->kvm = kvm;
2446         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2447
2448         for (i = 0; i < kvm_s390_fac_size(); i++) {
2449                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2450                                               (kvm_s390_fac_base[i] |
2451                                                kvm_s390_fac_ext[i]);
2452                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2453                                               kvm_s390_fac_base[i];
2454         }
2455         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2456
2457         /* we are always in czam mode - even on pre z14 machines */
2458         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2459         set_kvm_facility(kvm->arch.model.fac_list, 138);
2460         /* we emulate STHYI in kvm */
2461         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2462         set_kvm_facility(kvm->arch.model.fac_list, 74);
2463         if (MACHINE_HAS_TLB_GUEST) {
2464                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2465                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2466         }
2467
2468         if (css_general_characteristics.aiv && test_facility(65))
2469                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2470
2471         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2472         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2473
2474         kvm_s390_crypto_init(kvm);
2475
2476         mutex_init(&kvm->arch.float_int.ais_lock);
2477         spin_lock_init(&kvm->arch.float_int.lock);
2478         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2479                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2480         init_waitqueue_head(&kvm->arch.ipte_wq);
2481         mutex_init(&kvm->arch.ipte_mutex);
2482
2483         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2484         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2485
2486         if (type & KVM_VM_S390_UCONTROL) {
2487                 kvm->arch.gmap = NULL;
2488                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2489         } else {
2490                 if (sclp.hamax == U64_MAX)
2491                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2492                 else
2493                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2494                                                     sclp.hamax + 1);
2495                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2496                 if (!kvm->arch.gmap)
2497                         goto out_err;
2498                 kvm->arch.gmap->private = kvm;
2499                 kvm->arch.gmap->pfault_enabled = 0;
2500         }
2501
2502         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2503         kvm->arch.use_skf = sclp.has_skey;
2504         spin_lock_init(&kvm->arch.start_stop_lock);
2505         kvm_s390_vsie_init(kvm);
2506         kvm_s390_gisa_init(kvm);
2507         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2508
2509         return 0;
2510 out_err:
2511         free_page((unsigned long)kvm->arch.sie_page2);
2512         debug_unregister(kvm->arch.dbf);
2513         sca_dispose(kvm);
2514         KVM_EVENT(3, "creation of vm failed: %d", rc);
2515         return rc;
2516 }
2517
2518 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2519 {
2520         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2521         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2522         kvm_s390_clear_local_irqs(vcpu);
2523         kvm_clear_async_pf_completion_queue(vcpu);
2524         if (!kvm_is_ucontrol(vcpu->kvm))
2525                 sca_del_vcpu(vcpu);
2526
2527         if (kvm_is_ucontrol(vcpu->kvm))
2528                 gmap_remove(vcpu->arch.gmap);
2529
2530         if (vcpu->kvm->arch.use_cmma)
2531                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2532         free_page((unsigned long)(vcpu->arch.sie_block));
2533
2534         kvm_vcpu_uninit(vcpu);
2535         kmem_cache_free(kvm_vcpu_cache, vcpu);
2536 }
2537
2538 static void kvm_free_vcpus(struct kvm *kvm)
2539 {
2540         unsigned int i;
2541         struct kvm_vcpu *vcpu;
2542
2543         kvm_for_each_vcpu(i, vcpu, kvm)
2544                 kvm_arch_vcpu_destroy(vcpu);
2545
2546         mutex_lock(&kvm->lock);
2547         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2548                 kvm->vcpus[i] = NULL;
2549
2550         atomic_set(&kvm->online_vcpus, 0);
2551         mutex_unlock(&kvm->lock);
2552 }
2553
2554 void kvm_arch_destroy_vm(struct kvm *kvm)
2555 {
2556         kvm_free_vcpus(kvm);
2557         sca_dispose(kvm);
2558         debug_unregister(kvm->arch.dbf);
2559         kvm_s390_gisa_destroy(kvm);
2560         free_page((unsigned long)kvm->arch.sie_page2);
2561         if (!kvm_is_ucontrol(kvm))
2562                 gmap_remove(kvm->arch.gmap);
2563         kvm_s390_destroy_adapters(kvm);
2564         kvm_s390_clear_float_irqs(kvm);
2565         kvm_s390_vsie_destroy(kvm);
2566         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2567 }
2568
2569 /* Section: vcpu related */
2570 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2571 {
2572         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2573         if (!vcpu->arch.gmap)
2574                 return -ENOMEM;
2575         vcpu->arch.gmap->private = vcpu->kvm;
2576
2577         return 0;
2578 }
2579
2580 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2581 {
2582         if (!kvm_s390_use_sca_entries())
2583                 return;
2584         read_lock(&vcpu->kvm->arch.sca_lock);
2585         if (vcpu->kvm->arch.use_esca) {
2586                 struct esca_block *sca = vcpu->kvm->arch.sca;
2587
2588                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2589                 sca->cpu[vcpu->vcpu_id].sda = 0;
2590         } else {
2591                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2592
2593                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2594                 sca->cpu[vcpu->vcpu_id].sda = 0;
2595         }
2596         read_unlock(&vcpu->kvm->arch.sca_lock);
2597 }
2598
2599 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2600 {
2601         if (!kvm_s390_use_sca_entries()) {
2602                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2603
2604                 /* we still need the basic sca for the ipte control */
2605                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2606                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2607                 return;
2608         }
2609         read_lock(&vcpu->kvm->arch.sca_lock);
2610         if (vcpu->kvm->arch.use_esca) {
2611                 struct esca_block *sca = vcpu->kvm->arch.sca;
2612
2613                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2614                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2615                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2616                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2617                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2618         } else {
2619                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2620
2621                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2622                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2623                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2624                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2625         }
2626         read_unlock(&vcpu->kvm->arch.sca_lock);
2627 }
2628
2629 /* Basic SCA to Extended SCA data copy routines */
2630 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2631 {
2632         d->sda = s->sda;
2633         d->sigp_ctrl.c = s->sigp_ctrl.c;
2634         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2635 }
2636
2637 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2638 {
2639         int i;
2640
2641         d->ipte_control = s->ipte_control;
2642         d->mcn[0] = s->mcn;
2643         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2644                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2645 }
2646
2647 static int sca_switch_to_extended(struct kvm *kvm)
2648 {
2649         struct bsca_block *old_sca = kvm->arch.sca;
2650         struct esca_block *new_sca;
2651         struct kvm_vcpu *vcpu;
2652         unsigned int vcpu_idx;
2653         u32 scaol, scaoh;
2654
2655         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2656         if (!new_sca)
2657                 return -ENOMEM;
2658
2659         scaoh = (u32)((u64)(new_sca) >> 32);
2660         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2661
2662         kvm_s390_vcpu_block_all(kvm);
2663         write_lock(&kvm->arch.sca_lock);
2664
2665         sca_copy_b_to_e(new_sca, old_sca);
2666
2667         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2668                 vcpu->arch.sie_block->scaoh = scaoh;
2669                 vcpu->arch.sie_block->scaol = scaol;
2670                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2671         }
2672         kvm->arch.sca = new_sca;
2673         kvm->arch.use_esca = 1;
2674
2675         write_unlock(&kvm->arch.sca_lock);
2676         kvm_s390_vcpu_unblock_all(kvm);
2677
2678         free_page((unsigned long)old_sca);
2679
2680         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2681                  old_sca, kvm->arch.sca);
2682         return 0;
2683 }
2684
2685 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2686 {
2687         int rc;
2688
2689         if (!kvm_s390_use_sca_entries()) {
2690                 if (id < KVM_MAX_VCPUS)
2691                         return true;
2692                 return false;
2693         }
2694         if (id < KVM_S390_BSCA_CPU_SLOTS)
2695                 return true;
2696         if (!sclp.has_esca || !sclp.has_64bscao)
2697                 return false;
2698
2699         mutex_lock(&kvm->lock);
2700         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2701         mutex_unlock(&kvm->lock);
2702
2703         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2704 }
2705
2706 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2707 {
2708         return 0;
2709 }
2710
2711 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2712 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2713 {
2714         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2715         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2716         vcpu->arch.cputm_start = get_tod_clock_fast();
2717         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2718 }
2719
2720 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2721 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2722 {
2723         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2724         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2725         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2726         vcpu->arch.cputm_start = 0;
2727         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2728 }
2729
2730 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2731 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2732 {
2733         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2734         vcpu->arch.cputm_enabled = true;
2735         __start_cpu_timer_accounting(vcpu);
2736 }
2737
2738 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2739 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2740 {
2741         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2742         __stop_cpu_timer_accounting(vcpu);
2743         vcpu->arch.cputm_enabled = false;
2744 }
2745
2746 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2747 {
2748         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2749         __enable_cpu_timer_accounting(vcpu);
2750         preempt_enable();
2751 }
2752
2753 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2754 {
2755         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2756         __disable_cpu_timer_accounting(vcpu);
2757         preempt_enable();
2758 }
2759
2760 /* set the cpu timer - may only be called from the VCPU thread itself */
2761 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2762 {
2763         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2764         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2765         if (vcpu->arch.cputm_enabled)
2766                 vcpu->arch.cputm_start = get_tod_clock_fast();
2767         vcpu->arch.sie_block->cputm = cputm;
2768         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2769         preempt_enable();
2770 }
2771
2772 /* update and get the cpu timer - can also be called from other VCPU threads */
2773 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2774 {
2775         unsigned int seq;
2776         __u64 value;
2777
2778         if (unlikely(!vcpu->arch.cputm_enabled))
2779                 return vcpu->arch.sie_block->cputm;
2780
2781         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2782         do {
2783                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2784                 /*
2785                  * If the writer would ever execute a read in the critical
2786                  * section, e.g. in irq context, we have a deadlock.
2787                  */
2788                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2789                 value = vcpu->arch.sie_block->cputm;
2790                 /* if cputm_start is 0, accounting is being started/stopped */
2791                 if (likely(vcpu->arch.cputm_start))
2792                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2793         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2794         preempt_enable();
2795         return value;
2796 }
2797
2798 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2799 {
2800
2801         gmap_enable(vcpu->arch.enabled_gmap);
2802         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2803         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2804                 __start_cpu_timer_accounting(vcpu);
2805         vcpu->cpu = cpu;
2806 }
2807
2808 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2809 {
2810         vcpu->cpu = -1;
2811         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2812                 __stop_cpu_timer_accounting(vcpu);
2813         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2814         vcpu->arch.enabled_gmap = gmap_get_enabled();
2815         gmap_disable(vcpu->arch.enabled_gmap);
2816
2817 }
2818
2819 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2820 {
2821         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2822         vcpu->arch.sie_block->gpsw.mask = 0UL;
2823         vcpu->arch.sie_block->gpsw.addr = 0UL;
2824         kvm_s390_set_prefix(vcpu, 0);
2825         kvm_s390_set_cpu_timer(vcpu, 0);
2826         vcpu->arch.sie_block->ckc       = 0UL;
2827         vcpu->arch.sie_block->todpr     = 0;
2828         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2829         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2830                                         CR0_INTERRUPT_KEY_SUBMASK |
2831                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2832         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2833                                         CR14_UNUSED_33 |
2834                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2835         /* make sure the new fpc will be lazily loaded */
2836         save_fpu_regs();
2837         current->thread.fpu.fpc = 0;
2838         vcpu->arch.sie_block->gbea = 1;
2839         vcpu->arch.sie_block->pp = 0;
2840         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2841         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2842         kvm_clear_async_pf_completion_queue(vcpu);
2843         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2844                 kvm_s390_vcpu_stop(vcpu);
2845         kvm_s390_clear_local_irqs(vcpu);
2846 }
2847
2848 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2849 {
2850         mutex_lock(&vcpu->kvm->lock);
2851         preempt_disable();
2852         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2853         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2854         preempt_enable();
2855         mutex_unlock(&vcpu->kvm->lock);
2856         if (!kvm_is_ucontrol(vcpu->kvm)) {
2857                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2858                 sca_add_vcpu(vcpu);
2859         }
2860         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2861                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2862         /* make vcpu_load load the right gmap on the first trigger */
2863         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2864 }
2865
2866 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2867 {
2868         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2869             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2870                 return true;
2871         return false;
2872 }
2873
2874 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2875 {
2876         /* At least one ECC subfunction must be present */
2877         return kvm_has_pckmo_subfunc(kvm, 32) ||
2878                kvm_has_pckmo_subfunc(kvm, 33) ||
2879                kvm_has_pckmo_subfunc(kvm, 34) ||
2880                kvm_has_pckmo_subfunc(kvm, 40) ||
2881                kvm_has_pckmo_subfunc(kvm, 41);
2882
2883 }
2884
2885 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2886 {
2887         /*
2888          * If the AP instructions are not being interpreted and the MSAX3
2889          * facility is not configured for the guest, there is nothing to set up.
2890          */
2891         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2892                 return;
2893
2894         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2895         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2896         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2897         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2898
2899         if (vcpu->kvm->arch.crypto.apie)
2900                 vcpu->arch.sie_block->eca |= ECA_APIE;
2901
2902         /* Set up protected key support */
2903         if (vcpu->kvm->arch.crypto.aes_kw) {
2904                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2905                 /* ecc is also wrapped with AES key */
2906                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2907                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2908         }
2909
2910         if (vcpu->kvm->arch.crypto.dea_kw)
2911                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2912 }
2913
2914 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2915 {
2916         free_page(vcpu->arch.sie_block->cbrlo);
2917         vcpu->arch.sie_block->cbrlo = 0;
2918 }
2919
2920 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2921 {
2922         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2923         if (!vcpu->arch.sie_block->cbrlo)
2924                 return -ENOMEM;
2925         return 0;
2926 }
2927
2928 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2929 {
2930         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2931
2932         vcpu->arch.sie_block->ibc = model->ibc;
2933         if (test_kvm_facility(vcpu->kvm, 7))
2934                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2935 }
2936
2937 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2938 {
2939         int rc = 0;
2940
2941         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2942                                                     CPUSTAT_SM |
2943                                                     CPUSTAT_STOPPED);
2944
2945         if (test_kvm_facility(vcpu->kvm, 78))
2946                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2947         else if (test_kvm_facility(vcpu->kvm, 8))
2948                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2949
2950         kvm_s390_vcpu_setup_model(vcpu);
2951
2952         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2953         if (MACHINE_HAS_ESOP)
2954                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2955         if (test_kvm_facility(vcpu->kvm, 9))
2956                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2957         if (test_kvm_facility(vcpu->kvm, 73))
2958                 vcpu->arch.sie_block->ecb |= ECB_TE;
2959
2960         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2961                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2962         if (test_kvm_facility(vcpu->kvm, 130))
2963                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2964         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2965         if (sclp.has_cei)
2966                 vcpu->arch.sie_block->eca |= ECA_CEI;
2967         if (sclp.has_ib)
2968                 vcpu->arch.sie_block->eca |= ECA_IB;
2969         if (sclp.has_siif)
2970                 vcpu->arch.sie_block->eca |= ECA_SII;
2971         if (sclp.has_sigpif)
2972                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2973         if (test_kvm_facility(vcpu->kvm, 129)) {
2974                 vcpu->arch.sie_block->eca |= ECA_VX;
2975                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2976         }
2977         if (test_kvm_facility(vcpu->kvm, 139))
2978                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2979         if (test_kvm_facility(vcpu->kvm, 156))
2980                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2981         if (vcpu->arch.sie_block->gd) {
2982                 vcpu->arch.sie_block->eca |= ECA_AIV;
2983                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2984                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2985         }
2986         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2987                                         | SDNXC;
2988         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2989
2990         if (sclp.has_kss)
2991                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2992         else
2993                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2994
2995         if (vcpu->kvm->arch.use_cmma) {
2996                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2997                 if (rc)
2998                         return rc;
2999         }
3000         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3001         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3002
3003         vcpu->arch.sie_block->hpid = HPID_KVM;
3004
3005         kvm_s390_vcpu_crypto_setup(vcpu);
3006
3007         return rc;
3008 }
3009
3010 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3011 {
3012         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3013                 return -EINVAL;
3014         return 0;
3015 }
3016
3017 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3018                                       unsigned int id)
3019 {
3020         struct kvm_vcpu *vcpu;
3021         struct sie_page *sie_page;
3022         int rc;
3023
3024         rc = -ENOMEM;
3025
3026         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3027         if (!vcpu)
3028                 goto out;
3029
3030         rc = kvm_vcpu_init(vcpu, kvm, id);
3031         if (rc)
3032                 goto out_free_cpu;
3033
3034         rc = -ENOMEM;
3035
3036         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3037         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3038         if (!sie_page)
3039                 goto out_uninit_vcpu;
3040
3041         vcpu->arch.sie_block = &sie_page->sie_block;
3042         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3043
3044         /* the real guest size will always be smaller than msl */
3045         vcpu->arch.sie_block->mso = 0;
3046         vcpu->arch.sie_block->msl = sclp.hamax;
3047
3048         vcpu->arch.sie_block->icpua = id;
3049         spin_lock_init(&vcpu->arch.local_int.lock);
3050         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3051         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3052                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3053         seqcount_init(&vcpu->arch.cputm_seqcount);
3054
3055         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3056         kvm_clear_async_pf_completion_queue(vcpu);
3057         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3058                                     KVM_SYNC_GPRS |
3059                                     KVM_SYNC_ACRS |
3060                                     KVM_SYNC_CRS |
3061                                     KVM_SYNC_ARCH0 |
3062                                     KVM_SYNC_PFAULT;
3063         kvm_s390_set_prefix(vcpu, 0);
3064         if (test_kvm_facility(vcpu->kvm, 64))
3065                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3066         if (test_kvm_facility(vcpu->kvm, 82))
3067                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3068         if (test_kvm_facility(vcpu->kvm, 133))
3069                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3070         if (test_kvm_facility(vcpu->kvm, 156))
3071                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3072         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3073          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3074          */
3075         if (MACHINE_HAS_VX)
3076                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3077         else
3078                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3079
3080         if (kvm_is_ucontrol(vcpu->kvm)) {
3081                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3082                 if (rc)
3083                         goto out_free_sie_block;
3084         }
3085
3086         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3087                  vcpu->arch.sie_block);
3088         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3089
3090         return vcpu;
3091 out_free_sie_block:
3092         free_page((unsigned long)(vcpu->arch.sie_block));
3093 out_uninit_vcpu:
3094         kvm_vcpu_uninit(vcpu);
3095 out_free_cpu:
3096         kmem_cache_free(kvm_vcpu_cache, vcpu);
3097 out:
3098         return ERR_PTR(rc);
3099 }
3100
3101 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3102 {
3103         return kvm_s390_vcpu_has_irq(vcpu, 0);
3104 }
3105
3106 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3107 {
3108         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3109 }
3110
3111 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3112 {
3113         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3114         exit_sie(vcpu);
3115 }
3116
3117 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3118 {
3119         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3120 }
3121
3122 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3123 {
3124         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3125         exit_sie(vcpu);
3126 }
3127
3128 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3129 {
3130         return atomic_read(&vcpu->arch.sie_block->prog20) &
3131                (PROG_BLOCK_SIE | PROG_REQUEST);
3132 }
3133
3134 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3135 {
3136         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3137 }
3138
3139 /*
3140  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3141  * If the CPU is not running (e.g. waiting as idle) the function will
3142  * return immediately. */
3143 void exit_sie(struct kvm_vcpu *vcpu)
3144 {
3145         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3146         kvm_s390_vsie_kick(vcpu);
3147         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3148                 cpu_relax();
3149 }
3150
3151 /* Kick a guest cpu out of SIE to process a request synchronously */
3152 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3153 {
3154         kvm_make_request(req, vcpu);
3155         kvm_s390_vcpu_request(vcpu);
3156 }
3157
3158 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3159                               unsigned long end)
3160 {
3161         struct kvm *kvm = gmap->private;
3162         struct kvm_vcpu *vcpu;
3163         unsigned long prefix;
3164         int i;
3165
3166         if (gmap_is_shadow(gmap))
3167                 return;
3168         if (start >= 1UL << 31)
3169                 /* We are only interested in prefix pages */
3170                 return;
3171         kvm_for_each_vcpu(i, vcpu, kvm) {
3172                 /* match against both prefix pages */
3173                 prefix = kvm_s390_get_prefix(vcpu);
3174                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3175                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3176                                    start, end);
3177                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3178                 }
3179         }
3180 }
3181
3182 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3183 {
3184         /* do not poll with more than halt_poll_max_steal percent of steal time */
3185         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3186             halt_poll_max_steal) {
3187                 vcpu->stat.halt_no_poll_steal++;
3188                 return true;
3189         }
3190         return false;
3191 }
3192
3193 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3194 {
3195         /* kvm common code refers to this, but never calls it */
3196         BUG();
3197         return 0;
3198 }
3199
3200 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3201                                            struct kvm_one_reg *reg)
3202 {
3203         int r = -EINVAL;
3204
3205         switch (reg->id) {
3206         case KVM_REG_S390_TODPR:
3207                 r = put_user(vcpu->arch.sie_block->todpr,
3208                              (u32 __user *)reg->addr);
3209                 break;
3210         case KVM_REG_S390_EPOCHDIFF:
3211                 r = put_user(vcpu->arch.sie_block->epoch,
3212                              (u64 __user *)reg->addr);
3213                 break;
3214         case KVM_REG_S390_CPU_TIMER:
3215                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3216                              (u64 __user *)reg->addr);
3217                 break;
3218         case KVM_REG_S390_CLOCK_COMP:
3219                 r = put_user(vcpu->arch.sie_block->ckc,
3220                              (u64 __user *)reg->addr);
3221                 break;
3222         case KVM_REG_S390_PFTOKEN:
3223                 r = put_user(vcpu->arch.pfault_token,
3224                              (u64 __user *)reg->addr);
3225                 break;
3226         case KVM_REG_S390_PFCOMPARE:
3227                 r = put_user(vcpu->arch.pfault_compare,
3228                              (u64 __user *)reg->addr);
3229                 break;
3230         case KVM_REG_S390_PFSELECT:
3231                 r = put_user(vcpu->arch.pfault_select,
3232                              (u64 __user *)reg->addr);
3233                 break;
3234         case KVM_REG_S390_PP:
3235                 r = put_user(vcpu->arch.sie_block->pp,
3236                              (u64 __user *)reg->addr);
3237                 break;
3238         case KVM_REG_S390_GBEA:
3239                 r = put_user(vcpu->arch.sie_block->gbea,
3240                              (u64 __user *)reg->addr);
3241                 break;
3242         default:
3243                 break;
3244         }
3245
3246         return r;
3247 }
3248
3249 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3250                                            struct kvm_one_reg *reg)
3251 {
3252         int r = -EINVAL;
3253         __u64 val;
3254
3255         switch (reg->id) {
3256         case KVM_REG_S390_TODPR:
3257                 r = get_user(vcpu->arch.sie_block->todpr,
3258                              (u32 __user *)reg->addr);
3259                 break;
3260         case KVM_REG_S390_EPOCHDIFF:
3261                 r = get_user(vcpu->arch.sie_block->epoch,
3262                              (u64 __user *)reg->addr);
3263                 break;
3264         case KVM_REG_S390_CPU_TIMER:
3265                 r = get_user(val, (u64 __user *)reg->addr);
3266                 if (!r)
3267                         kvm_s390_set_cpu_timer(vcpu, val);
3268                 break;
3269         case KVM_REG_S390_CLOCK_COMP:
3270                 r = get_user(vcpu->arch.sie_block->ckc,
3271                              (u64 __user *)reg->addr);
3272                 break;
3273         case KVM_REG_S390_PFTOKEN:
3274                 r = get_user(vcpu->arch.pfault_token,
3275                              (u64 __user *)reg->addr);
3276                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3277                         kvm_clear_async_pf_completion_queue(vcpu);
3278                 break;
3279         case KVM_REG_S390_PFCOMPARE:
3280                 r = get_user(vcpu->arch.pfault_compare,
3281                              (u64 __user *)reg->addr);
3282                 break;
3283         case KVM_REG_S390_PFSELECT:
3284                 r = get_user(vcpu->arch.pfault_select,
3285                              (u64 __user *)reg->addr);
3286                 break;
3287         case KVM_REG_S390_PP:
3288                 r = get_user(vcpu->arch.sie_block->pp,
3289                              (u64 __user *)reg->addr);
3290                 break;
3291         case KVM_REG_S390_GBEA:
3292                 r = get_user(vcpu->arch.sie_block->gbea,
3293                              (u64 __user *)reg->addr);
3294                 break;
3295         default:
3296                 break;
3297         }
3298
3299         return r;
3300 }
3301
3302 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3303 {
3304         kvm_s390_vcpu_initial_reset(vcpu);
3305         return 0;
3306 }
3307
3308 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3309 {
3310         vcpu_load(vcpu);
3311         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3312         vcpu_put(vcpu);
3313         return 0;
3314 }
3315
3316 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3317 {
3318         vcpu_load(vcpu);
3319         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3320         vcpu_put(vcpu);
3321         return 0;
3322 }
3323
3324 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3325                                   struct kvm_sregs *sregs)
3326 {
3327         vcpu_load(vcpu);
3328
3329         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3330         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3331
3332         vcpu_put(vcpu);
3333         return 0;
3334 }
3335
3336 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3337                                   struct kvm_sregs *sregs)
3338 {
3339         vcpu_load(vcpu);
3340
3341         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3342         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3343
3344         vcpu_put(vcpu);
3345         return 0;
3346 }
3347
3348 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3349 {
3350         int ret = 0;
3351
3352         vcpu_load(vcpu);
3353
3354         if (test_fp_ctl(fpu->fpc)) {
3355                 ret = -EINVAL;
3356                 goto out;
3357         }
3358         vcpu->run->s.regs.fpc = fpu->fpc;
3359         if (MACHINE_HAS_VX)
3360                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3361                                  (freg_t *) fpu->fprs);
3362         else
3363                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3364
3365 out:
3366         vcpu_put(vcpu);
3367         return ret;
3368 }
3369
3370 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3371 {
3372         vcpu_load(vcpu);
3373
3374         /* make sure we have the latest values */
3375         save_fpu_regs();
3376         if (MACHINE_HAS_VX)
3377                 convert_vx_to_fp((freg_t *) fpu->fprs,
3378                                  (__vector128 *) vcpu->run->s.regs.vrs);
3379         else
3380                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3381         fpu->fpc = vcpu->run->s.regs.fpc;
3382
3383         vcpu_put(vcpu);
3384         return 0;
3385 }
3386
3387 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3388 {
3389         int rc = 0;
3390
3391         if (!is_vcpu_stopped(vcpu))
3392                 rc = -EBUSY;
3393         else {
3394                 vcpu->run->psw_mask = psw.mask;
3395                 vcpu->run->psw_addr = psw.addr;
3396         }
3397         return rc;
3398 }
3399
3400 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3401                                   struct kvm_translation *tr)
3402 {
3403         return -EINVAL; /* not implemented yet */
3404 }
3405
3406 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3407                               KVM_GUESTDBG_USE_HW_BP | \
3408                               KVM_GUESTDBG_ENABLE)
3409
3410 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3411                                         struct kvm_guest_debug *dbg)
3412 {
3413         int rc = 0;
3414
3415         vcpu_load(vcpu);
3416
3417         vcpu->guest_debug = 0;
3418         kvm_s390_clear_bp_data(vcpu);
3419
3420         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3421                 rc = -EINVAL;
3422                 goto out;
3423         }
3424         if (!sclp.has_gpere) {
3425                 rc = -EINVAL;
3426                 goto out;
3427         }
3428
3429         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3430                 vcpu->guest_debug = dbg->control;
3431                 /* enforce guest PER */
3432                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3433
3434                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3435                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3436         } else {
3437                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3438                 vcpu->arch.guestdbg.last_bp = 0;
3439         }
3440
3441         if (rc) {
3442                 vcpu->guest_debug = 0;
3443                 kvm_s390_clear_bp_data(vcpu);
3444                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3445         }
3446
3447 out:
3448         vcpu_put(vcpu);
3449         return rc;
3450 }
3451
3452 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3453                                     struct kvm_mp_state *mp_state)
3454 {
3455         int ret;
3456
3457         vcpu_load(vcpu);
3458
3459         /* CHECK_STOP and LOAD are not supported yet */
3460         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3461                                       KVM_MP_STATE_OPERATING;
3462
3463         vcpu_put(vcpu);
3464         return ret;
3465 }
3466
3467 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3468                                     struct kvm_mp_state *mp_state)
3469 {
3470         int rc = 0;
3471
3472         vcpu_load(vcpu);
3473
3474         /* user space knows about this interface - let it control the state */
3475         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3476
3477         switch (mp_state->mp_state) {
3478         case KVM_MP_STATE_STOPPED:
3479                 kvm_s390_vcpu_stop(vcpu);
3480                 break;
3481         case KVM_MP_STATE_OPERATING:
3482                 kvm_s390_vcpu_start(vcpu);
3483                 break;
3484         case KVM_MP_STATE_LOAD:
3485         case KVM_MP_STATE_CHECK_STOP:
3486                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3487         default:
3488                 rc = -ENXIO;
3489         }
3490
3491         vcpu_put(vcpu);
3492         return rc;
3493 }
3494
3495 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3496 {
3497         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3498 }
3499
3500 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3501 {
3502 retry:
3503         kvm_s390_vcpu_request_handled(vcpu);
3504         if (!kvm_request_pending(vcpu))
3505                 return 0;
3506         /*
3507          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3508          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3509          * This ensures that the ipte instruction for this request has
3510          * already finished. We might race against a second unmapper that
3511          * wants to set the blocking bit. Lets just retry the request loop.
3512          */
3513         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3514                 int rc;
3515                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3516                                           kvm_s390_get_prefix(vcpu),
3517                                           PAGE_SIZE * 2, PROT_WRITE);
3518                 if (rc) {
3519                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3520                         return rc;
3521                 }
3522                 goto retry;
3523         }
3524
3525         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3526                 vcpu->arch.sie_block->ihcpu = 0xffff;
3527                 goto retry;
3528         }
3529
3530         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3531                 if (!ibs_enabled(vcpu)) {
3532                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3533                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3534                 }
3535                 goto retry;
3536         }
3537
3538         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3539                 if (ibs_enabled(vcpu)) {
3540                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3541                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3542                 }
3543                 goto retry;
3544         }
3545
3546         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3547                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3548                 goto retry;
3549         }
3550
3551         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3552                 /*
3553                  * Disable CMM virtualization; we will emulate the ESSA
3554                  * instruction manually, in order to provide additional
3555                  * functionalities needed for live migration.
3556                  */
3557                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3558                 goto retry;
3559         }
3560
3561         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3562                 /*
3563                  * Re-enable CMM virtualization if CMMA is available and
3564                  * CMM has been used.
3565                  */
3566                 if ((vcpu->kvm->arch.use_cmma) &&
3567                     (vcpu->kvm->mm->context.uses_cmm))
3568                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3569                 goto retry;
3570         }
3571
3572         /* nothing to do, just clear the request */
3573         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3574         /* we left the vsie handler, nothing to do, just clear the request */
3575         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3576
3577         return 0;
3578 }
3579
3580 void kvm_s390_set_tod_clock(struct kvm *kvm,
3581                             const struct kvm_s390_vm_tod_clock *gtod)
3582 {
3583         struct kvm_vcpu *vcpu;
3584         struct kvm_s390_tod_clock_ext htod;
3585         int i;
3586
3587         mutex_lock(&kvm->lock);
3588         preempt_disable();
3589
3590         get_tod_clock_ext((char *)&htod);
3591
3592         kvm->arch.epoch = gtod->tod - htod.tod;
3593         kvm->arch.epdx = 0;
3594         if (test_kvm_facility(kvm, 139)) {
3595                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3596                 if (kvm->arch.epoch > gtod->tod)
3597                         kvm->arch.epdx -= 1;
3598         }
3599
3600         kvm_s390_vcpu_block_all(kvm);
3601         kvm_for_each_vcpu(i, vcpu, kvm) {
3602                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3603                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3604         }
3605
3606         kvm_s390_vcpu_unblock_all(kvm);
3607         preempt_enable();
3608         mutex_unlock(&kvm->lock);
3609 }
3610
3611 /**
3612  * kvm_arch_fault_in_page - fault-in guest page if necessary
3613  * @vcpu: The corresponding virtual cpu
3614  * @gpa: Guest physical address
3615  * @writable: Whether the page should be writable or not
3616  *
3617  * Make sure that a guest page has been faulted-in on the host.
3618  *
3619  * Return: Zero on success, negative error code otherwise.
3620  */
3621 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3622 {
3623         return gmap_fault(vcpu->arch.gmap, gpa,
3624                           writable ? FAULT_FLAG_WRITE : 0);
3625 }
3626
3627 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3628                                       unsigned long token)
3629 {
3630         struct kvm_s390_interrupt inti;
3631         struct kvm_s390_irq irq;
3632
3633         if (start_token) {
3634                 irq.u.ext.ext_params2 = token;
3635                 irq.type = KVM_S390_INT_PFAULT_INIT;
3636                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3637         } else {
3638                 inti.type = KVM_S390_INT_PFAULT_DONE;
3639                 inti.parm64 = token;
3640                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3641         }
3642 }
3643
3644 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3645                                      struct kvm_async_pf *work)
3646 {
3647         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3648         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3649 }
3650
3651 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3652                                  struct kvm_async_pf *work)
3653 {
3654         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3655         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3656 }
3657
3658 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3659                                struct kvm_async_pf *work)
3660 {
3661         /* s390 will always inject the page directly */
3662 }
3663
3664 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3665 {
3666         /*
3667          * s390 will always inject the page directly,
3668          * but we still want check_async_completion to cleanup
3669          */
3670         return true;
3671 }
3672
3673 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3674 {
3675         hva_t hva;
3676         struct kvm_arch_async_pf arch;
3677         int rc;
3678
3679         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3680                 return 0;
3681         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3682             vcpu->arch.pfault_compare)
3683                 return 0;
3684         if (psw_extint_disabled(vcpu))
3685                 return 0;
3686         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3687                 return 0;
3688         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3689                 return 0;
3690         if (!vcpu->arch.gmap->pfault_enabled)
3691                 return 0;
3692
3693         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3694         hva += current->thread.gmap_addr & ~PAGE_MASK;
3695         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3696                 return 0;
3697
3698         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3699         return rc;
3700 }
3701
3702 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3703 {
3704         int rc, cpuflags;
3705
3706         /*
3707          * On s390 notifications for arriving pages will be delivered directly
3708          * to the guest but the house keeping for completed pfaults is
3709          * handled outside the worker.
3710          */
3711         kvm_check_async_pf_completion(vcpu);
3712
3713         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3714         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3715
3716         if (need_resched())
3717                 schedule();
3718
3719         if (test_cpu_flag(CIF_MCCK_PENDING))
3720                 s390_handle_mcck();
3721
3722         if (!kvm_is_ucontrol(vcpu->kvm)) {
3723                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3724                 if (rc)
3725                         return rc;
3726         }
3727
3728         rc = kvm_s390_handle_requests(vcpu);
3729         if (rc)
3730                 return rc;
3731
3732         if (guestdbg_enabled(vcpu)) {
3733                 kvm_s390_backup_guest_per_regs(vcpu);
3734                 kvm_s390_patch_guest_per_regs(vcpu);
3735         }
3736
3737         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3738
3739         vcpu->arch.sie_block->icptcode = 0;
3740         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3741         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3742         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3743
3744         return 0;
3745 }
3746
3747 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3748 {
3749         struct kvm_s390_pgm_info pgm_info = {
3750                 .code = PGM_ADDRESSING,
3751         };
3752         u8 opcode, ilen;
3753         int rc;
3754
3755         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3756         trace_kvm_s390_sie_fault(vcpu);
3757
3758         /*
3759          * We want to inject an addressing exception, which is defined as a
3760          * suppressing or terminating exception. However, since we came here
3761          * by a DAT access exception, the PSW still points to the faulting
3762          * instruction since DAT exceptions are nullifying. So we've got
3763          * to look up the current opcode to get the length of the instruction
3764          * to be able to forward the PSW.
3765          */
3766         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3767         ilen = insn_length(opcode);
3768         if (rc < 0) {
3769                 return rc;
3770         } else if (rc) {
3771                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3772                  * Forward by arbitrary ilc, injection will take care of
3773                  * nullification if necessary.
3774                  */
3775                 pgm_info = vcpu->arch.pgm;
3776                 ilen = 4;
3777         }
3778         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3779         kvm_s390_forward_psw(vcpu, ilen);
3780         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3781 }
3782
3783 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3784 {
3785         struct mcck_volatile_info *mcck_info;
3786         struct sie_page *sie_page;
3787
3788         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3789                    vcpu->arch.sie_block->icptcode);
3790         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3791
3792         if (guestdbg_enabled(vcpu))
3793                 kvm_s390_restore_guest_per_regs(vcpu);
3794
3795         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3796         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3797
3798         if (exit_reason == -EINTR) {
3799                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3800                 sie_page = container_of(vcpu->arch.sie_block,
3801                                         struct sie_page, sie_block);
3802                 mcck_info = &sie_page->mcck_info;
3803                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3804                 return 0;
3805         }
3806
3807         if (vcpu->arch.sie_block->icptcode > 0) {
3808                 int rc = kvm_handle_sie_intercept(vcpu);
3809
3810                 if (rc != -EOPNOTSUPP)
3811                         return rc;
3812                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3813                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3814                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3815                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3816                 return -EREMOTE;
3817         } else if (exit_reason != -EFAULT) {
3818                 vcpu->stat.exit_null++;
3819                 return 0;
3820         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3821                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3822                 vcpu->run->s390_ucontrol.trans_exc_code =
3823                                                 current->thread.gmap_addr;
3824                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3825                 return -EREMOTE;
3826         } else if (current->thread.gmap_pfault) {
3827                 trace_kvm_s390_major_guest_pfault(vcpu);
3828                 current->thread.gmap_pfault = 0;
3829                 if (kvm_arch_setup_async_pf(vcpu))
3830                         return 0;
3831                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3832         }
3833         return vcpu_post_run_fault_in_sie(vcpu);
3834 }
3835
3836 static int __vcpu_run(struct kvm_vcpu *vcpu)
3837 {
3838         int rc, exit_reason;
3839
3840         /*
3841          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3842          * ning the guest), so that memslots (and other stuff) are protected
3843          */
3844         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3845
3846         do {
3847                 rc = vcpu_pre_run(vcpu);
3848                 if (rc)
3849                         break;
3850
3851                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3852                 /*
3853                  * As PF_VCPU will be used in fault handler, between
3854                  * guest_enter and guest_exit should be no uaccess.
3855                  */
3856                 local_irq_disable();
3857                 guest_enter_irqoff();
3858                 __disable_cpu_timer_accounting(vcpu);
3859                 local_irq_enable();
3860                 exit_reason = sie64a(vcpu->arch.sie_block,
3861                                      vcpu->run->s.regs.gprs);
3862                 local_irq_disable();
3863                 __enable_cpu_timer_accounting(vcpu);
3864                 guest_exit_irqoff();
3865                 local_irq_enable();
3866                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3867
3868                 rc = vcpu_post_run(vcpu, exit_reason);
3869         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3870
3871         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3872         return rc;
3873 }
3874
3875 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3876 {
3877         struct runtime_instr_cb *riccb;
3878         struct gs_cb *gscb;
3879
3880         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3881         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3882         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3883         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3884         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3885                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3886         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3887                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3888                 /* some control register changes require a tlb flush */
3889                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3890         }
3891         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3892                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3893                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3894                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3895                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3896                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3897         }
3898         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3899                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3900                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3901                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3902                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3903                         kvm_clear_async_pf_completion_queue(vcpu);
3904         }
3905         /*
3906          * If userspace sets the riccb (e.g. after migration) to a valid state,
3907          * we should enable RI here instead of doing the lazy enablement.
3908          */
3909         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3910             test_kvm_facility(vcpu->kvm, 64) &&
3911             riccb->v &&
3912             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3913                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3914                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3915         }
3916         /*
3917          * If userspace sets the gscb (e.g. after migration) to non-zero,
3918          * we should enable GS here instead of doing the lazy enablement.
3919          */
3920         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3921             test_kvm_facility(vcpu->kvm, 133) &&
3922             gscb->gssm &&
3923             !vcpu->arch.gs_enabled) {
3924                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3925                 vcpu->arch.sie_block->ecb |= ECB_GS;
3926                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3927                 vcpu->arch.gs_enabled = 1;
3928         }
3929         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3930             test_kvm_facility(vcpu->kvm, 82)) {
3931                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3932                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3933         }
3934         save_access_regs(vcpu->arch.host_acrs);
3935         restore_access_regs(vcpu->run->s.regs.acrs);
3936         /* save host (userspace) fprs/vrs */
3937         save_fpu_regs();
3938         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3939         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3940         if (MACHINE_HAS_VX)
3941                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3942         else
3943                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3944         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3945         if (test_fp_ctl(current->thread.fpu.fpc))
3946                 /* User space provided an invalid FPC, let's clear it */
3947                 current->thread.fpu.fpc = 0;
3948         if (MACHINE_HAS_GS) {
3949                 preempt_disable();
3950                 __ctl_set_bit(2, 4);
3951                 if (current->thread.gs_cb) {
3952                         vcpu->arch.host_gscb = current->thread.gs_cb;
3953                         save_gs_cb(vcpu->arch.host_gscb);
3954                 }
3955                 if (vcpu->arch.gs_enabled) {
3956                         current->thread.gs_cb = (struct gs_cb *)
3957                                                 &vcpu->run->s.regs.gscb;
3958                         restore_gs_cb(current->thread.gs_cb);
3959                 }
3960                 preempt_enable();
3961         }
3962         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3963
3964         kvm_run->kvm_dirty_regs = 0;
3965 }
3966
3967 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3968 {
3969         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3970         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3971         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3972         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3973         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3974         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3975         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3976         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3977         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3978         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3979         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3980         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3981         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3982         save_access_regs(vcpu->run->s.regs.acrs);
3983         restore_access_regs(vcpu->arch.host_acrs);
3984         /* Save guest register state */
3985         save_fpu_regs();
3986         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3987         /* Restore will be done lazily at return */
3988         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3989         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3990         if (MACHINE_HAS_GS) {
3991                 __ctl_set_bit(2, 4);
3992                 if (vcpu->arch.gs_enabled)
3993                         save_gs_cb(current->thread.gs_cb);
3994                 preempt_disable();
3995                 current->thread.gs_cb = vcpu->arch.host_gscb;
3996                 restore_gs_cb(vcpu->arch.host_gscb);
3997                 preempt_enable();
3998                 if (!vcpu->arch.host_gscb)
3999                         __ctl_clear_bit(2, 4);
4000                 vcpu->arch.host_gscb = NULL;
4001         }
4002         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4003 }
4004
4005 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4006 {
4007         int rc;
4008
4009         if (kvm_run->immediate_exit)
4010                 return -EINTR;
4011
4012         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4013             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4014                 return -EINVAL;
4015
4016         vcpu_load(vcpu);
4017
4018         if (guestdbg_exit_pending(vcpu)) {
4019                 kvm_s390_prepare_debug_exit(vcpu);
4020                 rc = 0;
4021                 goto out;
4022         }
4023
4024         kvm_sigset_activate(vcpu);
4025
4026         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4027                 kvm_s390_vcpu_start(vcpu);
4028         } else if (is_vcpu_stopped(vcpu)) {
4029                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4030                                    vcpu->vcpu_id);
4031                 rc = -EINVAL;
4032                 goto out;
4033         }
4034
4035         sync_regs(vcpu, kvm_run);
4036         enable_cpu_timer_accounting(vcpu);
4037
4038         might_fault();
4039         rc = __vcpu_run(vcpu);
4040
4041         if (signal_pending(current) && !rc) {
4042                 kvm_run->exit_reason = KVM_EXIT_INTR;
4043                 rc = -EINTR;
4044         }
4045
4046         if (guestdbg_exit_pending(vcpu) && !rc)  {
4047                 kvm_s390_prepare_debug_exit(vcpu);
4048                 rc = 0;
4049         }
4050
4051         if (rc == -EREMOTE) {
4052                 /* userspace support is needed, kvm_run has been prepared */
4053                 rc = 0;
4054         }
4055
4056         disable_cpu_timer_accounting(vcpu);
4057         store_regs(vcpu, kvm_run);
4058
4059         kvm_sigset_deactivate(vcpu);
4060
4061         vcpu->stat.exit_userspace++;
4062 out:
4063         vcpu_put(vcpu);
4064         return rc;
4065 }
4066
4067 /*
4068  * store status at address
4069  * we use have two special cases:
4070  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4071  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4072  */
4073 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4074 {
4075         unsigned char archmode = 1;
4076         freg_t fprs[NUM_FPRS];
4077         unsigned int px;
4078         u64 clkcomp, cputm;
4079         int rc;
4080
4081         px = kvm_s390_get_prefix(vcpu);
4082         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4083                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4084                         return -EFAULT;
4085                 gpa = 0;
4086         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4087                 if (write_guest_real(vcpu, 163, &archmode, 1))
4088                         return -EFAULT;
4089                 gpa = px;
4090         } else
4091                 gpa -= __LC_FPREGS_SAVE_AREA;
4092
4093         /* manually convert vector registers if necessary */
4094         if (MACHINE_HAS_VX) {
4095                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4096                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4097                                      fprs, 128);
4098         } else {
4099                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4100                                      vcpu->run->s.regs.fprs, 128);
4101         }
4102         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4103                               vcpu->run->s.regs.gprs, 128);
4104         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4105                               &vcpu->arch.sie_block->gpsw, 16);
4106         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4107                               &px, 4);
4108         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4109                               &vcpu->run->s.regs.fpc, 4);
4110         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4111                               &vcpu->arch.sie_block->todpr, 4);
4112         cputm = kvm_s390_get_cpu_timer(vcpu);
4113         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4114                               &cputm, 8);
4115         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4116         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4117                               &clkcomp, 8);
4118         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4119                               &vcpu->run->s.regs.acrs, 64);
4120         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4121                               &vcpu->arch.sie_block->gcr, 128);
4122         return rc ? -EFAULT : 0;
4123 }
4124
4125 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4126 {
4127         /*
4128          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4129          * switch in the run ioctl. Let's update our copies before we save
4130          * it into the save area
4131          */
4132         save_fpu_regs();
4133         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4134         save_access_regs(vcpu->run->s.regs.acrs);
4135
4136         return kvm_s390_store_status_unloaded(vcpu, addr);
4137 }
4138
4139 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4140 {
4141         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4142         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4143 }
4144
4145 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4146 {
4147         unsigned int i;
4148         struct kvm_vcpu *vcpu;
4149
4150         kvm_for_each_vcpu(i, vcpu, kvm) {
4151                 __disable_ibs_on_vcpu(vcpu);
4152         }
4153 }
4154
4155 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4156 {
4157         if (!sclp.has_ibs)
4158                 return;
4159         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4160         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4161 }
4162
4163 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4164 {
4165         int i, online_vcpus, started_vcpus = 0;
4166
4167         if (!is_vcpu_stopped(vcpu))
4168                 return;
4169
4170         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4171         /* Only one cpu at a time may enter/leave the STOPPED state. */
4172         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4173         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4174
4175         for (i = 0; i < online_vcpus; i++) {
4176                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4177                         started_vcpus++;
4178         }
4179
4180         if (started_vcpus == 0) {
4181                 /* we're the only active VCPU -> speed it up */
4182                 __enable_ibs_on_vcpu(vcpu);
4183         } else if (started_vcpus == 1) {
4184                 /*
4185                  * As we are starting a second VCPU, we have to disable
4186                  * the IBS facility on all VCPUs to remove potentially
4187                  * oustanding ENABLE requests.
4188                  */
4189                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4190         }
4191
4192         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4193         /*
4194          * Another VCPU might have used IBS while we were offline.
4195          * Let's play safe and flush the VCPU at startup.
4196          */
4197         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4198         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4199         return;
4200 }
4201
4202 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4203 {
4204         int i, online_vcpus, started_vcpus = 0;
4205         struct kvm_vcpu *started_vcpu = NULL;
4206
4207         if (is_vcpu_stopped(vcpu))
4208                 return;
4209
4210         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4211         /* Only one cpu at a time may enter/leave the STOPPED state. */
4212         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4213         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4214
4215         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4216         kvm_s390_clear_stop_irq(vcpu);
4217
4218         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4219         __disable_ibs_on_vcpu(vcpu);
4220
4221         for (i = 0; i < online_vcpus; i++) {
4222                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4223                         started_vcpus++;
4224                         started_vcpu = vcpu->kvm->vcpus[i];
4225                 }
4226         }
4227
4228         if (started_vcpus == 1) {
4229                 /*
4230                  * As we only have one VCPU left, we want to enable the
4231                  * IBS facility for that VCPU to speed it up.
4232                  */
4233                 __enable_ibs_on_vcpu(started_vcpu);
4234         }
4235
4236         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4237         return;
4238 }
4239
4240 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4241                                      struct kvm_enable_cap *cap)
4242 {
4243         int r;
4244
4245         if (cap->flags)
4246                 return -EINVAL;
4247
4248         switch (cap->cap) {
4249         case KVM_CAP_S390_CSS_SUPPORT:
4250                 if (!vcpu->kvm->arch.css_support) {
4251                         vcpu->kvm->arch.css_support = 1;
4252                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4253                         trace_kvm_s390_enable_css(vcpu->kvm);
4254                 }
4255                 r = 0;
4256                 break;
4257         default:
4258                 r = -EINVAL;
4259                 break;
4260         }
4261         return r;
4262 }
4263
4264 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4265                                   struct kvm_s390_mem_op *mop)
4266 {
4267         void __user *uaddr = (void __user *)mop->buf;
4268         void *tmpbuf = NULL;
4269         int r, srcu_idx;
4270         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4271                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4272
4273         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4274                 return -EINVAL;
4275
4276         if (mop->size > MEM_OP_MAX_SIZE)
4277                 return -E2BIG;
4278
4279         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4280                 tmpbuf = vmalloc(mop->size);
4281                 if (!tmpbuf)
4282                         return -ENOMEM;
4283         }
4284
4285         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4286
4287         switch (mop->op) {
4288         case KVM_S390_MEMOP_LOGICAL_READ:
4289                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4290                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4291                                             mop->size, GACC_FETCH);
4292                         break;
4293                 }
4294                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4295                 if (r == 0) {
4296                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4297                                 r = -EFAULT;
4298                 }
4299                 break;
4300         case KVM_S390_MEMOP_LOGICAL_WRITE:
4301                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4302                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4303                                             mop->size, GACC_STORE);
4304                         break;
4305                 }
4306                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4307                         r = -EFAULT;
4308                         break;
4309                 }
4310                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4311                 break;
4312         default:
4313                 r = -EINVAL;
4314         }
4315
4316         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4317
4318         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4319                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4320
4321         vfree(tmpbuf);
4322         return r;
4323 }
4324
4325 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4326                                unsigned int ioctl, unsigned long arg)
4327 {
4328         struct kvm_vcpu *vcpu = filp->private_data;
4329         void __user *argp = (void __user *)arg;
4330
4331         switch (ioctl) {
4332         case KVM_S390_IRQ: {
4333                 struct kvm_s390_irq s390irq;
4334
4335                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4336                         return -EFAULT;
4337                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4338         }
4339         case KVM_S390_INTERRUPT: {
4340                 struct kvm_s390_interrupt s390int;
4341                 struct kvm_s390_irq s390irq = {};
4342
4343                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4344                         return -EFAULT;
4345                 if (s390int_to_s390irq(&s390int, &s390irq))
4346                         return -EINVAL;
4347                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4348         }
4349         }
4350         return -ENOIOCTLCMD;
4351 }
4352
4353 long kvm_arch_vcpu_ioctl(struct file *filp,
4354                          unsigned int ioctl, unsigned long arg)
4355 {
4356         struct kvm_vcpu *vcpu = filp->private_data;
4357         void __user *argp = (void __user *)arg;
4358         int idx;
4359         long r;
4360
4361         vcpu_load(vcpu);
4362
4363         switch (ioctl) {
4364         case KVM_S390_STORE_STATUS:
4365                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4366                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4367                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4368                 break;
4369         case KVM_S390_SET_INITIAL_PSW: {
4370                 psw_t psw;
4371
4372                 r = -EFAULT;
4373                 if (copy_from_user(&psw, argp, sizeof(psw)))
4374                         break;
4375                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4376                 break;
4377         }
4378         case KVM_S390_INITIAL_RESET:
4379                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4380                 break;
4381         case KVM_SET_ONE_REG:
4382         case KVM_GET_ONE_REG: {
4383                 struct kvm_one_reg reg;
4384                 r = -EFAULT;
4385                 if (copy_from_user(&reg, argp, sizeof(reg)))
4386                         break;
4387                 if (ioctl == KVM_SET_ONE_REG)
4388                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4389                 else
4390                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4391                 break;
4392         }
4393 #ifdef CONFIG_KVM_S390_UCONTROL
4394         case KVM_S390_UCAS_MAP: {
4395                 struct kvm_s390_ucas_mapping ucasmap;
4396
4397                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4398                         r = -EFAULT;
4399                         break;
4400                 }
4401
4402                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4403                         r = -EINVAL;
4404                         break;
4405                 }
4406
4407                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4408                                      ucasmap.vcpu_addr, ucasmap.length);
4409                 break;
4410         }
4411         case KVM_S390_UCAS_UNMAP: {
4412                 struct kvm_s390_ucas_mapping ucasmap;
4413
4414                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4415                         r = -EFAULT;
4416                         break;
4417                 }
4418
4419                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4420                         r = -EINVAL;
4421                         break;
4422                 }
4423
4424                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4425                         ucasmap.length);
4426                 break;
4427         }
4428 #endif
4429         case KVM_S390_VCPU_FAULT: {
4430                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4431                 break;
4432         }
4433         case KVM_ENABLE_CAP:
4434         {
4435                 struct kvm_enable_cap cap;
4436                 r = -EFAULT;
4437                 if (copy_from_user(&cap, argp, sizeof(cap)))
4438                         break;
4439                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4440                 break;
4441         }
4442         case KVM_S390_MEM_OP: {
4443                 struct kvm_s390_mem_op mem_op;
4444
4445                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4446                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4447                 else
4448                         r = -EFAULT;
4449                 break;
4450         }
4451         case KVM_S390_SET_IRQ_STATE: {
4452                 struct kvm_s390_irq_state irq_state;
4453
4454                 r = -EFAULT;
4455                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4456                         break;
4457                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4458                     irq_state.len == 0 ||
4459                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4460                         r = -EINVAL;
4461                         break;
4462                 }
4463                 /* do not use irq_state.flags, it will break old QEMUs */
4464                 r = kvm_s390_set_irq_state(vcpu,
4465                                            (void __user *) irq_state.buf,
4466                                            irq_state.len);
4467                 break;
4468         }
4469         case KVM_S390_GET_IRQ_STATE: {
4470                 struct kvm_s390_irq_state irq_state;
4471
4472                 r = -EFAULT;
4473                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4474                         break;
4475                 if (irq_state.len == 0) {
4476                         r = -EINVAL;
4477                         break;
4478                 }
4479                 /* do not use irq_state.flags, it will break old QEMUs */
4480                 r = kvm_s390_get_irq_state(vcpu,
4481                                            (__u8 __user *)  irq_state.buf,
4482                                            irq_state.len);
4483                 break;
4484         }
4485         default:
4486                 r = -ENOTTY;
4487         }
4488
4489         vcpu_put(vcpu);
4490         return r;
4491 }
4492
4493 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4494 {
4495 #ifdef CONFIG_KVM_S390_UCONTROL
4496         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4497                  && (kvm_is_ucontrol(vcpu->kvm))) {
4498                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4499                 get_page(vmf->page);
4500                 return 0;
4501         }
4502 #endif
4503         return VM_FAULT_SIGBUS;
4504 }
4505
4506 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4507                             unsigned long npages)
4508 {
4509         return 0;
4510 }
4511
4512 /* Section: memory related */
4513 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4514                                    struct kvm_memory_slot *memslot,
4515                                    const struct kvm_userspace_memory_region *mem,
4516                                    enum kvm_mr_change change)
4517 {
4518         /* A few sanity checks. We can have memory slots which have to be
4519            located/ended at a segment boundary (1MB). The memory in userland is
4520            ok to be fragmented into various different vmas. It is okay to mmap()
4521            and munmap() stuff in this slot after doing this call at any time */
4522
4523         if (mem->userspace_addr & 0xffffful)
4524                 return -EINVAL;
4525
4526         if (mem->memory_size & 0xffffful)
4527                 return -EINVAL;
4528
4529         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4530                 return -EINVAL;
4531
4532         return 0;
4533 }
4534
4535 void kvm_arch_commit_memory_region(struct kvm *kvm,
4536                                 const struct kvm_userspace_memory_region *mem,
4537                                 const struct kvm_memory_slot *old,
4538                                 const struct kvm_memory_slot *new,
4539                                 enum kvm_mr_change change)
4540 {
4541         int rc = 0;
4542
4543         switch (change) {
4544         case KVM_MR_DELETE:
4545                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4546                                         old->npages * PAGE_SIZE);
4547                 break;
4548         case KVM_MR_MOVE:
4549                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4550                                         old->npages * PAGE_SIZE);
4551                 if (rc)
4552                         break;
4553                 /* FALLTHROUGH */
4554         case KVM_MR_CREATE:
4555                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4556                                       mem->guest_phys_addr, mem->memory_size);
4557                 break;
4558         case KVM_MR_FLAGS_ONLY:
4559                 break;
4560         default:
4561                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4562         }
4563         if (rc)
4564                 pr_warn("failed to commit memory region\n");
4565         return;
4566 }
4567
4568 static inline unsigned long nonhyp_mask(int i)
4569 {
4570         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4571
4572         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4573 }
4574
4575 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4576 {
4577         vcpu->valid_wakeup = false;
4578 }
4579
4580 static int __init kvm_s390_init(void)
4581 {
4582         int i;
4583
4584         if (!sclp.has_sief2) {
4585                 pr_info("SIE is not available\n");
4586                 return -ENODEV;
4587         }
4588
4589         if (nested && hpage) {
4590                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4591                 return -EINVAL;
4592         }
4593
4594         for (i = 0; i < 16; i++)
4595                 kvm_s390_fac_base[i] |=
4596                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4597
4598         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4599 }
4600
4601 static void __exit kvm_s390_exit(void)
4602 {
4603         kvm_exit();
4604 }
4605
4606 module_init(kvm_s390_init);
4607 module_exit(kvm_s390_exit);
4608
4609 /*
4610  * Enable autoloading of the kvm module.
4611  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4612  * since x86 takes a different approach.
4613  */
4614 #include <linux/miscdevice.h>
4615 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4616 MODULE_ALIAS("devname:kvm");