arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 159         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 160         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 161         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 162         { NULL }
 163 };
 164
 165 struct kvm_s390_tod_clock_ext {
 166         __u8 epoch_idx;
 167         __u64 tod;
 168         __u8 reserved[7];
 169 } __packed;
 170
 171 /* allow nested virtualization in KVM (if enabled by user space) */
 172 static int nested;
 173 module_param(nested, int, S_IRUGO);
 174 MODULE_PARM_DESC(nested, "Nested virtualization support");
 175
 176 /* allow 1m huge page guest backing, if !nested */
 177 static int hpage;
 178 module_param(hpage, int, 0444);
 179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 180
 181 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 182 static u8 halt_poll_max_steal = 10;
 183 module_param(halt_poll_max_steal, byte, 0644);
 184 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 185
 186 /*
 187  * For now we handle at most 16 double words as this is what the s390 base
 188  * kernel handles and stores in the prefix page. If we ever need to go beyond
 189  * this, this requires changes to code, but the external uapi can stay.
 190  */
 191 #define SIZE_INTERNAL 16
 192
 193 /*
 194  * Base feature mask that defines default mask for facilities. Consists of the
 195  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 196  */
 197 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 198 /*
 199  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 200  * and defines the facilities that can be enabled via a cpu model.
 201  */
 202 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 203
 204 static unsigned long kvm_s390_fac_size(void)
 205 {
 206         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 209                 sizeof(S390_lowcore.stfle_fac_list));
 210
 211         return SIZE_INTERNAL;
 212 }
 213
 214 /* available cpu features supported by kvm */
 215 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 216 /* available subfunctions indicated via query / "test bit" */
 217 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 218
 219 static struct gmap_notifier gmap_notifier;
 220 static struct gmap_notifier vsie_gmap_notifier;
 221 debug_info_t *kvm_s390_dbf;
 222
 223 /* Section: not file related */
 224 int kvm_arch_hardware_enable(void)
 225 {
 226         /* every s390 is virtualization enabled ;-) */
 227         return 0;
 228 }
 229
 230 int kvm_arch_check_processor_compat(void)
 231 {
 232         return 0;
 233 }
 234
 235 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 236                               unsigned long end);
 237
 238 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 239 {
 240         u8 delta_idx = 0;
 241
 242         /*
 243          * The TOD jumps by delta, we have to compensate this by adding
 244          * -delta to the epoch.
 245          */
 246         delta = -delta;
 247
 248         /* sign-extension - we're adding to signed values below */
 249         if ((s64)delta < 0)
 250                 delta_idx = -1;
 251
 252         scb->epoch += delta;
 253         if (scb->ecd & ECD_MEF) {
 254                 scb->epdx += delta_idx;
 255                 if (scb->epoch < delta)
 256                         scb->epdx += 1;
 257         }
 258 }
 259
 260 /*
 261  * This callback is executed during stop_machine(). All CPUs are therefore
 262  * temporarily stopped. In order not to change guest behavior, we have to
 263  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 264  * so a CPU won't be stopped while calculating with the epoch.
 265  */
 266 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 267                           void *v)
 268 {
 269         struct kvm *kvm;
 270         struct kvm_vcpu *vcpu;
 271         int i;
 272         unsigned long long *delta = v;
 273
 274         list_for_each_entry(kvm, &vm_list, vm_list) {
 275                 kvm_for_each_vcpu(i, vcpu, kvm) {
 276                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 277                         if (i == 0) {
 278                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 279                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 280                         }
 281                         if (vcpu->arch.cputm_enabled)
 282                                 vcpu->arch.cputm_start += *delta;
 283                         if (vcpu->arch.vsie_block)
 284                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 285                                                    *delta);
 286                 }
 287         }
 288         return NOTIFY_OK;
 289 }
 290
 291 static struct notifier_block kvm_clock_notifier = {
 292         .notifier_call = kvm_clock_sync,
 293 };
 294
 295 int kvm_arch_hardware_setup(void)
 296 {
 297         gmap_notifier.notifier_call = kvm_gmap_notifier;
 298         gmap_register_pte_notifier(&gmap_notifier);
 299         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 300         gmap_register_pte_notifier(&vsie_gmap_notifier);
 301         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 302                                        &kvm_clock_notifier);
 303         return 0;
 304 }
 305
 306 void kvm_arch_hardware_unsetup(void)
 307 {
 308         gmap_unregister_pte_notifier(&gmap_notifier);
 309         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 310         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 311                                          &kvm_clock_notifier);
 312 }
 313
 314 static void allow_cpu_feat(unsigned long nr)
 315 {
 316         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 317 }
 318
 319 static inline int plo_test_bit(unsigned char nr)
 320 {
 321         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 322         int cc;
 323
 324         asm volatile(
 325                 /* Parameter registers are ignored for "test bit" */
 326                 "       plo     0,0,0,0(0)\n"
 327                 "       ipm     %0\n"
 328                 "       srl     %0,28\n"
 329                 : "=d" (cc)
 330                 : "d" (r0)
 331                 : "cc");
 332         return cc == 0;
 333 }
 334
 335 static inline void __insn32_query(unsigned int opcode, u8 query[32])
 336 {
 337         register unsigned long r0 asm("0") = 0; /* query function */
 338         register unsigned long r1 asm("1") = (unsigned long) query;
 339
 340         asm volatile(
 341                 /* Parameter regs are ignored */
 342                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 343                 : "=m" (*query)
 344                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 345                 : "cc");
 346 }
 347
 348 #define INSN_SORTL 0xb938
 349 #define INSN_DFLTCC 0xb939
 350
 351 static void kvm_s390_cpu_feat_init(void)
 352 {
 353         int i;
 354
 355         for (i = 0; i < 256; ++i) {
 356                 if (plo_test_bit(i))
 357                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 358         }
 359
 360         if (test_facility(28)) /* TOD-clock steering */
 361                 ptff(kvm_s390_available_subfunc.ptff,
 362                      sizeof(kvm_s390_available_subfunc.ptff),
 363                      PTFF_QAF);
 364
 365         if (test_facility(17)) { /* MSA */
 366                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 367                               kvm_s390_available_subfunc.kmac);
 368                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 369                               kvm_s390_available_subfunc.kmc);
 370                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 371                               kvm_s390_available_subfunc.km);
 372                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 373                               kvm_s390_available_subfunc.kimd);
 374                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 375                               kvm_s390_available_subfunc.klmd);
 376         }
 377         if (test_facility(76)) /* MSA3 */
 378                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 379                               kvm_s390_available_subfunc.pckmo);
 380         if (test_facility(77)) { /* MSA4 */
 381                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 382                               kvm_s390_available_subfunc.kmctr);
 383                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 384                               kvm_s390_available_subfunc.kmf);
 385                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 386                               kvm_s390_available_subfunc.kmo);
 387                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 388                               kvm_s390_available_subfunc.pcc);
 389         }
 390         if (test_facility(57)) /* MSA5 */
 391                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 392                               kvm_s390_available_subfunc.ppno);
 393
 394         if (test_facility(146)) /* MSA8 */
 395                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kma);
 397
 398         if (test_facility(155)) /* MSA9 */
 399                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kdsa);
 401
 402         if (test_facility(150)) /* SORTL */
 403                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 404
 405         if (test_facility(151)) /* DFLTCC */
 406                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 407
 408         if (MACHINE_HAS_ESOP)
 409                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 410         /*
 411          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 412          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 413          */
 414         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 415             !test_facility(3) || !nested)
 416                 return;
 417         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 418         if (sclp.has_64bscao)
 419                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 420         if (sclp.has_siif)
 421                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 422         if (sclp.has_gpere)
 423                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 424         if (sclp.has_gsls)
 425                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 426         if (sclp.has_ib)
 427                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 428         if (sclp.has_cei)
 429                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 430         if (sclp.has_ibs)
 431                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 432         if (sclp.has_kss)
 433                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 434         /*
 435          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 436          * all skey handling functions read/set the skey from the PGSTE
 437          * instead of the real storage key.
 438          *
 439          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 440          * pages being detected as preserved although they are resident.
 441          *
 442          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 443          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 444          *
 445          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 446          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 447          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 448          *
 449          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 450          * cannot easily shadow the SCA because of the ipte lock.
 451          */
 452 }
 453
 454 int kvm_arch_init(void *opaque)
 455 {
 456         int rc;
 457
 458         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 459         if (!kvm_s390_dbf)
 460                 return -ENOMEM;
 461
 462         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 463                 rc = -ENOMEM;
 464                 goto out_debug_unreg;
 465         }
 466
 467         kvm_s390_cpu_feat_init();
 468
 469         /* Register floating interrupt controller interface. */
 470         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 471         if (rc) {
 472                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 473                 goto out_debug_unreg;
 474         }
 475
 476         rc = kvm_s390_gib_init(GAL_ISC);
 477         if (rc)
 478                 goto out_gib_destroy;
 479
 480         return 0;
 481
 482 out_gib_destroy:
 483         kvm_s390_gib_destroy();
 484 out_debug_unreg:
 485         debug_unregister(kvm_s390_dbf);
 486         return rc;
 487 }
 488
 489 void kvm_arch_exit(void)
 490 {
 491         kvm_s390_gib_destroy();
 492         debug_unregister(kvm_s390_dbf);
 493 }
 494
 495 /* Section: device related */
 496 long kvm_arch_dev_ioctl(struct file *filp,
 497                         unsigned int ioctl, unsigned long arg)
 498 {
 499         if (ioctl == KVM_S390_ENABLE_SIE)
 500                 return s390_enable_sie();
 501         return -EINVAL;
 502 }
 503
 504 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 505 {
 506         int r;
 507
 508         switch (ext) {
 509         case KVM_CAP_S390_PSW:
 510         case KVM_CAP_S390_GMAP:
 511         case KVM_CAP_SYNC_MMU:
 512 #ifdef CONFIG_KVM_S390_UCONTROL
 513         case KVM_CAP_S390_UCONTROL:
 514 #endif
 515         case KVM_CAP_ASYNC_PF:
 516         case KVM_CAP_SYNC_REGS:
 517         case KVM_CAP_ONE_REG:
 518         case KVM_CAP_ENABLE_CAP:
 519         case KVM_CAP_S390_CSS_SUPPORT:
 520         case KVM_CAP_IOEVENTFD:
 521         case KVM_CAP_DEVICE_CTRL:
 522         case KVM_CAP_S390_IRQCHIP:
 523         case KVM_CAP_VM_ATTRIBUTES:
 524         case KVM_CAP_MP_STATE:
 525         case KVM_CAP_IMMEDIATE_EXIT:
 526         case KVM_CAP_S390_INJECT_IRQ:
 527         case KVM_CAP_S390_USER_SIGP:
 528         case KVM_CAP_S390_USER_STSI:
 529         case KVM_CAP_S390_SKEYS:
 530         case KVM_CAP_S390_IRQ_STATE:
 531         case KVM_CAP_S390_USER_INSTR0:
 532         case KVM_CAP_S390_CMMA_MIGRATION:
 533         case KVM_CAP_S390_AIS:
 534         case KVM_CAP_S390_AIS_MIGRATION:
 535                 r = 1;
 536                 break;
 537         case KVM_CAP_S390_HPAGE_1M:
 538                 r = 0;
 539                 if (hpage && !kvm_is_ucontrol(kvm))
 540                         r = 1;
 541                 break;
 542         case KVM_CAP_S390_MEM_OP:
 543                 r = MEM_OP_MAX_SIZE;
 544                 break;
 545         case KVM_CAP_NR_VCPUS:
 546         case KVM_CAP_MAX_VCPUS:
 547         case KVM_CAP_MAX_VCPU_ID:
 548                 r = KVM_S390_BSCA_CPU_SLOTS;
 549                 if (!kvm_s390_use_sca_entries())
 550                         r = KVM_MAX_VCPUS;
 551                 else if (sclp.has_esca && sclp.has_64bscao)
 552                         r = KVM_S390_ESCA_CPU_SLOTS;
 553                 break;
 554         case KVM_CAP_S390_COW:
 555                 r = MACHINE_HAS_ESOP;
 556                 break;
 557         case KVM_CAP_S390_VECTOR_REGISTERS:
 558                 r = MACHINE_HAS_VX;
 559                 break;
 560         case KVM_CAP_S390_RI:
 561                 r = test_facility(64);
 562                 break;
 563         case KVM_CAP_S390_GS:
 564                 r = test_facility(133);
 565                 break;
 566         case KVM_CAP_S390_BPB:
 567                 r = test_facility(82);
 568                 break;
 569         default:
 570                 r = 0;
 571         }
 572         return r;
 573 }
 574
 575 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 576                                     struct kvm_memory_slot *memslot)
 577 {
 578         int i;
 579         gfn_t cur_gfn, last_gfn;
 580         unsigned long gaddr, vmaddr;
 581         struct gmap *gmap = kvm->arch.gmap;
 582         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 583
 584         /* Loop over all guest segments */
 585         cur_gfn = memslot->base_gfn;
 586         last_gfn = memslot->base_gfn + memslot->npages;
 587         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 588                 gaddr = gfn_to_gpa(cur_gfn);
 589                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 590                 if (kvm_is_error_hva(vmaddr))
 591                         continue;
 592
 593                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 594                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 595                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 596                         if (test_bit(i, bitmap))
 597                                 mark_page_dirty(kvm, cur_gfn + i);
 598                 }
 599
 600                 if (fatal_signal_pending(current))
 601                         return;
 602                 cond_resched();
 603         }
 604 }
 605
 606 /* Section: vm related */
 607 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 608
 609 /*
 610  * Get (and clear) the dirty memory log for a memory slot.
 611  */
 612 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 613                                struct kvm_dirty_log *log)
 614 {
 615         int r;
 616         unsigned long n;
 617         struct kvm_memslots *slots;
 618         struct kvm_memory_slot *memslot;
 619         int is_dirty = 0;
 620
 621         if (kvm_is_ucontrol(kvm))
 622                 return -EINVAL;
 623
 624         mutex_lock(&kvm->slots_lock);
 625
 626         r = -EINVAL;
 627         if (log->slot >= KVM_USER_MEM_SLOTS)
 628                 goto out;
 629
 630         slots = kvm_memslots(kvm);
 631         memslot = id_to_memslot(slots, log->slot);
 632         r = -ENOENT;
 633         if (!memslot->dirty_bitmap)
 634                 goto out;
 635
 636         kvm_s390_sync_dirty_log(kvm, memslot);
 637         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 638         if (r)
 639                 goto out;
 640
 641         /* Clear the dirty log */
 642         if (is_dirty) {
 643                 n = kvm_dirty_bitmap_bytes(memslot);
 644                 memset(memslot->dirty_bitmap, 0, n);
 645         }
 646         r = 0;
 647 out:
 648         mutex_unlock(&kvm->slots_lock);
 649         return r;
 650 }
 651
 652 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 653 {
 654         unsigned int i;
 655         struct kvm_vcpu *vcpu;
 656
 657         kvm_for_each_vcpu(i, vcpu, kvm) {
 658                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 659         }
 660 }
 661
 662 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 663 {
 664         int r;
 665
 666         if (cap->flags)
 667                 return -EINVAL;
 668
 669         switch (cap->cap) {
 670         case KVM_CAP_S390_IRQCHIP:
 671                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 672                 kvm->arch.use_irqchip = 1;
 673                 r = 0;
 674                 break;
 675         case KVM_CAP_S390_USER_SIGP:
 676                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 677                 kvm->arch.user_sigp = 1;
 678                 r = 0;
 679                 break;
 680         case KVM_CAP_S390_VECTOR_REGISTERS:
 681                 mutex_lock(&kvm->lock);
 682                 if (kvm->created_vcpus) {
 683                         r = -EBUSY;
 684                 } else if (MACHINE_HAS_VX) {
 685                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 686                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 687                         if (test_facility(134)) {
 688                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 689                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 690                         }
 691                         if (test_facility(135)) {
 692                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 693                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 694                         }
 695                         if (test_facility(148)) {
 696                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 697                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 698                         }
 699                         if (test_facility(152)) {
 700                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 701                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 702                         }
 703                         r = 0;
 704                 } else
 705                         r = -EINVAL;
 706                 mutex_unlock(&kvm->lock);
 707                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 708                          r ? "(not available)" : "(success)");
 709                 break;
 710         case KVM_CAP_S390_RI:
 711                 r = -EINVAL;
 712                 mutex_lock(&kvm->lock);
 713                 if (kvm->created_vcpus) {
 714                         r = -EBUSY;
 715                 } else if (test_facility(64)) {
 716                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 717                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 718                         r = 0;
 719                 }
 720                 mutex_unlock(&kvm->lock);
 721                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 722                          r ? "(not available)" : "(success)");
 723                 break;
 724         case KVM_CAP_S390_AIS:
 725                 mutex_lock(&kvm->lock);
 726                 if (kvm->created_vcpus) {
 727                         r = -EBUSY;
 728                 } else {
 729                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 730                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 731                         r = 0;
 732                 }
 733                 mutex_unlock(&kvm->lock);
 734                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 735                          r ? "(not available)" : "(success)");
 736                 break;
 737         case KVM_CAP_S390_GS:
 738                 r = -EINVAL;
 739                 mutex_lock(&kvm->lock);
 740                 if (kvm->created_vcpus) {
 741                         r = -EBUSY;
 742                 } else if (test_facility(133)) {
 743                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 744                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 745                         r = 0;
 746                 }
 747                 mutex_unlock(&kvm->lock);
 748                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 749                          r ? "(not available)" : "(success)");
 750                 break;
 751         case KVM_CAP_S390_HPAGE_1M:
 752                 mutex_lock(&kvm->lock);
 753                 if (kvm->created_vcpus)
 754                         r = -EBUSY;
 755                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 756                         r = -EINVAL;
 757                 else {
 758                         r = 0;
 759                         down_write(&kvm->mm->mmap_sem);
 760                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 761                         up_write(&kvm->mm->mmap_sem);
 762                         /*
 763                          * We might have to create fake 4k page
 764                          * tables. To avoid that the hardware works on
 765                          * stale PGSTEs, we emulate these instructions.
 766                          */
 767                         kvm->arch.use_skf = 0;
 768                         kvm->arch.use_pfmfi = 0;
 769                 }
 770                 mutex_unlock(&kvm->lock);
 771                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 772                          r ? "(not available)" : "(success)");
 773                 break;
 774         case KVM_CAP_S390_USER_STSI:
 775                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 776                 kvm->arch.user_stsi = 1;
 777                 r = 0;
 778                 break;
 779         case KVM_CAP_S390_USER_INSTR0:
 780                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 781                 kvm->arch.user_instr0 = 1;
 782                 icpt_operexc_on_all_vcpus(kvm);
 783                 r = 0;
 784                 break;
 785         default:
 786                 r = -EINVAL;
 787                 break;
 788         }
 789         return r;
 790 }
 791
 792 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 793 {
 794         int ret;
 795
 796         switch (attr->attr) {
 797         case KVM_S390_VM_MEM_LIMIT_SIZE:
 798                 ret = 0;
 799                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 800                          kvm->arch.mem_limit);
 801                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 802                         ret = -EFAULT;
 803                 break;
 804         default:
 805                 ret = -ENXIO;
 806                 break;
 807         }
 808         return ret;
 809 }
 810
 811 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 812 {
 813         int ret;
 814         unsigned int idx;
 815         switch (attr->attr) {
 816         case KVM_S390_VM_MEM_ENABLE_CMMA:
 817                 ret = -ENXIO;
 818                 if (!sclp.has_cmma)
 819                         break;
 820
 821                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 822                 mutex_lock(&kvm->lock);
 823                 if (kvm->created_vcpus)
 824                         ret = -EBUSY;
 825                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 826                         ret = -EINVAL;
 827                 else {
 828                         kvm->arch.use_cmma = 1;
 829                         /* Not compatible with cmma. */
 830                         kvm->arch.use_pfmfi = 0;
 831                         ret = 0;
 832                 }
 833                 mutex_unlock(&kvm->lock);
 834                 break;
 835         case KVM_S390_VM_MEM_CLR_CMMA:
 836                 ret = -ENXIO;
 837                 if (!sclp.has_cmma)
 838                         break;
 839                 ret = -EINVAL;
 840                 if (!kvm->arch.use_cmma)
 841                         break;
 842
 843                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 844                 mutex_lock(&kvm->lock);
 845                 idx = srcu_read_lock(&kvm->srcu);
 846                 s390_reset_cmma(kvm->arch.gmap->mm);
 847                 srcu_read_unlock(&kvm->srcu, idx);
 848                 mutex_unlock(&kvm->lock);
 849                 ret = 0;
 850                 break;
 851         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 852                 unsigned long new_limit;
 853
 854                 if (kvm_is_ucontrol(kvm))
 855                         return -EINVAL;
 856
 857                 if (get_user(new_limit, (u64 __user *)attr->addr))
 858                         return -EFAULT;
 859
 860                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 861                     new_limit > kvm->arch.mem_limit)
 862                         return -E2BIG;
 863
 864                 if (!new_limit)
 865                         return -EINVAL;
 866
 867                 /* gmap_create takes last usable address */
 868                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 869                         new_limit -= 1;
 870
 871                 ret = -EBUSY;
 872                 mutex_lock(&kvm->lock);
 873                 if (!kvm->created_vcpus) {
 874                         /* gmap_create will round the limit up */
 875                         struct gmap *new = gmap_create(current->mm, new_limit);
 876
 877                         if (!new) {
 878                                 ret = -ENOMEM;
 879                         } else {
 880                                 gmap_remove(kvm->arch.gmap);
 881                                 new->private = kvm;
 882                                 kvm->arch.gmap = new;
 883                                 ret = 0;
 884                         }
 885                 }
 886                 mutex_unlock(&kvm->lock);
 887                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 888                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 889                          (void *) kvm->arch.gmap->asce);
 890                 break;
 891         }
 892         default:
 893                 ret = -ENXIO;
 894                 break;
 895         }
 896         return ret;
 897 }
 898
 899 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 900
 901 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 902 {
 903         struct kvm_vcpu *vcpu;
 904         int i;
 905
 906         kvm_s390_vcpu_block_all(kvm);
 907
 908         kvm_for_each_vcpu(i, vcpu, kvm) {
 909                 kvm_s390_vcpu_crypto_setup(vcpu);
 910                 /* recreate the shadow crycb by leaving the VSIE handler */
 911                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 912         }
 913
 914         kvm_s390_vcpu_unblock_all(kvm);
 915 }
 916
 917 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 918 {
 919         mutex_lock(&kvm->lock);
 920         switch (attr->attr) {
 921         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 922                 if (!test_kvm_facility(kvm, 76)) {
 923                         mutex_unlock(&kvm->lock);
 924                         return -EINVAL;
 925                 }
 926                 get_random_bytes(
 927                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 928                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 929                 kvm->arch.crypto.aes_kw = 1;
 930                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 931                 break;
 932         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 933                 if (!test_kvm_facility(kvm, 76)) {
 934                         mutex_unlock(&kvm->lock);
 935                         return -EINVAL;
 936                 }
 937                 get_random_bytes(
 938                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 939                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 940                 kvm->arch.crypto.dea_kw = 1;
 941                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 942                 break;
 943         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 944                 if (!test_kvm_facility(kvm, 76)) {
 945                         mutex_unlock(&kvm->lock);
 946                         return -EINVAL;
 947                 }
 948                 kvm->arch.crypto.aes_kw = 0;
 949                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 950                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 951                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 952                 break;
 953         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 954                 if (!test_kvm_facility(kvm, 76)) {
 955                         mutex_unlock(&kvm->lock);
 956                         return -EINVAL;
 957                 }
 958                 kvm->arch.crypto.dea_kw = 0;
 959                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 960                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 961                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 962                 break;
 963         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 964                 if (!ap_instructions_available()) {
 965                         mutex_unlock(&kvm->lock);
 966                         return -EOPNOTSUPP;
 967                 }
 968                 kvm->arch.crypto.apie = 1;
 969                 break;
 970         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 971                 if (!ap_instructions_available()) {
 972                         mutex_unlock(&kvm->lock);
 973                         return -EOPNOTSUPP;
 974                 }
 975                 kvm->arch.crypto.apie = 0;
 976                 break;
 977         default:
 978                 mutex_unlock(&kvm->lock);
 979                 return -ENXIO;
 980         }
 981
 982         kvm_s390_vcpu_crypto_reset_all(kvm);
 983         mutex_unlock(&kvm->lock);
 984         return 0;
 985 }
 986
 987 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 988 {
 989         int cx;
 990         struct kvm_vcpu *vcpu;
 991
 992         kvm_for_each_vcpu(cx, vcpu, kvm)
 993                 kvm_s390_sync_request(req, vcpu);
 994 }
 995
 996 /*
 997  * Must be called with kvm->srcu held to avoid races on memslots, and with
 998  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 999  */
1000 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1001 {
1002         struct kvm_memory_slot *ms;
1003         struct kvm_memslots *slots;
1004         unsigned long ram_pages = 0;
1005         int slotnr;
1006
1007         /* migration mode already enabled */
1008         if (kvm->arch.migration_mode)
1009                 return 0;
1010         slots = kvm_memslots(kvm);
1011         if (!slots || !slots->used_slots)
1012                 return -EINVAL;
1013
1014         if (!kvm->arch.use_cmma) {
1015                 kvm->arch.migration_mode = 1;
1016                 return 0;
1017         }
1018         /* mark all the pages in active slots as dirty */
1019         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1020                 ms = slots->memslots + slotnr;
1021                 /*
1022                  * The second half of the bitmap is only used on x86,
1023                  * and would be wasted otherwise, so we put it to good
1024                  * use here to keep track of the state of the storage
1025                  * attributes.
1026                  */
1027                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1028                 ram_pages += ms->npages;
1029         }
1030         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1031         kvm->arch.migration_mode = 1;
1032         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1033         return 0;
1034 }
1035
1036 /*
1037  * Must be called with kvm->slots_lock to avoid races with ourselves and
1038  * kvm_s390_vm_start_migration.
1039  */
1040 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1041 {
1042         /* migration mode already disabled */
1043         if (!kvm->arch.migration_mode)
1044                 return 0;
1045         kvm->arch.migration_mode = 0;
1046         if (kvm->arch.use_cmma)
1047                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1048         return 0;
1049 }
1050
1051 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1052                                      struct kvm_device_attr *attr)
1053 {
1054         int res = -ENXIO;
1055
1056         mutex_lock(&kvm->slots_lock);
1057         switch (attr->attr) {
1058         case KVM_S390_VM_MIGRATION_START:
1059                 res = kvm_s390_vm_start_migration(kvm);
1060                 break;
1061         case KVM_S390_VM_MIGRATION_STOP:
1062                 res = kvm_s390_vm_stop_migration(kvm);
1063                 break;
1064         default:
1065                 break;
1066         }
1067         mutex_unlock(&kvm->slots_lock);
1068
1069         return res;
1070 }
1071
1072 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1073                                      struct kvm_device_attr *attr)
1074 {
1075         u64 mig = kvm->arch.migration_mode;
1076
1077         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1078                 return -ENXIO;
1079
1080         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1081                 return -EFAULT;
1082         return 0;
1083 }
1084
1085 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087         struct kvm_s390_vm_tod_clock gtod;
1088
1089         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1090                 return -EFAULT;
1091
1092         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1093                 return -EINVAL;
1094         kvm_s390_set_tod_clock(kvm, &gtod);
1095
1096         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1097                 gtod.epoch_idx, gtod.tod);
1098
1099         return 0;
1100 }
1101
1102 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1103 {
1104         u8 gtod_high;
1105
1106         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1107                                            sizeof(gtod_high)))
1108                 return -EFAULT;
1109
1110         if (gtod_high != 0)
1111                 return -EINVAL;
1112         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1113
1114         return 0;
1115 }
1116
1117 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119         struct kvm_s390_vm_tod_clock gtod = { 0 };
1120
1121         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1122                            sizeof(gtod.tod)))
1123                 return -EFAULT;
1124
1125         kvm_s390_set_tod_clock(kvm, &gtod);
1126         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1127         return 0;
1128 }
1129
1130 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1131 {
1132         int ret;
1133
1134         if (attr->flags)
1135                 return -EINVAL;
1136
1137         switch (attr->attr) {
1138         case KVM_S390_VM_TOD_EXT:
1139                 ret = kvm_s390_set_tod_ext(kvm, attr);
1140                 break;
1141         case KVM_S390_VM_TOD_HIGH:
1142                 ret = kvm_s390_set_tod_high(kvm, attr);
1143                 break;
1144         case KVM_S390_VM_TOD_LOW:
1145                 ret = kvm_s390_set_tod_low(kvm, attr);
1146                 break;
1147         default:
1148                 ret = -ENXIO;
1149                 break;
1150         }
1151         return ret;
1152 }
1153
1154 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1155                                    struct kvm_s390_vm_tod_clock *gtod)
1156 {
1157         struct kvm_s390_tod_clock_ext htod;
1158
1159         preempt_disable();
1160
1161         get_tod_clock_ext((char *)&htod);
1162
1163         gtod->tod = htod.tod + kvm->arch.epoch;
1164         gtod->epoch_idx = 0;
1165         if (test_kvm_facility(kvm, 139)) {
1166                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1167                 if (gtod->tod < htod.tod)
1168                         gtod->epoch_idx += 1;
1169         }
1170
1171         preempt_enable();
1172 }
1173
1174 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1175 {
1176         struct kvm_s390_vm_tod_clock gtod;
1177
1178         memset(&gtod, 0, sizeof(gtod));
1179         kvm_s390_get_tod_clock(kvm, &gtod);
1180         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1181                 return -EFAULT;
1182
1183         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1184                 gtod.epoch_idx, gtod.tod);
1185         return 0;
1186 }
1187
1188 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1189 {
1190         u8 gtod_high = 0;
1191
1192         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1193                                          sizeof(gtod_high)))
1194                 return -EFAULT;
1195         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1196
1197         return 0;
1198 }
1199
1200 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         u64 gtod;
1203
1204         gtod = kvm_s390_get_tod_clock_fast(kvm);
1205         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206                 return -EFAULT;
1207         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1208
1209         return 0;
1210 }
1211
1212 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1213 {
1214         int ret;
1215
1216         if (attr->flags)
1217                 return -EINVAL;
1218
1219         switch (attr->attr) {
1220         case KVM_S390_VM_TOD_EXT:
1221                 ret = kvm_s390_get_tod_ext(kvm, attr);
1222                 break;
1223         case KVM_S390_VM_TOD_HIGH:
1224                 ret = kvm_s390_get_tod_high(kvm, attr);
1225                 break;
1226         case KVM_S390_VM_TOD_LOW:
1227                 ret = kvm_s390_get_tod_low(kvm, attr);
1228                 break;
1229         default:
1230                 ret = -ENXIO;
1231                 break;
1232         }
1233         return ret;
1234 }
1235
1236 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238         struct kvm_s390_vm_cpu_processor *proc;
1239         u16 lowest_ibc, unblocked_ibc;
1240         int ret = 0;
1241
1242         mutex_lock(&kvm->lock);
1243         if (kvm->created_vcpus) {
1244                 ret = -EBUSY;
1245                 goto out;
1246         }
1247         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1248         if (!proc) {
1249                 ret = -ENOMEM;
1250                 goto out;
1251         }
1252         if (!copy_from_user(proc, (void __user *)attr->addr,
1253                             sizeof(*proc))) {
1254                 kvm->arch.model.cpuid = proc->cpuid;
1255                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1256                 unblocked_ibc = sclp.ibc & 0xfff;
1257                 if (lowest_ibc && proc->ibc) {
1258                         if (proc->ibc > unblocked_ibc)
1259                                 kvm->arch.model.ibc = unblocked_ibc;
1260                         else if (proc->ibc < lowest_ibc)
1261                                 kvm->arch.model.ibc = lowest_ibc;
1262                         else
1263                                 kvm->arch.model.ibc = proc->ibc;
1264                 }
1265                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1266                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1267                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1268                          kvm->arch.model.ibc,
1269                          kvm->arch.model.cpuid);
1270                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1271                          kvm->arch.model.fac_list[0],
1272                          kvm->arch.model.fac_list[1],
1273                          kvm->arch.model.fac_list[2]);
1274         } else
1275                 ret = -EFAULT;
1276         kfree(proc);
1277 out:
1278         mutex_unlock(&kvm->lock);
1279         return ret;
1280 }
1281
1282 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1283                                        struct kvm_device_attr *attr)
1284 {
1285         struct kvm_s390_vm_cpu_feat data;
1286
1287         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1288                 return -EFAULT;
1289         if (!bitmap_subset((unsigned long *) data.feat,
1290                            kvm_s390_available_cpu_feat,
1291                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1292                 return -EINVAL;
1293
1294         mutex_lock(&kvm->lock);
1295         if (kvm->created_vcpus) {
1296                 mutex_unlock(&kvm->lock);
1297                 return -EBUSY;
1298         }
1299         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1300                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1301         mutex_unlock(&kvm->lock);
1302         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1303                          data.feat[0],
1304                          data.feat[1],
1305                          data.feat[2]);
1306         return 0;
1307 }
1308
1309 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1310                                           struct kvm_device_attr *attr)
1311 {
1312         mutex_lock(&kvm->lock);
1313         if (kvm->created_vcpus) {
1314                 mutex_unlock(&kvm->lock);
1315                 return -EBUSY;
1316         }
1317
1318         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1319                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1320                 mutex_unlock(&kvm->lock);
1321                 return -EFAULT;
1322         }
1323         mutex_unlock(&kvm->lock);
1324
1325         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1329                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1330         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1332                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1333         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1335                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1336         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1339         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1342         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1345         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1348         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1351         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1354         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1357         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1360         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1363         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1366         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1377         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1382
1383         return 0;
1384 }
1385
1386 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1387 {
1388         int ret = -ENXIO;
1389
1390         switch (attr->attr) {
1391         case KVM_S390_VM_CPU_PROCESSOR:
1392                 ret = kvm_s390_set_processor(kvm, attr);
1393                 break;
1394         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1395                 ret = kvm_s390_set_processor_feat(kvm, attr);
1396                 break;
1397         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1398                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1399                 break;
1400         }
1401         return ret;
1402 }
1403
1404 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1405 {
1406         struct kvm_s390_vm_cpu_processor *proc;
1407         int ret = 0;
1408
1409         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1410         if (!proc) {
1411                 ret = -ENOMEM;
1412                 goto out;
1413         }
1414         proc->cpuid = kvm->arch.model.cpuid;
1415         proc->ibc = kvm->arch.model.ibc;
1416         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1417                S390_ARCH_FAC_LIST_SIZE_BYTE);
1418         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419                  kvm->arch.model.ibc,
1420                  kvm->arch.model.cpuid);
1421         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422                  kvm->arch.model.fac_list[0],
1423                  kvm->arch.model.fac_list[1],
1424                  kvm->arch.model.fac_list[2]);
1425         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1426                 ret = -EFAULT;
1427         kfree(proc);
1428 out:
1429         return ret;
1430 }
1431
1432 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1433 {
1434         struct kvm_s390_vm_cpu_machine *mach;
1435         int ret = 0;
1436
1437         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1438         if (!mach) {
1439                 ret = -ENOMEM;
1440                 goto out;
1441         }
1442         get_cpu_id((struct cpuid *) &mach->cpuid);
1443         mach->ibc = sclp.ibc;
1444         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1445                S390_ARCH_FAC_LIST_SIZE_BYTE);
1446         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1447                sizeof(S390_lowcore.stfle_fac_list));
1448         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1449                  kvm->arch.model.ibc,
1450                  kvm->arch.model.cpuid);
1451         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1452                  mach->fac_mask[0],
1453                  mach->fac_mask[1],
1454                  mach->fac_mask[2]);
1455         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1456                  mach->fac_list[0],
1457                  mach->fac_list[1],
1458                  mach->fac_list[2]);
1459         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1460                 ret = -EFAULT;
1461         kfree(mach);
1462 out:
1463         return ret;
1464 }
1465
1466 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1467                                        struct kvm_device_attr *attr)
1468 {
1469         struct kvm_s390_vm_cpu_feat data;
1470
1471         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1472                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1473         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1474                 return -EFAULT;
1475         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1476                          data.feat[0],
1477                          data.feat[1],
1478                          data.feat[2]);
1479         return 0;
1480 }
1481
1482 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1483                                      struct kvm_device_attr *attr)
1484 {
1485         struct kvm_s390_vm_cpu_feat data;
1486
1487         bitmap_copy((unsigned long *) data.feat,
1488                     kvm_s390_available_cpu_feat,
1489                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1490         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1491                 return -EFAULT;
1492         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1493                          data.feat[0],
1494                          data.feat[1],
1495                          data.feat[2]);
1496         return 0;
1497 }
1498
1499 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1500                                           struct kvm_device_attr *attr)
1501 {
1502         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1503             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1504                 return -EFAULT;
1505
1506         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1510                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1511         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1513                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1514         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1516                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1517         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1520         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1523         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1526         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1529         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1532         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1535         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1538         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1541         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1544         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1547         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1558         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1563
1564         return 0;
1565 }
1566
1567 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1568                                         struct kvm_device_attr *attr)
1569 {
1570         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1571             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1572                 return -EFAULT;
1573
1574         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1577                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1578                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1579         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1580                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1581                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1582         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1583                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1584                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1585         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1588         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1589                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1590                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1591         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1593                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1594         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1596                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1597         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1599                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1600         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1603         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1605                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1606         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1608                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1609         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1611                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1612         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1615         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1625                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1626         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1627                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1630                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1631
1632         return 0;
1633 }
1634
1635 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1636 {
1637         int ret = -ENXIO;
1638
1639         switch (attr->attr) {
1640         case KVM_S390_VM_CPU_PROCESSOR:
1641                 ret = kvm_s390_get_processor(kvm, attr);
1642                 break;
1643         case KVM_S390_VM_CPU_MACHINE:
1644                 ret = kvm_s390_get_machine(kvm, attr);
1645                 break;
1646         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1647                 ret = kvm_s390_get_processor_feat(kvm, attr);
1648                 break;
1649         case KVM_S390_VM_CPU_MACHINE_FEAT:
1650                 ret = kvm_s390_get_machine_feat(kvm, attr);
1651                 break;
1652         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1653                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1654                 break;
1655         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1656                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1657                 break;
1658         }
1659         return ret;
1660 }
1661
1662 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1663 {
1664         int ret;
1665
1666         switch (attr->group) {
1667         case KVM_S390_VM_MEM_CTRL:
1668                 ret = kvm_s390_set_mem_control(kvm, attr);
1669                 break;
1670         case KVM_S390_VM_TOD:
1671                 ret = kvm_s390_set_tod(kvm, attr);
1672                 break;
1673         case KVM_S390_VM_CPU_MODEL:
1674                 ret = kvm_s390_set_cpu_model(kvm, attr);
1675                 break;
1676         case KVM_S390_VM_CRYPTO:
1677                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_MIGRATION:
1680                 ret = kvm_s390_vm_set_migration(kvm, attr);
1681                 break;
1682         default:
1683                 ret = -ENXIO;
1684                 break;
1685         }
1686
1687         return ret;
1688 }
1689
1690 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1691 {
1692         int ret;
1693
1694         switch (attr->group) {
1695         case KVM_S390_VM_MEM_CTRL:
1696                 ret = kvm_s390_get_mem_control(kvm, attr);
1697                 break;
1698         case KVM_S390_VM_TOD:
1699                 ret = kvm_s390_get_tod(kvm, attr);
1700                 break;
1701         case KVM_S390_VM_CPU_MODEL:
1702                 ret = kvm_s390_get_cpu_model(kvm, attr);
1703                 break;
1704         case KVM_S390_VM_MIGRATION:
1705                 ret = kvm_s390_vm_get_migration(kvm, attr);
1706                 break;
1707         default:
1708                 ret = -ENXIO;
1709                 break;
1710         }
1711
1712         return ret;
1713 }
1714
1715 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717         int ret;
1718
1719         switch (attr->group) {
1720         case KVM_S390_VM_MEM_CTRL:
1721                 switch (attr->attr) {
1722                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1723                 case KVM_S390_VM_MEM_CLR_CMMA:
1724                         ret = sclp.has_cmma ? 0 : -ENXIO;
1725                         break;
1726                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1727                         ret = 0;
1728                         break;
1729                 default:
1730                         ret = -ENXIO;
1731                         break;
1732                 }
1733                 break;
1734         case KVM_S390_VM_TOD:
1735                 switch (attr->attr) {
1736                 case KVM_S390_VM_TOD_LOW:
1737                 case KVM_S390_VM_TOD_HIGH:
1738                         ret = 0;
1739                         break;
1740                 default:
1741                         ret = -ENXIO;
1742                         break;
1743                 }
1744                 break;
1745         case KVM_S390_VM_CPU_MODEL:
1746                 switch (attr->attr) {
1747                 case KVM_S390_VM_CPU_PROCESSOR:
1748                 case KVM_S390_VM_CPU_MACHINE:
1749                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1750                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1751                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1752                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1753                         ret = 0;
1754                         break;
1755                 default:
1756                         ret = -ENXIO;
1757                         break;
1758                 }
1759                 break;
1760         case KVM_S390_VM_CRYPTO:
1761                 switch (attr->attr) {
1762                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1763                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1765                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1766                         ret = 0;
1767                         break;
1768                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1769                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1770                         ret = ap_instructions_available() ? 0 : -ENXIO;
1771                         break;
1772                 default:
1773                         ret = -ENXIO;
1774                         break;
1775                 }
1776                 break;
1777         case KVM_S390_VM_MIGRATION:
1778                 ret = 0;
1779                 break;
1780         default:
1781                 ret = -ENXIO;
1782                 break;
1783         }
1784
1785         return ret;
1786 }
1787
1788 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1789 {
1790         uint8_t *keys;
1791         uint64_t hva;
1792         int srcu_idx, i, r = 0;
1793
1794         if (args->flags != 0)
1795                 return -EINVAL;
1796
1797         /* Is this guest using storage keys? */
1798         if (!mm_uses_skeys(current->mm))
1799                 return KVM_S390_GET_SKEYS_NONE;
1800
1801         /* Enforce sane limit on memory allocation */
1802         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1803                 return -EINVAL;
1804
1805         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1806         if (!keys)
1807                 return -ENOMEM;
1808
1809         down_read(&current->mm->mmap_sem);
1810         srcu_idx = srcu_read_lock(&kvm->srcu);
1811         for (i = 0; i < args->count; i++) {
1812                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1813                 if (kvm_is_error_hva(hva)) {
1814                         r = -EFAULT;
1815                         break;
1816                 }
1817
1818                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1819                 if (r)
1820                         break;
1821         }
1822         srcu_read_unlock(&kvm->srcu, srcu_idx);
1823         up_read(&current->mm->mmap_sem);
1824
1825         if (!r) {
1826                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1827                                  sizeof(uint8_t) * args->count);
1828                 if (r)
1829                         r = -EFAULT;
1830         }
1831
1832         kvfree(keys);
1833         return r;
1834 }
1835
1836 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1837 {
1838         uint8_t *keys;
1839         uint64_t hva;
1840         int srcu_idx, i, r = 0;
1841         bool unlocked;
1842
1843         if (args->flags != 0)
1844                 return -EINVAL;
1845
1846         /* Enforce sane limit on memory allocation */
1847         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1848                 return -EINVAL;
1849
1850         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1851         if (!keys)
1852                 return -ENOMEM;
1853
1854         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1855                            sizeof(uint8_t) * args->count);
1856         if (r) {
1857                 r = -EFAULT;
1858                 goto out;
1859         }
1860
1861         /* Enable storage key handling for the guest */
1862         r = s390_enable_skey();
1863         if (r)
1864                 goto out;
1865
1866         i = 0;
1867         down_read(&current->mm->mmap_sem);
1868         srcu_idx = srcu_read_lock(&kvm->srcu);
1869         while (i < args->count) {
1870                 unlocked = false;
1871                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1872                 if (kvm_is_error_hva(hva)) {
1873                         r = -EFAULT;
1874                         break;
1875                 }
1876
1877                 /* Lowest order bit is reserved */
1878                 if (keys[i] & 0x01) {
1879                         r = -EINVAL;
1880                         break;
1881                 }
1882
1883                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1884                 if (r) {
1885                         r = fixup_user_fault(current, current->mm, hva,
1886                                              FAULT_FLAG_WRITE, &unlocked);
1887                         if (r)
1888                                 break;
1889                 }
1890                 if (!r)
1891                         i++;
1892         }
1893         srcu_read_unlock(&kvm->srcu, srcu_idx);
1894         up_read(&current->mm->mmap_sem);
1895 out:
1896         kvfree(keys);
1897         return r;
1898 }
1899
1900 /*
1901  * Base address and length must be sent at the start of each block, therefore
1902  * it's cheaper to send some clean data, as long as it's less than the size of
1903  * two longs.
1904  */
1905 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1906 /* for consistency */
1907 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1908
1909 /*
1910  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1911  * address falls in a hole. In that case the index of one of the memslots
1912  * bordering the hole is returned.
1913  */
1914 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1915 {
1916         int start = 0, end = slots->used_slots;
1917         int slot = atomic_read(&slots->lru_slot);
1918         struct kvm_memory_slot *memslots = slots->memslots;
1919
1920         if (gfn >= memslots[slot].base_gfn &&
1921             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1922                 return slot;
1923
1924         while (start < end) {
1925                 slot = start + (end - start) / 2;
1926
1927                 if (gfn >= memslots[slot].base_gfn)
1928                         end = slot;
1929                 else
1930                         start = slot + 1;
1931         }
1932
1933         if (gfn >= memslots[start].base_gfn &&
1934             gfn < memslots[start].base_gfn + memslots[start].npages) {
1935                 atomic_set(&slots->lru_slot, start);
1936         }
1937
1938         return start;
1939 }
1940
1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1942                               u8 *res, unsigned long bufsize)
1943 {
1944         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1945
1946         args->count = 0;
1947         while (args->count < bufsize) {
1948                 hva = gfn_to_hva(kvm, cur_gfn);
1949                 /*
1950                  * We return an error if the first value was invalid, but we
1951                  * return successfully if at least one value was copied.
1952                  */
1953                 if (kvm_is_error_hva(hva))
1954                         return args->count ? 0 : -EFAULT;
1955                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1956                         pgstev = 0;
1957                 res[args->count++] = (pgstev >> 24) & 0x43;
1958                 cur_gfn++;
1959         }
1960
1961         return 0;
1962 }
1963
1964 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1965                                               unsigned long cur_gfn)
1966 {
1967         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1968         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1969         unsigned long ofs = cur_gfn - ms->base_gfn;
1970
1971         if (ms->base_gfn + ms->npages <= cur_gfn) {
1972                 slotidx--;
1973                 /* If we are above the highest slot, wrap around */
1974                 if (slotidx < 0)
1975                         slotidx = slots->used_slots - 1;
1976
1977                 ms = slots->memslots + slotidx;
1978                 ofs = 0;
1979         }
1980         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1981         while ((slotidx > 0) && (ofs >= ms->npages)) {
1982                 slotidx--;
1983                 ms = slots->memslots + slotidx;
1984                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1985         }
1986         return ms->base_gfn + ofs;
1987 }
1988
1989 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1990                              u8 *res, unsigned long bufsize)
1991 {
1992         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1993         struct kvm_memslots *slots = kvm_memslots(kvm);
1994         struct kvm_memory_slot *ms;
1995
1996         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1997         ms = gfn_to_memslot(kvm, cur_gfn);
1998         args->count = 0;
1999         args->start_gfn = cur_gfn;
2000         if (!ms)
2001                 return 0;
2002         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2003         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2004
2005         while (args->count < bufsize) {
2006                 hva = gfn_to_hva(kvm, cur_gfn);
2007                 if (kvm_is_error_hva(hva))
2008                         return 0;
2009                 /* Decrement only if we actually flipped the bit to 0 */
2010                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2011                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2012                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2013                         pgstev = 0;
2014                 /* Save the value */
2015                 res[args->count++] = (pgstev >> 24) & 0x43;
2016                 /* If the next bit is too far away, stop. */
2017                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2018                         return 0;
2019                 /* If we reached the previous "next", find the next one */
2020                 if (cur_gfn == next_gfn)
2021                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2022                 /* Reached the end of memory or of the buffer, stop */
2023                 if ((next_gfn >= mem_end) ||
2024                     (next_gfn - args->start_gfn >= bufsize))
2025                         return 0;
2026                 cur_gfn++;
2027                 /* Reached the end of the current memslot, take the next one. */
2028                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2029                         ms = gfn_to_memslot(kvm, cur_gfn);
2030                         if (!ms)
2031                                 return 0;
2032                 }
2033         }
2034         return 0;
2035 }
2036
2037 /*
2038  * This function searches for the next page with dirty CMMA attributes, and
2039  * saves the attributes in the buffer up to either the end of the buffer or
2040  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2041  * no trailing clean bytes are saved.
2042  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2043  * output buffer will indicate 0 as length.
2044  */
2045 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2046                                   struct kvm_s390_cmma_log *args)
2047 {
2048         unsigned long bufsize;
2049         int srcu_idx, peek, ret;
2050         u8 *values;
2051
2052         if (!kvm->arch.use_cmma)
2053                 return -ENXIO;
2054         /* Invalid/unsupported flags were specified */
2055         if (args->flags & ~KVM_S390_CMMA_PEEK)
2056                 return -EINVAL;
2057         /* Migration mode query, and we are not doing a migration */
2058         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2059         if (!peek && !kvm->arch.migration_mode)
2060                 return -EINVAL;
2061         /* CMMA is disabled or was not used, or the buffer has length zero */
2062         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2063         if (!bufsize || !kvm->mm->context.uses_cmm) {
2064                 memset(args, 0, sizeof(*args));
2065                 return 0;
2066         }
2067         /* We are not peeking, and there are no dirty pages */
2068         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2069                 memset(args, 0, sizeof(*args));
2070                 return 0;
2071         }
2072
2073         values = vmalloc(bufsize);
2074         if (!values)
2075                 return -ENOMEM;
2076
2077         down_read(&kvm->mm->mmap_sem);
2078         srcu_idx = srcu_read_lock(&kvm->srcu);
2079         if (peek)
2080                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2081         else
2082                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2083         srcu_read_unlock(&kvm->srcu, srcu_idx);
2084         up_read(&kvm->mm->mmap_sem);
2085
2086         if (kvm->arch.migration_mode)
2087                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2088         else
2089                 args->remaining = 0;
2090
2091         if (copy_to_user((void __user *)args->values, values, args->count))
2092                 ret = -EFAULT;
2093
2094         vfree(values);
2095         return ret;
2096 }
2097
2098 /*
2099  * This function sets the CMMA attributes for the given pages. If the input
2100  * buffer has zero length, no action is taken, otherwise the attributes are
2101  * set and the mm->context.uses_cmm flag is set.
2102  */
2103 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2104                                   const struct kvm_s390_cmma_log *args)
2105 {
2106         unsigned long hva, mask, pgstev, i;
2107         uint8_t *bits;
2108         int srcu_idx, r = 0;
2109
2110         mask = args->mask;
2111
2112         if (!kvm->arch.use_cmma)
2113                 return -ENXIO;
2114         /* invalid/unsupported flags */
2115         if (args->flags != 0)
2116                 return -EINVAL;
2117         /* Enforce sane limit on memory allocation */
2118         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2119                 return -EINVAL;
2120         /* Nothing to do */
2121         if (args->count == 0)
2122                 return 0;
2123
2124         bits = vmalloc(array_size(sizeof(*bits), args->count));
2125         if (!bits)
2126                 return -ENOMEM;
2127
2128         r = copy_from_user(bits, (void __user *)args->values, args->count);
2129         if (r) {
2130                 r = -EFAULT;
2131                 goto out;
2132         }
2133
2134         down_read(&kvm->mm->mmap_sem);
2135         srcu_idx = srcu_read_lock(&kvm->srcu);
2136         for (i = 0; i < args->count; i++) {
2137                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2138                 if (kvm_is_error_hva(hva)) {
2139                         r = -EFAULT;
2140                         break;
2141                 }
2142
2143                 pgstev = bits[i];
2144                 pgstev = pgstev << 24;
2145                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2146                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2147         }
2148         srcu_read_unlock(&kvm->srcu, srcu_idx);
2149         up_read(&kvm->mm->mmap_sem);
2150
2151         if (!kvm->mm->context.uses_cmm) {
2152                 down_write(&kvm->mm->mmap_sem);
2153                 kvm->mm->context.uses_cmm = 1;
2154                 up_write(&kvm->mm->mmap_sem);
2155         }
2156 out:
2157         vfree(bits);
2158         return r;
2159 }
2160
2161 long kvm_arch_vm_ioctl(struct file *filp,
2162                        unsigned int ioctl, unsigned long arg)
2163 {
2164         struct kvm *kvm = filp->private_data;
2165         void __user *argp = (void __user *)arg;
2166         struct kvm_device_attr attr;
2167         int r;
2168
2169         switch (ioctl) {
2170         case KVM_S390_INTERRUPT: {
2171                 struct kvm_s390_interrupt s390int;
2172
2173                 r = -EFAULT;
2174                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2175                         break;
2176                 r = kvm_s390_inject_vm(kvm, &s390int);
2177                 break;
2178         }
2179         case KVM_CREATE_IRQCHIP: {
2180                 struct kvm_irq_routing_entry routing;
2181
2182                 r = -EINVAL;
2183                 if (kvm->arch.use_irqchip) {
2184                         /* Set up dummy routing. */
2185                         memset(&routing, 0, sizeof(routing));
2186                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2187                 }
2188                 break;
2189         }
2190         case KVM_SET_DEVICE_ATTR: {
2191                 r = -EFAULT;
2192                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2193                         break;
2194                 r = kvm_s390_vm_set_attr(kvm, &attr);
2195                 break;
2196         }
2197         case KVM_GET_DEVICE_ATTR: {
2198                 r = -EFAULT;
2199                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2200                         break;
2201                 r = kvm_s390_vm_get_attr(kvm, &attr);
2202                 break;
2203         }
2204         case KVM_HAS_DEVICE_ATTR: {
2205                 r = -EFAULT;
2206                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2207                         break;
2208                 r = kvm_s390_vm_has_attr(kvm, &attr);
2209                 break;
2210         }
2211         case KVM_S390_GET_SKEYS: {
2212                 struct kvm_s390_skeys args;
2213
2214                 r = -EFAULT;
2215                 if (copy_from_user(&args, argp,
2216                                    sizeof(struct kvm_s390_skeys)))
2217                         break;
2218                 r = kvm_s390_get_skeys(kvm, &args);
2219                 break;
2220         }
2221         case KVM_S390_SET_SKEYS: {
2222                 struct kvm_s390_skeys args;
2223
2224                 r = -EFAULT;
2225                 if (copy_from_user(&args, argp,
2226                                    sizeof(struct kvm_s390_skeys)))
2227                         break;
2228                 r = kvm_s390_set_skeys(kvm, &args);
2229                 break;
2230         }
2231         case KVM_S390_GET_CMMA_BITS: {
2232                 struct kvm_s390_cmma_log args;
2233
2234                 r = -EFAULT;
2235                 if (copy_from_user(&args, argp, sizeof(args)))
2236                         break;
2237                 mutex_lock(&kvm->slots_lock);
2238                 r = kvm_s390_get_cmma_bits(kvm, &args);
2239                 mutex_unlock(&kvm->slots_lock);
2240                 if (!r) {
2241                         r = copy_to_user(argp, &args, sizeof(args));
2242                         if (r)
2243                                 r = -EFAULT;
2244                 }
2245                 break;
2246         }
2247         case KVM_S390_SET_CMMA_BITS: {
2248                 struct kvm_s390_cmma_log args;
2249
2250                 r = -EFAULT;
2251                 if (copy_from_user(&args, argp, sizeof(args)))
2252                         break;
2253                 mutex_lock(&kvm->slots_lock);
2254                 r = kvm_s390_set_cmma_bits(kvm, &args);
2255                 mutex_unlock(&kvm->slots_lock);
2256                 break;
2257         }
2258         default:
2259                 r = -ENOTTY;
2260         }
2261
2262         return r;
2263 }
2264
2265 static int kvm_s390_apxa_installed(void)
2266 {
2267         struct ap_config_info info;
2268
2269         if (ap_instructions_available()) {
2270                 if (ap_qci(&info) == 0)
2271                         return info.apxa;
2272         }
2273
2274         return 0;
2275 }
2276
2277 /*
2278  * The format of the crypto control block (CRYCB) is specified in the 3 low
2279  * order bits of the CRYCB designation (CRYCBD) field as follows:
2280  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2281  *           AP extended addressing (APXA) facility are installed.
2282  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2283  * Format 2: Both the APXA and MSAX3 facilities are installed
2284  */
2285 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2286 {
2287         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2288
2289         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2290         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2291
2292         /* Check whether MSAX3 is installed */
2293         if (!test_kvm_facility(kvm, 76))
2294                 return;
2295
2296         if (kvm_s390_apxa_installed())
2297                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2298         else
2299                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2300 }
2301
2302 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2303                                unsigned long *aqm, unsigned long *adm)
2304 {
2305         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2306
2307         mutex_lock(&kvm->lock);
2308         kvm_s390_vcpu_block_all(kvm);
2309
2310         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2311         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2312                 memcpy(crycb->apcb1.apm, apm, 32);
2313                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2314                          apm[0], apm[1], apm[2], apm[3]);
2315                 memcpy(crycb->apcb1.aqm, aqm, 32);
2316                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2317                          aqm[0], aqm[1], aqm[2], aqm[3]);
2318                 memcpy(crycb->apcb1.adm, adm, 32);
2319                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2320                          adm[0], adm[1], adm[2], adm[3]);
2321                 break;
2322         case CRYCB_FORMAT1:
2323         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2324                 memcpy(crycb->apcb0.apm, apm, 8);
2325                 memcpy(crycb->apcb0.aqm, aqm, 2);
2326                 memcpy(crycb->apcb0.adm, adm, 2);
2327                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2328                          apm[0], *((unsigned short *)aqm),
2329                          *((unsigned short *)adm));
2330                 break;
2331         default:        /* Can not happen */
2332                 break;
2333         }
2334
2335         /* recreate the shadow crycb for each vcpu */
2336         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2337         kvm_s390_vcpu_unblock_all(kvm);
2338         mutex_unlock(&kvm->lock);
2339 }
2340 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2341
2342 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2343 {
2344         mutex_lock(&kvm->lock);
2345         kvm_s390_vcpu_block_all(kvm);
2346
2347         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2348                sizeof(kvm->arch.crypto.crycb->apcb0));
2349         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2350                sizeof(kvm->arch.crypto.crycb->apcb1));
2351
2352         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2353         /* recreate the shadow crycb for each vcpu */
2354         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2355         kvm_s390_vcpu_unblock_all(kvm);
2356         mutex_unlock(&kvm->lock);
2357 }
2358 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2359
2360 static u64 kvm_s390_get_initial_cpuid(void)
2361 {
2362         struct cpuid cpuid;
2363
2364         get_cpu_id(&cpuid);
2365         cpuid.version = 0xff;
2366         return *((u64 *) &cpuid);
2367 }
2368
2369 static void kvm_s390_crypto_init(struct kvm *kvm)
2370 {
2371         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2372         kvm_s390_set_crycb_format(kvm);
2373
2374         if (!test_kvm_facility(kvm, 76))
2375                 return;
2376
2377         /* Enable AES/DEA protected key functions by default */
2378         kvm->arch.crypto.aes_kw = 1;
2379         kvm->arch.crypto.dea_kw = 1;
2380         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2381                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2382         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2383                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2384 }
2385
2386 static void sca_dispose(struct kvm *kvm)
2387 {
2388         if (kvm->arch.use_esca)
2389                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2390         else
2391                 free_page((unsigned long)(kvm->arch.sca));
2392         kvm->arch.sca = NULL;
2393 }
2394
2395 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2396 {
2397         gfp_t alloc_flags = GFP_KERNEL;
2398         int i, rc;
2399         char debug_name[16];
2400         static unsigned long sca_offset;
2401
2402         rc = -EINVAL;
2403 #ifdef CONFIG_KVM_S390_UCONTROL
2404         if (type & ~KVM_VM_S390_UCONTROL)
2405                 goto out_err;
2406         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2407                 goto out_err;
2408 #else
2409         if (type)
2410                 goto out_err;
2411 #endif
2412
2413         rc = s390_enable_sie();
2414         if (rc)
2415                 goto out_err;
2416
2417         rc = -ENOMEM;
2418
2419         if (!sclp.has_64bscao)
2420                 alloc_flags |= GFP_DMA;
2421         rwlock_init(&kvm->arch.sca_lock);
2422         /* start with basic SCA */
2423         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2424         if (!kvm->arch.sca)
2425                 goto out_err;
2426         spin_lock(&kvm_lock);
2427         sca_offset += 16;
2428         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2429                 sca_offset = 0;
2430         kvm->arch.sca = (struct bsca_block *)
2431                         ((char *) kvm->arch.sca + sca_offset);
2432         spin_unlock(&kvm_lock);
2433
2434         sprintf(debug_name, "kvm-%u", current->pid);
2435
2436         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2437         if (!kvm->arch.dbf)
2438                 goto out_err;
2439
2440         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2441         kvm->arch.sie_page2 =
2442              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2443         if (!kvm->arch.sie_page2)
2444                 goto out_err;
2445
2446         kvm->arch.sie_page2->kvm = kvm;
2447         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2448
2449         for (i = 0; i < kvm_s390_fac_size(); i++) {
2450                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2451                                               (kvm_s390_fac_base[i] |
2452                                                kvm_s390_fac_ext[i]);
2453                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2454                                               kvm_s390_fac_base[i];
2455         }
2456         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2457
2458         /* we are always in czam mode - even on pre z14 machines */
2459         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2460         set_kvm_facility(kvm->arch.model.fac_list, 138);
2461         /* we emulate STHYI in kvm */
2462         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2463         set_kvm_facility(kvm->arch.model.fac_list, 74);
2464         if (MACHINE_HAS_TLB_GUEST) {
2465                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2466                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2467         }
2468
2469         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2470         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2471
2472         kvm_s390_crypto_init(kvm);
2473
2474         mutex_init(&kvm->arch.float_int.ais_lock);
2475         spin_lock_init(&kvm->arch.float_int.lock);
2476         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2477                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2478         init_waitqueue_head(&kvm->arch.ipte_wq);
2479         mutex_init(&kvm->arch.ipte_mutex);
2480
2481         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2482         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2483
2484         if (type & KVM_VM_S390_UCONTROL) {
2485                 kvm->arch.gmap = NULL;
2486                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2487         } else {
2488                 if (sclp.hamax == U64_MAX)
2489                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2490                 else
2491                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2492                                                     sclp.hamax + 1);
2493                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2494                 if (!kvm->arch.gmap)
2495                         goto out_err;
2496                 kvm->arch.gmap->private = kvm;
2497                 kvm->arch.gmap->pfault_enabled = 0;
2498         }
2499
2500         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2501         kvm->arch.use_skf = sclp.has_skey;
2502         spin_lock_init(&kvm->arch.start_stop_lock);
2503         kvm_s390_vsie_init(kvm);
2504         kvm_s390_gisa_init(kvm);
2505         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2506
2507         return 0;
2508 out_err:
2509         free_page((unsigned long)kvm->arch.sie_page2);
2510         debug_unregister(kvm->arch.dbf);
2511         sca_dispose(kvm);
2512         KVM_EVENT(3, "creation of vm failed: %d", rc);
2513         return rc;
2514 }
2515
2516 bool kvm_arch_has_vcpu_debugfs(void)
2517 {
2518         return false;
2519 }
2520
2521 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2522 {
2523         return 0;
2524 }
2525
2526 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2527 {
2528         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2529         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2530         kvm_s390_clear_local_irqs(vcpu);
2531         kvm_clear_async_pf_completion_queue(vcpu);
2532         if (!kvm_is_ucontrol(vcpu->kvm))
2533                 sca_del_vcpu(vcpu);
2534
2535         if (kvm_is_ucontrol(vcpu->kvm))
2536                 gmap_remove(vcpu->arch.gmap);
2537
2538         if (vcpu->kvm->arch.use_cmma)
2539                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2540         free_page((unsigned long)(vcpu->arch.sie_block));
2541
2542         kvm_vcpu_uninit(vcpu);
2543         kmem_cache_free(kvm_vcpu_cache, vcpu);
2544 }
2545
2546 static void kvm_free_vcpus(struct kvm *kvm)
2547 {
2548         unsigned int i;
2549         struct kvm_vcpu *vcpu;
2550
2551         kvm_for_each_vcpu(i, vcpu, kvm)
2552                 kvm_arch_vcpu_destroy(vcpu);
2553
2554         mutex_lock(&kvm->lock);
2555         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2556                 kvm->vcpus[i] = NULL;
2557
2558         atomic_set(&kvm->online_vcpus, 0);
2559         mutex_unlock(&kvm->lock);
2560 }
2561
2562 void kvm_arch_destroy_vm(struct kvm *kvm)
2563 {
2564         kvm_free_vcpus(kvm);
2565         sca_dispose(kvm);
2566         debug_unregister(kvm->arch.dbf);
2567         kvm_s390_gisa_destroy(kvm);
2568         free_page((unsigned long)kvm->arch.sie_page2);
2569         if (!kvm_is_ucontrol(kvm))
2570                 gmap_remove(kvm->arch.gmap);
2571         kvm_s390_destroy_adapters(kvm);
2572         kvm_s390_clear_float_irqs(kvm);
2573         kvm_s390_vsie_destroy(kvm);
2574         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2575 }
2576
2577 /* Section: vcpu related */
2578 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2579 {
2580         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2581         if (!vcpu->arch.gmap)
2582                 return -ENOMEM;
2583         vcpu->arch.gmap->private = vcpu->kvm;
2584
2585         return 0;
2586 }
2587
2588 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2589 {
2590         if (!kvm_s390_use_sca_entries())
2591                 return;
2592         read_lock(&vcpu->kvm->arch.sca_lock);
2593         if (vcpu->kvm->arch.use_esca) {
2594                 struct esca_block *sca = vcpu->kvm->arch.sca;
2595
2596                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2597                 sca->cpu[vcpu->vcpu_id].sda = 0;
2598         } else {
2599                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2600
2601                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2602                 sca->cpu[vcpu->vcpu_id].sda = 0;
2603         }
2604         read_unlock(&vcpu->kvm->arch.sca_lock);
2605 }
2606
2607 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2608 {
2609         if (!kvm_s390_use_sca_entries()) {
2610                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2611
2612                 /* we still need the basic sca for the ipte control */
2613                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2614                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2615                 return;
2616         }
2617         read_lock(&vcpu->kvm->arch.sca_lock);
2618         if (vcpu->kvm->arch.use_esca) {
2619                 struct esca_block *sca = vcpu->kvm->arch.sca;
2620
2621                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2622                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2623                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2624                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2625                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2626         } else {
2627                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2628
2629                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2630                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2631                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2632                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2633         }
2634         read_unlock(&vcpu->kvm->arch.sca_lock);
2635 }
2636
2637 /* Basic SCA to Extended SCA data copy routines */
2638 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2639 {
2640         d->sda = s->sda;
2641         d->sigp_ctrl.c = s->sigp_ctrl.c;
2642         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2643 }
2644
2645 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2646 {
2647         int i;
2648
2649         d->ipte_control = s->ipte_control;
2650         d->mcn[0] = s->mcn;
2651         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2652                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2653 }
2654
2655 static int sca_switch_to_extended(struct kvm *kvm)
2656 {
2657         struct bsca_block *old_sca = kvm->arch.sca;
2658         struct esca_block *new_sca;
2659         struct kvm_vcpu *vcpu;
2660         unsigned int vcpu_idx;
2661         u32 scaol, scaoh;
2662
2663         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2664         if (!new_sca)
2665                 return -ENOMEM;
2666
2667         scaoh = (u32)((u64)(new_sca) >> 32);
2668         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2669
2670         kvm_s390_vcpu_block_all(kvm);
2671         write_lock(&kvm->arch.sca_lock);
2672
2673         sca_copy_b_to_e(new_sca, old_sca);
2674
2675         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2676                 vcpu->arch.sie_block->scaoh = scaoh;
2677                 vcpu->arch.sie_block->scaol = scaol;
2678                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2679         }
2680         kvm->arch.sca = new_sca;
2681         kvm->arch.use_esca = 1;
2682
2683         write_unlock(&kvm->arch.sca_lock);
2684         kvm_s390_vcpu_unblock_all(kvm);
2685
2686         free_page((unsigned long)old_sca);
2687
2688         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2689                  old_sca, kvm->arch.sca);
2690         return 0;
2691 }
2692
2693 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2694 {
2695         int rc;
2696
2697         if (!kvm_s390_use_sca_entries()) {
2698                 if (id < KVM_MAX_VCPUS)
2699                         return true;
2700                 return false;
2701         }
2702         if (id < KVM_S390_BSCA_CPU_SLOTS)
2703                 return true;
2704         if (!sclp.has_esca || !sclp.has_64bscao)
2705                 return false;
2706
2707         mutex_lock(&kvm->lock);
2708         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2709         mutex_unlock(&kvm->lock);
2710
2711         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2712 }
2713
2714 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2715 {
2716         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2717         kvm_clear_async_pf_completion_queue(vcpu);
2718         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2719                                     KVM_SYNC_GPRS |
2720                                     KVM_SYNC_ACRS |
2721                                     KVM_SYNC_CRS |
2722                                     KVM_SYNC_ARCH0 |
2723                                     KVM_SYNC_PFAULT;
2724         kvm_s390_set_prefix(vcpu, 0);
2725         if (test_kvm_facility(vcpu->kvm, 64))
2726                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2727         if (test_kvm_facility(vcpu->kvm, 82))
2728                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2729         if (test_kvm_facility(vcpu->kvm, 133))
2730                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2731         if (test_kvm_facility(vcpu->kvm, 156))
2732                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2733         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2734          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2735          */
2736         if (MACHINE_HAS_VX)
2737                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2738         else
2739                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2740
2741         if (kvm_is_ucontrol(vcpu->kvm))
2742                 return __kvm_ucontrol_vcpu_init(vcpu);
2743
2744         return 0;
2745 }
2746
2747 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2748 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2749 {
2750         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2751         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2752         vcpu->arch.cputm_start = get_tod_clock_fast();
2753         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2754 }
2755
2756 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2757 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2758 {
2759         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2760         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2761         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2762         vcpu->arch.cputm_start = 0;
2763         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2764 }
2765
2766 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2767 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2768 {
2769         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2770         vcpu->arch.cputm_enabled = true;
2771         __start_cpu_timer_accounting(vcpu);
2772 }
2773
2774 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2775 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2776 {
2777         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2778         __stop_cpu_timer_accounting(vcpu);
2779         vcpu->arch.cputm_enabled = false;
2780 }
2781
2782 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2783 {
2784         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2785         __enable_cpu_timer_accounting(vcpu);
2786         preempt_enable();
2787 }
2788
2789 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2790 {
2791         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2792         __disable_cpu_timer_accounting(vcpu);
2793         preempt_enable();
2794 }
2795
2796 /* set the cpu timer - may only be called from the VCPU thread itself */
2797 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2798 {
2799         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2800         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2801         if (vcpu->arch.cputm_enabled)
2802                 vcpu->arch.cputm_start = get_tod_clock_fast();
2803         vcpu->arch.sie_block->cputm = cputm;
2804         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2805         preempt_enable();
2806 }
2807
2808 /* update and get the cpu timer - can also be called from other VCPU threads */
2809 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2810 {
2811         unsigned int seq;
2812         __u64 value;
2813
2814         if (unlikely(!vcpu->arch.cputm_enabled))
2815                 return vcpu->arch.sie_block->cputm;
2816
2817         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2818         do {
2819                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2820                 /*
2821                  * If the writer would ever execute a read in the critical
2822                  * section, e.g. in irq context, we have a deadlock.
2823                  */
2824                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2825                 value = vcpu->arch.sie_block->cputm;
2826                 /* if cputm_start is 0, accounting is being started/stopped */
2827                 if (likely(vcpu->arch.cputm_start))
2828                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2829         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2830         preempt_enable();
2831         return value;
2832 }
2833
2834 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2835 {
2836
2837         gmap_enable(vcpu->arch.enabled_gmap);
2838         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2839         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2840                 __start_cpu_timer_accounting(vcpu);
2841         vcpu->cpu = cpu;
2842 }
2843
2844 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2845 {
2846         vcpu->cpu = -1;
2847         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2848                 __stop_cpu_timer_accounting(vcpu);
2849         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2850         vcpu->arch.enabled_gmap = gmap_get_enabled();
2851         gmap_disable(vcpu->arch.enabled_gmap);
2852
2853 }
2854
2855 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2856 {
2857         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2858         vcpu->arch.sie_block->gpsw.mask = 0UL;
2859         vcpu->arch.sie_block->gpsw.addr = 0UL;
2860         kvm_s390_set_prefix(vcpu, 0);
2861         kvm_s390_set_cpu_timer(vcpu, 0);
2862         vcpu->arch.sie_block->ckc       = 0UL;
2863         vcpu->arch.sie_block->todpr     = 0;
2864         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2865         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2866                                         CR0_INTERRUPT_KEY_SUBMASK |
2867                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2868         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2869                                         CR14_UNUSED_33 |
2870                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2871         /* make sure the new fpc will be lazily loaded */
2872         save_fpu_regs();
2873         current->thread.fpu.fpc = 0;
2874         vcpu->arch.sie_block->gbea = 1;
2875         vcpu->arch.sie_block->pp = 0;
2876         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2877         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2878         kvm_clear_async_pf_completion_queue(vcpu);
2879         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2880                 kvm_s390_vcpu_stop(vcpu);
2881         kvm_s390_clear_local_irqs(vcpu);
2882 }
2883
2884 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2885 {
2886         mutex_lock(&vcpu->kvm->lock);
2887         preempt_disable();
2888         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2889         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2890         preempt_enable();
2891         mutex_unlock(&vcpu->kvm->lock);
2892         if (!kvm_is_ucontrol(vcpu->kvm)) {
2893                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2894                 sca_add_vcpu(vcpu);
2895         }
2896         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2897                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2898         /* make vcpu_load load the right gmap on the first trigger */
2899         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2900 }
2901
2902 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2903 {
2904         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2905             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2906                 return true;
2907         return false;
2908 }
2909
2910 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2911 {
2912         /* At least one ECC subfunction must be present */
2913         return kvm_has_pckmo_subfunc(kvm, 32) ||
2914                kvm_has_pckmo_subfunc(kvm, 33) ||
2915                kvm_has_pckmo_subfunc(kvm, 34) ||
2916                kvm_has_pckmo_subfunc(kvm, 40) ||
2917                kvm_has_pckmo_subfunc(kvm, 41);
2918
2919 }
2920
2921 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2922 {
2923         /*
2924          * If the AP instructions are not being interpreted and the MSAX3
2925          * facility is not configured for the guest, there is nothing to set up.
2926          */
2927         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2928                 return;
2929
2930         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2931         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2932         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2933         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2934
2935         if (vcpu->kvm->arch.crypto.apie)
2936                 vcpu->arch.sie_block->eca |= ECA_APIE;
2937
2938         /* Set up protected key support */
2939         if (vcpu->kvm->arch.crypto.aes_kw) {
2940                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2941                 /* ecc is also wrapped with AES key */
2942                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2943                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2944         }
2945
2946         if (vcpu->kvm->arch.crypto.dea_kw)
2947                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2948 }
2949
2950 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2951 {
2952         free_page(vcpu->arch.sie_block->cbrlo);
2953         vcpu->arch.sie_block->cbrlo = 0;
2954 }
2955
2956 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2957 {
2958         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2959         if (!vcpu->arch.sie_block->cbrlo)
2960                 return -ENOMEM;
2961         return 0;
2962 }
2963
2964 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2965 {
2966         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2967
2968         vcpu->arch.sie_block->ibc = model->ibc;
2969         if (test_kvm_facility(vcpu->kvm, 7))
2970                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2971 }
2972
2973 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2974 {
2975         int rc = 0;
2976
2977         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2978                                                     CPUSTAT_SM |
2979                                                     CPUSTAT_STOPPED);
2980
2981         if (test_kvm_facility(vcpu->kvm, 78))
2982                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2983         else if (test_kvm_facility(vcpu->kvm, 8))
2984                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2985
2986         kvm_s390_vcpu_setup_model(vcpu);
2987
2988         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2989         if (MACHINE_HAS_ESOP)
2990                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2991         if (test_kvm_facility(vcpu->kvm, 9))
2992                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2993         if (test_kvm_facility(vcpu->kvm, 73))
2994                 vcpu->arch.sie_block->ecb |= ECB_TE;
2995
2996         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2997                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2998         if (test_kvm_facility(vcpu->kvm, 130))
2999                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3000         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3001         if (sclp.has_cei)
3002                 vcpu->arch.sie_block->eca |= ECA_CEI;
3003         if (sclp.has_ib)
3004                 vcpu->arch.sie_block->eca |= ECA_IB;
3005         if (sclp.has_siif)
3006                 vcpu->arch.sie_block->eca |= ECA_SII;
3007         if (sclp.has_sigpif)
3008                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3009         if (test_kvm_facility(vcpu->kvm, 129)) {
3010                 vcpu->arch.sie_block->eca |= ECA_VX;
3011                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3012         }
3013         if (test_kvm_facility(vcpu->kvm, 139))
3014                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3015         if (test_kvm_facility(vcpu->kvm, 156))
3016                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3017         if (vcpu->arch.sie_block->gd) {
3018                 vcpu->arch.sie_block->eca |= ECA_AIV;
3019                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3020                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3021         }
3022         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3023                                         | SDNXC;
3024         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3025
3026         if (sclp.has_kss)
3027                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3028         else
3029                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3030
3031         if (vcpu->kvm->arch.use_cmma) {
3032                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3033                 if (rc)
3034                         return rc;
3035         }
3036         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3037         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3038
3039         vcpu->arch.sie_block->hpid = HPID_KVM;
3040
3041         kvm_s390_vcpu_crypto_setup(vcpu);
3042
3043         return rc;
3044 }
3045
3046 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3047                                       unsigned int id)
3048 {
3049         struct kvm_vcpu *vcpu;
3050         struct sie_page *sie_page;
3051         int rc = -EINVAL;
3052
3053         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3054                 goto out;
3055
3056         rc = -ENOMEM;
3057
3058         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3059         if (!vcpu)
3060                 goto out;
3061
3062         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3063         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3064         if (!sie_page)
3065                 goto out_free_cpu;
3066
3067         vcpu->arch.sie_block = &sie_page->sie_block;
3068         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3069
3070         /* the real guest size will always be smaller than msl */
3071         vcpu->arch.sie_block->mso = 0;
3072         vcpu->arch.sie_block->msl = sclp.hamax;
3073
3074         vcpu->arch.sie_block->icpua = id;
3075         spin_lock_init(&vcpu->arch.local_int.lock);
3076         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3077         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3078                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3079         seqcount_init(&vcpu->arch.cputm_seqcount);
3080
3081         rc = kvm_vcpu_init(vcpu, kvm, id);
3082         if (rc)
3083                 goto out_free_sie_block;
3084         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3085                  vcpu->arch.sie_block);
3086         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3087
3088         return vcpu;
3089 out_free_sie_block:
3090         free_page((unsigned long)(vcpu->arch.sie_block));
3091 out_free_cpu:
3092         kmem_cache_free(kvm_vcpu_cache, vcpu);
3093 out:
3094         return ERR_PTR(rc);
3095 }
3096
3097 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3098 {
3099         return kvm_s390_vcpu_has_irq(vcpu, 0);
3100 }
3101
3102 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3103 {
3104         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3105 }
3106
3107 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3108 {
3109         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3110         exit_sie(vcpu);
3111 }
3112
3113 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3114 {
3115         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3116 }
3117
3118 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3119 {
3120         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3121         exit_sie(vcpu);
3122 }
3123
3124 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3125 {
3126         return atomic_read(&vcpu->arch.sie_block->prog20) &
3127                (PROG_BLOCK_SIE | PROG_REQUEST);
3128 }
3129
3130 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3131 {
3132         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3133 }
3134
3135 /*
3136  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3137  * If the CPU is not running (e.g. waiting as idle) the function will
3138  * return immediately. */
3139 void exit_sie(struct kvm_vcpu *vcpu)
3140 {
3141         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3142         kvm_s390_vsie_kick(vcpu);
3143         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3144                 cpu_relax();
3145 }
3146
3147 /* Kick a guest cpu out of SIE to process a request synchronously */
3148 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3149 {
3150         kvm_make_request(req, vcpu);
3151         kvm_s390_vcpu_request(vcpu);
3152 }
3153
3154 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3155                               unsigned long end)
3156 {
3157         struct kvm *kvm = gmap->private;
3158         struct kvm_vcpu *vcpu;
3159         unsigned long prefix;
3160         int i;
3161
3162         if (gmap_is_shadow(gmap))
3163                 return;
3164         if (start >= 1UL << 31)
3165                 /* We are only interested in prefix pages */
3166                 return;
3167         kvm_for_each_vcpu(i, vcpu, kvm) {
3168                 /* match against both prefix pages */
3169                 prefix = kvm_s390_get_prefix(vcpu);
3170                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3171                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3172                                    start, end);
3173                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3174                 }
3175         }
3176 }
3177
3178 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3179 {
3180         /* do not poll with more than halt_poll_max_steal percent of steal time */
3181         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3182             halt_poll_max_steal) {
3183                 vcpu->stat.halt_no_poll_steal++;
3184                 return true;
3185         }
3186         return false;
3187 }
3188
3189 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3190 {
3191         /* kvm common code refers to this, but never calls it */
3192         BUG();
3193         return 0;
3194 }
3195
3196 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3197                                            struct kvm_one_reg *reg)
3198 {
3199         int r = -EINVAL;
3200
3201         switch (reg->id) {
3202         case KVM_REG_S390_TODPR:
3203                 r = put_user(vcpu->arch.sie_block->todpr,
3204                              (u32 __user *)reg->addr);
3205                 break;
3206         case KVM_REG_S390_EPOCHDIFF:
3207                 r = put_user(vcpu->arch.sie_block->epoch,
3208                              (u64 __user *)reg->addr);
3209                 break;
3210         case KVM_REG_S390_CPU_TIMER:
3211                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3212                              (u64 __user *)reg->addr);
3213                 break;
3214         case KVM_REG_S390_CLOCK_COMP:
3215                 r = put_user(vcpu->arch.sie_block->ckc,
3216                              (u64 __user *)reg->addr);
3217                 break;
3218         case KVM_REG_S390_PFTOKEN:
3219                 r = put_user(vcpu->arch.pfault_token,
3220                              (u64 __user *)reg->addr);
3221                 break;
3222         case KVM_REG_S390_PFCOMPARE:
3223                 r = put_user(vcpu->arch.pfault_compare,
3224                              (u64 __user *)reg->addr);
3225                 break;
3226         case KVM_REG_S390_PFSELECT:
3227                 r = put_user(vcpu->arch.pfault_select,
3228                              (u64 __user *)reg->addr);
3229                 break;
3230         case KVM_REG_S390_PP:
3231                 r = put_user(vcpu->arch.sie_block->pp,
3232                              (u64 __user *)reg->addr);
3233                 break;
3234         case KVM_REG_S390_GBEA:
3235                 r = put_user(vcpu->arch.sie_block->gbea,
3236                              (u64 __user *)reg->addr);
3237                 break;
3238         default:
3239                 break;
3240         }
3241
3242         return r;
3243 }
3244
3245 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3246                                            struct kvm_one_reg *reg)
3247 {
3248         int r = -EINVAL;
3249         __u64 val;
3250
3251         switch (reg->id) {
3252         case KVM_REG_S390_TODPR:
3253                 r = get_user(vcpu->arch.sie_block->todpr,
3254                              (u32 __user *)reg->addr);
3255                 break;
3256         case KVM_REG_S390_EPOCHDIFF:
3257                 r = get_user(vcpu->arch.sie_block->epoch,
3258                              (u64 __user *)reg->addr);
3259                 break;
3260         case KVM_REG_S390_CPU_TIMER:
3261                 r = get_user(val, (u64 __user *)reg->addr);
3262                 if (!r)
3263                         kvm_s390_set_cpu_timer(vcpu, val);
3264                 break;
3265         case KVM_REG_S390_CLOCK_COMP:
3266                 r = get_user(vcpu->arch.sie_block->ckc,
3267                              (u64 __user *)reg->addr);
3268                 break;
3269         case KVM_REG_S390_PFTOKEN:
3270                 r = get_user(vcpu->arch.pfault_token,
3271                              (u64 __user *)reg->addr);
3272                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3273                         kvm_clear_async_pf_completion_queue(vcpu);
3274                 break;
3275         case KVM_REG_S390_PFCOMPARE:
3276                 r = get_user(vcpu->arch.pfault_compare,
3277                              (u64 __user *)reg->addr);
3278                 break;
3279         case KVM_REG_S390_PFSELECT:
3280                 r = get_user(vcpu->arch.pfault_select,
3281                              (u64 __user *)reg->addr);
3282                 break;
3283         case KVM_REG_S390_PP:
3284                 r = get_user(vcpu->arch.sie_block->pp,
3285                              (u64 __user *)reg->addr);
3286                 break;
3287         case KVM_REG_S390_GBEA:
3288                 r = get_user(vcpu->arch.sie_block->gbea,
3289                              (u64 __user *)reg->addr);
3290                 break;
3291         default:
3292                 break;
3293         }
3294
3295         return r;
3296 }
3297
3298 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3299 {
3300         kvm_s390_vcpu_initial_reset(vcpu);
3301         return 0;
3302 }
3303
3304 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3305 {
3306         vcpu_load(vcpu);
3307         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3308         vcpu_put(vcpu);
3309         return 0;
3310 }
3311
3312 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3313 {
3314         vcpu_load(vcpu);
3315         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3316         vcpu_put(vcpu);
3317         return 0;
3318 }
3319
3320 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3321                                   struct kvm_sregs *sregs)
3322 {
3323         vcpu_load(vcpu);
3324
3325         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3326         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3327
3328         vcpu_put(vcpu);
3329         return 0;
3330 }
3331
3332 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3333                                   struct kvm_sregs *sregs)
3334 {
3335         vcpu_load(vcpu);
3336
3337         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3338         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3339
3340         vcpu_put(vcpu);
3341         return 0;
3342 }
3343
3344 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3345 {
3346         int ret = 0;
3347
3348         vcpu_load(vcpu);
3349
3350         if (test_fp_ctl(fpu->fpc)) {
3351                 ret = -EINVAL;
3352                 goto out;
3353         }
3354         vcpu->run->s.regs.fpc = fpu->fpc;
3355         if (MACHINE_HAS_VX)
3356                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3357                                  (freg_t *) fpu->fprs);
3358         else
3359                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3360
3361 out:
3362         vcpu_put(vcpu);
3363         return ret;
3364 }
3365
3366 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3367 {
3368         vcpu_load(vcpu);
3369
3370         /* make sure we have the latest values */
3371         save_fpu_regs();
3372         if (MACHINE_HAS_VX)
3373                 convert_vx_to_fp((freg_t *) fpu->fprs,
3374                                  (__vector128 *) vcpu->run->s.regs.vrs);
3375         else
3376                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3377         fpu->fpc = vcpu->run->s.regs.fpc;
3378
3379         vcpu_put(vcpu);
3380         return 0;
3381 }
3382
3383 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3384 {
3385         int rc = 0;
3386
3387         if (!is_vcpu_stopped(vcpu))
3388                 rc = -EBUSY;
3389         else {
3390                 vcpu->run->psw_mask = psw.mask;
3391                 vcpu->run->psw_addr = psw.addr;
3392         }
3393         return rc;
3394 }
3395
3396 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3397                                   struct kvm_translation *tr)
3398 {
3399         return -EINVAL; /* not implemented yet */
3400 }
3401
3402 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3403                               KVM_GUESTDBG_USE_HW_BP | \
3404                               KVM_GUESTDBG_ENABLE)
3405
3406 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3407                                         struct kvm_guest_debug *dbg)
3408 {
3409         int rc = 0;
3410
3411         vcpu_load(vcpu);
3412
3413         vcpu->guest_debug = 0;
3414         kvm_s390_clear_bp_data(vcpu);
3415
3416         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3417                 rc = -EINVAL;
3418                 goto out;
3419         }
3420         if (!sclp.has_gpere) {
3421                 rc = -EINVAL;
3422                 goto out;
3423         }
3424
3425         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3426                 vcpu->guest_debug = dbg->control;
3427                 /* enforce guest PER */
3428                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3429
3430                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3431                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3432         } else {
3433                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3434                 vcpu->arch.guestdbg.last_bp = 0;
3435         }
3436
3437         if (rc) {
3438                 vcpu->guest_debug = 0;
3439                 kvm_s390_clear_bp_data(vcpu);
3440                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3441         }
3442
3443 out:
3444         vcpu_put(vcpu);
3445         return rc;
3446 }
3447
3448 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3449                                     struct kvm_mp_state *mp_state)
3450 {
3451         int ret;
3452
3453         vcpu_load(vcpu);
3454
3455         /* CHECK_STOP and LOAD are not supported yet */
3456         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3457                                       KVM_MP_STATE_OPERATING;
3458
3459         vcpu_put(vcpu);
3460         return ret;
3461 }
3462
3463 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3464                                     struct kvm_mp_state *mp_state)
3465 {
3466         int rc = 0;
3467
3468         vcpu_load(vcpu);
3469
3470         /* user space knows about this interface - let it control the state */
3471         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3472
3473         switch (mp_state->mp_state) {
3474         case KVM_MP_STATE_STOPPED:
3475                 kvm_s390_vcpu_stop(vcpu);
3476                 break;
3477         case KVM_MP_STATE_OPERATING:
3478                 kvm_s390_vcpu_start(vcpu);
3479                 break;
3480         case KVM_MP_STATE_LOAD:
3481         case KVM_MP_STATE_CHECK_STOP:
3482                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3483         default:
3484                 rc = -ENXIO;
3485         }
3486
3487         vcpu_put(vcpu);
3488         return rc;
3489 }
3490
3491 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3492 {
3493         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3494 }
3495
3496 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3497 {
3498 retry:
3499         kvm_s390_vcpu_request_handled(vcpu);
3500         if (!kvm_request_pending(vcpu))
3501                 return 0;
3502         /*
3503          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3504          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3505          * This ensures that the ipte instruction for this request has
3506          * already finished. We might race against a second unmapper that
3507          * wants to set the blocking bit. Lets just retry the request loop.
3508          */
3509         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3510                 int rc;
3511                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3512                                           kvm_s390_get_prefix(vcpu),
3513                                           PAGE_SIZE * 2, PROT_WRITE);
3514                 if (rc) {
3515                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3516                         return rc;
3517                 }
3518                 goto retry;
3519         }
3520
3521         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3522                 vcpu->arch.sie_block->ihcpu = 0xffff;
3523                 goto retry;
3524         }
3525
3526         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3527                 if (!ibs_enabled(vcpu)) {
3528                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3529                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3530                 }
3531                 goto retry;
3532         }
3533
3534         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3535                 if (ibs_enabled(vcpu)) {
3536                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3537                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3538                 }
3539                 goto retry;
3540         }
3541
3542         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3543                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3544                 goto retry;
3545         }
3546
3547         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3548                 /*
3549                  * Disable CMM virtualization; we will emulate the ESSA
3550                  * instruction manually, in order to provide additional
3551                  * functionalities needed for live migration.
3552                  */
3553                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3554                 goto retry;
3555         }
3556
3557         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3558                 /*
3559                  * Re-enable CMM virtualization if CMMA is available and
3560                  * CMM has been used.
3561                  */
3562                 if ((vcpu->kvm->arch.use_cmma) &&
3563                     (vcpu->kvm->mm->context.uses_cmm))
3564                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3565                 goto retry;
3566         }
3567
3568         /* nothing to do, just clear the request */
3569         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3570         /* we left the vsie handler, nothing to do, just clear the request */
3571         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3572
3573         return 0;
3574 }
3575
3576 void kvm_s390_set_tod_clock(struct kvm *kvm,
3577                             const struct kvm_s390_vm_tod_clock *gtod)
3578 {
3579         struct kvm_vcpu *vcpu;
3580         struct kvm_s390_tod_clock_ext htod;
3581         int i;
3582
3583         mutex_lock(&kvm->lock);
3584         preempt_disable();
3585
3586         get_tod_clock_ext((char *)&htod);
3587
3588         kvm->arch.epoch = gtod->tod - htod.tod;
3589         kvm->arch.epdx = 0;
3590         if (test_kvm_facility(kvm, 139)) {
3591                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3592                 if (kvm->arch.epoch > gtod->tod)
3593                         kvm->arch.epdx -= 1;
3594         }
3595
3596         kvm_s390_vcpu_block_all(kvm);
3597         kvm_for_each_vcpu(i, vcpu, kvm) {
3598                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3599                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3600         }
3601
3602         kvm_s390_vcpu_unblock_all(kvm);
3603         preempt_enable();
3604         mutex_unlock(&kvm->lock);
3605 }
3606
3607 /**
3608  * kvm_arch_fault_in_page - fault-in guest page if necessary
3609  * @vcpu: The corresponding virtual cpu
3610  * @gpa: Guest physical address
3611  * @writable: Whether the page should be writable or not
3612  *
3613  * Make sure that a guest page has been faulted-in on the host.
3614  *
3615  * Return: Zero on success, negative error code otherwise.
3616  */
3617 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3618 {
3619         return gmap_fault(vcpu->arch.gmap, gpa,
3620                           writable ? FAULT_FLAG_WRITE : 0);
3621 }
3622
3623 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3624                                       unsigned long token)
3625 {
3626         struct kvm_s390_interrupt inti;
3627         struct kvm_s390_irq irq;
3628
3629         if (start_token) {
3630                 irq.u.ext.ext_params2 = token;
3631                 irq.type = KVM_S390_INT_PFAULT_INIT;
3632                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3633         } else {
3634                 inti.type = KVM_S390_INT_PFAULT_DONE;
3635                 inti.parm64 = token;
3636                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3637         }
3638 }
3639
3640 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3641                                      struct kvm_async_pf *work)
3642 {
3643         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3644         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3645 }
3646
3647 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3648                                  struct kvm_async_pf *work)
3649 {
3650         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3651         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3652 }
3653
3654 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3655                                struct kvm_async_pf *work)
3656 {
3657         /* s390 will always inject the page directly */
3658 }
3659
3660 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3661 {
3662         /*
3663          * s390 will always inject the page directly,
3664          * but we still want check_async_completion to cleanup
3665          */
3666         return true;
3667 }
3668
3669 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3670 {
3671         hva_t hva;
3672         struct kvm_arch_async_pf arch;
3673         int rc;
3674
3675         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3676                 return 0;
3677         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3678             vcpu->arch.pfault_compare)
3679                 return 0;
3680         if (psw_extint_disabled(vcpu))
3681                 return 0;
3682         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3683                 return 0;
3684         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3685                 return 0;
3686         if (!vcpu->arch.gmap->pfault_enabled)
3687                 return 0;
3688
3689         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3690         hva += current->thread.gmap_addr & ~PAGE_MASK;
3691         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3692                 return 0;
3693
3694         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3695         return rc;
3696 }
3697
3698 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3699 {
3700         int rc, cpuflags;
3701
3702         /*
3703          * On s390 notifications for arriving pages will be delivered directly
3704          * to the guest but the house keeping for completed pfaults is
3705          * handled outside the worker.
3706          */
3707         kvm_check_async_pf_completion(vcpu);
3708
3709         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3710         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3711
3712         if (need_resched())
3713                 schedule();
3714
3715         if (test_cpu_flag(CIF_MCCK_PENDING))
3716                 s390_handle_mcck();
3717
3718         if (!kvm_is_ucontrol(vcpu->kvm)) {
3719                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3720                 if (rc)
3721                         return rc;
3722         }
3723
3724         rc = kvm_s390_handle_requests(vcpu);
3725         if (rc)
3726                 return rc;
3727
3728         if (guestdbg_enabled(vcpu)) {
3729                 kvm_s390_backup_guest_per_regs(vcpu);
3730                 kvm_s390_patch_guest_per_regs(vcpu);
3731         }
3732
3733         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3734
3735         vcpu->arch.sie_block->icptcode = 0;
3736         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3737         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3738         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3739
3740         return 0;
3741 }
3742
3743 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3744 {
3745         struct kvm_s390_pgm_info pgm_info = {
3746                 .code = PGM_ADDRESSING,
3747         };
3748         u8 opcode, ilen;
3749         int rc;
3750
3751         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3752         trace_kvm_s390_sie_fault(vcpu);
3753
3754         /*
3755          * We want to inject an addressing exception, which is defined as a
3756          * suppressing or terminating exception. However, since we came here
3757          * by a DAT access exception, the PSW still points to the faulting
3758          * instruction since DAT exceptions are nullifying. So we've got
3759          * to look up the current opcode to get the length of the instruction
3760          * to be able to forward the PSW.
3761          */
3762         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3763         ilen = insn_length(opcode);
3764         if (rc < 0) {
3765                 return rc;
3766         } else if (rc) {
3767                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3768                  * Forward by arbitrary ilc, injection will take care of
3769                  * nullification if necessary.
3770                  */
3771                 pgm_info = vcpu->arch.pgm;
3772                 ilen = 4;
3773         }
3774         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3775         kvm_s390_forward_psw(vcpu, ilen);
3776         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3777 }
3778
3779 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3780 {
3781         struct mcck_volatile_info *mcck_info;
3782         struct sie_page *sie_page;
3783
3784         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3785                    vcpu->arch.sie_block->icptcode);
3786         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3787
3788         if (guestdbg_enabled(vcpu))
3789                 kvm_s390_restore_guest_per_regs(vcpu);
3790
3791         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3792         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3793
3794         if (exit_reason == -EINTR) {
3795                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3796                 sie_page = container_of(vcpu->arch.sie_block,
3797                                         struct sie_page, sie_block);
3798                 mcck_info = &sie_page->mcck_info;
3799                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3800                 return 0;
3801         }
3802
3803         if (vcpu->arch.sie_block->icptcode > 0) {
3804                 int rc = kvm_handle_sie_intercept(vcpu);
3805
3806                 if (rc != -EOPNOTSUPP)
3807                         return rc;
3808                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3809                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3810                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3811                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3812                 return -EREMOTE;
3813         } else if (exit_reason != -EFAULT) {
3814                 vcpu->stat.exit_null++;
3815                 return 0;
3816         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3817                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3818                 vcpu->run->s390_ucontrol.trans_exc_code =
3819                                                 current->thread.gmap_addr;
3820                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3821                 return -EREMOTE;
3822         } else if (current->thread.gmap_pfault) {
3823                 trace_kvm_s390_major_guest_pfault(vcpu);
3824                 current->thread.gmap_pfault = 0;
3825                 if (kvm_arch_setup_async_pf(vcpu))
3826                         return 0;
3827                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3828         }
3829         return vcpu_post_run_fault_in_sie(vcpu);
3830 }
3831
3832 static int __vcpu_run(struct kvm_vcpu *vcpu)
3833 {
3834         int rc, exit_reason;
3835
3836         /*
3837          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3838          * ning the guest), so that memslots (and other stuff) are protected
3839          */
3840         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3841
3842         do {
3843                 rc = vcpu_pre_run(vcpu);
3844                 if (rc)
3845                         break;
3846
3847                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3848                 /*
3849                  * As PF_VCPU will be used in fault handler, between
3850                  * guest_enter and guest_exit should be no uaccess.
3851                  */
3852                 local_irq_disable();
3853                 guest_enter_irqoff();
3854                 __disable_cpu_timer_accounting(vcpu);
3855                 local_irq_enable();
3856                 exit_reason = sie64a(vcpu->arch.sie_block,
3857                                      vcpu->run->s.regs.gprs);
3858                 local_irq_disable();
3859                 __enable_cpu_timer_accounting(vcpu);
3860                 guest_exit_irqoff();
3861                 local_irq_enable();
3862                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3863
3864                 rc = vcpu_post_run(vcpu, exit_reason);
3865         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3866
3867         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3868         return rc;
3869 }
3870
3871 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3872 {
3873         struct runtime_instr_cb *riccb;
3874         struct gs_cb *gscb;
3875
3876         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3877         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3878         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3879         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3880         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3881                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3882         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3883                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3884                 /* some control register changes require a tlb flush */
3885                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3886         }
3887         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3888                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3889                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3890                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3891                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3892                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3893         }
3894         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3895                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3896                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3897                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3898                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3899                         kvm_clear_async_pf_completion_queue(vcpu);
3900         }
3901         /*
3902          * If userspace sets the riccb (e.g. after migration) to a valid state,
3903          * we should enable RI here instead of doing the lazy enablement.
3904          */
3905         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3906             test_kvm_facility(vcpu->kvm, 64) &&
3907             riccb->v &&
3908             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3909                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3910                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3911         }
3912         /*
3913          * If userspace sets the gscb (e.g. after migration) to non-zero,
3914          * we should enable GS here instead of doing the lazy enablement.
3915          */
3916         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3917             test_kvm_facility(vcpu->kvm, 133) &&
3918             gscb->gssm &&
3919             !vcpu->arch.gs_enabled) {
3920                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3921                 vcpu->arch.sie_block->ecb |= ECB_GS;
3922                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3923                 vcpu->arch.gs_enabled = 1;
3924         }
3925         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3926             test_kvm_facility(vcpu->kvm, 82)) {
3927                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3928                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3929         }
3930         save_access_regs(vcpu->arch.host_acrs);
3931         restore_access_regs(vcpu->run->s.regs.acrs);
3932         /* save host (userspace) fprs/vrs */
3933         save_fpu_regs();
3934         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3935         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3936         if (MACHINE_HAS_VX)
3937                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3938         else
3939                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3940         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3941         if (test_fp_ctl(current->thread.fpu.fpc))
3942                 /* User space provided an invalid FPC, let's clear it */
3943                 current->thread.fpu.fpc = 0;
3944         if (MACHINE_HAS_GS) {
3945                 preempt_disable();
3946                 __ctl_set_bit(2, 4);
3947                 if (current->thread.gs_cb) {
3948                         vcpu->arch.host_gscb = current->thread.gs_cb;
3949                         save_gs_cb(vcpu->arch.host_gscb);
3950                 }
3951                 if (vcpu->arch.gs_enabled) {
3952                         current->thread.gs_cb = (struct gs_cb *)
3953                                                 &vcpu->run->s.regs.gscb;
3954                         restore_gs_cb(current->thread.gs_cb);
3955                 }
3956                 preempt_enable();
3957         }
3958         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3959
3960         kvm_run->kvm_dirty_regs = 0;
3961 }
3962
3963 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3964 {
3965         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3966         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3967         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3968         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3969         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3970         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3971         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3972         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3973         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3974         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3975         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3976         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3977         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3978         save_access_regs(vcpu->run->s.regs.acrs);
3979         restore_access_regs(vcpu->arch.host_acrs);
3980         /* Save guest register state */
3981         save_fpu_regs();
3982         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3983         /* Restore will be done lazily at return */
3984         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3985         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3986         if (MACHINE_HAS_GS) {
3987                 __ctl_set_bit(2, 4);
3988                 if (vcpu->arch.gs_enabled)
3989                         save_gs_cb(current->thread.gs_cb);
3990                 preempt_disable();
3991                 current->thread.gs_cb = vcpu->arch.host_gscb;
3992                 restore_gs_cb(vcpu->arch.host_gscb);
3993                 preempt_enable();
3994                 if (!vcpu->arch.host_gscb)
3995                         __ctl_clear_bit(2, 4);
3996                 vcpu->arch.host_gscb = NULL;
3997         }
3998         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3999 }
4000
4001 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4002 {
4003         int rc;
4004
4005         if (kvm_run->immediate_exit)
4006                 return -EINTR;
4007
4008         vcpu_load(vcpu);
4009
4010         if (guestdbg_exit_pending(vcpu)) {
4011                 kvm_s390_prepare_debug_exit(vcpu);
4012                 rc = 0;
4013                 goto out;
4014         }
4015
4016         kvm_sigset_activate(vcpu);
4017
4018         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4019                 kvm_s390_vcpu_start(vcpu);
4020         } else if (is_vcpu_stopped(vcpu)) {
4021                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4022                                    vcpu->vcpu_id);
4023                 rc = -EINVAL;
4024                 goto out;
4025         }
4026
4027         sync_regs(vcpu, kvm_run);
4028         enable_cpu_timer_accounting(vcpu);
4029
4030         might_fault();
4031         rc = __vcpu_run(vcpu);
4032
4033         if (signal_pending(current) && !rc) {
4034                 kvm_run->exit_reason = KVM_EXIT_INTR;
4035                 rc = -EINTR;
4036         }
4037
4038         if (guestdbg_exit_pending(vcpu) && !rc)  {
4039                 kvm_s390_prepare_debug_exit(vcpu);
4040                 rc = 0;
4041         }
4042
4043         if (rc == -EREMOTE) {
4044                 /* userspace support is needed, kvm_run has been prepared */
4045                 rc = 0;
4046         }
4047
4048         disable_cpu_timer_accounting(vcpu);
4049         store_regs(vcpu, kvm_run);
4050
4051         kvm_sigset_deactivate(vcpu);
4052
4053         vcpu->stat.exit_userspace++;
4054 out:
4055         vcpu_put(vcpu);
4056         return rc;
4057 }
4058
4059 /*
4060  * store status at address
4061  * we use have two special cases:
4062  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4063  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4064  */
4065 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4066 {
4067         unsigned char archmode = 1;
4068         freg_t fprs[NUM_FPRS];
4069         unsigned int px;
4070         u64 clkcomp, cputm;
4071         int rc;
4072
4073         px = kvm_s390_get_prefix(vcpu);
4074         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4075                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4076                         return -EFAULT;
4077                 gpa = 0;
4078         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4079                 if (write_guest_real(vcpu, 163, &archmode, 1))
4080                         return -EFAULT;
4081                 gpa = px;
4082         } else
4083                 gpa -= __LC_FPREGS_SAVE_AREA;
4084
4085         /* manually convert vector registers if necessary */
4086         if (MACHINE_HAS_VX) {
4087                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4088                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4089                                      fprs, 128);
4090         } else {
4091                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4092                                      vcpu->run->s.regs.fprs, 128);
4093         }
4094         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4095                               vcpu->run->s.regs.gprs, 128);
4096         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4097                               &vcpu->arch.sie_block->gpsw, 16);
4098         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4099                               &px, 4);
4100         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4101                               &vcpu->run->s.regs.fpc, 4);
4102         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4103                               &vcpu->arch.sie_block->todpr, 4);
4104         cputm = kvm_s390_get_cpu_timer(vcpu);
4105         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4106                               &cputm, 8);
4107         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4108         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4109                               &clkcomp, 8);
4110         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4111                               &vcpu->run->s.regs.acrs, 64);
4112         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4113                               &vcpu->arch.sie_block->gcr, 128);
4114         return rc ? -EFAULT : 0;
4115 }
4116
4117 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4118 {
4119         /*
4120          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4121          * switch in the run ioctl. Let's update our copies before we save
4122          * it into the save area
4123          */
4124         save_fpu_regs();
4125         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4126         save_access_regs(vcpu->run->s.regs.acrs);
4127
4128         return kvm_s390_store_status_unloaded(vcpu, addr);
4129 }
4130
4131 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4132 {
4133         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4134         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4135 }
4136
4137 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4138 {
4139         unsigned int i;
4140         struct kvm_vcpu *vcpu;
4141
4142         kvm_for_each_vcpu(i, vcpu, kvm) {
4143                 __disable_ibs_on_vcpu(vcpu);
4144         }
4145 }
4146
4147 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4148 {
4149         if (!sclp.has_ibs)
4150                 return;
4151         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4152         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4153 }
4154
4155 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4156 {
4157         int i, online_vcpus, started_vcpus = 0;
4158
4159         if (!is_vcpu_stopped(vcpu))
4160                 return;
4161
4162         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4163         /* Only one cpu at a time may enter/leave the STOPPED state. */
4164         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4165         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4166
4167         for (i = 0; i < online_vcpus; i++) {
4168                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4169                         started_vcpus++;
4170         }
4171
4172         if (started_vcpus == 0) {
4173                 /* we're the only active VCPU -> speed it up */
4174                 __enable_ibs_on_vcpu(vcpu);
4175         } else if (started_vcpus == 1) {
4176                 /*
4177                  * As we are starting a second VCPU, we have to disable
4178                  * the IBS facility on all VCPUs to remove potentially
4179                  * oustanding ENABLE requests.
4180                  */
4181                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4182         }
4183
4184         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4185         /*
4186          * Another VCPU might have used IBS while we were offline.
4187          * Let's play safe and flush the VCPU at startup.
4188          */
4189         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4190         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4191         return;
4192 }
4193
4194 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4195 {
4196         int i, online_vcpus, started_vcpus = 0;
4197         struct kvm_vcpu *started_vcpu = NULL;
4198
4199         if (is_vcpu_stopped(vcpu))
4200                 return;
4201
4202         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4203         /* Only one cpu at a time may enter/leave the STOPPED state. */
4204         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4205         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4206
4207         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4208         kvm_s390_clear_stop_irq(vcpu);
4209
4210         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4211         __disable_ibs_on_vcpu(vcpu);
4212
4213         for (i = 0; i < online_vcpus; i++) {
4214                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4215                         started_vcpus++;
4216                         started_vcpu = vcpu->kvm->vcpus[i];
4217                 }
4218         }
4219
4220         if (started_vcpus == 1) {
4221                 /*
4222                  * As we only have one VCPU left, we want to enable the
4223                  * IBS facility for that VCPU to speed it up.
4224                  */
4225                 __enable_ibs_on_vcpu(started_vcpu);
4226         }
4227
4228         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4229         return;
4230 }
4231
4232 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4233                                      struct kvm_enable_cap *cap)
4234 {
4235         int r;
4236
4237         if (cap->flags)
4238                 return -EINVAL;
4239
4240         switch (cap->cap) {
4241         case KVM_CAP_S390_CSS_SUPPORT:
4242                 if (!vcpu->kvm->arch.css_support) {
4243                         vcpu->kvm->arch.css_support = 1;
4244                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4245                         trace_kvm_s390_enable_css(vcpu->kvm);
4246                 }
4247                 r = 0;
4248                 break;
4249         default:
4250                 r = -EINVAL;
4251                 break;
4252         }
4253         return r;
4254 }
4255
4256 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4257                                   struct kvm_s390_mem_op *mop)
4258 {
4259         void __user *uaddr = (void __user *)mop->buf;
4260         void *tmpbuf = NULL;
4261         int r, srcu_idx;
4262         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4263                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4264
4265         if (mop->flags & ~supported_flags)
4266                 return -EINVAL;
4267
4268         if (mop->size > MEM_OP_MAX_SIZE)
4269                 return -E2BIG;
4270
4271         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4272                 tmpbuf = vmalloc(mop->size);
4273                 if (!tmpbuf)
4274                         return -ENOMEM;
4275         }
4276
4277         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4278
4279         switch (mop->op) {
4280         case KVM_S390_MEMOP_LOGICAL_READ:
4281                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4282                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4283                                             mop->size, GACC_FETCH);
4284                         break;
4285                 }
4286                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4287                 if (r == 0) {
4288                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4289                                 r = -EFAULT;
4290                 }
4291                 break;
4292         case KVM_S390_MEMOP_LOGICAL_WRITE:
4293                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4294                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4295                                             mop->size, GACC_STORE);
4296                         break;
4297                 }
4298                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4299                         r = -EFAULT;
4300                         break;
4301                 }
4302                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4303                 break;
4304         default:
4305                 r = -EINVAL;
4306         }
4307
4308         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4309
4310         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4311                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4312
4313         vfree(tmpbuf);
4314         return r;
4315 }
4316
4317 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4318                                unsigned int ioctl, unsigned long arg)
4319 {
4320         struct kvm_vcpu *vcpu = filp->private_data;
4321         void __user *argp = (void __user *)arg;
4322
4323         switch (ioctl) {
4324         case KVM_S390_IRQ: {
4325                 struct kvm_s390_irq s390irq;
4326
4327                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4328                         return -EFAULT;
4329                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4330         }
4331         case KVM_S390_INTERRUPT: {
4332                 struct kvm_s390_interrupt s390int;
4333                 struct kvm_s390_irq s390irq;
4334
4335                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4336                         return -EFAULT;
4337                 if (s390int_to_s390irq(&s390int, &s390irq))
4338                         return -EINVAL;
4339                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4340         }
4341         }
4342         return -ENOIOCTLCMD;
4343 }
4344
4345 long kvm_arch_vcpu_ioctl(struct file *filp,
4346                          unsigned int ioctl, unsigned long arg)
4347 {
4348         struct kvm_vcpu *vcpu = filp->private_data;
4349         void __user *argp = (void __user *)arg;
4350         int idx;
4351         long r;
4352
4353         vcpu_load(vcpu);
4354
4355         switch (ioctl) {
4356         case KVM_S390_STORE_STATUS:
4357                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4358                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4359                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4360                 break;
4361         case KVM_S390_SET_INITIAL_PSW: {
4362                 psw_t psw;
4363
4364                 r = -EFAULT;
4365                 if (copy_from_user(&psw, argp, sizeof(psw)))
4366                         break;
4367                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4368                 break;
4369         }
4370         case KVM_S390_INITIAL_RESET:
4371                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4372                 break;
4373         case KVM_SET_ONE_REG:
4374         case KVM_GET_ONE_REG: {
4375                 struct kvm_one_reg reg;
4376                 r = -EFAULT;
4377                 if (copy_from_user(&reg, argp, sizeof(reg)))
4378                         break;
4379                 if (ioctl == KVM_SET_ONE_REG)
4380                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4381                 else
4382                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4383                 break;
4384         }
4385 #ifdef CONFIG_KVM_S390_UCONTROL
4386         case KVM_S390_UCAS_MAP: {
4387                 struct kvm_s390_ucas_mapping ucasmap;
4388
4389                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4390                         r = -EFAULT;
4391                         break;
4392                 }
4393
4394                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4395                         r = -EINVAL;
4396                         break;
4397                 }
4398
4399                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4400                                      ucasmap.vcpu_addr, ucasmap.length);
4401                 break;
4402         }
4403         case KVM_S390_UCAS_UNMAP: {
4404                 struct kvm_s390_ucas_mapping ucasmap;
4405
4406                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4407                         r = -EFAULT;
4408                         break;
4409                 }
4410
4411                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4412                         r = -EINVAL;
4413                         break;
4414                 }
4415
4416                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4417                         ucasmap.length);
4418                 break;
4419         }
4420 #endif
4421         case KVM_S390_VCPU_FAULT: {
4422                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4423                 break;
4424         }
4425         case KVM_ENABLE_CAP:
4426         {
4427                 struct kvm_enable_cap cap;
4428                 r = -EFAULT;
4429                 if (copy_from_user(&cap, argp, sizeof(cap)))
4430                         break;
4431                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4432                 break;
4433         }
4434         case KVM_S390_MEM_OP: {
4435                 struct kvm_s390_mem_op mem_op;
4436
4437                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4438                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4439                 else
4440                         r = -EFAULT;
4441                 break;
4442         }
4443         case KVM_S390_SET_IRQ_STATE: {
4444                 struct kvm_s390_irq_state irq_state;
4445
4446                 r = -EFAULT;
4447                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4448                         break;
4449                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4450                     irq_state.len == 0 ||
4451                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4452                         r = -EINVAL;
4453                         break;
4454                 }
4455                 /* do not use irq_state.flags, it will break old QEMUs */
4456                 r = kvm_s390_set_irq_state(vcpu,
4457                                            (void __user *) irq_state.buf,
4458                                            irq_state.len);
4459                 break;
4460         }
4461         case KVM_S390_GET_IRQ_STATE: {
4462                 struct kvm_s390_irq_state irq_state;
4463
4464                 r = -EFAULT;
4465                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4466                         break;
4467                 if (irq_state.len == 0) {
4468                         r = -EINVAL;
4469                         break;
4470                 }
4471                 /* do not use irq_state.flags, it will break old QEMUs */
4472                 r = kvm_s390_get_irq_state(vcpu,
4473                                            (__u8 __user *)  irq_state.buf,
4474                                            irq_state.len);
4475                 break;
4476         }
4477         default:
4478                 r = -ENOTTY;
4479         }
4480
4481         vcpu_put(vcpu);
4482         return r;
4483 }
4484
4485 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4486 {
4487 #ifdef CONFIG_KVM_S390_UCONTROL
4488         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4489                  && (kvm_is_ucontrol(vcpu->kvm))) {
4490                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4491                 get_page(vmf->page);
4492                 return 0;
4493         }
4494 #endif
4495         return VM_FAULT_SIGBUS;
4496 }
4497
4498 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4499                             unsigned long npages)
4500 {
4501         return 0;
4502 }
4503
4504 /* Section: memory related */
4505 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4506                                    struct kvm_memory_slot *memslot,
4507                                    const struct kvm_userspace_memory_region *mem,
4508                                    enum kvm_mr_change change)
4509 {
4510         /* A few sanity checks. We can have memory slots which have to be
4511            located/ended at a segment boundary (1MB). The memory in userland is
4512            ok to be fragmented into various different vmas. It is okay to mmap()
4513            and munmap() stuff in this slot after doing this call at any time */
4514
4515         if (mem->userspace_addr & 0xffffful)
4516                 return -EINVAL;
4517
4518         if (mem->memory_size & 0xffffful)
4519                 return -EINVAL;
4520
4521         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4522                 return -EINVAL;
4523
4524         return 0;
4525 }
4526
4527 void kvm_arch_commit_memory_region(struct kvm *kvm,
4528                                 const struct kvm_userspace_memory_region *mem,
4529                                 const struct kvm_memory_slot *old,
4530                                 const struct kvm_memory_slot *new,
4531                                 enum kvm_mr_change change)
4532 {
4533         int rc = 0;
4534
4535         switch (change) {
4536         case KVM_MR_DELETE:
4537                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4538                                         old->npages * PAGE_SIZE);
4539                 break;
4540         case KVM_MR_MOVE:
4541                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4542                                         old->npages * PAGE_SIZE);
4543                 if (rc)
4544                         break;
4545                 /* FALLTHROUGH */
4546         case KVM_MR_CREATE:
4547                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4548                                       mem->guest_phys_addr, mem->memory_size);
4549                 break;
4550         case KVM_MR_FLAGS_ONLY:
4551                 break;
4552         default:
4553                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4554         }
4555         if (rc)
4556                 pr_warn("failed to commit memory region\n");
4557         return;
4558 }
4559
4560 static inline unsigned long nonhyp_mask(int i)
4561 {
4562         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4563
4564         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4565 }
4566
4567 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4568 {
4569         vcpu->valid_wakeup = false;
4570 }
4571
4572 static int __init kvm_s390_init(void)
4573 {
4574         int i;
4575
4576         if (!sclp.has_sief2) {
4577                 pr_info("SIE is not available\n");
4578                 return -ENODEV;
4579         }
4580
4581         if (nested && hpage) {
4582                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4583                 return -EINVAL;
4584         }
4585
4586         for (i = 0; i < 16; i++)
4587                 kvm_s390_fac_base[i] |=
4588                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4589
4590         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4591 }
4592
4593 static void __exit kvm_s390_exit(void)
4594 {
4595         kvm_exit();
4596 }
4597
4598 module_init(kvm_s390_init);
4599 module_exit(kvm_s390_exit);
4600
4601 /*
4602  * Enable autoloading of the kvm module.
4603  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4604  * since x86 takes a different approach.
4605  */
4606 #include <linux/miscdevice.h>
4607 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4608 MODULE_ALIAS("devname:kvm");