arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include "kvm-s390.h"
  47 #include "gaccess.h"
  48
  49 #define KMSG_COMPONENT "kvm-s390"
  50 #undef pr_fmt
  51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  52
  53 #define CREATE_TRACE_POINTS
  54 #include "trace.h"
  55 #include "trace-s390.h"
  56
  57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  58 #define LOCAL_IRQS 32
  59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  61
  62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  63
  64 struct kvm_stats_debugfs_item debugfs_entries[] = {
  65         { "userspace_handled", VCPU_STAT(exit_userspace) },
  66         { "exit_null", VCPU_STAT(exit_null) },
  67         { "exit_validity", VCPU_STAT(exit_validity) },
  68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  69         { "exit_external_request", VCPU_STAT(exit_external_request) },
  70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  71         { "exit_instruction", VCPU_STAT(exit_instruction) },
  72         { "exit_pei", VCPU_STAT(exit_pei) },
  73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  95         { "instruction_spx", VCPU_STAT(instruction_spx) },
  96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  97         { "instruction_stap", VCPU_STAT(instruction_stap) },
  98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
 101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 102         { "instruction_essa", VCPU_STAT(instruction_essa) },
 103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 107         { "instruction_sie", VCPU_STAT(instruction_sie) },
 108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 124         { "diagnose_10", VCPU_STAT(diagnose_10) },
 125         { "diagnose_44", VCPU_STAT(diagnose_44) },
 126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 127         { "diagnose_258", VCPU_STAT(diagnose_258) },
 128         { "diagnose_308", VCPU_STAT(diagnose_308) },
 129         { "diagnose_500", VCPU_STAT(diagnose_500) },
 130         { NULL }
 131 };
 132
 133 /* allow nested virtualization in KVM (if enabled by user space) */
 134 static int nested;
 135 module_param(nested, int, S_IRUGO);
 136 MODULE_PARM_DESC(nested, "Nested virtualization support");
 137
 138 /* upper facilities limit for kvm */
 139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 140
 141 unsigned long kvm_s390_fac_list_mask_size(void)
 142 {
 143         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 144         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 145 }
 146
 147 /* available cpu features supported by kvm */
 148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 149 /* available subfunctions indicated via query / "test bit" */
 150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 151
 152 static struct gmap_notifier gmap_notifier;
 153 static struct gmap_notifier vsie_gmap_notifier;
 154 debug_info_t *kvm_s390_dbf;
 155
 156 /* Section: not file related */
 157 int kvm_arch_hardware_enable(void)
 158 {
 159         /* every s390 is virtualization enabled ;-) */
 160         return 0;
 161 }
 162
 163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 164                               unsigned long end);
 165
 166 /*
 167  * This callback is executed during stop_machine(). All CPUs are therefore
 168  * temporarily stopped. In order not to change guest behavior, we have to
 169  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 170  * so a CPU won't be stopped while calculating with the epoch.
 171  */
 172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 173                           void *v)
 174 {
 175         struct kvm *kvm;
 176         struct kvm_vcpu *vcpu;
 177         int i;
 178         unsigned long long *delta = v;
 179
 180         list_for_each_entry(kvm, &vm_list, vm_list) {
 181                 kvm->arch.epoch -= *delta;
 182                 kvm_for_each_vcpu(i, vcpu, kvm) {
 183                         vcpu->arch.sie_block->epoch -= *delta;
 184                         if (vcpu->arch.cputm_enabled)
 185                                 vcpu->arch.cputm_start += *delta;
 186                         if (vcpu->arch.vsie_block)
 187                                 vcpu->arch.vsie_block->epoch -= *delta;
 188                 }
 189         }
 190         return NOTIFY_OK;
 191 }
 192
 193 static struct notifier_block kvm_clock_notifier = {
 194         .notifier_call = kvm_clock_sync,
 195 };
 196
 197 int kvm_arch_hardware_setup(void)
 198 {
 199         gmap_notifier.notifier_call = kvm_gmap_notifier;
 200         gmap_register_pte_notifier(&gmap_notifier);
 201         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 202         gmap_register_pte_notifier(&vsie_gmap_notifier);
 203         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 204                                        &kvm_clock_notifier);
 205         return 0;
 206 }
 207
 208 void kvm_arch_hardware_unsetup(void)
 209 {
 210         gmap_unregister_pte_notifier(&gmap_notifier);
 211         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 212         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 213                                          &kvm_clock_notifier);
 214 }
 215
 216 static void allow_cpu_feat(unsigned long nr)
 217 {
 218         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 219 }
 220
 221 static inline int plo_test_bit(unsigned char nr)
 222 {
 223         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 224         int cc;
 225
 226         asm volatile(
 227                 /* Parameter registers are ignored for "test bit" */
 228                 "       plo     0,0,0,0(0)\n"
 229                 "       ipm     %0\n"
 230                 "       srl     %0,28\n"
 231                 : "=d" (cc)
 232                 : "d" (r0)
 233                 : "cc");
 234         return cc == 0;
 235 }
 236
 237 static void kvm_s390_cpu_feat_init(void)
 238 {
 239         int i;
 240
 241         for (i = 0; i < 256; ++i) {
 242                 if (plo_test_bit(i))
 243                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 244         }
 245
 246         if (test_facility(28)) /* TOD-clock steering */
 247                 ptff(kvm_s390_available_subfunc.ptff,
 248                      sizeof(kvm_s390_available_subfunc.ptff),
 249                      PTFF_QAF);
 250
 251         if (test_facility(17)) { /* MSA */
 252                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 253                               kvm_s390_available_subfunc.kmac);
 254                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 255                               kvm_s390_available_subfunc.kmc);
 256                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 257                               kvm_s390_available_subfunc.km);
 258                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 259                               kvm_s390_available_subfunc.kimd);
 260                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 261                               kvm_s390_available_subfunc.klmd);
 262         }
 263         if (test_facility(76)) /* MSA3 */
 264                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 265                               kvm_s390_available_subfunc.pckmo);
 266         if (test_facility(77)) { /* MSA4 */
 267                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 268                               kvm_s390_available_subfunc.kmctr);
 269                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 270                               kvm_s390_available_subfunc.kmf);
 271                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 272                               kvm_s390_available_subfunc.kmo);
 273                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 274                               kvm_s390_available_subfunc.pcc);
 275         }
 276         if (test_facility(57)) /* MSA5 */
 277                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 278                               kvm_s390_available_subfunc.ppno);
 279
 280         if (test_facility(146)) /* MSA8 */
 281                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 282                               kvm_s390_available_subfunc.kma);
 283
 284         if (MACHINE_HAS_ESOP)
 285                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 286         /*
 287          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 288          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 289          */
 290         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 291             !test_facility(3) || !nested)
 292                 return;
 293         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 294         if (sclp.has_64bscao)
 295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 296         if (sclp.has_siif)
 297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 298         if (sclp.has_gpere)
 299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 300         if (sclp.has_gsls)
 301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 302         if (sclp.has_ib)
 303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 304         if (sclp.has_cei)
 305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 306         if (sclp.has_ibs)
 307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 308         if (sclp.has_kss)
 309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 310         /*
 311          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 312          * all skey handling functions read/set the skey from the PGSTE
 313          * instead of the real storage key.
 314          *
 315          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 316          * pages being detected as preserved although they are resident.
 317          *
 318          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 319          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 320          *
 321          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 322          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 323          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 324          *
 325          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 326          * cannot easily shadow the SCA because of the ipte lock.
 327          */
 328 }
 329
 330 int kvm_arch_init(void *opaque)
 331 {
 332         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 333         if (!kvm_s390_dbf)
 334                 return -ENOMEM;
 335
 336         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 337                 debug_unregister(kvm_s390_dbf);
 338                 return -ENOMEM;
 339         }
 340
 341         kvm_s390_cpu_feat_init();
 342
 343         /* Register floating interrupt controller interface. */
 344         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 345 }
 346
 347 void kvm_arch_exit(void)
 348 {
 349         debug_unregister(kvm_s390_dbf);
 350 }
 351
 352 /* Section: device related */
 353 long kvm_arch_dev_ioctl(struct file *filp,
 354                         unsigned int ioctl, unsigned long arg)
 355 {
 356         if (ioctl == KVM_S390_ENABLE_SIE)
 357                 return s390_enable_sie();
 358         return -EINVAL;
 359 }
 360
 361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 362 {
 363         int r;
 364
 365         switch (ext) {
 366         case KVM_CAP_S390_PSW:
 367         case KVM_CAP_S390_GMAP:
 368         case KVM_CAP_SYNC_MMU:
 369 #ifdef CONFIG_KVM_S390_UCONTROL
 370         case KVM_CAP_S390_UCONTROL:
 371 #endif
 372         case KVM_CAP_ASYNC_PF:
 373         case KVM_CAP_SYNC_REGS:
 374         case KVM_CAP_ONE_REG:
 375         case KVM_CAP_ENABLE_CAP:
 376         case KVM_CAP_S390_CSS_SUPPORT:
 377         case KVM_CAP_IOEVENTFD:
 378         case KVM_CAP_DEVICE_CTRL:
 379         case KVM_CAP_ENABLE_CAP_VM:
 380         case KVM_CAP_S390_IRQCHIP:
 381         case KVM_CAP_VM_ATTRIBUTES:
 382         case KVM_CAP_MP_STATE:
 383         case KVM_CAP_IMMEDIATE_EXIT:
 384         case KVM_CAP_S390_INJECT_IRQ:
 385         case KVM_CAP_S390_USER_SIGP:
 386         case KVM_CAP_S390_USER_STSI:
 387         case KVM_CAP_S390_SKEYS:
 388         case KVM_CAP_S390_IRQ_STATE:
 389         case KVM_CAP_S390_USER_INSTR0:
 390         case KVM_CAP_S390_CMMA_MIGRATION:
 391         case KVM_CAP_S390_AIS:
 392                 r = 1;
 393                 break;
 394         case KVM_CAP_S390_MEM_OP:
 395                 r = MEM_OP_MAX_SIZE;
 396                 break;
 397         case KVM_CAP_NR_VCPUS:
 398         case KVM_CAP_MAX_VCPUS:
 399                 r = KVM_S390_BSCA_CPU_SLOTS;
 400                 if (!kvm_s390_use_sca_entries())
 401                         r = KVM_MAX_VCPUS;
 402                 else if (sclp.has_esca && sclp.has_64bscao)
 403                         r = KVM_S390_ESCA_CPU_SLOTS;
 404                 break;
 405         case KVM_CAP_NR_MEMSLOTS:
 406                 r = KVM_USER_MEM_SLOTS;
 407                 break;
 408         case KVM_CAP_S390_COW:
 409                 r = MACHINE_HAS_ESOP;
 410                 break;
 411         case KVM_CAP_S390_VECTOR_REGISTERS:
 412                 r = MACHINE_HAS_VX;
 413                 break;
 414         case KVM_CAP_S390_RI:
 415                 r = test_facility(64);
 416                 break;
 417         case KVM_CAP_S390_GS:
 418                 r = test_facility(133);
 419                 break;
 420         default:
 421                 r = 0;
 422         }
 423         return r;
 424 }
 425
 426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 427                                         struct kvm_memory_slot *memslot)
 428 {
 429         gfn_t cur_gfn, last_gfn;
 430         unsigned long address;
 431         struct gmap *gmap = kvm->arch.gmap;
 432
 433         /* Loop over all guest pages */
 434         last_gfn = memslot->base_gfn + memslot->npages;
 435         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 436                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 437
 438                 if (test_and_clear_guest_dirty(gmap->mm, address))
 439                         mark_page_dirty(kvm, cur_gfn);
 440                 if (fatal_signal_pending(current))
 441                         return;
 442                 cond_resched();
 443         }
 444 }
 445
 446 /* Section: vm related */
 447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 448
 449 /*
 450  * Get (and clear) the dirty memory log for a memory slot.
 451  */
 452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 453                                struct kvm_dirty_log *log)
 454 {
 455         int r;
 456         unsigned long n;
 457         struct kvm_memslots *slots;
 458         struct kvm_memory_slot *memslot;
 459         int is_dirty = 0;
 460
 461         if (kvm_is_ucontrol(kvm))
 462                 return -EINVAL;
 463
 464         mutex_lock(&kvm->slots_lock);
 465
 466         r = -EINVAL;
 467         if (log->slot >= KVM_USER_MEM_SLOTS)
 468                 goto out;
 469
 470         slots = kvm_memslots(kvm);
 471         memslot = id_to_memslot(slots, log->slot);
 472         r = -ENOENT;
 473         if (!memslot->dirty_bitmap)
 474                 goto out;
 475
 476         kvm_s390_sync_dirty_log(kvm, memslot);
 477         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 478         if (r)
 479                 goto out;
 480
 481         /* Clear the dirty log */
 482         if (is_dirty) {
 483                 n = kvm_dirty_bitmap_bytes(memslot);
 484                 memset(memslot->dirty_bitmap, 0, n);
 485         }
 486         r = 0;
 487 out:
 488         mutex_unlock(&kvm->slots_lock);
 489         return r;
 490 }
 491
 492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 493 {
 494         unsigned int i;
 495         struct kvm_vcpu *vcpu;
 496
 497         kvm_for_each_vcpu(i, vcpu, kvm) {
 498                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 499         }
 500 }
 501
 502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 503 {
 504         int r;
 505
 506         if (cap->flags)
 507                 return -EINVAL;
 508
 509         switch (cap->cap) {
 510         case KVM_CAP_S390_IRQCHIP:
 511                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 512                 kvm->arch.use_irqchip = 1;
 513                 r = 0;
 514                 break;
 515         case KVM_CAP_S390_USER_SIGP:
 516                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 517                 kvm->arch.user_sigp = 1;
 518                 r = 0;
 519                 break;
 520         case KVM_CAP_S390_VECTOR_REGISTERS:
 521                 mutex_lock(&kvm->lock);
 522                 if (kvm->created_vcpus) {
 523                         r = -EBUSY;
 524                 } else if (MACHINE_HAS_VX) {
 525                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 526                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 527                         if (test_facility(134)) {
 528                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 529                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 530                         }
 531                         if (test_facility(135)) {
 532                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 533                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 534                         }
 535                         r = 0;
 536                 } else
 537                         r = -EINVAL;
 538                 mutex_unlock(&kvm->lock);
 539                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 540                          r ? "(not available)" : "(success)");
 541                 break;
 542         case KVM_CAP_S390_RI:
 543                 r = -EINVAL;
 544                 mutex_lock(&kvm->lock);
 545                 if (kvm->created_vcpus) {
 546                         r = -EBUSY;
 547                 } else if (test_facility(64)) {
 548                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 549                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 550                         r = 0;
 551                 }
 552                 mutex_unlock(&kvm->lock);
 553                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 554                          r ? "(not available)" : "(success)");
 555                 break;
 556         case KVM_CAP_S390_AIS:
 557                 mutex_lock(&kvm->lock);
 558                 if (kvm->created_vcpus) {
 559                         r = -EBUSY;
 560                 } else {
 561                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 562                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 563                         r = 0;
 564                 }
 565                 mutex_unlock(&kvm->lock);
 566                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 567                          r ? "(not available)" : "(success)");
 568                 break;
 569         case KVM_CAP_S390_GS:
 570                 r = -EINVAL;
 571                 mutex_lock(&kvm->lock);
 572                 if (atomic_read(&kvm->online_vcpus)) {
 573                         r = -EBUSY;
 574                 } else if (test_facility(133)) {
 575                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 576                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 577                         r = 0;
 578                 }
 579                 mutex_unlock(&kvm->lock);
 580                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 581                          r ? "(not available)" : "(success)");
 582                 break;
 583         case KVM_CAP_S390_USER_STSI:
 584                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 585                 kvm->arch.user_stsi = 1;
 586                 r = 0;
 587                 break;
 588         case KVM_CAP_S390_USER_INSTR0:
 589                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 590                 kvm->arch.user_instr0 = 1;
 591                 icpt_operexc_on_all_vcpus(kvm);
 592                 r = 0;
 593                 break;
 594         default:
 595                 r = -EINVAL;
 596                 break;
 597         }
 598         return r;
 599 }
 600
 601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 602 {
 603         int ret;
 604
 605         switch (attr->attr) {
 606         case KVM_S390_VM_MEM_LIMIT_SIZE:
 607                 ret = 0;
 608                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 609                          kvm->arch.mem_limit);
 610                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 611                         ret = -EFAULT;
 612                 break;
 613         default:
 614                 ret = -ENXIO;
 615                 break;
 616         }
 617         return ret;
 618 }
 619
 620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 621 {
 622         int ret;
 623         unsigned int idx;
 624         switch (attr->attr) {
 625         case KVM_S390_VM_MEM_ENABLE_CMMA:
 626                 ret = -ENXIO;
 627                 if (!sclp.has_cmma)
 628                         break;
 629
 630                 ret = -EBUSY;
 631                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 632                 mutex_lock(&kvm->lock);
 633                 if (!kvm->created_vcpus) {
 634                         kvm->arch.use_cmma = 1;
 635                         ret = 0;
 636                 }
 637                 mutex_unlock(&kvm->lock);
 638                 break;
 639         case KVM_S390_VM_MEM_CLR_CMMA:
 640                 ret = -ENXIO;
 641                 if (!sclp.has_cmma)
 642                         break;
 643                 ret = -EINVAL;
 644                 if (!kvm->arch.use_cmma)
 645                         break;
 646
 647                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 648                 mutex_lock(&kvm->lock);
 649                 idx = srcu_read_lock(&kvm->srcu);
 650                 s390_reset_cmma(kvm->arch.gmap->mm);
 651                 srcu_read_unlock(&kvm->srcu, idx);
 652                 mutex_unlock(&kvm->lock);
 653                 ret = 0;
 654                 break;
 655         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 656                 unsigned long new_limit;
 657
 658                 if (kvm_is_ucontrol(kvm))
 659                         return -EINVAL;
 660
 661                 if (get_user(new_limit, (u64 __user *)attr->addr))
 662                         return -EFAULT;
 663
 664                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 665                     new_limit > kvm->arch.mem_limit)
 666                         return -E2BIG;
 667
 668                 if (!new_limit)
 669                         return -EINVAL;
 670
 671                 /* gmap_create takes last usable address */
 672                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 673                         new_limit -= 1;
 674
 675                 ret = -EBUSY;
 676                 mutex_lock(&kvm->lock);
 677                 if (!kvm->created_vcpus) {
 678                         /* gmap_create will round the limit up */
 679                         struct gmap *new = gmap_create(current->mm, new_limit);
 680
 681                         if (!new) {
 682                                 ret = -ENOMEM;
 683                         } else {
 684                                 gmap_remove(kvm->arch.gmap);
 685                                 new->private = kvm;
 686                                 kvm->arch.gmap = new;
 687                                 ret = 0;
 688                         }
 689                 }
 690                 mutex_unlock(&kvm->lock);
 691                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 692                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 693                          (void *) kvm->arch.gmap->asce);
 694                 break;
 695         }
 696         default:
 697                 ret = -ENXIO;
 698                 break;
 699         }
 700         return ret;
 701 }
 702
 703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 704
 705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 706 {
 707         struct kvm_vcpu *vcpu;
 708         int i;
 709
 710         if (!test_kvm_facility(kvm, 76))
 711                 return -EINVAL;
 712
 713         mutex_lock(&kvm->lock);
 714         switch (attr->attr) {
 715         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 716                 get_random_bytes(
 717                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 718                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 719                 kvm->arch.crypto.aes_kw = 1;
 720                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 721                 break;
 722         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 723                 get_random_bytes(
 724                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 725                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 726                 kvm->arch.crypto.dea_kw = 1;
 727                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 728                 break;
 729         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 730                 kvm->arch.crypto.aes_kw = 0;
 731                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 732                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 733                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 734                 break;
 735         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 736                 kvm->arch.crypto.dea_kw = 0;
 737                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 738                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 739                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 740                 break;
 741         default:
 742                 mutex_unlock(&kvm->lock);
 743                 return -ENXIO;
 744         }
 745
 746         kvm_for_each_vcpu(i, vcpu, kvm) {
 747                 kvm_s390_vcpu_crypto_setup(vcpu);
 748                 exit_sie(vcpu);
 749         }
 750         mutex_unlock(&kvm->lock);
 751         return 0;
 752 }
 753
 754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 755 {
 756         int cx;
 757         struct kvm_vcpu *vcpu;
 758
 759         kvm_for_each_vcpu(cx, vcpu, kvm)
 760                 kvm_s390_sync_request(req, vcpu);
 761 }
 762
 763 /*
 764  * Must be called with kvm->srcu held to avoid races on memslots, and with
 765  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 766  */
 767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 768 {
 769         struct kvm_s390_migration_state *mgs;
 770         struct kvm_memory_slot *ms;
 771         /* should be the only one */
 772         struct kvm_memslots *slots;
 773         unsigned long ram_pages;
 774         int slotnr;
 775
 776         /* migration mode already enabled */
 777         if (kvm->arch.migration_state)
 778                 return 0;
 779
 780         slots = kvm_memslots(kvm);
 781         if (!slots || !slots->used_slots)
 782                 return -EINVAL;
 783
 784         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 785         if (!mgs)
 786                 return -ENOMEM;
 787         kvm->arch.migration_state = mgs;
 788
 789         if (kvm->arch.use_cmma) {
 790                 /*
 791                  * Get the last slot. They should be sorted by base_gfn, so the
 792                  * last slot is also the one at the end of the address space.
 793                  * We have verified above that at least one slot is present.
 794                  */
 795                 ms = slots->memslots + slots->used_slots - 1;
 796                 /* round up so we only use full longs */
 797                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 798                 /* allocate enough bytes to store all the bits */
 799                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 800                 if (!mgs->pgste_bitmap) {
 801                         kfree(mgs);
 802                         kvm->arch.migration_state = NULL;
 803                         return -ENOMEM;
 804                 }
 805
 806                 mgs->bitmap_size = ram_pages;
 807                 atomic64_set(&mgs->dirty_pages, ram_pages);
 808                 /* mark all the pages in active slots as dirty */
 809                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 810                         ms = slots->memslots + slotnr;
 811                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 812                 }
 813
 814                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 815         }
 816         return 0;
 817 }
 818
 819 /*
 820  * Must be called with kvm->lock to avoid races with ourselves and
 821  * kvm_s390_vm_start_migration.
 822  */
 823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 824 {
 825         struct kvm_s390_migration_state *mgs;
 826
 827         /* migration mode already disabled */
 828         if (!kvm->arch.migration_state)
 829                 return 0;
 830         mgs = kvm->arch.migration_state;
 831         kvm->arch.migration_state = NULL;
 832
 833         if (kvm->arch.use_cmma) {
 834                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 835                 vfree(mgs->pgste_bitmap);
 836         }
 837         kfree(mgs);
 838         return 0;
 839 }
 840
 841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 842                                      struct kvm_device_attr *attr)
 843 {
 844         int idx, res = -ENXIO;
 845
 846         mutex_lock(&kvm->lock);
 847         switch (attr->attr) {
 848         case KVM_S390_VM_MIGRATION_START:
 849                 idx = srcu_read_lock(&kvm->srcu);
 850                 res = kvm_s390_vm_start_migration(kvm);
 851                 srcu_read_unlock(&kvm->srcu, idx);
 852                 break;
 853         case KVM_S390_VM_MIGRATION_STOP:
 854                 res = kvm_s390_vm_stop_migration(kvm);
 855                 break;
 856         default:
 857                 break;
 858         }
 859         mutex_unlock(&kvm->lock);
 860
 861         return res;
 862 }
 863
 864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 865                                      struct kvm_device_attr *attr)
 866 {
 867         u64 mig = (kvm->arch.migration_state != NULL);
 868
 869         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 870                 return -ENXIO;
 871
 872         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 873                 return -EFAULT;
 874         return 0;
 875 }
 876
 877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 878 {
 879         u8 gtod_high;
 880
 881         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 882                                            sizeof(gtod_high)))
 883                 return -EFAULT;
 884
 885         if (gtod_high != 0)
 886                 return -EINVAL;
 887         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 888
 889         return 0;
 890 }
 891
 892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 893 {
 894         u64 gtod;
 895
 896         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 897                 return -EFAULT;
 898
 899         kvm_s390_set_tod_clock(kvm, gtod);
 900         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 901         return 0;
 902 }
 903
 904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 905 {
 906         int ret;
 907
 908         if (attr->flags)
 909                 return -EINVAL;
 910
 911         switch (attr->attr) {
 912         case KVM_S390_VM_TOD_HIGH:
 913                 ret = kvm_s390_set_tod_high(kvm, attr);
 914                 break;
 915         case KVM_S390_VM_TOD_LOW:
 916                 ret = kvm_s390_set_tod_low(kvm, attr);
 917                 break;
 918         default:
 919                 ret = -ENXIO;
 920                 break;
 921         }
 922         return ret;
 923 }
 924
 925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 926 {
 927         u8 gtod_high = 0;
 928
 929         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 930                                          sizeof(gtod_high)))
 931                 return -EFAULT;
 932         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 933
 934         return 0;
 935 }
 936
 937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 938 {
 939         u64 gtod;
 940
 941         gtod = kvm_s390_get_tod_clock_fast(kvm);
 942         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 943                 return -EFAULT;
 944         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 945
 946         return 0;
 947 }
 948
 949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 950 {
 951         int ret;
 952
 953         if (attr->flags)
 954                 return -EINVAL;
 955
 956         switch (attr->attr) {
 957         case KVM_S390_VM_TOD_HIGH:
 958                 ret = kvm_s390_get_tod_high(kvm, attr);
 959                 break;
 960         case KVM_S390_VM_TOD_LOW:
 961                 ret = kvm_s390_get_tod_low(kvm, attr);
 962                 break;
 963         default:
 964                 ret = -ENXIO;
 965                 break;
 966         }
 967         return ret;
 968 }
 969
 970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 971 {
 972         struct kvm_s390_vm_cpu_processor *proc;
 973         u16 lowest_ibc, unblocked_ibc;
 974         int ret = 0;
 975
 976         mutex_lock(&kvm->lock);
 977         if (kvm->created_vcpus) {
 978                 ret = -EBUSY;
 979                 goto out;
 980         }
 981         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 982         if (!proc) {
 983                 ret = -ENOMEM;
 984                 goto out;
 985         }
 986         if (!copy_from_user(proc, (void __user *)attr->addr,
 987                             sizeof(*proc))) {
 988                 kvm->arch.model.cpuid = proc->cpuid;
 989                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 990                 unblocked_ibc = sclp.ibc & 0xfff;
 991                 if (lowest_ibc && proc->ibc) {
 992                         if (proc->ibc > unblocked_ibc)
 993                                 kvm->arch.model.ibc = unblocked_ibc;
 994                         else if (proc->ibc < lowest_ibc)
 995                                 kvm->arch.model.ibc = lowest_ibc;
 996                         else
 997                                 kvm->arch.model.ibc = proc->ibc;
 998                 }
 999                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1001                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002                          kvm->arch.model.ibc,
1003                          kvm->arch.model.cpuid);
1004                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005                          kvm->arch.model.fac_list[0],
1006                          kvm->arch.model.fac_list[1],
1007                          kvm->arch.model.fac_list[2]);
1008         } else
1009                 ret = -EFAULT;
1010         kfree(proc);
1011 out:
1012         mutex_unlock(&kvm->lock);
1013         return ret;
1014 }
1015
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017                                        struct kvm_device_attr *attr)
1018 {
1019         struct kvm_s390_vm_cpu_feat data;
1020         int ret = -EBUSY;
1021
1022         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023                 return -EFAULT;
1024         if (!bitmap_subset((unsigned long *) data.feat,
1025                            kvm_s390_available_cpu_feat,
1026                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1027                 return -EINVAL;
1028
1029         mutex_lock(&kvm->lock);
1030         if (!atomic_read(&kvm->online_vcpus)) {
1031                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1033                 ret = 0;
1034         }
1035         mutex_unlock(&kvm->lock);
1036         return ret;
1037 }
1038
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040                                           struct kvm_device_attr *attr)
1041 {
1042         /*
1043          * Once supported by kernel + hw, we have to store the subfunctions
1044          * in kvm->arch and remember that user space configured them.
1045          */
1046         return -ENXIO;
1047 }
1048
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051         int ret = -ENXIO;
1052
1053         switch (attr->attr) {
1054         case KVM_S390_VM_CPU_PROCESSOR:
1055                 ret = kvm_s390_set_processor(kvm, attr);
1056                 break;
1057         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058                 ret = kvm_s390_set_processor_feat(kvm, attr);
1059                 break;
1060         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062                 break;
1063         }
1064         return ret;
1065 }
1066
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069         struct kvm_s390_vm_cpu_processor *proc;
1070         int ret = 0;
1071
1072         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073         if (!proc) {
1074                 ret = -ENOMEM;
1075                 goto out;
1076         }
1077         proc->cpuid = kvm->arch.model.cpuid;
1078         proc->ibc = kvm->arch.model.ibc;
1079         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080                S390_ARCH_FAC_LIST_SIZE_BYTE);
1081         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082                  kvm->arch.model.ibc,
1083                  kvm->arch.model.cpuid);
1084         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085                  kvm->arch.model.fac_list[0],
1086                  kvm->arch.model.fac_list[1],
1087                  kvm->arch.model.fac_list[2]);
1088         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089                 ret = -EFAULT;
1090         kfree(proc);
1091 out:
1092         return ret;
1093 }
1094
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097         struct kvm_s390_vm_cpu_machine *mach;
1098         int ret = 0;
1099
1100         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101         if (!mach) {
1102                 ret = -ENOMEM;
1103                 goto out;
1104         }
1105         get_cpu_id((struct cpuid *) &mach->cpuid);
1106         mach->ibc = sclp.ibc;
1107         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108                S390_ARCH_FAC_LIST_SIZE_BYTE);
1109         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110                sizeof(S390_lowcore.stfle_fac_list));
1111         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1112                  kvm->arch.model.ibc,
1113                  kvm->arch.model.cpuid);
1114         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1115                  mach->fac_mask[0],
1116                  mach->fac_mask[1],
1117                  mach->fac_mask[2]);
1118         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1119                  mach->fac_list[0],
1120                  mach->fac_list[1],
1121                  mach->fac_list[2]);
1122         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123                 ret = -EFAULT;
1124         kfree(mach);
1125 out:
1126         return ret;
1127 }
1128
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130                                        struct kvm_device_attr *attr)
1131 {
1132         struct kvm_s390_vm_cpu_feat data;
1133
1134         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1136         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137                 return -EFAULT;
1138         return 0;
1139 }
1140
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142                                      struct kvm_device_attr *attr)
1143 {
1144         struct kvm_s390_vm_cpu_feat data;
1145
1146         bitmap_copy((unsigned long *) data.feat,
1147                     kvm_s390_available_cpu_feat,
1148                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1149         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150                 return -EFAULT;
1151         return 0;
1152 }
1153
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155                                           struct kvm_device_attr *attr)
1156 {
1157         /*
1158          * Once we can actually configure subfunctions (kernel + hw support),
1159          * we have to check if they were already set by user space, if so copy
1160          * them from kvm->arch.
1161          */
1162         return -ENXIO;
1163 }
1164
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166                                         struct kvm_device_attr *attr)
1167 {
1168         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170                 return -EFAULT;
1171         return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         int ret = -ENXIO;
1176
1177         switch (attr->attr) {
1178         case KVM_S390_VM_CPU_PROCESSOR:
1179                 ret = kvm_s390_get_processor(kvm, attr);
1180                 break;
1181         case KVM_S390_VM_CPU_MACHINE:
1182                 ret = kvm_s390_get_machine(kvm, attr);
1183                 break;
1184         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185                 ret = kvm_s390_get_processor_feat(kvm, attr);
1186                 break;
1187         case KVM_S390_VM_CPU_MACHINE_FEAT:
1188                 ret = kvm_s390_get_machine_feat(kvm, attr);
1189                 break;
1190         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192                 break;
1193         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195                 break;
1196         }
1197         return ret;
1198 }
1199
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         int ret;
1203
1204         switch (attr->group) {
1205         case KVM_S390_VM_MEM_CTRL:
1206                 ret = kvm_s390_set_mem_control(kvm, attr);
1207                 break;
1208         case KVM_S390_VM_TOD:
1209                 ret = kvm_s390_set_tod(kvm, attr);
1210                 break;
1211         case KVM_S390_VM_CPU_MODEL:
1212                 ret = kvm_s390_set_cpu_model(kvm, attr);
1213                 break;
1214         case KVM_S390_VM_CRYPTO:
1215                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1216                 break;
1217         case KVM_S390_VM_MIGRATION:
1218                 ret = kvm_s390_vm_set_migration(kvm, attr);
1219                 break;
1220         default:
1221                 ret = -ENXIO;
1222                 break;
1223         }
1224
1225         return ret;
1226 }
1227
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         int ret;
1231
1232         switch (attr->group) {
1233         case KVM_S390_VM_MEM_CTRL:
1234                 ret = kvm_s390_get_mem_control(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_TOD:
1237                 ret = kvm_s390_get_tod(kvm, attr);
1238                 break;
1239         case KVM_S390_VM_CPU_MODEL:
1240                 ret = kvm_s390_get_cpu_model(kvm, attr);
1241                 break;
1242         case KVM_S390_VM_MIGRATION:
1243                 ret = kvm_s390_vm_get_migration(kvm, attr);
1244                 break;
1245         default:
1246                 ret = -ENXIO;
1247                 break;
1248         }
1249
1250         return ret;
1251 }
1252
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255         int ret;
1256
1257         switch (attr->group) {
1258         case KVM_S390_VM_MEM_CTRL:
1259                 switch (attr->attr) {
1260                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1261                 case KVM_S390_VM_MEM_CLR_CMMA:
1262                         ret = sclp.has_cmma ? 0 : -ENXIO;
1263                         break;
1264                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1265                         ret = 0;
1266                         break;
1267                 default:
1268                         ret = -ENXIO;
1269                         break;
1270                 }
1271                 break;
1272         case KVM_S390_VM_TOD:
1273                 switch (attr->attr) {
1274                 case KVM_S390_VM_TOD_LOW:
1275                 case KVM_S390_VM_TOD_HIGH:
1276                         ret = 0;
1277                         break;
1278                 default:
1279                         ret = -ENXIO;
1280                         break;
1281                 }
1282                 break;
1283         case KVM_S390_VM_CPU_MODEL:
1284                 switch (attr->attr) {
1285                 case KVM_S390_VM_CPU_PROCESSOR:
1286                 case KVM_S390_VM_CPU_MACHINE:
1287                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1289                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290                         ret = 0;
1291                         break;
1292                 /* configuring subfunctions is not supported yet */
1293                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294                 default:
1295                         ret = -ENXIO;
1296                         break;
1297                 }
1298                 break;
1299         case KVM_S390_VM_CRYPTO:
1300                 switch (attr->attr) {
1301                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305                         ret = 0;
1306                         break;
1307                 default:
1308                         ret = -ENXIO;
1309                         break;
1310                 }
1311                 break;
1312         case KVM_S390_VM_MIGRATION:
1313                 ret = 0;
1314                 break;
1315         default:
1316                 ret = -ENXIO;
1317                 break;
1318         }
1319
1320         return ret;
1321 }
1322
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325         uint8_t *keys;
1326         uint64_t hva;
1327         int i, r = 0;
1328
1329         if (args->flags != 0)
1330                 return -EINVAL;
1331
1332         /* Is this guest using storage keys? */
1333         if (!mm_use_skey(current->mm))
1334                 return KVM_S390_GET_SKEYS_NONE;
1335
1336         /* Enforce sane limit on memory allocation */
1337         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338                 return -EINVAL;
1339
1340         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341         if (!keys)
1342                 return -ENOMEM;
1343
1344         down_read(&current->mm->mmap_sem);
1345         for (i = 0; i < args->count; i++) {
1346                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1347                 if (kvm_is_error_hva(hva)) {
1348                         r = -EFAULT;
1349                         break;
1350                 }
1351
1352                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1353                 if (r)
1354                         break;
1355         }
1356         up_read(&current->mm->mmap_sem);
1357
1358         if (!r) {
1359                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1360                                  sizeof(uint8_t) * args->count);
1361                 if (r)
1362                         r = -EFAULT;
1363         }
1364
1365         kvfree(keys);
1366         return r;
1367 }
1368
1369 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1370 {
1371         uint8_t *keys;
1372         uint64_t hva;
1373         int i, r = 0;
1374
1375         if (args->flags != 0)
1376                 return -EINVAL;
1377
1378         /* Enforce sane limit on memory allocation */
1379         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1380                 return -EINVAL;
1381
1382         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1383         if (!keys)
1384                 return -ENOMEM;
1385
1386         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1387                            sizeof(uint8_t) * args->count);
1388         if (r) {
1389                 r = -EFAULT;
1390                 goto out;
1391         }
1392
1393         /* Enable storage key handling for the guest */
1394         r = s390_enable_skey();
1395         if (r)
1396                 goto out;
1397
1398         down_read(&current->mm->mmap_sem);
1399         for (i = 0; i < args->count; i++) {
1400                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1401                 if (kvm_is_error_hva(hva)) {
1402                         r = -EFAULT;
1403                         break;
1404                 }
1405
1406                 /* Lowest order bit is reserved */
1407                 if (keys[i] & 0x01) {
1408                         r = -EINVAL;
1409                         break;
1410                 }
1411
1412                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1413                 if (r)
1414                         break;
1415         }
1416         up_read(&current->mm->mmap_sem);
1417 out:
1418         kvfree(keys);
1419         return r;
1420 }
1421
1422 /*
1423  * Base address and length must be sent at the start of each block, therefore
1424  * it's cheaper to send some clean data, as long as it's less than the size of
1425  * two longs.
1426  */
1427 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1428 /* for consistency */
1429 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1430
1431 /*
1432  * This function searches for the next page with dirty CMMA attributes, and
1433  * saves the attributes in the buffer up to either the end of the buffer or
1434  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1435  * no trailing clean bytes are saved.
1436  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1437  * output buffer will indicate 0 as length.
1438  */
1439 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1440                                   struct kvm_s390_cmma_log *args)
1441 {
1442         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1443         unsigned long bufsize, hva, pgstev, i, next, cur;
1444         int srcu_idx, peek, r = 0, rr;
1445         u8 *res;
1446
1447         cur = args->start_gfn;
1448         i = next = pgstev = 0;
1449
1450         if (unlikely(!kvm->arch.use_cmma))
1451                 return -ENXIO;
1452         /* Invalid/unsupported flags were specified */
1453         if (args->flags & ~KVM_S390_CMMA_PEEK)
1454                 return -EINVAL;
1455         /* Migration mode query, and we are not doing a migration */
1456         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1457         if (!peek && !s)
1458                 return -EINVAL;
1459         /* CMMA is disabled or was not used, or the buffer has length zero */
1460         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1461         if (!bufsize || !kvm->mm->context.use_cmma) {
1462                 memset(args, 0, sizeof(*args));
1463                 return 0;
1464         }
1465
1466         if (!peek) {
1467                 /* We are not peeking, and there are no dirty pages */
1468                 if (!atomic64_read(&s->dirty_pages)) {
1469                         memset(args, 0, sizeof(*args));
1470                         return 0;
1471                 }
1472                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1473                                     args->start_gfn);
1474                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1475                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1476                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1477                         memset(args, 0, sizeof(*args));
1478                         return 0;
1479                 }
1480                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1481         }
1482
1483         res = vmalloc(bufsize);
1484         if (!res)
1485                 return -ENOMEM;
1486
1487         args->start_gfn = cur;
1488
1489         down_read(&kvm->mm->mmap_sem);
1490         srcu_idx = srcu_read_lock(&kvm->srcu);
1491         while (i < bufsize) {
1492                 hva = gfn_to_hva(kvm, cur);
1493                 if (kvm_is_error_hva(hva)) {
1494                         r = -EFAULT;
1495                         break;
1496                 }
1497                 /* decrement only if we actually flipped the bit to 0 */
1498                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1499                         atomic64_dec(&s->dirty_pages);
1500                 r = get_pgste(kvm->mm, hva, &pgstev);
1501                 if (r < 0)
1502                         pgstev = 0;
1503                 /* save the value */
1504                 res[i++] = (pgstev >> 24) & 0x3;
1505                 /*
1506                  * if the next bit is too far away, stop.
1507                  * if we reached the previous "next", find the next one
1508                  */
1509                 if (!peek) {
1510                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1511                                 break;
1512                         if (cur == next)
1513                                 next = find_next_bit(s->pgste_bitmap,
1514                                                      s->bitmap_size, cur + 1);
1515                 /* reached the end of the bitmap or of the buffer, stop */
1516                         if ((next >= s->bitmap_size) ||
1517                             (next >= args->start_gfn + bufsize))
1518                                 break;
1519                 }
1520                 cur++;
1521         }
1522         srcu_read_unlock(&kvm->srcu, srcu_idx);
1523         up_read(&kvm->mm->mmap_sem);
1524         args->count = i;
1525         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1526
1527         rr = copy_to_user((void __user *)args->values, res, args->count);
1528         if (rr)
1529                 r = -EFAULT;
1530
1531         vfree(res);
1532         return r;
1533 }
1534
1535 /*
1536  * This function sets the CMMA attributes for the given pages. If the input
1537  * buffer has zero length, no action is taken, otherwise the attributes are
1538  * set and the mm->context.use_cmma flag is set.
1539  */
1540 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1541                                   const struct kvm_s390_cmma_log *args)
1542 {
1543         unsigned long hva, mask, pgstev, i;
1544         uint8_t *bits;
1545         int srcu_idx, r = 0;
1546
1547         mask = args->mask;
1548
1549         if (!kvm->arch.use_cmma)
1550                 return -ENXIO;
1551         /* invalid/unsupported flags */
1552         if (args->flags != 0)
1553                 return -EINVAL;
1554         /* Enforce sane limit on memory allocation */
1555         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1556                 return -EINVAL;
1557         /* Nothing to do */
1558         if (args->count == 0)
1559                 return 0;
1560
1561         bits = vmalloc(sizeof(*bits) * args->count);
1562         if (!bits)
1563                 return -ENOMEM;
1564
1565         r = copy_from_user(bits, (void __user *)args->values, args->count);
1566         if (r) {
1567                 r = -EFAULT;
1568                 goto out;
1569         }
1570
1571         down_read(&kvm->mm->mmap_sem);
1572         srcu_idx = srcu_read_lock(&kvm->srcu);
1573         for (i = 0; i < args->count; i++) {
1574                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1575                 if (kvm_is_error_hva(hva)) {
1576                         r = -EFAULT;
1577                         break;
1578                 }
1579
1580                 pgstev = bits[i];
1581                 pgstev = pgstev << 24;
1582                 mask &= _PGSTE_GPS_USAGE_MASK;
1583                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1584         }
1585         srcu_read_unlock(&kvm->srcu, srcu_idx);
1586         up_read(&kvm->mm->mmap_sem);
1587
1588         if (!kvm->mm->context.use_cmma) {
1589                 down_write(&kvm->mm->mmap_sem);
1590                 kvm->mm->context.use_cmma = 1;
1591                 up_write(&kvm->mm->mmap_sem);
1592         }
1593 out:
1594         vfree(bits);
1595         return r;
1596 }
1597
1598 long kvm_arch_vm_ioctl(struct file *filp,
1599                        unsigned int ioctl, unsigned long arg)
1600 {
1601         struct kvm *kvm = filp->private_data;
1602         void __user *argp = (void __user *)arg;
1603         struct kvm_device_attr attr;
1604         int r;
1605
1606         switch (ioctl) {
1607         case KVM_S390_INTERRUPT: {
1608                 struct kvm_s390_interrupt s390int;
1609
1610                 r = -EFAULT;
1611                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1612                         break;
1613                 r = kvm_s390_inject_vm(kvm, &s390int);
1614                 break;
1615         }
1616         case KVM_ENABLE_CAP: {
1617                 struct kvm_enable_cap cap;
1618                 r = -EFAULT;
1619                 if (copy_from_user(&cap, argp, sizeof(cap)))
1620                         break;
1621                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1622                 break;
1623         }
1624         case KVM_CREATE_IRQCHIP: {
1625                 struct kvm_irq_routing_entry routing;
1626
1627                 r = -EINVAL;
1628                 if (kvm->arch.use_irqchip) {
1629                         /* Set up dummy routing. */
1630                         memset(&routing, 0, sizeof(routing));
1631                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1632                 }
1633                 break;
1634         }
1635         case KVM_SET_DEVICE_ATTR: {
1636                 r = -EFAULT;
1637                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1638                         break;
1639                 r = kvm_s390_vm_set_attr(kvm, &attr);
1640                 break;
1641         }
1642         case KVM_GET_DEVICE_ATTR: {
1643                 r = -EFAULT;
1644                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1645                         break;
1646                 r = kvm_s390_vm_get_attr(kvm, &attr);
1647                 break;
1648         }
1649         case KVM_HAS_DEVICE_ATTR: {
1650                 r = -EFAULT;
1651                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1652                         break;
1653                 r = kvm_s390_vm_has_attr(kvm, &attr);
1654                 break;
1655         }
1656         case KVM_S390_GET_SKEYS: {
1657                 struct kvm_s390_skeys args;
1658
1659                 r = -EFAULT;
1660                 if (copy_from_user(&args, argp,
1661                                    sizeof(struct kvm_s390_skeys)))
1662                         break;
1663                 r = kvm_s390_get_skeys(kvm, &args);
1664                 break;
1665         }
1666         case KVM_S390_SET_SKEYS: {
1667                 struct kvm_s390_skeys args;
1668
1669                 r = -EFAULT;
1670                 if (copy_from_user(&args, argp,
1671                                    sizeof(struct kvm_s390_skeys)))
1672                         break;
1673                 r = kvm_s390_set_skeys(kvm, &args);
1674                 break;
1675         }
1676         case KVM_S390_GET_CMMA_BITS: {
1677                 struct kvm_s390_cmma_log args;
1678
1679                 r = -EFAULT;
1680                 if (copy_from_user(&args, argp, sizeof(args)))
1681                         break;
1682                 r = kvm_s390_get_cmma_bits(kvm, &args);
1683                 if (!r) {
1684                         r = copy_to_user(argp, &args, sizeof(args));
1685                         if (r)
1686                                 r = -EFAULT;
1687                 }
1688                 break;
1689         }
1690         case KVM_S390_SET_CMMA_BITS: {
1691                 struct kvm_s390_cmma_log args;
1692
1693                 r = -EFAULT;
1694                 if (copy_from_user(&args, argp, sizeof(args)))
1695                         break;
1696                 r = kvm_s390_set_cmma_bits(kvm, &args);
1697                 break;
1698         }
1699         default:
1700                 r = -ENOTTY;
1701         }
1702
1703         return r;
1704 }
1705
1706 static int kvm_s390_query_ap_config(u8 *config)
1707 {
1708         u32 fcn_code = 0x04000000UL;
1709         u32 cc = 0;
1710
1711         memset(config, 0, 128);
1712         asm volatile(
1713                 "lgr 0,%1\n"
1714                 "lgr 2,%2\n"
1715                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1716                 "0: ipm %0\n"
1717                 "srl %0,28\n"
1718                 "1:\n"
1719                 EX_TABLE(0b, 1b)
1720                 : "+r" (cc)
1721                 : "r" (fcn_code), "r" (config)
1722                 : "cc", "0", "2", "memory"
1723         );
1724
1725         return cc;
1726 }
1727
1728 static int kvm_s390_apxa_installed(void)
1729 {
1730         u8 config[128];
1731         int cc;
1732
1733         if (test_facility(12)) {
1734                 cc = kvm_s390_query_ap_config(config);
1735
1736                 if (cc)
1737                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1738                 else
1739                         return config[0] & 0x40;
1740         }
1741
1742         return 0;
1743 }
1744
1745 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1746 {
1747         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1748
1749         if (kvm_s390_apxa_installed())
1750                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1751         else
1752                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1753 }
1754
1755 static u64 kvm_s390_get_initial_cpuid(void)
1756 {
1757         struct cpuid cpuid;
1758
1759         get_cpu_id(&cpuid);
1760         cpuid.version = 0xff;
1761         return *((u64 *) &cpuid);
1762 }
1763
1764 static void kvm_s390_crypto_init(struct kvm *kvm)
1765 {
1766         if (!test_kvm_facility(kvm, 76))
1767                 return;
1768
1769         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1770         kvm_s390_set_crycb_format(kvm);
1771
1772         /* Enable AES/DEA protected key functions by default */
1773         kvm->arch.crypto.aes_kw = 1;
1774         kvm->arch.crypto.dea_kw = 1;
1775         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1776                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1777         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1778                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1779 }
1780
1781 static void sca_dispose(struct kvm *kvm)
1782 {
1783         if (kvm->arch.use_esca)
1784                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1785         else
1786                 free_page((unsigned long)(kvm->arch.sca));
1787         kvm->arch.sca = NULL;
1788 }
1789
1790 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1791 {
1792         gfp_t alloc_flags = GFP_KERNEL;
1793         int i, rc;
1794         char debug_name[16];
1795         static unsigned long sca_offset;
1796
1797         rc = -EINVAL;
1798 #ifdef CONFIG_KVM_S390_UCONTROL
1799         if (type & ~KVM_VM_S390_UCONTROL)
1800                 goto out_err;
1801         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1802                 goto out_err;
1803 #else
1804         if (type)
1805                 goto out_err;
1806 #endif
1807
1808         rc = s390_enable_sie();
1809         if (rc)
1810                 goto out_err;
1811
1812         rc = -ENOMEM;
1813
1814         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1815
1816         kvm->arch.use_esca = 0; /* start with basic SCA */
1817         if (!sclp.has_64bscao)
1818                 alloc_flags |= GFP_DMA;
1819         rwlock_init(&kvm->arch.sca_lock);
1820         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1821         if (!kvm->arch.sca)
1822                 goto out_err;
1823         spin_lock(&kvm_lock);
1824         sca_offset += 16;
1825         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1826                 sca_offset = 0;
1827         kvm->arch.sca = (struct bsca_block *)
1828                         ((char *) kvm->arch.sca + sca_offset);
1829         spin_unlock(&kvm_lock);
1830
1831         sprintf(debug_name, "kvm-%u", current->pid);
1832
1833         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1834         if (!kvm->arch.dbf)
1835                 goto out_err;
1836
1837         kvm->arch.sie_page2 =
1838              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1839         if (!kvm->arch.sie_page2)
1840                 goto out_err;
1841
1842         /* Populate the facility mask initially. */
1843         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1844                sizeof(S390_lowcore.stfle_fac_list));
1845         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1846                 if (i < kvm_s390_fac_list_mask_size())
1847                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1848                 else
1849                         kvm->arch.model.fac_mask[i] = 0UL;
1850         }
1851
1852         /* Populate the facility list initially. */
1853         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1854         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1855                S390_ARCH_FAC_LIST_SIZE_BYTE);
1856
1857         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1858         set_kvm_facility(kvm->arch.model.fac_list, 74);
1859
1860         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1861         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1862
1863         kvm_s390_crypto_init(kvm);
1864
1865         mutex_init(&kvm->arch.float_int.ais_lock);
1866         kvm->arch.float_int.simm = 0;
1867         kvm->arch.float_int.nimm = 0;
1868         spin_lock_init(&kvm->arch.float_int.lock);
1869         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1870                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1871         init_waitqueue_head(&kvm->arch.ipte_wq);
1872         mutex_init(&kvm->arch.ipte_mutex);
1873
1874         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1875         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1876
1877         if (type & KVM_VM_S390_UCONTROL) {
1878                 kvm->arch.gmap = NULL;
1879                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1880         } else {
1881                 if (sclp.hamax == U64_MAX)
1882                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1883                 else
1884                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1885                                                     sclp.hamax + 1);
1886                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1887                 if (!kvm->arch.gmap)
1888                         goto out_err;
1889                 kvm->arch.gmap->private = kvm;
1890                 kvm->arch.gmap->pfault_enabled = 0;
1891         }
1892
1893         kvm->arch.css_support = 0;
1894         kvm->arch.use_irqchip = 0;
1895         kvm->arch.epoch = 0;
1896
1897         spin_lock_init(&kvm->arch.start_stop_lock);
1898         kvm_s390_vsie_init(kvm);
1899         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1900
1901         return 0;
1902 out_err:
1903         free_page((unsigned long)kvm->arch.sie_page2);
1904         debug_unregister(kvm->arch.dbf);
1905         sca_dispose(kvm);
1906         KVM_EVENT(3, "creation of vm failed: %d", rc);
1907         return rc;
1908 }
1909
1910 bool kvm_arch_has_vcpu_debugfs(void)
1911 {
1912         return false;
1913 }
1914
1915 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1916 {
1917         return 0;
1918 }
1919
1920 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1921 {
1922         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1923         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1924         kvm_s390_clear_local_irqs(vcpu);
1925         kvm_clear_async_pf_completion_queue(vcpu);
1926         if (!kvm_is_ucontrol(vcpu->kvm))
1927                 sca_del_vcpu(vcpu);
1928
1929         if (kvm_is_ucontrol(vcpu->kvm))
1930                 gmap_remove(vcpu->arch.gmap);
1931
1932         if (vcpu->kvm->arch.use_cmma)
1933                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1934         free_page((unsigned long)(vcpu->arch.sie_block));
1935
1936         kvm_vcpu_uninit(vcpu);
1937         kmem_cache_free(kvm_vcpu_cache, vcpu);
1938 }
1939
1940 static void kvm_free_vcpus(struct kvm *kvm)
1941 {
1942         unsigned int i;
1943         struct kvm_vcpu *vcpu;
1944
1945         kvm_for_each_vcpu(i, vcpu, kvm)
1946                 kvm_arch_vcpu_destroy(vcpu);
1947
1948         mutex_lock(&kvm->lock);
1949         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1950                 kvm->vcpus[i] = NULL;
1951
1952         atomic_set(&kvm->online_vcpus, 0);
1953         mutex_unlock(&kvm->lock);
1954 }
1955
1956 void kvm_arch_destroy_vm(struct kvm *kvm)
1957 {
1958         kvm_free_vcpus(kvm);
1959         sca_dispose(kvm);
1960         debug_unregister(kvm->arch.dbf);
1961         free_page((unsigned long)kvm->arch.sie_page2);
1962         if (!kvm_is_ucontrol(kvm))
1963                 gmap_remove(kvm->arch.gmap);
1964         kvm_s390_destroy_adapters(kvm);
1965         kvm_s390_clear_float_irqs(kvm);
1966         kvm_s390_vsie_destroy(kvm);
1967         if (kvm->arch.migration_state) {
1968                 vfree(kvm->arch.migration_state->pgste_bitmap);
1969                 kfree(kvm->arch.migration_state);
1970         }
1971         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1972 }
1973
1974 /* Section: vcpu related */
1975 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1976 {
1977         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1978         if (!vcpu->arch.gmap)
1979                 return -ENOMEM;
1980         vcpu->arch.gmap->private = vcpu->kvm;
1981
1982         return 0;
1983 }
1984
1985 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1986 {
1987         if (!kvm_s390_use_sca_entries())
1988                 return;
1989         read_lock(&vcpu->kvm->arch.sca_lock);
1990         if (vcpu->kvm->arch.use_esca) {
1991                 struct esca_block *sca = vcpu->kvm->arch.sca;
1992
1993                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1994                 sca->cpu[vcpu->vcpu_id].sda = 0;
1995         } else {
1996                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1997
1998                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1999                 sca->cpu[vcpu->vcpu_id].sda = 0;
2000         }
2001         read_unlock(&vcpu->kvm->arch.sca_lock);
2002 }
2003
2004 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2005 {
2006         if (!kvm_s390_use_sca_entries()) {
2007                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2008
2009                 /* we still need the basic sca for the ipte control */
2010                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2011                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2012         }
2013         read_lock(&vcpu->kvm->arch.sca_lock);
2014         if (vcpu->kvm->arch.use_esca) {
2015                 struct esca_block *sca = vcpu->kvm->arch.sca;
2016
2017                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2018                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2019                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2020                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2021                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2022         } else {
2023                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2024
2025                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2026                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2027                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2028                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2029         }
2030         read_unlock(&vcpu->kvm->arch.sca_lock);
2031 }
2032
2033 /* Basic SCA to Extended SCA data copy routines */
2034 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2035 {
2036         d->sda = s->sda;
2037         d->sigp_ctrl.c = s->sigp_ctrl.c;
2038         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2039 }
2040
2041 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2042 {
2043         int i;
2044
2045         d->ipte_control = s->ipte_control;
2046         d->mcn[0] = s->mcn;
2047         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2048                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2049 }
2050
2051 static int sca_switch_to_extended(struct kvm *kvm)
2052 {
2053         struct bsca_block *old_sca = kvm->arch.sca;
2054         struct esca_block *new_sca;
2055         struct kvm_vcpu *vcpu;
2056         unsigned int vcpu_idx;
2057         u32 scaol, scaoh;
2058
2059         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2060         if (!new_sca)
2061                 return -ENOMEM;
2062
2063         scaoh = (u32)((u64)(new_sca) >> 32);
2064         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2065
2066         kvm_s390_vcpu_block_all(kvm);
2067         write_lock(&kvm->arch.sca_lock);
2068
2069         sca_copy_b_to_e(new_sca, old_sca);
2070
2071         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2072                 vcpu->arch.sie_block->scaoh = scaoh;
2073                 vcpu->arch.sie_block->scaol = scaol;
2074                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2075         }
2076         kvm->arch.sca = new_sca;
2077         kvm->arch.use_esca = 1;
2078
2079         write_unlock(&kvm->arch.sca_lock);
2080         kvm_s390_vcpu_unblock_all(kvm);
2081
2082         free_page((unsigned long)old_sca);
2083
2084         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2085                  old_sca, kvm->arch.sca);
2086         return 0;
2087 }
2088
2089 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2090 {
2091         int rc;
2092
2093         if (!kvm_s390_use_sca_entries()) {
2094                 if (id < KVM_MAX_VCPUS)
2095                         return true;
2096                 return false;
2097         }
2098         if (id < KVM_S390_BSCA_CPU_SLOTS)
2099                 return true;
2100         if (!sclp.has_esca || !sclp.has_64bscao)
2101                 return false;
2102
2103         mutex_lock(&kvm->lock);
2104         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2105         mutex_unlock(&kvm->lock);
2106
2107         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2108 }
2109
2110 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2111 {
2112         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2113         kvm_clear_async_pf_completion_queue(vcpu);
2114         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2115                                     KVM_SYNC_GPRS |
2116                                     KVM_SYNC_ACRS |
2117                                     KVM_SYNC_CRS |
2118                                     KVM_SYNC_ARCH0 |
2119                                     KVM_SYNC_PFAULT;
2120         kvm_s390_set_prefix(vcpu, 0);
2121         if (test_kvm_facility(vcpu->kvm, 64))
2122                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2123         if (test_kvm_facility(vcpu->kvm, 133))
2124                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2125         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2126          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2127          */
2128         if (MACHINE_HAS_VX)
2129                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2130         else
2131                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2132
2133         if (kvm_is_ucontrol(vcpu->kvm))
2134                 return __kvm_ucontrol_vcpu_init(vcpu);
2135
2136         return 0;
2137 }
2138
2139 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2140 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2141 {
2142         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2143         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2144         vcpu->arch.cputm_start = get_tod_clock_fast();
2145         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2146 }
2147
2148 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2149 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2150 {
2151         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2152         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2153         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2154         vcpu->arch.cputm_start = 0;
2155         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2156 }
2157
2158 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2159 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2160 {
2161         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2162         vcpu->arch.cputm_enabled = true;
2163         __start_cpu_timer_accounting(vcpu);
2164 }
2165
2166 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2167 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2168 {
2169         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2170         __stop_cpu_timer_accounting(vcpu);
2171         vcpu->arch.cputm_enabled = false;
2172 }
2173
2174 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2175 {
2176         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2177         __enable_cpu_timer_accounting(vcpu);
2178         preempt_enable();
2179 }
2180
2181 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2182 {
2183         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2184         __disable_cpu_timer_accounting(vcpu);
2185         preempt_enable();
2186 }
2187
2188 /* set the cpu timer - may only be called from the VCPU thread itself */
2189 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2190 {
2191         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2192         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2193         if (vcpu->arch.cputm_enabled)
2194                 vcpu->arch.cputm_start = get_tod_clock_fast();
2195         vcpu->arch.sie_block->cputm = cputm;
2196         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2197         preempt_enable();
2198 }
2199
2200 /* update and get the cpu timer - can also be called from other VCPU threads */
2201 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2202 {
2203         unsigned int seq;
2204         __u64 value;
2205
2206         if (unlikely(!vcpu->arch.cputm_enabled))
2207                 return vcpu->arch.sie_block->cputm;
2208
2209         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2210         do {
2211                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2212                 /*
2213                  * If the writer would ever execute a read in the critical
2214                  * section, e.g. in irq context, we have a deadlock.
2215                  */
2216                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2217                 value = vcpu->arch.sie_block->cputm;
2218                 /* if cputm_start is 0, accounting is being started/stopped */
2219                 if (likely(vcpu->arch.cputm_start))
2220                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2221         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2222         preempt_enable();
2223         return value;
2224 }
2225
2226 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2227 {
2228
2229         gmap_enable(vcpu->arch.enabled_gmap);
2230         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2231         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2232                 __start_cpu_timer_accounting(vcpu);
2233         vcpu->cpu = cpu;
2234 }
2235
2236 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2237 {
2238         vcpu->cpu = -1;
2239         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2240                 __stop_cpu_timer_accounting(vcpu);
2241         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2242         vcpu->arch.enabled_gmap = gmap_get_enabled();
2243         gmap_disable(vcpu->arch.enabled_gmap);
2244
2245 }
2246
2247 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2248 {
2249         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2250         vcpu->arch.sie_block->gpsw.mask = 0UL;
2251         vcpu->arch.sie_block->gpsw.addr = 0UL;
2252         kvm_s390_set_prefix(vcpu, 0);
2253         kvm_s390_set_cpu_timer(vcpu, 0);
2254         vcpu->arch.sie_block->ckc       = 0UL;
2255         vcpu->arch.sie_block->todpr     = 0;
2256         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2257         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2258         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2259         /* make sure the new fpc will be lazily loaded */
2260         save_fpu_regs();
2261         current->thread.fpu.fpc = 0;
2262         vcpu->arch.sie_block->gbea = 1;
2263         vcpu->arch.sie_block->pp = 0;
2264         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2265         kvm_clear_async_pf_completion_queue(vcpu);
2266         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2267                 kvm_s390_vcpu_stop(vcpu);
2268         kvm_s390_clear_local_irqs(vcpu);
2269 }
2270
2271 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2272 {
2273         mutex_lock(&vcpu->kvm->lock);
2274         preempt_disable();
2275         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2276         preempt_enable();
2277         mutex_unlock(&vcpu->kvm->lock);
2278         if (!kvm_is_ucontrol(vcpu->kvm)) {
2279                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2280                 sca_add_vcpu(vcpu);
2281         }
2282         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2283                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2284         /* make vcpu_load load the right gmap on the first trigger */
2285         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2286 }
2287
2288 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2289 {
2290         if (!test_kvm_facility(vcpu->kvm, 76))
2291                 return;
2292
2293         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2294
2295         if (vcpu->kvm->arch.crypto.aes_kw)
2296                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2297         if (vcpu->kvm->arch.crypto.dea_kw)
2298                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2299
2300         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2301 }
2302
2303 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2304 {
2305         free_page(vcpu->arch.sie_block->cbrlo);
2306         vcpu->arch.sie_block->cbrlo = 0;
2307 }
2308
2309 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2310 {
2311         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2312         if (!vcpu->arch.sie_block->cbrlo)
2313                 return -ENOMEM;
2314
2315         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2316         return 0;
2317 }
2318
2319 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2320 {
2321         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2322
2323         vcpu->arch.sie_block->ibc = model->ibc;
2324         if (test_kvm_facility(vcpu->kvm, 7))
2325                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2326 }
2327
2328 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2329 {
2330         int rc = 0;
2331
2332         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2333                                                     CPUSTAT_SM |
2334                                                     CPUSTAT_STOPPED);
2335
2336         if (test_kvm_facility(vcpu->kvm, 78))
2337                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2338         else if (test_kvm_facility(vcpu->kvm, 8))
2339                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2340
2341         kvm_s390_vcpu_setup_model(vcpu);
2342
2343         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2344         if (MACHINE_HAS_ESOP)
2345                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2346         if (test_kvm_facility(vcpu->kvm, 9))
2347                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2348         if (test_kvm_facility(vcpu->kvm, 73))
2349                 vcpu->arch.sie_block->ecb |= ECB_TE;
2350
2351         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2352                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2353         if (test_kvm_facility(vcpu->kvm, 130))
2354                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2355         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2356         if (sclp.has_cei)
2357                 vcpu->arch.sie_block->eca |= ECA_CEI;
2358         if (sclp.has_ib)
2359                 vcpu->arch.sie_block->eca |= ECA_IB;
2360         if (sclp.has_siif)
2361                 vcpu->arch.sie_block->eca |= ECA_SII;
2362         if (sclp.has_sigpif)
2363                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2364         if (test_kvm_facility(vcpu->kvm, 129)) {
2365                 vcpu->arch.sie_block->eca |= ECA_VX;
2366                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2367         }
2368         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2369                                         | SDNXC;
2370         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2371
2372         if (sclp.has_kss)
2373                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2374         else
2375                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2376
2377         if (vcpu->kvm->arch.use_cmma) {
2378                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2379                 if (rc)
2380                         return rc;
2381         }
2382         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2383         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2384
2385         kvm_s390_vcpu_crypto_setup(vcpu);
2386
2387         return rc;
2388 }
2389
2390 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2391                                       unsigned int id)
2392 {
2393         struct kvm_vcpu *vcpu;
2394         struct sie_page *sie_page;
2395         int rc = -EINVAL;
2396
2397         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2398                 goto out;
2399
2400         rc = -ENOMEM;
2401
2402         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2403         if (!vcpu)
2404                 goto out;
2405
2406         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2407         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2408         if (!sie_page)
2409                 goto out_free_cpu;
2410
2411         vcpu->arch.sie_block = &sie_page->sie_block;
2412         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2413
2414         /* the real guest size will always be smaller than msl */
2415         vcpu->arch.sie_block->mso = 0;
2416         vcpu->arch.sie_block->msl = sclp.hamax;
2417
2418         vcpu->arch.sie_block->icpua = id;
2419         spin_lock_init(&vcpu->arch.local_int.lock);
2420         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2421         vcpu->arch.local_int.wq = &vcpu->wq;
2422         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2423         seqcount_init(&vcpu->arch.cputm_seqcount);
2424
2425         rc = kvm_vcpu_init(vcpu, kvm, id);
2426         if (rc)
2427                 goto out_free_sie_block;
2428         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2429                  vcpu->arch.sie_block);
2430         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2431
2432         return vcpu;
2433 out_free_sie_block:
2434         free_page((unsigned long)(vcpu->arch.sie_block));
2435 out_free_cpu:
2436         kmem_cache_free(kvm_vcpu_cache, vcpu);
2437 out:
2438         return ERR_PTR(rc);
2439 }
2440
2441 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2442 {
2443         return kvm_s390_vcpu_has_irq(vcpu, 0);
2444 }
2445
2446 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2447 {
2448         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2449         exit_sie(vcpu);
2450 }
2451
2452 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2453 {
2454         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2455 }
2456
2457 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2458 {
2459         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2460         exit_sie(vcpu);
2461 }
2462
2463 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2464 {
2465         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2466 }
2467
2468 /*
2469  * Kick a guest cpu out of SIE and wait until SIE is not running.
2470  * If the CPU is not running (e.g. waiting as idle) the function will
2471  * return immediately. */
2472 void exit_sie(struct kvm_vcpu *vcpu)
2473 {
2474         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2475         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2476                 cpu_relax();
2477 }
2478
2479 /* Kick a guest cpu out of SIE to process a request synchronously */
2480 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2481 {
2482         kvm_make_request(req, vcpu);
2483         kvm_s390_vcpu_request(vcpu);
2484 }
2485
2486 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2487                               unsigned long end)
2488 {
2489         struct kvm *kvm = gmap->private;
2490         struct kvm_vcpu *vcpu;
2491         unsigned long prefix;
2492         int i;
2493
2494         if (gmap_is_shadow(gmap))
2495                 return;
2496         if (start >= 1UL << 31)
2497                 /* We are only interested in prefix pages */
2498                 return;
2499         kvm_for_each_vcpu(i, vcpu, kvm) {
2500                 /* match against both prefix pages */
2501                 prefix = kvm_s390_get_prefix(vcpu);
2502                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2503                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2504                                    start, end);
2505                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2506                 }
2507         }
2508 }
2509
2510 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2511 {
2512         /* kvm common code refers to this, but never calls it */
2513         BUG();
2514         return 0;
2515 }
2516
2517 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2518                                            struct kvm_one_reg *reg)
2519 {
2520         int r = -EINVAL;
2521
2522         switch (reg->id) {
2523         case KVM_REG_S390_TODPR:
2524                 r = put_user(vcpu->arch.sie_block->todpr,
2525                              (u32 __user *)reg->addr);
2526                 break;
2527         case KVM_REG_S390_EPOCHDIFF:
2528                 r = put_user(vcpu->arch.sie_block->epoch,
2529                              (u64 __user *)reg->addr);
2530                 break;
2531         case KVM_REG_S390_CPU_TIMER:
2532                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2533                              (u64 __user *)reg->addr);
2534                 break;
2535         case KVM_REG_S390_CLOCK_COMP:
2536                 r = put_user(vcpu->arch.sie_block->ckc,
2537                              (u64 __user *)reg->addr);
2538                 break;
2539         case KVM_REG_S390_PFTOKEN:
2540                 r = put_user(vcpu->arch.pfault_token,
2541                              (u64 __user *)reg->addr);
2542                 break;
2543         case KVM_REG_S390_PFCOMPARE:
2544                 r = put_user(vcpu->arch.pfault_compare,
2545                              (u64 __user *)reg->addr);
2546                 break;
2547         case KVM_REG_S390_PFSELECT:
2548                 r = put_user(vcpu->arch.pfault_select,
2549                              (u64 __user *)reg->addr);
2550                 break;
2551         case KVM_REG_S390_PP:
2552                 r = put_user(vcpu->arch.sie_block->pp,
2553                              (u64 __user *)reg->addr);
2554                 break;
2555         case KVM_REG_S390_GBEA:
2556                 r = put_user(vcpu->arch.sie_block->gbea,
2557                              (u64 __user *)reg->addr);
2558                 break;
2559         default:
2560                 break;
2561         }
2562
2563         return r;
2564 }
2565
2566 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2567                                            struct kvm_one_reg *reg)
2568 {
2569         int r = -EINVAL;
2570         __u64 val;
2571
2572         switch (reg->id) {
2573         case KVM_REG_S390_TODPR:
2574                 r = get_user(vcpu->arch.sie_block->todpr,
2575                              (u32 __user *)reg->addr);
2576                 break;
2577         case KVM_REG_S390_EPOCHDIFF:
2578                 r = get_user(vcpu->arch.sie_block->epoch,
2579                              (u64 __user *)reg->addr);
2580                 break;
2581         case KVM_REG_S390_CPU_TIMER:
2582                 r = get_user(val, (u64 __user *)reg->addr);
2583                 if (!r)
2584                         kvm_s390_set_cpu_timer(vcpu, val);
2585                 break;
2586         case KVM_REG_S390_CLOCK_COMP:
2587                 r = get_user(vcpu->arch.sie_block->ckc,
2588                              (u64 __user *)reg->addr);
2589                 break;
2590         case KVM_REG_S390_PFTOKEN:
2591                 r = get_user(vcpu->arch.pfault_token,
2592                              (u64 __user *)reg->addr);
2593                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2594                         kvm_clear_async_pf_completion_queue(vcpu);
2595                 break;
2596         case KVM_REG_S390_PFCOMPARE:
2597                 r = get_user(vcpu->arch.pfault_compare,
2598                              (u64 __user *)reg->addr);
2599                 break;
2600         case KVM_REG_S390_PFSELECT:
2601                 r = get_user(vcpu->arch.pfault_select,
2602                              (u64 __user *)reg->addr);
2603                 break;
2604         case KVM_REG_S390_PP:
2605                 r = get_user(vcpu->arch.sie_block->pp,
2606                              (u64 __user *)reg->addr);
2607                 break;
2608         case KVM_REG_S390_GBEA:
2609                 r = get_user(vcpu->arch.sie_block->gbea,
2610                              (u64 __user *)reg->addr);
2611                 break;
2612         default:
2613                 break;
2614         }
2615
2616         return r;
2617 }
2618
2619 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2620 {
2621         kvm_s390_vcpu_initial_reset(vcpu);
2622         return 0;
2623 }
2624
2625 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2626 {
2627         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2628         return 0;
2629 }
2630
2631 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2632 {
2633         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2634         return 0;
2635 }
2636
2637 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2638                                   struct kvm_sregs *sregs)
2639 {
2640         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2641         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2642         return 0;
2643 }
2644
2645 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2646                                   struct kvm_sregs *sregs)
2647 {
2648         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2649         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2650         return 0;
2651 }
2652
2653 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2654 {
2655         if (test_fp_ctl(fpu->fpc))
2656                 return -EINVAL;
2657         vcpu->run->s.regs.fpc = fpu->fpc;
2658         if (MACHINE_HAS_VX)
2659                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2660                                  (freg_t *) fpu->fprs);
2661         else
2662                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2663         return 0;
2664 }
2665
2666 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2667 {
2668         /* make sure we have the latest values */
2669         save_fpu_regs();
2670         if (MACHINE_HAS_VX)
2671                 convert_vx_to_fp((freg_t *) fpu->fprs,
2672                                  (__vector128 *) vcpu->run->s.regs.vrs);
2673         else
2674                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2675         fpu->fpc = vcpu->run->s.regs.fpc;
2676         return 0;
2677 }
2678
2679 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2680 {
2681         int rc = 0;
2682
2683         if (!is_vcpu_stopped(vcpu))
2684                 rc = -EBUSY;
2685         else {
2686                 vcpu->run->psw_mask = psw.mask;
2687                 vcpu->run->psw_addr = psw.addr;
2688         }
2689         return rc;
2690 }
2691
2692 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2693                                   struct kvm_translation *tr)
2694 {
2695         return -EINVAL; /* not implemented yet */
2696 }
2697
2698 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2699                               KVM_GUESTDBG_USE_HW_BP | \
2700                               KVM_GUESTDBG_ENABLE)
2701
2702 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2703                                         struct kvm_guest_debug *dbg)
2704 {
2705         int rc = 0;
2706
2707         vcpu->guest_debug = 0;
2708         kvm_s390_clear_bp_data(vcpu);
2709
2710         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2711                 return -EINVAL;
2712         if (!sclp.has_gpere)
2713                 return -EINVAL;
2714
2715         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2716                 vcpu->guest_debug = dbg->control;
2717                 /* enforce guest PER */
2718                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2719
2720                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2721                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2722         } else {
2723                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2724                 vcpu->arch.guestdbg.last_bp = 0;
2725         }
2726
2727         if (rc) {
2728                 vcpu->guest_debug = 0;
2729                 kvm_s390_clear_bp_data(vcpu);
2730                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2731         }
2732
2733         return rc;
2734 }
2735
2736 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2737                                     struct kvm_mp_state *mp_state)
2738 {
2739         /* CHECK_STOP and LOAD are not supported yet */
2740         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2741                                        KVM_MP_STATE_OPERATING;
2742 }
2743
2744 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2745                                     struct kvm_mp_state *mp_state)
2746 {
2747         int rc = 0;
2748
2749         /* user space knows about this interface - let it control the state */
2750         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2751
2752         switch (mp_state->mp_state) {
2753         case KVM_MP_STATE_STOPPED:
2754                 kvm_s390_vcpu_stop(vcpu);
2755                 break;
2756         case KVM_MP_STATE_OPERATING:
2757                 kvm_s390_vcpu_start(vcpu);
2758                 break;
2759         case KVM_MP_STATE_LOAD:
2760         case KVM_MP_STATE_CHECK_STOP:
2761                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2762         default:
2763                 rc = -ENXIO;
2764         }
2765
2766         return rc;
2767 }
2768
2769 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2770 {
2771         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2772 }
2773
2774 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2775 {
2776 retry:
2777         kvm_s390_vcpu_request_handled(vcpu);
2778         if (!kvm_request_pending(vcpu))
2779                 return 0;
2780         /*
2781          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2782          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2783          * This ensures that the ipte instruction for this request has
2784          * already finished. We might race against a second unmapper that
2785          * wants to set the blocking bit. Lets just retry the request loop.
2786          */
2787         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2788                 int rc;
2789                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2790                                           kvm_s390_get_prefix(vcpu),
2791                                           PAGE_SIZE * 2, PROT_WRITE);
2792                 if (rc) {
2793                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2794                         return rc;
2795                 }
2796                 goto retry;
2797         }
2798
2799         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2800                 vcpu->arch.sie_block->ihcpu = 0xffff;
2801                 goto retry;
2802         }
2803
2804         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2805                 if (!ibs_enabled(vcpu)) {
2806                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2807                         atomic_or(CPUSTAT_IBS,
2808                                         &vcpu->arch.sie_block->cpuflags);
2809                 }
2810                 goto retry;
2811         }
2812
2813         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2814                 if (ibs_enabled(vcpu)) {
2815                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2816                         atomic_andnot(CPUSTAT_IBS,
2817                                           &vcpu->arch.sie_block->cpuflags);
2818                 }
2819                 goto retry;
2820         }
2821
2822         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2823                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2824                 goto retry;
2825         }
2826
2827         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2828                 /*
2829                  * Disable CMMA virtualization; we will emulate the ESSA
2830                  * instruction manually, in order to provide additional
2831                  * functionalities needed for live migration.
2832                  */
2833                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2834                 goto retry;
2835         }
2836
2837         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2838                 /*
2839                  * Re-enable CMMA virtualization if CMMA is available and
2840                  * was used.
2841                  */
2842                 if ((vcpu->kvm->arch.use_cmma) &&
2843                     (vcpu->kvm->mm->context.use_cmma))
2844                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2845                 goto retry;
2846         }
2847
2848         /* nothing to do, just clear the request */
2849         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2850
2851         return 0;
2852 }
2853
2854 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2855 {
2856         struct kvm_vcpu *vcpu;
2857         int i;
2858
2859         mutex_lock(&kvm->lock);
2860         preempt_disable();
2861         kvm->arch.epoch = tod - get_tod_clock();
2862         kvm_s390_vcpu_block_all(kvm);
2863         kvm_for_each_vcpu(i, vcpu, kvm)
2864                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2865         kvm_s390_vcpu_unblock_all(kvm);
2866         preempt_enable();
2867         mutex_unlock(&kvm->lock);
2868 }
2869
2870 /**
2871  * kvm_arch_fault_in_page - fault-in guest page if necessary
2872  * @vcpu: The corresponding virtual cpu
2873  * @gpa: Guest physical address
2874  * @writable: Whether the page should be writable or not
2875  *
2876  * Make sure that a guest page has been faulted-in on the host.
2877  *
2878  * Return: Zero on success, negative error code otherwise.
2879  */
2880 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2881 {
2882         return gmap_fault(vcpu->arch.gmap, gpa,
2883                           writable ? FAULT_FLAG_WRITE : 0);
2884 }
2885
2886 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2887                                       unsigned long token)
2888 {
2889         struct kvm_s390_interrupt inti;
2890         struct kvm_s390_irq irq;
2891
2892         if (start_token) {
2893                 irq.u.ext.ext_params2 = token;
2894                 irq.type = KVM_S390_INT_PFAULT_INIT;
2895                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2896         } else {
2897                 inti.type = KVM_S390_INT_PFAULT_DONE;
2898                 inti.parm64 = token;
2899                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2900         }
2901 }
2902
2903 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2904                                      struct kvm_async_pf *work)
2905 {
2906         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2907         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2908 }
2909
2910 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2911                                  struct kvm_async_pf *work)
2912 {
2913         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2914         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2915 }
2916
2917 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2918                                struct kvm_async_pf *work)
2919 {
2920         /* s390 will always inject the page directly */
2921 }
2922
2923 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2924 {
2925         /*
2926          * s390 will always inject the page directly,
2927          * but we still want check_async_completion to cleanup
2928          */
2929         return true;
2930 }
2931
2932 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2933 {
2934         hva_t hva;
2935         struct kvm_arch_async_pf arch;
2936         int rc;
2937
2938         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2939                 return 0;
2940         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2941             vcpu->arch.pfault_compare)
2942                 return 0;
2943         if (psw_extint_disabled(vcpu))
2944                 return 0;
2945         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2946                 return 0;
2947         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2948                 return 0;
2949         if (!vcpu->arch.gmap->pfault_enabled)
2950                 return 0;
2951
2952         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2953         hva += current->thread.gmap_addr & ~PAGE_MASK;
2954         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2955                 return 0;
2956
2957         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2958         return rc;
2959 }
2960
2961 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2962 {
2963         int rc, cpuflags;
2964
2965         /*
2966          * On s390 notifications for arriving pages will be delivered directly
2967          * to the guest but the house keeping for completed pfaults is
2968          * handled outside the worker.
2969          */
2970         kvm_check_async_pf_completion(vcpu);
2971
2972         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2973         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2974
2975         if (need_resched())
2976                 schedule();
2977
2978         if (test_cpu_flag(CIF_MCCK_PENDING))
2979                 s390_handle_mcck();
2980
2981         if (!kvm_is_ucontrol(vcpu->kvm)) {
2982                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2983                 if (rc)
2984                         return rc;
2985         }
2986
2987         rc = kvm_s390_handle_requests(vcpu);
2988         if (rc)
2989                 return rc;
2990
2991         if (guestdbg_enabled(vcpu)) {
2992                 kvm_s390_backup_guest_per_regs(vcpu);
2993                 kvm_s390_patch_guest_per_regs(vcpu);
2994         }
2995
2996         vcpu->arch.sie_block->icptcode = 0;
2997         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2998         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2999         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3000
3001         return 0;
3002 }
3003
3004 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3005 {
3006         struct kvm_s390_pgm_info pgm_info = {
3007                 .code = PGM_ADDRESSING,
3008         };
3009         u8 opcode, ilen;
3010         int rc;
3011
3012         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3013         trace_kvm_s390_sie_fault(vcpu);
3014
3015         /*
3016          * We want to inject an addressing exception, which is defined as a
3017          * suppressing or terminating exception. However, since we came here
3018          * by a DAT access exception, the PSW still points to the faulting
3019          * instruction since DAT exceptions are nullifying. So we've got
3020          * to look up the current opcode to get the length of the instruction
3021          * to be able to forward the PSW.
3022          */
3023         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3024         ilen = insn_length(opcode);
3025         if (rc < 0) {
3026                 return rc;
3027         } else if (rc) {
3028                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3029                  * Forward by arbitrary ilc, injection will take care of
3030                  * nullification if necessary.
3031                  */
3032                 pgm_info = vcpu->arch.pgm;
3033                 ilen = 4;
3034         }
3035         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3036         kvm_s390_forward_psw(vcpu, ilen);
3037         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3038 }
3039
3040 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3041 {
3042         struct mcck_volatile_info *mcck_info;
3043         struct sie_page *sie_page;
3044
3045         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3046                    vcpu->arch.sie_block->icptcode);
3047         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3048
3049         if (guestdbg_enabled(vcpu))
3050                 kvm_s390_restore_guest_per_regs(vcpu);
3051
3052         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3053         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3054
3055         if (exit_reason == -EINTR) {
3056                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3057                 sie_page = container_of(vcpu->arch.sie_block,
3058                                         struct sie_page, sie_block);
3059                 mcck_info = &sie_page->mcck_info;
3060                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3061                 return 0;
3062         }
3063
3064         if (vcpu->arch.sie_block->icptcode > 0) {
3065                 int rc = kvm_handle_sie_intercept(vcpu);
3066
3067                 if (rc != -EOPNOTSUPP)
3068                         return rc;
3069                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3070                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3071                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3072                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3073                 return -EREMOTE;
3074         } else if (exit_reason != -EFAULT) {
3075                 vcpu->stat.exit_null++;
3076                 return 0;
3077         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3078                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3079                 vcpu->run->s390_ucontrol.trans_exc_code =
3080                                                 current->thread.gmap_addr;
3081                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3082                 return -EREMOTE;
3083         } else if (current->thread.gmap_pfault) {
3084                 trace_kvm_s390_major_guest_pfault(vcpu);
3085                 current->thread.gmap_pfault = 0;
3086                 if (kvm_arch_setup_async_pf(vcpu))
3087                         return 0;
3088                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3089         }
3090         return vcpu_post_run_fault_in_sie(vcpu);
3091 }
3092
3093 static int __vcpu_run(struct kvm_vcpu *vcpu)
3094 {
3095         int rc, exit_reason;
3096
3097         /*
3098          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3099          * ning the guest), so that memslots (and other stuff) are protected
3100          */
3101         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3102
3103         do {
3104                 rc = vcpu_pre_run(vcpu);
3105                 if (rc)
3106                         break;
3107
3108                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3109                 /*
3110                  * As PF_VCPU will be used in fault handler, between
3111                  * guest_enter and guest_exit should be no uaccess.
3112                  */
3113                 local_irq_disable();
3114                 guest_enter_irqoff();
3115                 __disable_cpu_timer_accounting(vcpu);
3116                 local_irq_enable();
3117                 exit_reason = sie64a(vcpu->arch.sie_block,
3118                                      vcpu->run->s.regs.gprs);
3119                 local_irq_disable();
3120                 __enable_cpu_timer_accounting(vcpu);
3121                 guest_exit_irqoff();
3122                 local_irq_enable();
3123                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3124
3125                 rc = vcpu_post_run(vcpu, exit_reason);
3126         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3127
3128         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3129         return rc;
3130 }
3131
3132 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3133 {
3134         struct runtime_instr_cb *riccb;
3135         struct gs_cb *gscb;
3136
3137         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3138         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3139         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3140         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3141         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3142                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3143         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3144                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3145                 /* some control register changes require a tlb flush */
3146                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3147         }
3148         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3149                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3150                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3151                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3152                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3153                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3154         }
3155         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3156                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3157                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3158                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3159                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3160                         kvm_clear_async_pf_completion_queue(vcpu);
3161         }
3162         /*
3163          * If userspace sets the riccb (e.g. after migration) to a valid state,
3164          * we should enable RI here instead of doing the lazy enablement.
3165          */
3166         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3167             test_kvm_facility(vcpu->kvm, 64) &&
3168             riccb->valid &&
3169             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3170                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3171                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3172         }
3173         /*
3174          * If userspace sets the gscb (e.g. after migration) to non-zero,
3175          * we should enable GS here instead of doing the lazy enablement.
3176          */
3177         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3178             test_kvm_facility(vcpu->kvm, 133) &&
3179             gscb->gssm &&
3180             !vcpu->arch.gs_enabled) {
3181                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3182                 vcpu->arch.sie_block->ecb |= ECB_GS;
3183                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3184                 vcpu->arch.gs_enabled = 1;
3185         }
3186         save_access_regs(vcpu->arch.host_acrs);
3187         restore_access_regs(vcpu->run->s.regs.acrs);
3188         /* save host (userspace) fprs/vrs */
3189         save_fpu_regs();
3190         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3191         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3192         if (MACHINE_HAS_VX)
3193                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3194         else
3195                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3196         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3197         if (test_fp_ctl(current->thread.fpu.fpc))
3198                 /* User space provided an invalid FPC, let's clear it */
3199                 current->thread.fpu.fpc = 0;
3200         if (MACHINE_HAS_GS) {
3201                 preempt_disable();
3202                 __ctl_set_bit(2, 4);
3203                 if (current->thread.gs_cb) {
3204                         vcpu->arch.host_gscb = current->thread.gs_cb;
3205                         save_gs_cb(vcpu->arch.host_gscb);
3206                 }
3207                 if (vcpu->arch.gs_enabled) {
3208                         current->thread.gs_cb = (struct gs_cb *)
3209                                                 &vcpu->run->s.regs.gscb;
3210                         restore_gs_cb(current->thread.gs_cb);
3211                 }
3212                 preempt_enable();
3213         }
3214
3215         kvm_run->kvm_dirty_regs = 0;
3216 }
3217
3218 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3219 {
3220         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3221         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3222         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3223         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3224         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3225         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3226         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3227         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3228         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3229         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3230         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3231         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3232         save_access_regs(vcpu->run->s.regs.acrs);
3233         restore_access_regs(vcpu->arch.host_acrs);
3234         /* Save guest register state */
3235         save_fpu_regs();
3236         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3237         /* Restore will be done lazily at return */
3238         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3239         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3240         if (MACHINE_HAS_GS) {
3241                 __ctl_set_bit(2, 4);
3242                 if (vcpu->arch.gs_enabled)
3243                         save_gs_cb(current->thread.gs_cb);
3244                 preempt_disable();
3245                 current->thread.gs_cb = vcpu->arch.host_gscb;
3246                 restore_gs_cb(vcpu->arch.host_gscb);
3247                 preempt_enable();
3248                 if (!vcpu->arch.host_gscb)
3249                         __ctl_clear_bit(2, 4);
3250                 vcpu->arch.host_gscb = NULL;
3251         }
3252
3253 }
3254
3255 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3256 {
3257         int rc;
3258         sigset_t sigsaved;
3259
3260         if (kvm_run->immediate_exit)
3261                 return -EINTR;
3262
3263         if (guestdbg_exit_pending(vcpu)) {
3264                 kvm_s390_prepare_debug_exit(vcpu);
3265                 return 0;
3266         }
3267
3268         if (vcpu->sigset_active)
3269                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3270
3271         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3272                 kvm_s390_vcpu_start(vcpu);
3273         } else if (is_vcpu_stopped(vcpu)) {
3274                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3275                                    vcpu->vcpu_id);
3276                 return -EINVAL;
3277         }
3278
3279         sync_regs(vcpu, kvm_run);
3280         enable_cpu_timer_accounting(vcpu);
3281
3282         might_fault();
3283         rc = __vcpu_run(vcpu);
3284
3285         if (signal_pending(current) && !rc) {
3286                 kvm_run->exit_reason = KVM_EXIT_INTR;
3287                 rc = -EINTR;
3288         }
3289
3290         if (guestdbg_exit_pending(vcpu) && !rc)  {
3291                 kvm_s390_prepare_debug_exit(vcpu);
3292                 rc = 0;
3293         }
3294
3295         if (rc == -EREMOTE) {
3296                 /* userspace support is needed, kvm_run has been prepared */
3297                 rc = 0;
3298         }
3299
3300         disable_cpu_timer_accounting(vcpu);
3301         store_regs(vcpu, kvm_run);
3302
3303         if (vcpu->sigset_active)
3304                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3305
3306         vcpu->stat.exit_userspace++;
3307         return rc;
3308 }
3309
3310 /*
3311  * store status at address
3312  * we use have two special cases:
3313  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3314  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3315  */
3316 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3317 {
3318         unsigned char archmode = 1;
3319         freg_t fprs[NUM_FPRS];
3320         unsigned int px;
3321         u64 clkcomp, cputm;
3322         int rc;
3323
3324         px = kvm_s390_get_prefix(vcpu);
3325         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3326                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3327                         return -EFAULT;
3328                 gpa = 0;
3329         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3330                 if (write_guest_real(vcpu, 163, &archmode, 1))
3331                         return -EFAULT;
3332                 gpa = px;
3333         } else
3334                 gpa -= __LC_FPREGS_SAVE_AREA;
3335
3336         /* manually convert vector registers if necessary */
3337         if (MACHINE_HAS_VX) {
3338                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3339                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3340                                      fprs, 128);
3341         } else {
3342                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3343                                      vcpu->run->s.regs.fprs, 128);
3344         }
3345         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3346                               vcpu->run->s.regs.gprs, 128);
3347         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3348                               &vcpu->arch.sie_block->gpsw, 16);
3349         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3350                               &px, 4);
3351         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3352                               &vcpu->run->s.regs.fpc, 4);
3353         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3354                               &vcpu->arch.sie_block->todpr, 4);
3355         cputm = kvm_s390_get_cpu_timer(vcpu);
3356         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3357                               &cputm, 8);
3358         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3359         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3360                               &clkcomp, 8);
3361         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3362                               &vcpu->run->s.regs.acrs, 64);
3363         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3364                               &vcpu->arch.sie_block->gcr, 128);
3365         return rc ? -EFAULT : 0;
3366 }
3367
3368 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3369 {
3370         /*
3371          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3372          * switch in the run ioctl. Let's update our copies before we save
3373          * it into the save area
3374          */
3375         save_fpu_regs();
3376         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3377         save_access_regs(vcpu->run->s.regs.acrs);
3378
3379         return kvm_s390_store_status_unloaded(vcpu, addr);
3380 }
3381
3382 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3383 {
3384         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3385         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3386 }
3387
3388 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3389 {
3390         unsigned int i;
3391         struct kvm_vcpu *vcpu;
3392
3393         kvm_for_each_vcpu(i, vcpu, kvm) {
3394                 __disable_ibs_on_vcpu(vcpu);
3395         }
3396 }
3397
3398 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3399 {
3400         if (!sclp.has_ibs)
3401                 return;
3402         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3403         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3404 }
3405
3406 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3407 {
3408         int i, online_vcpus, started_vcpus = 0;
3409
3410         if (!is_vcpu_stopped(vcpu))
3411                 return;
3412
3413         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3414         /* Only one cpu at a time may enter/leave the STOPPED state. */
3415         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3416         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3417
3418         for (i = 0; i < online_vcpus; i++) {
3419                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3420                         started_vcpus++;
3421         }
3422
3423         if (started_vcpus == 0) {
3424                 /* we're the only active VCPU -> speed it up */
3425                 __enable_ibs_on_vcpu(vcpu);
3426         } else if (started_vcpus == 1) {
3427                 /*
3428                  * As we are starting a second VCPU, we have to disable
3429                  * the IBS facility on all VCPUs to remove potentially
3430                  * oustanding ENABLE requests.
3431                  */
3432                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3433         }
3434
3435         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3436         /*
3437          * Another VCPU might have used IBS while we were offline.
3438          * Let's play safe and flush the VCPU at startup.
3439          */
3440         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3441         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3442         return;
3443 }
3444
3445 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3446 {
3447         int i, online_vcpus, started_vcpus = 0;
3448         struct kvm_vcpu *started_vcpu = NULL;
3449
3450         if (is_vcpu_stopped(vcpu))
3451                 return;
3452
3453         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3454         /* Only one cpu at a time may enter/leave the STOPPED state. */
3455         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3456         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3457
3458         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3459         kvm_s390_clear_stop_irq(vcpu);
3460
3461         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3462         __disable_ibs_on_vcpu(vcpu);
3463
3464         for (i = 0; i < online_vcpus; i++) {
3465                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3466                         started_vcpus++;
3467                         started_vcpu = vcpu->kvm->vcpus[i];
3468                 }
3469         }
3470
3471         if (started_vcpus == 1) {
3472                 /*
3473                  * As we only have one VCPU left, we want to enable the
3474                  * IBS facility for that VCPU to speed it up.
3475                  */
3476                 __enable_ibs_on_vcpu(started_vcpu);
3477         }
3478
3479         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3480         return;
3481 }
3482
3483 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3484                                      struct kvm_enable_cap *cap)
3485 {
3486         int r;
3487
3488         if (cap->flags)
3489                 return -EINVAL;
3490
3491         switch (cap->cap) {
3492         case KVM_CAP_S390_CSS_SUPPORT:
3493                 if (!vcpu->kvm->arch.css_support) {
3494                         vcpu->kvm->arch.css_support = 1;
3495                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3496                         trace_kvm_s390_enable_css(vcpu->kvm);
3497                 }
3498                 r = 0;
3499                 break;
3500         default:
3501                 r = -EINVAL;
3502                 break;
3503         }
3504         return r;
3505 }
3506
3507 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3508                                   struct kvm_s390_mem_op *mop)
3509 {
3510         void __user *uaddr = (void __user *)mop->buf;
3511         void *tmpbuf = NULL;
3512         int r, srcu_idx;
3513         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3514                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3515
3516         if (mop->flags & ~supported_flags)
3517                 return -EINVAL;
3518
3519         if (mop->size > MEM_OP_MAX_SIZE)
3520                 return -E2BIG;
3521
3522         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3523                 tmpbuf = vmalloc(mop->size);
3524                 if (!tmpbuf)
3525                         return -ENOMEM;
3526         }
3527
3528         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3529
3530         switch (mop->op) {
3531         case KVM_S390_MEMOP_LOGICAL_READ:
3532                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3533                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3534                                             mop->size, GACC_FETCH);
3535                         break;
3536                 }
3537                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3538                 if (r == 0) {
3539                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3540                                 r = -EFAULT;
3541                 }
3542                 break;
3543         case KVM_S390_MEMOP_LOGICAL_WRITE:
3544                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3545                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3546                                             mop->size, GACC_STORE);
3547                         break;
3548                 }
3549                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3550                         r = -EFAULT;
3551                         break;
3552                 }
3553                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3554                 break;
3555         default:
3556                 r = -EINVAL;
3557         }
3558
3559         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3560
3561         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3562                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3563
3564         vfree(tmpbuf);
3565         return r;
3566 }
3567
3568 long kvm_arch_vcpu_ioctl(struct file *filp,
3569                          unsigned int ioctl, unsigned long arg)
3570 {
3571         struct kvm_vcpu *vcpu = filp->private_data;
3572         void __user *argp = (void __user *)arg;
3573         int idx;
3574         long r;
3575
3576         switch (ioctl) {
3577         case KVM_S390_IRQ: {
3578                 struct kvm_s390_irq s390irq;
3579
3580                 r = -EFAULT;
3581                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3582                         break;
3583                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3584                 break;
3585         }
3586         case KVM_S390_INTERRUPT: {
3587                 struct kvm_s390_interrupt s390int;
3588                 struct kvm_s390_irq s390irq;
3589
3590                 r = -EFAULT;
3591                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3592                         break;
3593                 if (s390int_to_s390irq(&s390int, &s390irq))
3594                         return -EINVAL;
3595                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3596                 break;
3597         }
3598         case KVM_S390_STORE_STATUS:
3599                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3600                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3601                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3602                 break;
3603         case KVM_S390_SET_INITIAL_PSW: {
3604                 psw_t psw;
3605
3606                 r = -EFAULT;
3607                 if (copy_from_user(&psw, argp, sizeof(psw)))
3608                         break;
3609                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3610                 break;
3611         }
3612         case KVM_S390_INITIAL_RESET:
3613                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3614                 break;
3615         case KVM_SET_ONE_REG:
3616         case KVM_GET_ONE_REG: {
3617                 struct kvm_one_reg reg;
3618                 r = -EFAULT;
3619                 if (copy_from_user(&reg, argp, sizeof(reg)))
3620                         break;
3621                 if (ioctl == KVM_SET_ONE_REG)
3622                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3623                 else
3624                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3625                 break;
3626         }
3627 #ifdef CONFIG_KVM_S390_UCONTROL
3628         case KVM_S390_UCAS_MAP: {
3629                 struct kvm_s390_ucas_mapping ucasmap;
3630
3631                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3632                         r = -EFAULT;
3633                         break;
3634                 }
3635
3636                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3637                         r = -EINVAL;
3638                         break;
3639                 }
3640
3641                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3642                                      ucasmap.vcpu_addr, ucasmap.length);
3643                 break;
3644         }
3645         case KVM_S390_UCAS_UNMAP: {
3646                 struct kvm_s390_ucas_mapping ucasmap;
3647
3648                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3649                         r = -EFAULT;
3650                         break;
3651                 }
3652
3653                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3654                         r = -EINVAL;
3655                         break;
3656                 }
3657
3658                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3659                         ucasmap.length);
3660                 break;
3661         }
3662 #endif
3663         case KVM_S390_VCPU_FAULT: {
3664                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3665                 break;
3666         }
3667         case KVM_ENABLE_CAP:
3668         {
3669                 struct kvm_enable_cap cap;
3670                 r = -EFAULT;
3671                 if (copy_from_user(&cap, argp, sizeof(cap)))
3672                         break;
3673                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3674                 break;
3675         }
3676         case KVM_S390_MEM_OP: {
3677                 struct kvm_s390_mem_op mem_op;
3678
3679                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3680                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3681                 else
3682                         r = -EFAULT;
3683                 break;
3684         }
3685         case KVM_S390_SET_IRQ_STATE: {
3686                 struct kvm_s390_irq_state irq_state;
3687
3688                 r = -EFAULT;
3689                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3690                         break;
3691                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3692                     irq_state.len == 0 ||
3693                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3694                         r = -EINVAL;
3695                         break;
3696                 }
3697                 r = kvm_s390_set_irq_state(vcpu,
3698                                            (void __user *) irq_state.buf,
3699                                            irq_state.len);
3700                 break;
3701         }
3702         case KVM_S390_GET_IRQ_STATE: {
3703                 struct kvm_s390_irq_state irq_state;
3704
3705                 r = -EFAULT;
3706                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3707                         break;
3708                 if (irq_state.len == 0) {
3709                         r = -EINVAL;
3710                         break;
3711                 }
3712                 r = kvm_s390_get_irq_state(vcpu,
3713                                            (__u8 __user *)  irq_state.buf,
3714                                            irq_state.len);
3715                 break;
3716         }
3717         default:
3718                 r = -ENOTTY;
3719         }
3720         return r;
3721 }
3722
3723 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3724 {
3725 #ifdef CONFIG_KVM_S390_UCONTROL
3726         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3727                  && (kvm_is_ucontrol(vcpu->kvm))) {
3728                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3729                 get_page(vmf->page);
3730                 return 0;
3731         }
3732 #endif
3733         return VM_FAULT_SIGBUS;
3734 }
3735
3736 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3737                             unsigned long npages)
3738 {
3739         return 0;
3740 }
3741
3742 /* Section: memory related */
3743 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3744                                    struct kvm_memory_slot *memslot,
3745                                    const struct kvm_userspace_memory_region *mem,
3746                                    enum kvm_mr_change change)
3747 {
3748         /* A few sanity checks. We can have memory slots which have to be
3749            located/ended at a segment boundary (1MB). The memory in userland is
3750            ok to be fragmented into various different vmas. It is okay to mmap()
3751            and munmap() stuff in this slot after doing this call at any time */
3752
3753         if (mem->userspace_addr & 0xffffful)
3754                 return -EINVAL;
3755
3756         if (mem->memory_size & 0xffffful)
3757                 return -EINVAL;
3758
3759         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3760                 return -EINVAL;
3761
3762         return 0;
3763 }
3764
3765 void kvm_arch_commit_memory_region(struct kvm *kvm,
3766                                 const struct kvm_userspace_memory_region *mem,
3767                                 const struct kvm_memory_slot *old,
3768                                 const struct kvm_memory_slot *new,
3769                                 enum kvm_mr_change change)
3770 {
3771         int rc;
3772
3773         /* If the basics of the memslot do not change, we do not want
3774          * to update the gmap. Every update causes several unnecessary
3775          * segment translation exceptions. This is usually handled just
3776          * fine by the normal fault handler + gmap, but it will also
3777          * cause faults on the prefix page of running guest CPUs.
3778          */
3779         if (old->userspace_addr == mem->userspace_addr &&
3780             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3781             old->npages * PAGE_SIZE == mem->memory_size)
3782                 return;
3783
3784         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3785                 mem->guest_phys_addr, mem->memory_size);
3786         if (rc)
3787                 pr_warn("failed to commit memory region\n");
3788         return;
3789 }
3790
3791 static inline unsigned long nonhyp_mask(int i)
3792 {
3793         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3794
3795         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3796 }
3797
3798 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3799 {
3800         vcpu->valid_wakeup = false;
3801 }
3802
3803 static int __init kvm_s390_init(void)
3804 {
3805         int i;
3806
3807         if (!sclp.has_sief2) {
3808                 pr_info("SIE not available\n");
3809                 return -ENODEV;
3810         }
3811
3812         for (i = 0; i < 16; i++)
3813                 kvm_s390_fac_list_mask[i] |=
3814                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3815
3816         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3817 }
3818
3819 static void __exit kvm_s390_exit(void)
3820 {
3821         kvm_exit();
3822 }
3823
3824 module_init(kvm_s390_init);
3825 module_exit(kvm_s390_exit);
3826
3827 /*
3828  * Enable autoloading of the kvm module.
3829  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3830  * since x86 takes a different approach.
3831  */
3832 #include <linux/miscdevice.h>
3833 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3834 MODULE_ALIAS("devname:kvm");