arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/etr.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/etr.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  69         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  70         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  71         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  72         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  73         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  74         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  75         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  76         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  77         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  78         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  79         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  80         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  81         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  82         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  83         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  84         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  85         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  86         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  87         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  88         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  89         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  90         { "instruction_spx", VCPU_STAT(instruction_spx) },
  91         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  92         { "instruction_stap", VCPU_STAT(instruction_stap) },
  93         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  94         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  95         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  96         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  97         { "instruction_essa", VCPU_STAT(instruction_essa) },
  98         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  99         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 100         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 101         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 102         { "instruction_sie", VCPU_STAT(instruction_sie) },
 103         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 104         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 105         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 106         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 107         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 108         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 109         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 110         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 111         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 112         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 113         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 114         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 115         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 116         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 117         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 118         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 119         { "diagnose_10", VCPU_STAT(diagnose_10) },
 120         { "diagnose_44", VCPU_STAT(diagnose_44) },
 121         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 122         { "diagnose_258", VCPU_STAT(diagnose_258) },
 123         { "diagnose_308", VCPU_STAT(diagnose_308) },
 124         { "diagnose_500", VCPU_STAT(diagnose_500) },
 125         { NULL }
 126 };
 127
 128 /* upper facilities limit for kvm */
 129 unsigned long kvm_s390_fac_list_mask[16] = {
 130         0xffe6000000000000UL,
 131         0x005e000000000000UL,
 132 };
 133
 134 unsigned long kvm_s390_fac_list_mask_size(void)
 135 {
 136         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 137         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 138 }
 139
 140 /* available cpu features supported by kvm */
 141 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 142 /* available subfunctions indicated via query / "test bit" */
 143 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 144
 145 static struct gmap_notifier gmap_notifier;
 146 static struct gmap_notifier vsie_gmap_notifier;
 147 debug_info_t *kvm_s390_dbf;
 148
 149 /* Section: not file related */
 150 int kvm_arch_hardware_enable(void)
 151 {
 152         /* every s390 is virtualization enabled ;-) */
 153         return 0;
 154 }
 155
 156 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 157                               unsigned long end);
 158
 159 /*
 160  * This callback is executed during stop_machine(). All CPUs are therefore
 161  * temporarily stopped. In order not to change guest behavior, we have to
 162  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 163  * so a CPU won't be stopped while calculating with the epoch.
 164  */
 165 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 166                           void *v)
 167 {
 168         struct kvm *kvm;
 169         struct kvm_vcpu *vcpu;
 170         int i;
 171         unsigned long long *delta = v;
 172
 173         list_for_each_entry(kvm, &vm_list, vm_list) {
 174                 kvm->arch.epoch -= *delta;
 175                 kvm_for_each_vcpu(i, vcpu, kvm) {
 176                         vcpu->arch.sie_block->epoch -= *delta;
 177                         if (vcpu->arch.cputm_enabled)
 178                                 vcpu->arch.cputm_start += *delta;
 179                 }
 180         }
 181         return NOTIFY_OK;
 182 }
 183
 184 static struct notifier_block kvm_clock_notifier = {
 185         .notifier_call = kvm_clock_sync,
 186 };
 187
 188 int kvm_arch_hardware_setup(void)
 189 {
 190         gmap_notifier.notifier_call = kvm_gmap_notifier;
 191         gmap_register_pte_notifier(&gmap_notifier);
 192         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 193         gmap_register_pte_notifier(&vsie_gmap_notifier);
 194         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 195                                        &kvm_clock_notifier);
 196         return 0;
 197 }
 198
 199 void kvm_arch_hardware_unsetup(void)
 200 {
 201         gmap_unregister_pte_notifier(&gmap_notifier);
 202         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 203         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 204                                          &kvm_clock_notifier);
 205 }
 206
 207 static void allow_cpu_feat(unsigned long nr)
 208 {
 209         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 210 }
 211
 212 static inline int plo_test_bit(unsigned char nr)
 213 {
 214         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 215         int cc = 3; /* subfunction not available */
 216
 217         asm volatile(
 218                 /* Parameter registers are ignored for "test bit" */
 219                 "       plo     0,0,0,0(0)\n"
 220                 "       ipm     %0\n"
 221                 "       srl     %0,28\n"
 222                 : "=d" (cc)
 223                 : "d" (r0)
 224                 : "cc");
 225         return cc == 0;
 226 }
 227
 228 static void kvm_s390_cpu_feat_init(void)
 229 {
 230         int i;
 231
 232         for (i = 0; i < 256; ++i) {
 233                 if (plo_test_bit(i))
 234                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 235         }
 236
 237         if (test_facility(28)) /* TOD-clock steering */
 238                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 239
 240         if (test_facility(17)) { /* MSA */
 241                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 242                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 243                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 244                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 245                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 246         }
 247         if (test_facility(76)) /* MSA3 */
 248                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 249         if (test_facility(77)) { /* MSA4 */
 250                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 251                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 252                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 253                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 254         }
 255         if (test_facility(57)) /* MSA5 */
 256                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 257
 258         if (MACHINE_HAS_ESOP)
 259                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 260         /*
 261          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 262          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 263          */
 264         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 265             !test_facility(3))
 266                 return;
 267         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 268         if (sclp.has_64bscao)
 269                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 270         if (sclp.has_siif)
 271                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 272         if (sclp.has_gpere)
 273                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 274 }
 275
 276 int kvm_arch_init(void *opaque)
 277 {
 278         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 279         if (!kvm_s390_dbf)
 280                 return -ENOMEM;
 281
 282         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 283                 debug_unregister(kvm_s390_dbf);
 284                 return -ENOMEM;
 285         }
 286
 287         kvm_s390_cpu_feat_init();
 288
 289         /* Register floating interrupt controller interface. */
 290         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 291 }
 292
 293 void kvm_arch_exit(void)
 294 {
 295         debug_unregister(kvm_s390_dbf);
 296 }
 297
 298 /* Section: device related */
 299 long kvm_arch_dev_ioctl(struct file *filp,
 300                         unsigned int ioctl, unsigned long arg)
 301 {
 302         if (ioctl == KVM_S390_ENABLE_SIE)
 303                 return s390_enable_sie();
 304         return -EINVAL;
 305 }
 306
 307 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 308 {
 309         int r;
 310
 311         switch (ext) {
 312         case KVM_CAP_S390_PSW:
 313         case KVM_CAP_S390_GMAP:
 314         case KVM_CAP_SYNC_MMU:
 315 #ifdef CONFIG_KVM_S390_UCONTROL
 316         case KVM_CAP_S390_UCONTROL:
 317 #endif
 318         case KVM_CAP_ASYNC_PF:
 319         case KVM_CAP_SYNC_REGS:
 320         case KVM_CAP_ONE_REG:
 321         case KVM_CAP_ENABLE_CAP:
 322         case KVM_CAP_S390_CSS_SUPPORT:
 323         case KVM_CAP_IOEVENTFD:
 324         case KVM_CAP_DEVICE_CTRL:
 325         case KVM_CAP_ENABLE_CAP_VM:
 326         case KVM_CAP_S390_IRQCHIP:
 327         case KVM_CAP_VM_ATTRIBUTES:
 328         case KVM_CAP_MP_STATE:
 329         case KVM_CAP_S390_INJECT_IRQ:
 330         case KVM_CAP_S390_USER_SIGP:
 331         case KVM_CAP_S390_USER_STSI:
 332         case KVM_CAP_S390_SKEYS:
 333         case KVM_CAP_S390_IRQ_STATE:
 334                 r = 1;
 335                 break;
 336         case KVM_CAP_S390_MEM_OP:
 337                 r = MEM_OP_MAX_SIZE;
 338                 break;
 339         case KVM_CAP_NR_VCPUS:
 340         case KVM_CAP_MAX_VCPUS:
 341                 r = KVM_S390_BSCA_CPU_SLOTS;
 342                 if (sclp.has_esca && sclp.has_64bscao)
 343                         r = KVM_S390_ESCA_CPU_SLOTS;
 344                 break;
 345         case KVM_CAP_NR_MEMSLOTS:
 346                 r = KVM_USER_MEM_SLOTS;
 347                 break;
 348         case KVM_CAP_S390_COW:
 349                 r = MACHINE_HAS_ESOP;
 350                 break;
 351         case KVM_CAP_S390_VECTOR_REGISTERS:
 352                 r = MACHINE_HAS_VX;
 353                 break;
 354         case KVM_CAP_S390_RI:
 355                 r = test_facility(64);
 356                 break;
 357         default:
 358                 r = 0;
 359         }
 360         return r;
 361 }
 362
 363 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 364                                         struct kvm_memory_slot *memslot)
 365 {
 366         gfn_t cur_gfn, last_gfn;
 367         unsigned long address;
 368         struct gmap *gmap = kvm->arch.gmap;
 369
 370         /* Loop over all guest pages */
 371         last_gfn = memslot->base_gfn + memslot->npages;
 372         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 373                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 374
 375                 if (test_and_clear_guest_dirty(gmap->mm, address))
 376                         mark_page_dirty(kvm, cur_gfn);
 377                 if (fatal_signal_pending(current))
 378                         return;
 379                 cond_resched();
 380         }
 381 }
 382
 383 /* Section: vm related */
 384 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 385
 386 /*
 387  * Get (and clear) the dirty memory log for a memory slot.
 388  */
 389 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 390                                struct kvm_dirty_log *log)
 391 {
 392         int r;
 393         unsigned long n;
 394         struct kvm_memslots *slots;
 395         struct kvm_memory_slot *memslot;
 396         int is_dirty = 0;
 397
 398         mutex_lock(&kvm->slots_lock);
 399
 400         r = -EINVAL;
 401         if (log->slot >= KVM_USER_MEM_SLOTS)
 402                 goto out;
 403
 404         slots = kvm_memslots(kvm);
 405         memslot = id_to_memslot(slots, log->slot);
 406         r = -ENOENT;
 407         if (!memslot->dirty_bitmap)
 408                 goto out;
 409
 410         kvm_s390_sync_dirty_log(kvm, memslot);
 411         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 412         if (r)
 413                 goto out;
 414
 415         /* Clear the dirty log */
 416         if (is_dirty) {
 417                 n = kvm_dirty_bitmap_bytes(memslot);
 418                 memset(memslot->dirty_bitmap, 0, n);
 419         }
 420         r = 0;
 421 out:
 422         mutex_unlock(&kvm->slots_lock);
 423         return r;
 424 }
 425
 426 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 427 {
 428         int r;
 429
 430         if (cap->flags)
 431                 return -EINVAL;
 432
 433         switch (cap->cap) {
 434         case KVM_CAP_S390_IRQCHIP:
 435                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 436                 kvm->arch.use_irqchip = 1;
 437                 r = 0;
 438                 break;
 439         case KVM_CAP_S390_USER_SIGP:
 440                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 441                 kvm->arch.user_sigp = 1;
 442                 r = 0;
 443                 break;
 444         case KVM_CAP_S390_VECTOR_REGISTERS:
 445                 mutex_lock(&kvm->lock);
 446                 if (kvm->created_vcpus) {
 447                         r = -EBUSY;
 448                 } else if (MACHINE_HAS_VX) {
 449                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 450                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 451                         r = 0;
 452                 } else
 453                         r = -EINVAL;
 454                 mutex_unlock(&kvm->lock);
 455                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 456                          r ? "(not available)" : "(success)");
 457                 break;
 458         case KVM_CAP_S390_RI:
 459                 r = -EINVAL;
 460                 mutex_lock(&kvm->lock);
 461                 if (kvm->created_vcpus) {
 462                         r = -EBUSY;
 463                 } else if (test_facility(64)) {
 464                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 465                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 466                         r = 0;
 467                 }
 468                 mutex_unlock(&kvm->lock);
 469                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 470                          r ? "(not available)" : "(success)");
 471                 break;
 472         case KVM_CAP_S390_USER_STSI:
 473                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 474                 kvm->arch.user_stsi = 1;
 475                 r = 0;
 476                 break;
 477         default:
 478                 r = -EINVAL;
 479                 break;
 480         }
 481         return r;
 482 }
 483
 484 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 485 {
 486         int ret;
 487
 488         switch (attr->attr) {
 489         case KVM_S390_VM_MEM_LIMIT_SIZE:
 490                 ret = 0;
 491                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 492                          kvm->arch.mem_limit);
 493                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 494                         ret = -EFAULT;
 495                 break;
 496         default:
 497                 ret = -ENXIO;
 498                 break;
 499         }
 500         return ret;
 501 }
 502
 503 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 504 {
 505         int ret;
 506         unsigned int idx;
 507         switch (attr->attr) {
 508         case KVM_S390_VM_MEM_ENABLE_CMMA:
 509                 ret = -ENXIO;
 510                 if (!sclp.has_cmma)
 511                         break;
 512
 513                 ret = -EBUSY;
 514                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 515                 mutex_lock(&kvm->lock);
 516                 if (!kvm->created_vcpus) {
 517                         kvm->arch.use_cmma = 1;
 518                         ret = 0;
 519                 }
 520                 mutex_unlock(&kvm->lock);
 521                 break;
 522         case KVM_S390_VM_MEM_CLR_CMMA:
 523                 ret = -ENXIO;
 524                 if (!sclp.has_cmma)
 525                         break;
 526                 ret = -EINVAL;
 527                 if (!kvm->arch.use_cmma)
 528                         break;
 529
 530                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 531                 mutex_lock(&kvm->lock);
 532                 idx = srcu_read_lock(&kvm->srcu);
 533                 s390_reset_cmma(kvm->arch.gmap->mm);
 534                 srcu_read_unlock(&kvm->srcu, idx);
 535                 mutex_unlock(&kvm->lock);
 536                 ret = 0;
 537                 break;
 538         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 539                 unsigned long new_limit;
 540
 541                 if (kvm_is_ucontrol(kvm))
 542                         return -EINVAL;
 543
 544                 if (get_user(new_limit, (u64 __user *)attr->addr))
 545                         return -EFAULT;
 546
 547                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 548                     new_limit > kvm->arch.mem_limit)
 549                         return -E2BIG;
 550
 551                 if (!new_limit)
 552                         return -EINVAL;
 553
 554                 /* gmap_create takes last usable address */
 555                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 556                         new_limit -= 1;
 557
 558                 ret = -EBUSY;
 559                 mutex_lock(&kvm->lock);
 560                 if (!kvm->created_vcpus) {
 561                         /* gmap_create will round the limit up */
 562                         struct gmap *new = gmap_create(current->mm, new_limit);
 563
 564                         if (!new) {
 565                                 ret = -ENOMEM;
 566                         } else {
 567                                 gmap_remove(kvm->arch.gmap);
 568                                 new->private = kvm;
 569                                 kvm->arch.gmap = new;
 570                                 ret = 0;
 571                         }
 572                 }
 573                 mutex_unlock(&kvm->lock);
 574                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 575                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 576                          (void *) kvm->arch.gmap->asce);
 577                 break;
 578         }
 579         default:
 580                 ret = -ENXIO;
 581                 break;
 582         }
 583         return ret;
 584 }
 585
 586 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 587
 588 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 589 {
 590         struct kvm_vcpu *vcpu;
 591         int i;
 592
 593         if (!test_kvm_facility(kvm, 76))
 594                 return -EINVAL;
 595
 596         mutex_lock(&kvm->lock);
 597         switch (attr->attr) {
 598         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 599                 get_random_bytes(
 600                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 601                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 602                 kvm->arch.crypto.aes_kw = 1;
 603                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 604                 break;
 605         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 606                 get_random_bytes(
 607                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 608                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 609                 kvm->arch.crypto.dea_kw = 1;
 610                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 611                 break;
 612         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 613                 kvm->arch.crypto.aes_kw = 0;
 614                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 615                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 616                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 617                 break;
 618         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 619                 kvm->arch.crypto.dea_kw = 0;
 620                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 621                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 622                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 623                 break;
 624         default:
 625                 mutex_unlock(&kvm->lock);
 626                 return -ENXIO;
 627         }
 628
 629         kvm_for_each_vcpu(i, vcpu, kvm) {
 630                 kvm_s390_vcpu_crypto_setup(vcpu);
 631                 exit_sie(vcpu);
 632         }
 633         mutex_unlock(&kvm->lock);
 634         return 0;
 635 }
 636
 637 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 638 {
 639         u8 gtod_high;
 640
 641         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 642                                            sizeof(gtod_high)))
 643                 return -EFAULT;
 644
 645         if (gtod_high != 0)
 646                 return -EINVAL;
 647         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 648
 649         return 0;
 650 }
 651
 652 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 653 {
 654         u64 gtod;
 655
 656         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 657                 return -EFAULT;
 658
 659         kvm_s390_set_tod_clock(kvm, gtod);
 660         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 661         return 0;
 662 }
 663
 664 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 665 {
 666         int ret;
 667
 668         if (attr->flags)
 669                 return -EINVAL;
 670
 671         switch (attr->attr) {
 672         case KVM_S390_VM_TOD_HIGH:
 673                 ret = kvm_s390_set_tod_high(kvm, attr);
 674                 break;
 675         case KVM_S390_VM_TOD_LOW:
 676                 ret = kvm_s390_set_tod_low(kvm, attr);
 677                 break;
 678         default:
 679                 ret = -ENXIO;
 680                 break;
 681         }
 682         return ret;
 683 }
 684
 685 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 686 {
 687         u8 gtod_high = 0;
 688
 689         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 690                                          sizeof(gtod_high)))
 691                 return -EFAULT;
 692         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 693
 694         return 0;
 695 }
 696
 697 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 698 {
 699         u64 gtod;
 700
 701         gtod = kvm_s390_get_tod_clock_fast(kvm);
 702         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 703                 return -EFAULT;
 704         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 705
 706         return 0;
 707 }
 708
 709 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 710 {
 711         int ret;
 712
 713         if (attr->flags)
 714                 return -EINVAL;
 715
 716         switch (attr->attr) {
 717         case KVM_S390_VM_TOD_HIGH:
 718                 ret = kvm_s390_get_tod_high(kvm, attr);
 719                 break;
 720         case KVM_S390_VM_TOD_LOW:
 721                 ret = kvm_s390_get_tod_low(kvm, attr);
 722                 break;
 723         default:
 724                 ret = -ENXIO;
 725                 break;
 726         }
 727         return ret;
 728 }
 729
 730 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 731 {
 732         struct kvm_s390_vm_cpu_processor *proc;
 733         u16 lowest_ibc, unblocked_ibc;
 734         int ret = 0;
 735
 736         mutex_lock(&kvm->lock);
 737         if (kvm->created_vcpus) {
 738                 ret = -EBUSY;
 739                 goto out;
 740         }
 741         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 742         if (!proc) {
 743                 ret = -ENOMEM;
 744                 goto out;
 745         }
 746         if (!copy_from_user(proc, (void __user *)attr->addr,
 747                             sizeof(*proc))) {
 748                 kvm->arch.model.cpuid = proc->cpuid;
 749                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 750                 unblocked_ibc = sclp.ibc & 0xfff;
 751                 if (lowest_ibc) {
 752                         if (proc->ibc > unblocked_ibc)
 753                                 kvm->arch.model.ibc = unblocked_ibc;
 754                         else if (proc->ibc < lowest_ibc)
 755                                 kvm->arch.model.ibc = lowest_ibc;
 756                         else
 757                                 kvm->arch.model.ibc = proc->ibc;
 758                 }
 759                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 760                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 761         } else
 762                 ret = -EFAULT;
 763         kfree(proc);
 764 out:
 765         mutex_unlock(&kvm->lock);
 766         return ret;
 767 }
 768
 769 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 770                                        struct kvm_device_attr *attr)
 771 {
 772         struct kvm_s390_vm_cpu_feat data;
 773         int ret = -EBUSY;
 774
 775         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 776                 return -EFAULT;
 777         if (!bitmap_subset((unsigned long *) data.feat,
 778                            kvm_s390_available_cpu_feat,
 779                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 780                 return -EINVAL;
 781
 782         mutex_lock(&kvm->lock);
 783         if (!atomic_read(&kvm->online_vcpus)) {
 784                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 785                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 786                 ret = 0;
 787         }
 788         mutex_unlock(&kvm->lock);
 789         return ret;
 790 }
 791
 792 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 793                                           struct kvm_device_attr *attr)
 794 {
 795         /*
 796          * Once supported by kernel + hw, we have to store the subfunctions
 797          * in kvm->arch and remember that user space configured them.
 798          */
 799         return -ENXIO;
 800 }
 801
 802 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 803 {
 804         int ret = -ENXIO;
 805
 806         switch (attr->attr) {
 807         case KVM_S390_VM_CPU_PROCESSOR:
 808                 ret = kvm_s390_set_processor(kvm, attr);
 809                 break;
 810         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 811                 ret = kvm_s390_set_processor_feat(kvm, attr);
 812                 break;
 813         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 814                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 815                 break;
 816         }
 817         return ret;
 818 }
 819
 820 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 821 {
 822         struct kvm_s390_vm_cpu_processor *proc;
 823         int ret = 0;
 824
 825         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 826         if (!proc) {
 827                 ret = -ENOMEM;
 828                 goto out;
 829         }
 830         proc->cpuid = kvm->arch.model.cpuid;
 831         proc->ibc = kvm->arch.model.ibc;
 832         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 833                S390_ARCH_FAC_LIST_SIZE_BYTE);
 834         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 835                 ret = -EFAULT;
 836         kfree(proc);
 837 out:
 838         return ret;
 839 }
 840
 841 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 842 {
 843         struct kvm_s390_vm_cpu_machine *mach;
 844         int ret = 0;
 845
 846         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 847         if (!mach) {
 848                 ret = -ENOMEM;
 849                 goto out;
 850         }
 851         get_cpu_id((struct cpuid *) &mach->cpuid);
 852         mach->ibc = sclp.ibc;
 853         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 854                S390_ARCH_FAC_LIST_SIZE_BYTE);
 855         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 856                S390_ARCH_FAC_LIST_SIZE_BYTE);
 857         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 858                 ret = -EFAULT;
 859         kfree(mach);
 860 out:
 861         return ret;
 862 }
 863
 864 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 865                                        struct kvm_device_attr *attr)
 866 {
 867         struct kvm_s390_vm_cpu_feat data;
 868
 869         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 870                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 871         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 872                 return -EFAULT;
 873         return 0;
 874 }
 875
 876 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 877                                      struct kvm_device_attr *attr)
 878 {
 879         struct kvm_s390_vm_cpu_feat data;
 880
 881         bitmap_copy((unsigned long *) data.feat,
 882                     kvm_s390_available_cpu_feat,
 883                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 884         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 885                 return -EFAULT;
 886         return 0;
 887 }
 888
 889 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 890                                           struct kvm_device_attr *attr)
 891 {
 892         /*
 893          * Once we can actually configure subfunctions (kernel + hw support),
 894          * we have to check if they were already set by user space, if so copy
 895          * them from kvm->arch.
 896          */
 897         return -ENXIO;
 898 }
 899
 900 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 901                                         struct kvm_device_attr *attr)
 902 {
 903         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 904             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 905                 return -EFAULT;
 906         return 0;
 907 }
 908 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 909 {
 910         int ret = -ENXIO;
 911
 912         switch (attr->attr) {
 913         case KVM_S390_VM_CPU_PROCESSOR:
 914                 ret = kvm_s390_get_processor(kvm, attr);
 915                 break;
 916         case KVM_S390_VM_CPU_MACHINE:
 917                 ret = kvm_s390_get_machine(kvm, attr);
 918                 break;
 919         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 920                 ret = kvm_s390_get_processor_feat(kvm, attr);
 921                 break;
 922         case KVM_S390_VM_CPU_MACHINE_FEAT:
 923                 ret = kvm_s390_get_machine_feat(kvm, attr);
 924                 break;
 925         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 926                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 927                 break;
 928         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 929                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 930                 break;
 931         }
 932         return ret;
 933 }
 934
 935 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 936 {
 937         int ret;
 938
 939         switch (attr->group) {
 940         case KVM_S390_VM_MEM_CTRL:
 941                 ret = kvm_s390_set_mem_control(kvm, attr);
 942                 break;
 943         case KVM_S390_VM_TOD:
 944                 ret = kvm_s390_set_tod(kvm, attr);
 945                 break;
 946         case KVM_S390_VM_CPU_MODEL:
 947                 ret = kvm_s390_set_cpu_model(kvm, attr);
 948                 break;
 949         case KVM_S390_VM_CRYPTO:
 950                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 951                 break;
 952         default:
 953                 ret = -ENXIO;
 954                 break;
 955         }
 956
 957         return ret;
 958 }
 959
 960 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 961 {
 962         int ret;
 963
 964         switch (attr->group) {
 965         case KVM_S390_VM_MEM_CTRL:
 966                 ret = kvm_s390_get_mem_control(kvm, attr);
 967                 break;
 968         case KVM_S390_VM_TOD:
 969                 ret = kvm_s390_get_tod(kvm, attr);
 970                 break;
 971         case KVM_S390_VM_CPU_MODEL:
 972                 ret = kvm_s390_get_cpu_model(kvm, attr);
 973                 break;
 974         default:
 975                 ret = -ENXIO;
 976                 break;
 977         }
 978
 979         return ret;
 980 }
 981
 982 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 983 {
 984         int ret;
 985
 986         switch (attr->group) {
 987         case KVM_S390_VM_MEM_CTRL:
 988                 switch (attr->attr) {
 989                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 990                 case KVM_S390_VM_MEM_CLR_CMMA:
 991                         ret = sclp.has_cmma ? 0 : -ENXIO;
 992                         break;
 993                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 994                         ret = 0;
 995                         break;
 996                 default:
 997                         ret = -ENXIO;
 998                         break;
 999                 }
1000                 break;
1001         case KVM_S390_VM_TOD:
1002                 switch (attr->attr) {
1003                 case KVM_S390_VM_TOD_LOW:
1004                 case KVM_S390_VM_TOD_HIGH:
1005                         ret = 0;
1006                         break;
1007                 default:
1008                         ret = -ENXIO;
1009                         break;
1010                 }
1011                 break;
1012         case KVM_S390_VM_CPU_MODEL:
1013                 switch (attr->attr) {
1014                 case KVM_S390_VM_CPU_PROCESSOR:
1015                 case KVM_S390_VM_CPU_MACHINE:
1016                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1017                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1018                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1019                         ret = 0;
1020                         break;
1021                 /* configuring subfunctions is not supported yet */
1022                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1023                 default:
1024                         ret = -ENXIO;
1025                         break;
1026                 }
1027                 break;
1028         case KVM_S390_VM_CRYPTO:
1029                 switch (attr->attr) {
1030                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1031                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1032                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1033                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1034                         ret = 0;
1035                         break;
1036                 default:
1037                         ret = -ENXIO;
1038                         break;
1039                 }
1040                 break;
1041         default:
1042                 ret = -ENXIO;
1043                 break;
1044         }
1045
1046         return ret;
1047 }
1048
1049 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1050 {
1051         uint8_t *keys;
1052         uint64_t hva;
1053         int i, r = 0;
1054
1055         if (args->flags != 0)
1056                 return -EINVAL;
1057
1058         /* Is this guest using storage keys? */
1059         if (!mm_use_skey(current->mm))
1060                 return KVM_S390_GET_SKEYS_NONE;
1061
1062         /* Enforce sane limit on memory allocation */
1063         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1064                 return -EINVAL;
1065
1066         keys = kmalloc_array(args->count, sizeof(uint8_t),
1067                              GFP_KERNEL | __GFP_NOWARN);
1068         if (!keys)
1069                 keys = vmalloc(sizeof(uint8_t) * args->count);
1070         if (!keys)
1071                 return -ENOMEM;
1072
1073         down_read(&current->mm->mmap_sem);
1074         for (i = 0; i < args->count; i++) {
1075                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1076                 if (kvm_is_error_hva(hva)) {
1077                         r = -EFAULT;
1078                         break;
1079                 }
1080
1081                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1082                 if (r)
1083                         break;
1084         }
1085         up_read(&current->mm->mmap_sem);
1086
1087         if (!r) {
1088                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1089                                  sizeof(uint8_t) * args->count);
1090                 if (r)
1091                         r = -EFAULT;
1092         }
1093
1094         kvfree(keys);
1095         return r;
1096 }
1097
1098 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1099 {
1100         uint8_t *keys;
1101         uint64_t hva;
1102         int i, r = 0;
1103
1104         if (args->flags != 0)
1105                 return -EINVAL;
1106
1107         /* Enforce sane limit on memory allocation */
1108         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1109                 return -EINVAL;
1110
1111         keys = kmalloc_array(args->count, sizeof(uint8_t),
1112                              GFP_KERNEL | __GFP_NOWARN);
1113         if (!keys)
1114                 keys = vmalloc(sizeof(uint8_t) * args->count);
1115         if (!keys)
1116                 return -ENOMEM;
1117
1118         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1119                            sizeof(uint8_t) * args->count);
1120         if (r) {
1121                 r = -EFAULT;
1122                 goto out;
1123         }
1124
1125         /* Enable storage key handling for the guest */
1126         r = s390_enable_skey();
1127         if (r)
1128                 goto out;
1129
1130         down_read(&current->mm->mmap_sem);
1131         for (i = 0; i < args->count; i++) {
1132                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1133                 if (kvm_is_error_hva(hva)) {
1134                         r = -EFAULT;
1135                         break;
1136                 }
1137
1138                 /* Lowest order bit is reserved */
1139                 if (keys[i] & 0x01) {
1140                         r = -EINVAL;
1141                         break;
1142                 }
1143
1144                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1145                 if (r)
1146                         break;
1147         }
1148         up_read(&current->mm->mmap_sem);
1149 out:
1150         kvfree(keys);
1151         return r;
1152 }
1153
1154 long kvm_arch_vm_ioctl(struct file *filp,
1155                        unsigned int ioctl, unsigned long arg)
1156 {
1157         struct kvm *kvm = filp->private_data;
1158         void __user *argp = (void __user *)arg;
1159         struct kvm_device_attr attr;
1160         int r;
1161
1162         switch (ioctl) {
1163         case KVM_S390_INTERRUPT: {
1164                 struct kvm_s390_interrupt s390int;
1165
1166                 r = -EFAULT;
1167                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1168                         break;
1169                 r = kvm_s390_inject_vm(kvm, &s390int);
1170                 break;
1171         }
1172         case KVM_ENABLE_CAP: {
1173                 struct kvm_enable_cap cap;
1174                 r = -EFAULT;
1175                 if (copy_from_user(&cap, argp, sizeof(cap)))
1176                         break;
1177                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1178                 break;
1179         }
1180         case KVM_CREATE_IRQCHIP: {
1181                 struct kvm_irq_routing_entry routing;
1182
1183                 r = -EINVAL;
1184                 if (kvm->arch.use_irqchip) {
1185                         /* Set up dummy routing. */
1186                         memset(&routing, 0, sizeof(routing));
1187                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1188                 }
1189                 break;
1190         }
1191         case KVM_SET_DEVICE_ATTR: {
1192                 r = -EFAULT;
1193                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1194                         break;
1195                 r = kvm_s390_vm_set_attr(kvm, &attr);
1196                 break;
1197         }
1198         case KVM_GET_DEVICE_ATTR: {
1199                 r = -EFAULT;
1200                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1201                         break;
1202                 r = kvm_s390_vm_get_attr(kvm, &attr);
1203                 break;
1204         }
1205         case KVM_HAS_DEVICE_ATTR: {
1206                 r = -EFAULT;
1207                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1208                         break;
1209                 r = kvm_s390_vm_has_attr(kvm, &attr);
1210                 break;
1211         }
1212         case KVM_S390_GET_SKEYS: {
1213                 struct kvm_s390_skeys args;
1214
1215                 r = -EFAULT;
1216                 if (copy_from_user(&args, argp,
1217                                    sizeof(struct kvm_s390_skeys)))
1218                         break;
1219                 r = kvm_s390_get_skeys(kvm, &args);
1220                 break;
1221         }
1222         case KVM_S390_SET_SKEYS: {
1223                 struct kvm_s390_skeys args;
1224
1225                 r = -EFAULT;
1226                 if (copy_from_user(&args, argp,
1227                                    sizeof(struct kvm_s390_skeys)))
1228                         break;
1229                 r = kvm_s390_set_skeys(kvm, &args);
1230                 break;
1231         }
1232         default:
1233                 r = -ENOTTY;
1234         }
1235
1236         return r;
1237 }
1238
1239 static int kvm_s390_query_ap_config(u8 *config)
1240 {
1241         u32 fcn_code = 0x04000000UL;
1242         u32 cc = 0;
1243
1244         memset(config, 0, 128);
1245         asm volatile(
1246                 "lgr 0,%1\n"
1247                 "lgr 2,%2\n"
1248                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1249                 "0: ipm %0\n"
1250                 "srl %0,28\n"
1251                 "1:\n"
1252                 EX_TABLE(0b, 1b)
1253                 : "+r" (cc)
1254                 : "r" (fcn_code), "r" (config)
1255                 : "cc", "0", "2", "memory"
1256         );
1257
1258         return cc;
1259 }
1260
1261 static int kvm_s390_apxa_installed(void)
1262 {
1263         u8 config[128];
1264         int cc;
1265
1266         if (test_facility(12)) {
1267                 cc = kvm_s390_query_ap_config(config);
1268
1269                 if (cc)
1270                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1271                 else
1272                         return config[0] & 0x40;
1273         }
1274
1275         return 0;
1276 }
1277
1278 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1279 {
1280         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1281
1282         if (kvm_s390_apxa_installed())
1283                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1284         else
1285                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1286 }
1287
1288 static u64 kvm_s390_get_initial_cpuid(void)
1289 {
1290         struct cpuid cpuid;
1291
1292         get_cpu_id(&cpuid);
1293         cpuid.version = 0xff;
1294         return *((u64 *) &cpuid);
1295 }
1296
1297 static void kvm_s390_crypto_init(struct kvm *kvm)
1298 {
1299         if (!test_kvm_facility(kvm, 76))
1300                 return;
1301
1302         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1303         kvm_s390_set_crycb_format(kvm);
1304
1305         /* Enable AES/DEA protected key functions by default */
1306         kvm->arch.crypto.aes_kw = 1;
1307         kvm->arch.crypto.dea_kw = 1;
1308         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1309                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1310         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1311                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1312 }
1313
1314 static void sca_dispose(struct kvm *kvm)
1315 {
1316         if (kvm->arch.use_esca)
1317                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1318         else
1319                 free_page((unsigned long)(kvm->arch.sca));
1320         kvm->arch.sca = NULL;
1321 }
1322
1323 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1324 {
1325         gfp_t alloc_flags = GFP_KERNEL;
1326         int i, rc;
1327         char debug_name[16];
1328         static unsigned long sca_offset;
1329
1330         rc = -EINVAL;
1331 #ifdef CONFIG_KVM_S390_UCONTROL
1332         if (type & ~KVM_VM_S390_UCONTROL)
1333                 goto out_err;
1334         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1335                 goto out_err;
1336 #else
1337         if (type)
1338                 goto out_err;
1339 #endif
1340
1341         rc = s390_enable_sie();
1342         if (rc)
1343                 goto out_err;
1344
1345         rc = -ENOMEM;
1346
1347         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1348
1349         kvm->arch.use_esca = 0; /* start with basic SCA */
1350         if (!sclp.has_64bscao)
1351                 alloc_flags |= GFP_DMA;
1352         rwlock_init(&kvm->arch.sca_lock);
1353         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1354         if (!kvm->arch.sca)
1355                 goto out_err;
1356         spin_lock(&kvm_lock);
1357         sca_offset += 16;
1358         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1359                 sca_offset = 0;
1360         kvm->arch.sca = (struct bsca_block *)
1361                         ((char *) kvm->arch.sca + sca_offset);
1362         spin_unlock(&kvm_lock);
1363
1364         sprintf(debug_name, "kvm-%u", current->pid);
1365
1366         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1367         if (!kvm->arch.dbf)
1368                 goto out_err;
1369
1370         kvm->arch.sie_page2 =
1371              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1372         if (!kvm->arch.sie_page2)
1373                 goto out_err;
1374
1375         /* Populate the facility mask initially. */
1376         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1377                S390_ARCH_FAC_LIST_SIZE_BYTE);
1378         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1379                 if (i < kvm_s390_fac_list_mask_size())
1380                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1381                 else
1382                         kvm->arch.model.fac_mask[i] = 0UL;
1383         }
1384
1385         /* Populate the facility list initially. */
1386         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1387         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1388                S390_ARCH_FAC_LIST_SIZE_BYTE);
1389
1390         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1391         set_kvm_facility(kvm->arch.model.fac_list, 74);
1392
1393         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1394         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1395
1396         kvm_s390_crypto_init(kvm);
1397
1398         spin_lock_init(&kvm->arch.float_int.lock);
1399         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1400                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1401         init_waitqueue_head(&kvm->arch.ipte_wq);
1402         mutex_init(&kvm->arch.ipte_mutex);
1403
1404         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1405         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1406
1407         if (type & KVM_VM_S390_UCONTROL) {
1408                 kvm->arch.gmap = NULL;
1409                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1410         } else {
1411                 if (sclp.hamax == U64_MAX)
1412                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1413                 else
1414                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1415                                                     sclp.hamax + 1);
1416                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1417                 if (!kvm->arch.gmap)
1418                         goto out_err;
1419                 kvm->arch.gmap->private = kvm;
1420                 kvm->arch.gmap->pfault_enabled = 0;
1421         }
1422
1423         kvm->arch.css_support = 0;
1424         kvm->arch.use_irqchip = 0;
1425         kvm->arch.epoch = 0;
1426
1427         spin_lock_init(&kvm->arch.start_stop_lock);
1428         kvm_s390_vsie_init(kvm);
1429         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1430
1431         return 0;
1432 out_err:
1433         free_page((unsigned long)kvm->arch.sie_page2);
1434         debug_unregister(kvm->arch.dbf);
1435         sca_dispose(kvm);
1436         KVM_EVENT(3, "creation of vm failed: %d", rc);
1437         return rc;
1438 }
1439
1440 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1441 {
1442         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1443         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1444         kvm_s390_clear_local_irqs(vcpu);
1445         kvm_clear_async_pf_completion_queue(vcpu);
1446         if (!kvm_is_ucontrol(vcpu->kvm))
1447                 sca_del_vcpu(vcpu);
1448
1449         if (kvm_is_ucontrol(vcpu->kvm))
1450                 gmap_remove(vcpu->arch.gmap);
1451
1452         if (vcpu->kvm->arch.use_cmma)
1453                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1454         free_page((unsigned long)(vcpu->arch.sie_block));
1455
1456         kvm_vcpu_uninit(vcpu);
1457         kmem_cache_free(kvm_vcpu_cache, vcpu);
1458 }
1459
1460 static void kvm_free_vcpus(struct kvm *kvm)
1461 {
1462         unsigned int i;
1463         struct kvm_vcpu *vcpu;
1464
1465         kvm_for_each_vcpu(i, vcpu, kvm)
1466                 kvm_arch_vcpu_destroy(vcpu);
1467
1468         mutex_lock(&kvm->lock);
1469         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1470                 kvm->vcpus[i] = NULL;
1471
1472         atomic_set(&kvm->online_vcpus, 0);
1473         mutex_unlock(&kvm->lock);
1474 }
1475
1476 void kvm_arch_destroy_vm(struct kvm *kvm)
1477 {
1478         kvm_free_vcpus(kvm);
1479         sca_dispose(kvm);
1480         debug_unregister(kvm->arch.dbf);
1481         free_page((unsigned long)kvm->arch.sie_page2);
1482         if (!kvm_is_ucontrol(kvm))
1483                 gmap_remove(kvm->arch.gmap);
1484         kvm_s390_destroy_adapters(kvm);
1485         kvm_s390_clear_float_irqs(kvm);
1486         kvm_s390_vsie_destroy(kvm);
1487         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1488 }
1489
1490 /* Section: vcpu related */
1491 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1492 {
1493         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1494         if (!vcpu->arch.gmap)
1495                 return -ENOMEM;
1496         vcpu->arch.gmap->private = vcpu->kvm;
1497
1498         return 0;
1499 }
1500
1501 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1502 {
1503         read_lock(&vcpu->kvm->arch.sca_lock);
1504         if (vcpu->kvm->arch.use_esca) {
1505                 struct esca_block *sca = vcpu->kvm->arch.sca;
1506
1507                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1508                 sca->cpu[vcpu->vcpu_id].sda = 0;
1509         } else {
1510                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1511
1512                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1513                 sca->cpu[vcpu->vcpu_id].sda = 0;
1514         }
1515         read_unlock(&vcpu->kvm->arch.sca_lock);
1516 }
1517
1518 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1519 {
1520         read_lock(&vcpu->kvm->arch.sca_lock);
1521         if (vcpu->kvm->arch.use_esca) {
1522                 struct esca_block *sca = vcpu->kvm->arch.sca;
1523
1524                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1525                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1526                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1527                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1528                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1529         } else {
1530                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1531
1532                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1533                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1534                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1535                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1536         }
1537         read_unlock(&vcpu->kvm->arch.sca_lock);
1538 }
1539
1540 /* Basic SCA to Extended SCA data copy routines */
1541 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1542 {
1543         d->sda = s->sda;
1544         d->sigp_ctrl.c = s->sigp_ctrl.c;
1545         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1546 }
1547
1548 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1549 {
1550         int i;
1551
1552         d->ipte_control = s->ipte_control;
1553         d->mcn[0] = s->mcn;
1554         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1555                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1556 }
1557
1558 static int sca_switch_to_extended(struct kvm *kvm)
1559 {
1560         struct bsca_block *old_sca = kvm->arch.sca;
1561         struct esca_block *new_sca;
1562         struct kvm_vcpu *vcpu;
1563         unsigned int vcpu_idx;
1564         u32 scaol, scaoh;
1565
1566         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1567         if (!new_sca)
1568                 return -ENOMEM;
1569
1570         scaoh = (u32)((u64)(new_sca) >> 32);
1571         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1572
1573         kvm_s390_vcpu_block_all(kvm);
1574         write_lock(&kvm->arch.sca_lock);
1575
1576         sca_copy_b_to_e(new_sca, old_sca);
1577
1578         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1579                 vcpu->arch.sie_block->scaoh = scaoh;
1580                 vcpu->arch.sie_block->scaol = scaol;
1581                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1582         }
1583         kvm->arch.sca = new_sca;
1584         kvm->arch.use_esca = 1;
1585
1586         write_unlock(&kvm->arch.sca_lock);
1587         kvm_s390_vcpu_unblock_all(kvm);
1588
1589         free_page((unsigned long)old_sca);
1590
1591         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1592                  old_sca, kvm->arch.sca);
1593         return 0;
1594 }
1595
1596 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1597 {
1598         int rc;
1599
1600         if (id < KVM_S390_BSCA_CPU_SLOTS)
1601                 return true;
1602         if (!sclp.has_esca || !sclp.has_64bscao)
1603                 return false;
1604
1605         mutex_lock(&kvm->lock);
1606         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1607         mutex_unlock(&kvm->lock);
1608
1609         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1610 }
1611
1612 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1613 {
1614         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1615         kvm_clear_async_pf_completion_queue(vcpu);
1616         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1617                                     KVM_SYNC_GPRS |
1618                                     KVM_SYNC_ACRS |
1619                                     KVM_SYNC_CRS |
1620                                     KVM_SYNC_ARCH0 |
1621                                     KVM_SYNC_PFAULT;
1622         if (test_kvm_facility(vcpu->kvm, 64))
1623                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1624         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1625          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1626          */
1627         if (MACHINE_HAS_VX)
1628                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1629         else
1630                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1631
1632         if (kvm_is_ucontrol(vcpu->kvm))
1633                 return __kvm_ucontrol_vcpu_init(vcpu);
1634
1635         return 0;
1636 }
1637
1638 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1639 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1640 {
1641         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1642         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1643         vcpu->arch.cputm_start = get_tod_clock_fast();
1644         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1645 }
1646
1647 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1648 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1649 {
1650         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1651         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1652         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1653         vcpu->arch.cputm_start = 0;
1654         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1655 }
1656
1657 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1658 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1659 {
1660         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1661         vcpu->arch.cputm_enabled = true;
1662         __start_cpu_timer_accounting(vcpu);
1663 }
1664
1665 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1666 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1667 {
1668         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1669         __stop_cpu_timer_accounting(vcpu);
1670         vcpu->arch.cputm_enabled = false;
1671 }
1672
1673 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1674 {
1675         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1676         __enable_cpu_timer_accounting(vcpu);
1677         preempt_enable();
1678 }
1679
1680 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1681 {
1682         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1683         __disable_cpu_timer_accounting(vcpu);
1684         preempt_enable();
1685 }
1686
1687 /* set the cpu timer - may only be called from the VCPU thread itself */
1688 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1689 {
1690         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1691         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1692         if (vcpu->arch.cputm_enabled)
1693                 vcpu->arch.cputm_start = get_tod_clock_fast();
1694         vcpu->arch.sie_block->cputm = cputm;
1695         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1696         preempt_enable();
1697 }
1698
1699 /* update and get the cpu timer - can also be called from other VCPU threads */
1700 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1701 {
1702         unsigned int seq;
1703         __u64 value;
1704
1705         if (unlikely(!vcpu->arch.cputm_enabled))
1706                 return vcpu->arch.sie_block->cputm;
1707
1708         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1709         do {
1710                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1711                 /*
1712                  * If the writer would ever execute a read in the critical
1713                  * section, e.g. in irq context, we have a deadlock.
1714                  */
1715                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1716                 value = vcpu->arch.sie_block->cputm;
1717                 /* if cputm_start is 0, accounting is being started/stopped */
1718                 if (likely(vcpu->arch.cputm_start))
1719                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1720         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1721         preempt_enable();
1722         return value;
1723 }
1724
1725 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1726 {
1727         /* Save host register state */
1728         save_fpu_regs();
1729         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1730         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1731
1732         if (MACHINE_HAS_VX)
1733                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1734         else
1735                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1736         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1737         if (test_fp_ctl(current->thread.fpu.fpc))
1738                 /* User space provided an invalid FPC, let's clear it */
1739                 current->thread.fpu.fpc = 0;
1740
1741         save_access_regs(vcpu->arch.host_acrs);
1742         restore_access_regs(vcpu->run->s.regs.acrs);
1743         gmap_enable(vcpu->arch.enabled_gmap);
1744         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1745         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1746                 __start_cpu_timer_accounting(vcpu);
1747         vcpu->cpu = cpu;
1748 }
1749
1750 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1751 {
1752         vcpu->cpu = -1;
1753         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1754                 __stop_cpu_timer_accounting(vcpu);
1755         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1756         vcpu->arch.enabled_gmap = gmap_get_enabled();
1757         gmap_disable(vcpu->arch.enabled_gmap);
1758
1759         /* Save guest register state */
1760         save_fpu_regs();
1761         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1762
1763         /* Restore host register state */
1764         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1765         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1766
1767         save_access_regs(vcpu->run->s.regs.acrs);
1768         restore_access_regs(vcpu->arch.host_acrs);
1769 }
1770
1771 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1772 {
1773         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1774         vcpu->arch.sie_block->gpsw.mask = 0UL;
1775         vcpu->arch.sie_block->gpsw.addr = 0UL;
1776         kvm_s390_set_prefix(vcpu, 0);
1777         kvm_s390_set_cpu_timer(vcpu, 0);
1778         vcpu->arch.sie_block->ckc       = 0UL;
1779         vcpu->arch.sie_block->todpr     = 0;
1780         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1781         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1782         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1783         /* make sure the new fpc will be lazily loaded */
1784         save_fpu_regs();
1785         current->thread.fpu.fpc = 0;
1786         vcpu->arch.sie_block->gbea = 1;
1787         vcpu->arch.sie_block->pp = 0;
1788         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1789         kvm_clear_async_pf_completion_queue(vcpu);
1790         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1791                 kvm_s390_vcpu_stop(vcpu);
1792         kvm_s390_clear_local_irqs(vcpu);
1793 }
1794
1795 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1796 {
1797         mutex_lock(&vcpu->kvm->lock);
1798         preempt_disable();
1799         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1800         preempt_enable();
1801         mutex_unlock(&vcpu->kvm->lock);
1802         if (!kvm_is_ucontrol(vcpu->kvm)) {
1803                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1804                 sca_add_vcpu(vcpu);
1805         }
1806         /* make vcpu_load load the right gmap on the first trigger */
1807         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1808 }
1809
1810 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1811 {
1812         if (!test_kvm_facility(vcpu->kvm, 76))
1813                 return;
1814
1815         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1816
1817         if (vcpu->kvm->arch.crypto.aes_kw)
1818                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1819         if (vcpu->kvm->arch.crypto.dea_kw)
1820                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1821
1822         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1823 }
1824
1825 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1826 {
1827         free_page(vcpu->arch.sie_block->cbrlo);
1828         vcpu->arch.sie_block->cbrlo = 0;
1829 }
1830
1831 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1832 {
1833         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1834         if (!vcpu->arch.sie_block->cbrlo)
1835                 return -ENOMEM;
1836
1837         vcpu->arch.sie_block->ecb2 |= 0x80;
1838         vcpu->arch.sie_block->ecb2 &= ~0x08;
1839         return 0;
1840 }
1841
1842 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1843 {
1844         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1845
1846         vcpu->arch.sie_block->ibc = model->ibc;
1847         if (test_kvm_facility(vcpu->kvm, 7))
1848                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1849 }
1850
1851 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1852 {
1853         int rc = 0;
1854
1855         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1856                                                     CPUSTAT_SM |
1857                                                     CPUSTAT_STOPPED);
1858
1859         if (test_kvm_facility(vcpu->kvm, 78))
1860                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1861         else if (test_kvm_facility(vcpu->kvm, 8))
1862                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1863
1864         kvm_s390_vcpu_setup_model(vcpu);
1865
1866         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1867         if (MACHINE_HAS_ESOP)
1868                 vcpu->arch.sie_block->ecb |= 0x02;
1869         if (test_kvm_facility(vcpu->kvm, 9))
1870                 vcpu->arch.sie_block->ecb |= 0x04;
1871         if (test_kvm_facility(vcpu->kvm, 73))
1872                 vcpu->arch.sie_block->ecb |= 0x10;
1873
1874         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1875                 vcpu->arch.sie_block->ecb2 |= 0x08;
1876         vcpu->arch.sie_block->eca = 0x1002000U;
1877         if (sclp.has_cei)
1878                 vcpu->arch.sie_block->eca |= 0x80000000U;
1879         if (sclp.has_ib)
1880                 vcpu->arch.sie_block->eca |= 0x40000000U;
1881         if (sclp.has_siif)
1882                 vcpu->arch.sie_block->eca |= 1;
1883         if (sclp.has_sigpif)
1884                 vcpu->arch.sie_block->eca |= 0x10000000U;
1885         if (test_kvm_facility(vcpu->kvm, 64))
1886                 vcpu->arch.sie_block->ecb3 |= 0x01;
1887         if (test_kvm_facility(vcpu->kvm, 129)) {
1888                 vcpu->arch.sie_block->eca |= 0x00020000;
1889                 vcpu->arch.sie_block->ecd |= 0x20000000;
1890         }
1891         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1892         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1893         if (test_kvm_facility(vcpu->kvm, 74))
1894                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1895
1896         if (vcpu->kvm->arch.use_cmma) {
1897                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1898                 if (rc)
1899                         return rc;
1900         }
1901         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1902         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1903
1904         kvm_s390_vcpu_crypto_setup(vcpu);
1905
1906         return rc;
1907 }
1908
1909 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1910                                       unsigned int id)
1911 {
1912         struct kvm_vcpu *vcpu;
1913         struct sie_page *sie_page;
1914         int rc = -EINVAL;
1915
1916         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1917                 goto out;
1918
1919         rc = -ENOMEM;
1920
1921         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1922         if (!vcpu)
1923                 goto out;
1924
1925         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1926         if (!sie_page)
1927                 goto out_free_cpu;
1928
1929         vcpu->arch.sie_block = &sie_page->sie_block;
1930         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1931
1932         /* the real guest size will always be smaller than msl */
1933         vcpu->arch.sie_block->mso = 0;
1934         vcpu->arch.sie_block->msl = sclp.hamax;
1935
1936         vcpu->arch.sie_block->icpua = id;
1937         spin_lock_init(&vcpu->arch.local_int.lock);
1938         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1939         vcpu->arch.local_int.wq = &vcpu->wq;
1940         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1941         seqcount_init(&vcpu->arch.cputm_seqcount);
1942
1943         rc = kvm_vcpu_init(vcpu, kvm, id);
1944         if (rc)
1945                 goto out_free_sie_block;
1946         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1947                  vcpu->arch.sie_block);
1948         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1949
1950         return vcpu;
1951 out_free_sie_block:
1952         free_page((unsigned long)(vcpu->arch.sie_block));
1953 out_free_cpu:
1954         kmem_cache_free(kvm_vcpu_cache, vcpu);
1955 out:
1956         return ERR_PTR(rc);
1957 }
1958
1959 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1960 {
1961         return kvm_s390_vcpu_has_irq(vcpu, 0);
1962 }
1963
1964 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1965 {
1966         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1967         exit_sie(vcpu);
1968 }
1969
1970 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1971 {
1972         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1973 }
1974
1975 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1976 {
1977         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1978         exit_sie(vcpu);
1979 }
1980
1981 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1982 {
1983         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1984 }
1985
1986 /*
1987  * Kick a guest cpu out of SIE and wait until SIE is not running.
1988  * If the CPU is not running (e.g. waiting as idle) the function will
1989  * return immediately. */
1990 void exit_sie(struct kvm_vcpu *vcpu)
1991 {
1992         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1993         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1994                 cpu_relax();
1995 }
1996
1997 /* Kick a guest cpu out of SIE to process a request synchronously */
1998 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1999 {
2000         kvm_make_request(req, vcpu);
2001         kvm_s390_vcpu_request(vcpu);
2002 }
2003
2004 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2005                               unsigned long end)
2006 {
2007         struct kvm *kvm = gmap->private;
2008         struct kvm_vcpu *vcpu;
2009         unsigned long prefix;
2010         int i;
2011
2012         if (gmap_is_shadow(gmap))
2013                 return;
2014         if (start >= 1UL << 31)
2015                 /* We are only interested in prefix pages */
2016                 return;
2017         kvm_for_each_vcpu(i, vcpu, kvm) {
2018                 /* match against both prefix pages */
2019                 prefix = kvm_s390_get_prefix(vcpu);
2020                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2021                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2022                                    start, end);
2023                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2024                 }
2025         }
2026 }
2027
2028 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2029 {
2030         /* kvm common code refers to this, but never calls it */
2031         BUG();
2032         return 0;
2033 }
2034
2035 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2036                                            struct kvm_one_reg *reg)
2037 {
2038         int r = -EINVAL;
2039
2040         switch (reg->id) {
2041         case KVM_REG_S390_TODPR:
2042                 r = put_user(vcpu->arch.sie_block->todpr,
2043                              (u32 __user *)reg->addr);
2044                 break;
2045         case KVM_REG_S390_EPOCHDIFF:
2046                 r = put_user(vcpu->arch.sie_block->epoch,
2047                              (u64 __user *)reg->addr);
2048                 break;
2049         case KVM_REG_S390_CPU_TIMER:
2050                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2051                              (u64 __user *)reg->addr);
2052                 break;
2053         case KVM_REG_S390_CLOCK_COMP:
2054                 r = put_user(vcpu->arch.sie_block->ckc,
2055                              (u64 __user *)reg->addr);
2056                 break;
2057         case KVM_REG_S390_PFTOKEN:
2058                 r = put_user(vcpu->arch.pfault_token,
2059                              (u64 __user *)reg->addr);
2060                 break;
2061         case KVM_REG_S390_PFCOMPARE:
2062                 r = put_user(vcpu->arch.pfault_compare,
2063                              (u64 __user *)reg->addr);
2064                 break;
2065         case KVM_REG_S390_PFSELECT:
2066                 r = put_user(vcpu->arch.pfault_select,
2067                              (u64 __user *)reg->addr);
2068                 break;
2069         case KVM_REG_S390_PP:
2070                 r = put_user(vcpu->arch.sie_block->pp,
2071                              (u64 __user *)reg->addr);
2072                 break;
2073         case KVM_REG_S390_GBEA:
2074                 r = put_user(vcpu->arch.sie_block->gbea,
2075                              (u64 __user *)reg->addr);
2076                 break;
2077         default:
2078                 break;
2079         }
2080
2081         return r;
2082 }
2083
2084 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2085                                            struct kvm_one_reg *reg)
2086 {
2087         int r = -EINVAL;
2088         __u64 val;
2089
2090         switch (reg->id) {
2091         case KVM_REG_S390_TODPR:
2092                 r = get_user(vcpu->arch.sie_block->todpr,
2093                              (u32 __user *)reg->addr);
2094                 break;
2095         case KVM_REG_S390_EPOCHDIFF:
2096                 r = get_user(vcpu->arch.sie_block->epoch,
2097                              (u64 __user *)reg->addr);
2098                 break;
2099         case KVM_REG_S390_CPU_TIMER:
2100                 r = get_user(val, (u64 __user *)reg->addr);
2101                 if (!r)
2102                         kvm_s390_set_cpu_timer(vcpu, val);
2103                 break;
2104         case KVM_REG_S390_CLOCK_COMP:
2105                 r = get_user(vcpu->arch.sie_block->ckc,
2106                              (u64 __user *)reg->addr);
2107                 break;
2108         case KVM_REG_S390_PFTOKEN:
2109                 r = get_user(vcpu->arch.pfault_token,
2110                              (u64 __user *)reg->addr);
2111                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2112                         kvm_clear_async_pf_completion_queue(vcpu);
2113                 break;
2114         case KVM_REG_S390_PFCOMPARE:
2115                 r = get_user(vcpu->arch.pfault_compare,
2116                              (u64 __user *)reg->addr);
2117                 break;
2118         case KVM_REG_S390_PFSELECT:
2119                 r = get_user(vcpu->arch.pfault_select,
2120                              (u64 __user *)reg->addr);
2121                 break;
2122         case KVM_REG_S390_PP:
2123                 r = get_user(vcpu->arch.sie_block->pp,
2124                              (u64 __user *)reg->addr);
2125                 break;
2126         case KVM_REG_S390_GBEA:
2127                 r = get_user(vcpu->arch.sie_block->gbea,
2128                              (u64 __user *)reg->addr);
2129                 break;
2130         default:
2131                 break;
2132         }
2133
2134         return r;
2135 }
2136
2137 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2138 {
2139         kvm_s390_vcpu_initial_reset(vcpu);
2140         return 0;
2141 }
2142
2143 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2144 {
2145         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2146         return 0;
2147 }
2148
2149 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2150 {
2151         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2152         return 0;
2153 }
2154
2155 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2156                                   struct kvm_sregs *sregs)
2157 {
2158         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2159         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2160         restore_access_regs(vcpu->run->s.regs.acrs);
2161         return 0;
2162 }
2163
2164 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2165                                   struct kvm_sregs *sregs)
2166 {
2167         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2168         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2169         return 0;
2170 }
2171
2172 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2173 {
2174         /* make sure the new values will be lazily loaded */
2175         save_fpu_regs();
2176         if (test_fp_ctl(fpu->fpc))
2177                 return -EINVAL;
2178         current->thread.fpu.fpc = fpu->fpc;
2179         if (MACHINE_HAS_VX)
2180                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2181         else
2182                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2183         return 0;
2184 }
2185
2186 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2187 {
2188         /* make sure we have the latest values */
2189         save_fpu_regs();
2190         if (MACHINE_HAS_VX)
2191                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2192         else
2193                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2194         fpu->fpc = current->thread.fpu.fpc;
2195         return 0;
2196 }
2197
2198 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2199 {
2200         int rc = 0;
2201
2202         if (!is_vcpu_stopped(vcpu))
2203                 rc = -EBUSY;
2204         else {
2205                 vcpu->run->psw_mask = psw.mask;
2206                 vcpu->run->psw_addr = psw.addr;
2207         }
2208         return rc;
2209 }
2210
2211 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2212                                   struct kvm_translation *tr)
2213 {
2214         return -EINVAL; /* not implemented yet */
2215 }
2216
2217 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2218                               KVM_GUESTDBG_USE_HW_BP | \
2219                               KVM_GUESTDBG_ENABLE)
2220
2221 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2222                                         struct kvm_guest_debug *dbg)
2223 {
2224         int rc = 0;
2225
2226         vcpu->guest_debug = 0;
2227         kvm_s390_clear_bp_data(vcpu);
2228
2229         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2230                 return -EINVAL;
2231         if (!sclp.has_gpere)
2232                 return -EINVAL;
2233
2234         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2235                 vcpu->guest_debug = dbg->control;
2236                 /* enforce guest PER */
2237                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2238
2239                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2240                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2241         } else {
2242                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2243                 vcpu->arch.guestdbg.last_bp = 0;
2244         }
2245
2246         if (rc) {
2247                 vcpu->guest_debug = 0;
2248                 kvm_s390_clear_bp_data(vcpu);
2249                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2250         }
2251
2252         return rc;
2253 }
2254
2255 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2256                                     struct kvm_mp_state *mp_state)
2257 {
2258         /* CHECK_STOP and LOAD are not supported yet */
2259         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2260                                        KVM_MP_STATE_OPERATING;
2261 }
2262
2263 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2264                                     struct kvm_mp_state *mp_state)
2265 {
2266         int rc = 0;
2267
2268         /* user space knows about this interface - let it control the state */
2269         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2270
2271         switch (mp_state->mp_state) {
2272         case KVM_MP_STATE_STOPPED:
2273                 kvm_s390_vcpu_stop(vcpu);
2274                 break;
2275         case KVM_MP_STATE_OPERATING:
2276                 kvm_s390_vcpu_start(vcpu);
2277                 break;
2278         case KVM_MP_STATE_LOAD:
2279         case KVM_MP_STATE_CHECK_STOP:
2280                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2281         default:
2282                 rc = -ENXIO;
2283         }
2284
2285         return rc;
2286 }
2287
2288 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2289 {
2290         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2291 }
2292
2293 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2294 {
2295 retry:
2296         kvm_s390_vcpu_request_handled(vcpu);
2297         if (!vcpu->requests)
2298                 return 0;
2299         /*
2300          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2301          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2302          * This ensures that the ipte instruction for this request has
2303          * already finished. We might race against a second unmapper that
2304          * wants to set the blocking bit. Lets just retry the request loop.
2305          */
2306         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2307                 int rc;
2308                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2309                                           kvm_s390_get_prefix(vcpu),
2310                                           PAGE_SIZE * 2, PROT_WRITE);
2311                 if (rc)
2312                         return rc;
2313                 goto retry;
2314         }
2315
2316         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2317                 vcpu->arch.sie_block->ihcpu = 0xffff;
2318                 goto retry;
2319         }
2320
2321         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2322                 if (!ibs_enabled(vcpu)) {
2323                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2324                         atomic_or(CPUSTAT_IBS,
2325                                         &vcpu->arch.sie_block->cpuflags);
2326                 }
2327                 goto retry;
2328         }
2329
2330         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2331                 if (ibs_enabled(vcpu)) {
2332                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2333                         atomic_andnot(CPUSTAT_IBS,
2334                                           &vcpu->arch.sie_block->cpuflags);
2335                 }
2336                 goto retry;
2337         }
2338
2339         /* nothing to do, just clear the request */
2340         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2341
2342         return 0;
2343 }
2344
2345 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2346 {
2347         struct kvm_vcpu *vcpu;
2348         int i;
2349
2350         mutex_lock(&kvm->lock);
2351         preempt_disable();
2352         kvm->arch.epoch = tod - get_tod_clock();
2353         kvm_s390_vcpu_block_all(kvm);
2354         kvm_for_each_vcpu(i, vcpu, kvm)
2355                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2356         kvm_s390_vcpu_unblock_all(kvm);
2357         preempt_enable();
2358         mutex_unlock(&kvm->lock);
2359 }
2360
2361 /**
2362  * kvm_arch_fault_in_page - fault-in guest page if necessary
2363  * @vcpu: The corresponding virtual cpu
2364  * @gpa: Guest physical address
2365  * @writable: Whether the page should be writable or not
2366  *
2367  * Make sure that a guest page has been faulted-in on the host.
2368  *
2369  * Return: Zero on success, negative error code otherwise.
2370  */
2371 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2372 {
2373         return gmap_fault(vcpu->arch.gmap, gpa,
2374                           writable ? FAULT_FLAG_WRITE : 0);
2375 }
2376
2377 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2378                                       unsigned long token)
2379 {
2380         struct kvm_s390_interrupt inti;
2381         struct kvm_s390_irq irq;
2382
2383         if (start_token) {
2384                 irq.u.ext.ext_params2 = token;
2385                 irq.type = KVM_S390_INT_PFAULT_INIT;
2386                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2387         } else {
2388                 inti.type = KVM_S390_INT_PFAULT_DONE;
2389                 inti.parm64 = token;
2390                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2391         }
2392 }
2393
2394 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2395                                      struct kvm_async_pf *work)
2396 {
2397         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2398         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2399 }
2400
2401 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2402                                  struct kvm_async_pf *work)
2403 {
2404         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2405         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2406 }
2407
2408 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2409                                struct kvm_async_pf *work)
2410 {
2411         /* s390 will always inject the page directly */
2412 }
2413
2414 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2415 {
2416         /*
2417          * s390 will always inject the page directly,
2418          * but we still want check_async_completion to cleanup
2419          */
2420         return true;
2421 }
2422
2423 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2424 {
2425         hva_t hva;
2426         struct kvm_arch_async_pf arch;
2427         int rc;
2428
2429         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2430                 return 0;
2431         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2432             vcpu->arch.pfault_compare)
2433                 return 0;
2434         if (psw_extint_disabled(vcpu))
2435                 return 0;
2436         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2437                 return 0;
2438         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2439                 return 0;
2440         if (!vcpu->arch.gmap->pfault_enabled)
2441                 return 0;
2442
2443         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2444         hva += current->thread.gmap_addr & ~PAGE_MASK;
2445         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2446                 return 0;
2447
2448         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2449         return rc;
2450 }
2451
2452 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2453 {
2454         int rc, cpuflags;
2455
2456         /*
2457          * On s390 notifications for arriving pages will be delivered directly
2458          * to the guest but the house keeping for completed pfaults is
2459          * handled outside the worker.
2460          */
2461         kvm_check_async_pf_completion(vcpu);
2462
2463         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2464         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2465
2466         if (need_resched())
2467                 schedule();
2468
2469         if (test_cpu_flag(CIF_MCCK_PENDING))
2470                 s390_handle_mcck();
2471
2472         if (!kvm_is_ucontrol(vcpu->kvm)) {
2473                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2474                 if (rc)
2475                         return rc;
2476         }
2477
2478         rc = kvm_s390_handle_requests(vcpu);
2479         if (rc)
2480                 return rc;
2481
2482         if (guestdbg_enabled(vcpu)) {
2483                 kvm_s390_backup_guest_per_regs(vcpu);
2484                 kvm_s390_patch_guest_per_regs(vcpu);
2485         }
2486
2487         vcpu->arch.sie_block->icptcode = 0;
2488         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2489         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2490         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2491
2492         return 0;
2493 }
2494
2495 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2496 {
2497         struct kvm_s390_pgm_info pgm_info = {
2498                 .code = PGM_ADDRESSING,
2499         };
2500         u8 opcode, ilen;
2501         int rc;
2502
2503         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2504         trace_kvm_s390_sie_fault(vcpu);
2505
2506         /*
2507          * We want to inject an addressing exception, which is defined as a
2508          * suppressing or terminating exception. However, since we came here
2509          * by a DAT access exception, the PSW still points to the faulting
2510          * instruction since DAT exceptions are nullifying. So we've got
2511          * to look up the current opcode to get the length of the instruction
2512          * to be able to forward the PSW.
2513          */
2514         rc = read_guest_instr(vcpu, &opcode, 1);
2515         ilen = insn_length(opcode);
2516         if (rc < 0) {
2517                 return rc;
2518         } else if (rc) {
2519                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2520                  * Forward by arbitrary ilc, injection will take care of
2521                  * nullification if necessary.
2522                  */
2523                 pgm_info = vcpu->arch.pgm;
2524                 ilen = 4;
2525         }
2526         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2527         kvm_s390_forward_psw(vcpu, ilen);
2528         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2529 }
2530
2531 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2532 {
2533         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2534                    vcpu->arch.sie_block->icptcode);
2535         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2536
2537         if (guestdbg_enabled(vcpu))
2538                 kvm_s390_restore_guest_per_regs(vcpu);
2539
2540         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2541         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2542
2543         if (vcpu->arch.sie_block->icptcode > 0) {
2544                 int rc = kvm_handle_sie_intercept(vcpu);
2545
2546                 if (rc != -EOPNOTSUPP)
2547                         return rc;
2548                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2549                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2550                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2551                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2552                 return -EREMOTE;
2553         } else if (exit_reason != -EFAULT) {
2554                 vcpu->stat.exit_null++;
2555                 return 0;
2556         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2557                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2558                 vcpu->run->s390_ucontrol.trans_exc_code =
2559                                                 current->thread.gmap_addr;
2560                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2561                 return -EREMOTE;
2562         } else if (current->thread.gmap_pfault) {
2563                 trace_kvm_s390_major_guest_pfault(vcpu);
2564                 current->thread.gmap_pfault = 0;
2565                 if (kvm_arch_setup_async_pf(vcpu))
2566                         return 0;
2567                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2568         }
2569         return vcpu_post_run_fault_in_sie(vcpu);
2570 }
2571
2572 static int __vcpu_run(struct kvm_vcpu *vcpu)
2573 {
2574         int rc, exit_reason;
2575
2576         /*
2577          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2578          * ning the guest), so that memslots (and other stuff) are protected
2579          */
2580         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2581
2582         do {
2583                 rc = vcpu_pre_run(vcpu);
2584                 if (rc)
2585                         break;
2586
2587                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2588                 /*
2589                  * As PF_VCPU will be used in fault handler, between
2590                  * guest_enter and guest_exit should be no uaccess.
2591                  */
2592                 local_irq_disable();
2593                 __kvm_guest_enter();
2594                 __disable_cpu_timer_accounting(vcpu);
2595                 local_irq_enable();
2596                 exit_reason = sie64a(vcpu->arch.sie_block,
2597                                      vcpu->run->s.regs.gprs);
2598                 local_irq_disable();
2599                 __enable_cpu_timer_accounting(vcpu);
2600                 __kvm_guest_exit();
2601                 local_irq_enable();
2602                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2603
2604                 rc = vcpu_post_run(vcpu, exit_reason);
2605         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2606
2607         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2608         return rc;
2609 }
2610
2611 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2612 {
2613         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2614         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2615         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2616                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2617         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2618                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2619                 /* some control register changes require a tlb flush */
2620                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2621         }
2622         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2623                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2624                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2625                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2626                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2627                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2628         }
2629         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2630                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2631                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2632                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2633                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2634                         kvm_clear_async_pf_completion_queue(vcpu);
2635         }
2636         kvm_run->kvm_dirty_regs = 0;
2637 }
2638
2639 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2640 {
2641         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2642         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2643         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2644         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2645         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2646         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2647         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2648         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2649         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2650         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2651         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2652         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2653 }
2654
2655 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2656 {
2657         int rc;
2658         sigset_t sigsaved;
2659
2660         if (guestdbg_exit_pending(vcpu)) {
2661                 kvm_s390_prepare_debug_exit(vcpu);
2662                 return 0;
2663         }
2664
2665         if (vcpu->sigset_active)
2666                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2667
2668         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2669                 kvm_s390_vcpu_start(vcpu);
2670         } else if (is_vcpu_stopped(vcpu)) {
2671                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2672                                    vcpu->vcpu_id);
2673                 return -EINVAL;
2674         }
2675
2676         sync_regs(vcpu, kvm_run);
2677         enable_cpu_timer_accounting(vcpu);
2678
2679         might_fault();
2680         rc = __vcpu_run(vcpu);
2681
2682         if (signal_pending(current) && !rc) {
2683                 kvm_run->exit_reason = KVM_EXIT_INTR;
2684                 rc = -EINTR;
2685         }
2686
2687         if (guestdbg_exit_pending(vcpu) && !rc)  {
2688                 kvm_s390_prepare_debug_exit(vcpu);
2689                 rc = 0;
2690         }
2691
2692         if (rc == -EREMOTE) {
2693                 /* userspace support is needed, kvm_run has been prepared */
2694                 rc = 0;
2695         }
2696
2697         disable_cpu_timer_accounting(vcpu);
2698         store_regs(vcpu, kvm_run);
2699
2700         if (vcpu->sigset_active)
2701                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2702
2703         vcpu->stat.exit_userspace++;
2704         return rc;
2705 }
2706
2707 /*
2708  * store status at address
2709  * we use have two special cases:
2710  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2711  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2712  */
2713 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2714 {
2715         unsigned char archmode = 1;
2716         freg_t fprs[NUM_FPRS];
2717         unsigned int px;
2718         u64 clkcomp, cputm;
2719         int rc;
2720
2721         px = kvm_s390_get_prefix(vcpu);
2722         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2723                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2724                         return -EFAULT;
2725                 gpa = 0;
2726         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2727                 if (write_guest_real(vcpu, 163, &archmode, 1))
2728                         return -EFAULT;
2729                 gpa = px;
2730         } else
2731                 gpa -= __LC_FPREGS_SAVE_AREA;
2732
2733         /* manually convert vector registers if necessary */
2734         if (MACHINE_HAS_VX) {
2735                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2736                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2737                                      fprs, 128);
2738         } else {
2739                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2740                                      vcpu->run->s.regs.fprs, 128);
2741         }
2742         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2743                               vcpu->run->s.regs.gprs, 128);
2744         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2745                               &vcpu->arch.sie_block->gpsw, 16);
2746         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2747                               &px, 4);
2748         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2749                               &vcpu->run->s.regs.fpc, 4);
2750         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2751                               &vcpu->arch.sie_block->todpr, 4);
2752         cputm = kvm_s390_get_cpu_timer(vcpu);
2753         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2754                               &cputm, 8);
2755         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2756         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2757                               &clkcomp, 8);
2758         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2759                               &vcpu->run->s.regs.acrs, 64);
2760         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2761                               &vcpu->arch.sie_block->gcr, 128);
2762         return rc ? -EFAULT : 0;
2763 }
2764
2765 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2766 {
2767         /*
2768          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2769          * copying in vcpu load/put. Lets update our copies before we save
2770          * it into the save area
2771          */
2772         save_fpu_regs();
2773         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2774         save_access_regs(vcpu->run->s.regs.acrs);
2775
2776         return kvm_s390_store_status_unloaded(vcpu, addr);
2777 }
2778
2779 /*
2780  * store additional status at address
2781  */
2782 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2783                                         unsigned long gpa)
2784 {
2785         /* Only bits 0-53 are used for address formation */
2786         if (!(gpa & ~0x3ff))
2787                 return 0;
2788
2789         return write_guest_abs(vcpu, gpa & ~0x3ff,
2790                                (void *)&vcpu->run->s.regs.vrs, 512);
2791 }
2792
2793 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2794 {
2795         if (!test_kvm_facility(vcpu->kvm, 129))
2796                 return 0;
2797
2798         /*
2799          * The guest VXRS are in the host VXRs due to the lazy
2800          * copying in vcpu load/put. We can simply call save_fpu_regs()
2801          * to save the current register state because we are in the
2802          * middle of a load/put cycle.
2803          *
2804          * Let's update our copies before we save it into the save area.
2805          */
2806         save_fpu_regs();
2807
2808         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2809 }
2810
2811 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2812 {
2813         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2814         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2815 }
2816
2817 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2818 {
2819         unsigned int i;
2820         struct kvm_vcpu *vcpu;
2821
2822         kvm_for_each_vcpu(i, vcpu, kvm) {
2823                 __disable_ibs_on_vcpu(vcpu);
2824         }
2825 }
2826
2827 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2828 {
2829         if (!sclp.has_ibs)
2830                 return;
2831         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2832         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2833 }
2834
2835 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2836 {
2837         int i, online_vcpus, started_vcpus = 0;
2838
2839         if (!is_vcpu_stopped(vcpu))
2840                 return;
2841
2842         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2843         /* Only one cpu at a time may enter/leave the STOPPED state. */
2844         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2845         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2846
2847         for (i = 0; i < online_vcpus; i++) {
2848                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2849                         started_vcpus++;
2850         }
2851
2852         if (started_vcpus == 0) {
2853                 /* we're the only active VCPU -> speed it up */
2854                 __enable_ibs_on_vcpu(vcpu);
2855         } else if (started_vcpus == 1) {
2856                 /*
2857                  * As we are starting a second VCPU, we have to disable
2858                  * the IBS facility on all VCPUs to remove potentially
2859                  * oustanding ENABLE requests.
2860                  */
2861                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2862         }
2863
2864         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2865         /*
2866          * Another VCPU might have used IBS while we were offline.
2867          * Let's play safe and flush the VCPU at startup.
2868          */
2869         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2870         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2871         return;
2872 }
2873
2874 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2875 {
2876         int i, online_vcpus, started_vcpus = 0;
2877         struct kvm_vcpu *started_vcpu = NULL;
2878
2879         if (is_vcpu_stopped(vcpu))
2880                 return;
2881
2882         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2883         /* Only one cpu at a time may enter/leave the STOPPED state. */
2884         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2885         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2886
2887         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2888         kvm_s390_clear_stop_irq(vcpu);
2889
2890         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2891         __disable_ibs_on_vcpu(vcpu);
2892
2893         for (i = 0; i < online_vcpus; i++) {
2894                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2895                         started_vcpus++;
2896                         started_vcpu = vcpu->kvm->vcpus[i];
2897                 }
2898         }
2899
2900         if (started_vcpus == 1) {
2901                 /*
2902                  * As we only have one VCPU left, we want to enable the
2903                  * IBS facility for that VCPU to speed it up.
2904                  */
2905                 __enable_ibs_on_vcpu(started_vcpu);
2906         }
2907
2908         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2909         return;
2910 }
2911
2912 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2913                                      struct kvm_enable_cap *cap)
2914 {
2915         int r;
2916
2917         if (cap->flags)
2918                 return -EINVAL;
2919
2920         switch (cap->cap) {
2921         case KVM_CAP_S390_CSS_SUPPORT:
2922                 if (!vcpu->kvm->arch.css_support) {
2923                         vcpu->kvm->arch.css_support = 1;
2924                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2925                         trace_kvm_s390_enable_css(vcpu->kvm);
2926                 }
2927                 r = 0;
2928                 break;
2929         default:
2930                 r = -EINVAL;
2931                 break;
2932         }
2933         return r;
2934 }
2935
2936 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2937                                   struct kvm_s390_mem_op *mop)
2938 {
2939         void __user *uaddr = (void __user *)mop->buf;
2940         void *tmpbuf = NULL;
2941         int r, srcu_idx;
2942         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2943                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2944
2945         if (mop->flags & ~supported_flags)
2946                 return -EINVAL;
2947
2948         if (mop->size > MEM_OP_MAX_SIZE)
2949                 return -E2BIG;
2950
2951         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2952                 tmpbuf = vmalloc(mop->size);
2953                 if (!tmpbuf)
2954                         return -ENOMEM;
2955         }
2956
2957         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2958
2959         switch (mop->op) {
2960         case KVM_S390_MEMOP_LOGICAL_READ:
2961                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2962                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2963                                             mop->size, GACC_FETCH);
2964                         break;
2965                 }
2966                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2967                 if (r == 0) {
2968                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2969                                 r = -EFAULT;
2970                 }
2971                 break;
2972         case KVM_S390_MEMOP_LOGICAL_WRITE:
2973                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2974                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2975                                             mop->size, GACC_STORE);
2976                         break;
2977                 }
2978                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2979                         r = -EFAULT;
2980                         break;
2981                 }
2982                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2983                 break;
2984         default:
2985                 r = -EINVAL;
2986         }
2987
2988         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2989
2990         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2991                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2992
2993         vfree(tmpbuf);
2994         return r;
2995 }
2996
2997 long kvm_arch_vcpu_ioctl(struct file *filp,
2998                          unsigned int ioctl, unsigned long arg)
2999 {
3000         struct kvm_vcpu *vcpu = filp->private_data;
3001         void __user *argp = (void __user *)arg;
3002         int idx;
3003         long r;
3004
3005         switch (ioctl) {
3006         case KVM_S390_IRQ: {
3007                 struct kvm_s390_irq s390irq;
3008
3009                 r = -EFAULT;
3010                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3011                         break;
3012                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3013                 break;
3014         }
3015         case KVM_S390_INTERRUPT: {
3016                 struct kvm_s390_interrupt s390int;
3017                 struct kvm_s390_irq s390irq;
3018
3019                 r = -EFAULT;
3020                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3021                         break;
3022                 if (s390int_to_s390irq(&s390int, &s390irq))
3023                         return -EINVAL;
3024                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3025                 break;
3026         }
3027         case KVM_S390_STORE_STATUS:
3028                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3029                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3030                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3031                 break;
3032         case KVM_S390_SET_INITIAL_PSW: {
3033                 psw_t psw;
3034
3035                 r = -EFAULT;
3036                 if (copy_from_user(&psw, argp, sizeof(psw)))
3037                         break;
3038                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3039                 break;
3040         }
3041         case KVM_S390_INITIAL_RESET:
3042                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3043                 break;
3044         case KVM_SET_ONE_REG:
3045         case KVM_GET_ONE_REG: {
3046                 struct kvm_one_reg reg;
3047                 r = -EFAULT;
3048                 if (copy_from_user(&reg, argp, sizeof(reg)))
3049                         break;
3050                 if (ioctl == KVM_SET_ONE_REG)
3051                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3052                 else
3053                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3054                 break;
3055         }
3056 #ifdef CONFIG_KVM_S390_UCONTROL
3057         case KVM_S390_UCAS_MAP: {
3058                 struct kvm_s390_ucas_mapping ucasmap;
3059
3060                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3061                         r = -EFAULT;
3062                         break;
3063                 }
3064
3065                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3066                         r = -EINVAL;
3067                         break;
3068                 }
3069
3070                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3071                                      ucasmap.vcpu_addr, ucasmap.length);
3072                 break;
3073         }
3074         case KVM_S390_UCAS_UNMAP: {
3075                 struct kvm_s390_ucas_mapping ucasmap;
3076
3077                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3078                         r = -EFAULT;
3079                         break;
3080                 }
3081
3082                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3083                         r = -EINVAL;
3084                         break;
3085                 }
3086
3087                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3088                         ucasmap.length);
3089                 break;
3090         }
3091 #endif
3092         case KVM_S390_VCPU_FAULT: {
3093                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3094                 break;
3095         }
3096         case KVM_ENABLE_CAP:
3097         {
3098                 struct kvm_enable_cap cap;
3099                 r = -EFAULT;
3100                 if (copy_from_user(&cap, argp, sizeof(cap)))
3101                         break;
3102                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3103                 break;
3104         }
3105         case KVM_S390_MEM_OP: {
3106                 struct kvm_s390_mem_op mem_op;
3107
3108                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3109                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3110                 else
3111                         r = -EFAULT;
3112                 break;
3113         }
3114         case KVM_S390_SET_IRQ_STATE: {
3115                 struct kvm_s390_irq_state irq_state;
3116
3117                 r = -EFAULT;
3118                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3119                         break;
3120                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3121                     irq_state.len == 0 ||
3122                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3123                         r = -EINVAL;
3124                         break;
3125                 }
3126                 r = kvm_s390_set_irq_state(vcpu,
3127                                            (void __user *) irq_state.buf,
3128                                            irq_state.len);
3129                 break;
3130         }
3131         case KVM_S390_GET_IRQ_STATE: {
3132                 struct kvm_s390_irq_state irq_state;
3133
3134                 r = -EFAULT;
3135                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3136                         break;
3137                 if (irq_state.len == 0) {
3138                         r = -EINVAL;
3139                         break;
3140                 }
3141                 r = kvm_s390_get_irq_state(vcpu,
3142                                            (__u8 __user *)  irq_state.buf,
3143                                            irq_state.len);
3144                 break;
3145         }
3146         default:
3147                 r = -ENOTTY;
3148         }
3149         return r;
3150 }
3151
3152 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3153 {
3154 #ifdef CONFIG_KVM_S390_UCONTROL
3155         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3156                  && (kvm_is_ucontrol(vcpu->kvm))) {
3157                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3158                 get_page(vmf->page);
3159                 return 0;
3160         }
3161 #endif
3162         return VM_FAULT_SIGBUS;
3163 }
3164
3165 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3166                             unsigned long npages)
3167 {
3168         return 0;
3169 }
3170
3171 /* Section: memory related */
3172 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3173                                    struct kvm_memory_slot *memslot,
3174                                    const struct kvm_userspace_memory_region *mem,
3175                                    enum kvm_mr_change change)
3176 {
3177         /* A few sanity checks. We can have memory slots which have to be
3178            located/ended at a segment boundary (1MB). The memory in userland is
3179            ok to be fragmented into various different vmas. It is okay to mmap()
3180            and munmap() stuff in this slot after doing this call at any time */
3181
3182         if (mem->userspace_addr & 0xffffful)
3183                 return -EINVAL;
3184
3185         if (mem->memory_size & 0xffffful)
3186                 return -EINVAL;
3187
3188         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3189                 return -EINVAL;
3190
3191         return 0;
3192 }
3193
3194 void kvm_arch_commit_memory_region(struct kvm *kvm,
3195                                 const struct kvm_userspace_memory_region *mem,
3196                                 const struct kvm_memory_slot *old,
3197                                 const struct kvm_memory_slot *new,
3198                                 enum kvm_mr_change change)
3199 {
3200         int rc;
3201
3202         /* If the basics of the memslot do not change, we do not want
3203          * to update the gmap. Every update causes several unnecessary
3204          * segment translation exceptions. This is usually handled just
3205          * fine by the normal fault handler + gmap, but it will also
3206          * cause faults on the prefix page of running guest CPUs.
3207          */
3208         if (old->userspace_addr == mem->userspace_addr &&
3209             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3210             old->npages * PAGE_SIZE == mem->memory_size)
3211                 return;
3212
3213         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3214                 mem->guest_phys_addr, mem->memory_size);
3215         if (rc)
3216                 pr_warn("failed to commit memory region\n");
3217         return;
3218 }
3219
3220 static inline unsigned long nonhyp_mask(int i)
3221 {
3222         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3223
3224         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3225 }
3226
3227 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3228 {
3229         vcpu->valid_wakeup = false;
3230 }
3231
3232 static int __init kvm_s390_init(void)
3233 {
3234         int i;
3235
3236         if (!sclp.has_sief2) {
3237                 pr_info("SIE not available\n");
3238                 return -ENODEV;
3239         }
3240
3241         for (i = 0; i < 16; i++)
3242                 kvm_s390_fac_list_mask[i] |=
3243                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3244
3245         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3246 }
3247
3248 static void __exit kvm_s390_exit(void)
3249 {
3250         kvm_exit();
3251 }
3252
3253 module_init(kvm_s390_init);
3254 module_exit(kvm_s390_exit);
3255
3256 /*
3257  * Enable autoloading of the kvm module.
3258  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3259  * since x86 takes a different approach.
3260  */
3261 #include <linux/miscdevice.h>
3262 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3263 MODULE_ALIAS("devname:kvm");