]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'kvm-s390-next-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorPaolo Bonzini <pbonzini@redhat.com>
Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)
KVM: s390: fixes and features for 4.13

- initial machine check forwarding
- migration support for the CMMA page hinting information
- cleanups
- fixes

arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
tools/kvm/kvm_stat/kvm_stat
tools/kvm/kvm_stat/kvm_stat.txt
virt/kvm/kvm_main.c

index 6e3095d1bad4f87d49b36de3ca82e3fa589d3a84..03df7c1da581a7317c4e566aa0db5e28f86134c6 100644 (file)
@@ -189,6 +189,7 @@ struct vcpu_svm {
        struct nested_state nested;
 
        bool nmi_singlestep;
+       u64 nmi_singlestep_guest_rflags;
 
        unsigned int3_injected;
        unsigned long int3_rip;
@@ -963,6 +964,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
        set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
+static void disable_nmi_singlestep(struct vcpu_svm *svm)
+{
+       svm->nmi_singlestep = false;
+       if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+               /* Clear our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+       }
+}
+
 /* Note:
  * This hash table is used to map VM_ID to a struct kvm_arch,
  * when handling AMD IOMMU GALOG notification to schedule in
@@ -1712,11 +1725,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 {
-       return to_svm(vcpu)->vmcb->save.rflags;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long rflags = svm->vmcb->save.rflags;
+
+       if (svm->nmi_singlestep) {
+               /* Hide our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       rflags &= ~X86_EFLAGS_RF;
+       }
+       return rflags;
 }
 
 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
+       if (to_svm(vcpu)->nmi_singlestep)
+               rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+
        /*
         * Any change of EFLAGS.VM is accompanied by a reload of SS
         * (caused by either a task switch or an inter-privilege IRET),
@@ -2111,10 +2137,7 @@ static int db_interception(struct vcpu_svm *svm)
        }
 
        if (svm->nmi_singlestep) {
-               svm->nmi_singlestep = false;
-               if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
-                       svm->vmcb->save.rflags &=
-                               ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+               disable_nmi_singlestep(svm);
        }
 
        if (svm->vcpu.guest_debug &
@@ -2533,6 +2556,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
        return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
 }
 
+/* DB exceptions for our internal use must not cause vmexit */
+static int nested_svm_intercept_db(struct vcpu_svm *svm)
+{
+       unsigned long dr6;
+
+       /* if we're not singlestepping, it's not ours */
+       if (!svm->nmi_singlestep)
+               return NESTED_EXIT_DONE;
+
+       /* if it's not a singlestep exception, it's not ours */
+       if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+               return NESTED_EXIT_DONE;
+       if (!(dr6 & DR6_BS))
+               return NESTED_EXIT_DONE;
+
+       /* if the guest is singlestepping, it should get the vmexit */
+       if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+               disable_nmi_singlestep(svm);
+               return NESTED_EXIT_DONE;
+       }
+
+       /* it's ours, the nested hypervisor must not see this one */
+       return NESTED_EXIT_HOST;
+}
+
 static int nested_svm_exit_special(struct vcpu_svm *svm)
 {
        u32 exit_code = svm->vmcb->control.exit_code;
@@ -2588,8 +2636,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
        }
        case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
                u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
-               if (svm->nested.intercept_exceptions & excp_bits)
-                       vmexit = NESTED_EXIT_DONE;
+               if (svm->nested.intercept_exceptions & excp_bits) {
+                       if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+                               vmexit = nested_svm_intercept_db(svm);
+                       else
+                               vmexit = NESTED_EXIT_DONE;
+               }
                /* async page fault always cause vmexit */
                else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
                         svm->apf_reason != 0)
@@ -4626,10 +4678,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
            == HF_NMI_MASK)
                return; /* IRET will cause a vm exit */
 
+       if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+               return; /* STGI will cause a vm exit */
+
+       if (svm->nested.exit_required)
+               return; /* we're not going to run the guest yet */
+
        /*
         * Something prevents NMI from been injected. Single step over possible
         * problem (IRET or exception injection or interrupt shadow)
         */
+       svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
        svm->nmi_singlestep = true;
        svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
 }
@@ -4770,6 +4829,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
        if (unlikely(svm->nested.exit_required))
                return;
 
+       /*
+        * Disable singlestep if we're injecting an interrupt/exception.
+        * We don't want our modified rflags to be pushed on the stack where
+        * we might not be able to easily reset them if we disabled NMI
+        * singlestep later.
+        */
+       if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+               /*
+                * Event injection happens before external interrupts cause a
+                * vmexit and interrupts are disabled here, so smp_send_reschedule
+                * is enough to force an immediate vmexit.
+                */
+               disable_nmi_singlestep(svm);
+               smp_send_reschedule(vcpu->cpu);
+       }
+
        pre_svm_run(svm);
 
        sync_lapic_to_cr8(vcpu);
index c6dec552b28fa7e48e312d5b7727cc2e9843965d..e8b61ad84a8efc3820f88943addd615bda325887 100644 (file)
@@ -7653,7 +7653,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
        unsigned long type, types;
        gva_t gva;
        struct x86_exception e;
-       int vpid;
+       struct {
+               u64 vpid;
+               u64 gla;
+       } operand;
 
        if (!(vmx->nested.nested_vmx_secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_VPID) ||
@@ -7683,17 +7686,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
        if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                        vmx_instruction_info, false, &gva))
                return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
-                               sizeof(u32), &e)) {
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+                               sizeof(operand), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
+       if (operand.vpid >> 16) {
+               nested_vmx_failValid(vcpu,
+                       VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
 
        switch (type) {
        case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+               if (is_noncanonical_address(operand.gla)) {
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+                       return kvm_skip_emulated_instruction(vcpu);
+               }
+               /* fall through */
        case VMX_VPID_EXTENT_SINGLE_CONTEXT:
        case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
-               if (!vpid) {
+               if (!operand.vpid) {
                        nested_vmx_failValid(vcpu,
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                        return kvm_skip_emulated_instruction(vcpu);
index 2cf5176bbeee88feca35f6dadca99f3a53ab3eb2..dd8f00cfb8b482b71b60529fac0618363dc8e540 100755 (executable)
@@ -662,7 +662,7 @@ class TracepointProvider(Provider):
         self.setup_traces()
         self.fields = self._fields
 
-    def read(self):
+    def read(self, by_guest=0):
         """Returns 'event name: current value' for all enabled events."""
         ret = defaultdict(int)
         for group in self.group_leaders:
@@ -681,12 +681,14 @@ class TracepointProvider(Provider):
 class DebugfsProvider(Provider):
     """Provides data from the files that KVM creates in the kvm debugfs
     folder."""
-    def __init__(self, pid, fields_filter):
+    def __init__(self, pid, fields_filter, include_past):
         self.update_fields(fields_filter)
         self._baseline = {}
         self.do_read = True
         self.paths = []
         self.pid = pid
+        if include_past:
+            self.restore()
 
     def get_available_fields(self):
         """"Returns a list of available fields.
@@ -729,8 +731,15 @@ class DebugfsProvider(Provider):
             self.do_read = True
         self.reset()
 
-    def read(self, reset=0):
-        """Returns a dict with format:'file name / field -> current value'."""
+    def read(self, reset=0, by_guest=0):
+        """Returns a dict with format:'file name / field -> current value'.
+
+        Parameter 'reset':
+          0   plain read
+          1   reset field counts to 0
+          2   restore the original field counts
+
+        """
         results = {}
 
         # If no debugfs filtering support is available, then don't read.
@@ -747,12 +756,22 @@ class DebugfsProvider(Provider):
             for field in self._fields:
                 value = self.read_field(field, path)
                 key = path + field
-                if reset:
+                if reset == 1:
                     self._baseline[key] = value
+                if reset == 2:
+                    self._baseline[key] = 0
                 if self._baseline.get(key, -1) == -1:
                     self._baseline[key] = value
-                results[field] = (results.get(field, 0) + value -
-                                  self._baseline.get(key, 0))
+                increment = (results.get(field, 0) + value -
+                             self._baseline.get(key, 0))
+                if by_guest:
+                    pid = key.split('-')[0]
+                    if pid in results:
+                        results[pid] += increment
+                    else:
+                        results[pid] = increment
+                else:
+                    results[field] = increment
 
         return results
 
@@ -771,6 +790,11 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.read(1)
 
+    def restore(self):
+        """Reset field counters"""
+        self._baseline = {}
+        self.read(2)
+
 
 class Stats(object):
     """Manages the data providers and the data they provide.
@@ -791,7 +815,8 @@ class Stats(object):
         providers = []
 
         if options.debugfs:
-            providers.append(DebugfsProvider(options.pid, options.fields))
+            providers.append(DebugfsProvider(options.pid, options.fields,
+                                             options.dbgfs_include_past))
         if options.tracepoints or not providers:
             providers.append(TracepointProvider(options.pid, options.fields))
 
@@ -832,18 +857,44 @@ class Stats(object):
             for provider in self.providers:
                 provider.pid = self._pid_filter
 
-    def get(self):
+    def get(self, by_guest=0):
         """Returns a dict with field -> (value, delta to last value) of all
         provider data."""
         for provider in self.providers:
-            new = provider.read()
-            for key in provider.fields:
+            new = provider.read(by_guest=by_guest)
+            for key in new if by_guest else provider.fields:
                 oldval = self.values.get(key, (0, 0))[0]
                 newval = new.get(key, 0)
                 newdelta = newval - oldval
                 self.values[key] = (newval, newdelta)
         return self.values
 
+    def toggle_display_guests(self, to_pid):
+        """Toggle between collection of stats by individual event and by
+        guest pid
+
+        Events reported by DebugfsProvider change when switching to/from
+        reading by guest values. Hence we have to remove the excess event
+        names from self.values.
+
+        """
+        if any(isinstance(ins, TracepointProvider) for ins in self.providers):
+            return 1
+        if to_pid:
+            for provider in self.providers:
+                if isinstance(provider, DebugfsProvider):
+                    for key in provider.fields:
+                        if key in self.values.keys():
+                            del self.values[key]
+        else:
+            oldvals = self.values.copy()
+            for key in oldvals:
+                if key.isdigit():
+                    del self.values[key]
+        # Update oldval (see get())
+        self.get(to_pid)
+        return 0
+
 DELAY_DEFAULT = 3.0
 MAX_GUEST_NAME_LEN = 48
 MAX_REGEX_LEN = 44
@@ -859,6 +910,7 @@ class Tui(object):
         self._delay_initial = 0.25
         self._delay_regular = DELAY_DEFAULT
         self._sorting = SORT_DEFAULT
+        self._display_guests = 0
 
     def __enter__(self):
         """Initialises curses for later use.  Based on curses.wrapper
@@ -1007,8 +1059,12 @@ class Tui(object):
             if len(regex) > MAX_REGEX_LEN:
                 regex = regex[:MAX_REGEX_LEN] + '...'
             self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
+        if self._display_guests:
+            col_name = 'Guest Name'
+        else:
+            col_name = 'Event'
         self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
-                           ('Event', 'Total', '%Total', 'CurAvg/s'),
+                           (col_name, 'Total', '%Total', 'CurAvg/s'),
                            curses.A_STANDOUT)
         self.screen.addstr(4, 1, 'Collecting data...')
         self.screen.refresh()
@@ -1017,7 +1073,7 @@ class Tui(object):
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
-        stats = self.stats.get()
+        stats = self.stats.get(self._display_guests)
 
         def sortCurAvg(x):
             # sort by current events if available
@@ -1045,6 +1101,8 @@ class Tui(object):
                 break
             if values[0] is not None:
                 cur = int(round(values[1] / sleeptime)) if values[1] else ''
+                if self._display_guests:
+                    key = self.get_gname_from_pid(key)
                 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                    (key, values[0], values[0] * 100 / total,
                                     cur))
@@ -1053,9 +1111,26 @@ class Tui(object):
             self.screen.addstr(4, 1, 'No matching events reported yet')
         self.screen.refresh()
 
+    def show_msg(self, text):
+        """Display message centered text and exit on key press"""
+        hint = 'Press any key to continue'
+        curses.cbreak()
+        self.screen.erase()
+        (x, term_width) = self.screen.getmaxyx()
+        row = 2
+        for line in text:
+            start = (term_width - len(line)) / 2
+            self.screen.addstr(row, start, line)
+            row += 1
+        self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
+                           curses.A_STANDOUT)
+        self.screen.getkey()
+
     def show_help_interactive(self):
         """Display help with list of interactive commands"""
-        msg = ('   c     clear filter',
+        msg = ('   b     toggle events by guests (debugfs only, honors'
+               ' filters)',
+               '   c     clear filter',
                '   f     filter by regular expression',
                '   g     filter by guest name',
                '   h     display interactive commands reference',
@@ -1195,7 +1270,7 @@ class Tui(object):
                                'This might limit the shown data to the trace '
                                'statistics.')
             self.screen.addstr(5, 0, msg)
-            self.print_all_gnames()
+            self.print_all_gnames(7)
             curses.echo()
             self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
             gname = self.screen.getstr()
@@ -1236,6 +1311,14 @@ class Tui(object):
             sleeptime = self._delay_regular
             try:
                 char = self.screen.getkey()
+                if char == 'b':
+                    self._display_guests = not self._display_guests
+                    if self.stats.toggle_display_guests(self._display_guests):
+                        self.show_msg(['Command not available with tracepoints'
+                                       ' enabled', 'Restart with debugfs only '
+                                       '(see option \'-d\') and try again!'])
+                        self._display_guests = not self._display_guests
+                    self.refresh_header()
                 if char == 'c':
                     self.stats.fields_filter = DEFAULT_REGEX
                     self.refresh_header(0)
@@ -1270,6 +1353,8 @@ class Tui(object):
                     sleeptime = self._delay_initial
                 if char == 'x':
                     self.update_drilldown()
+                    # prevents display of current values on next refresh
+                    self.stats.get()
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1337,6 +1422,7 @@ Requirements:
   the large number of files that are possibly opened.
 
 Interactive Commands:
+   b     toggle events by guests (debugfs only, honors filters)
    c     clear filter
    f     filter by regular expression
    g     filter by guest name
@@ -1381,6 +1467,13 @@ Press any other key to refresh statistics immediately.
                          dest='once',
                          help='run in batch mode for one second',
                          )
+    optparser.add_option('-i', '--debugfs-include-past',
+                         action='store_true',
+                         default=False,
+                         dest='dbgfs_include_past',
+                         help='include all available data on past events for '
+                              'debugfs',
+                         )
     optparser.add_option('-l', '--log',
                          action='store_true',
                          default=False,
index e24ac464d3417750bc869196bd1a008c4d77f180..e5cf836be8a1848bb82f39cfa3c7c75dcc67b4fa 100644 (file)
@@ -29,6 +29,8 @@ meaning of events.
 INTERACTIVE COMMANDS
 --------------------
 [horizontal]
+*b*::  toggle events by guests (debugfs only, honors filters)
+
 *c*::  clear filter
 
 *f*::  filter by regular expression
@@ -70,6 +72,10 @@ OPTIONS
 --debugfs::
        retrieve statistics from debugfs
 
+-i::
+--debugfs-include-past::
+       include all available data on past events for debugfs
+
 -p<pid>::
 --pid=<pid>::
        limit statistics to one virtual machine (pid)
index f0fe9d02f6bb2c47d909cf493ad6e19778cc666c..19f0ecb9b93e23501af3f5c21e2c971cebfd3c6a 100644 (file)
@@ -73,17 +73,17 @@ MODULE_LICENSE("GPL");
 
 /* Architectures should define their poll value according to the halt latency */
 unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
-module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns, uint, 0644);
 EXPORT_SYMBOL_GPL(halt_poll_ns);
 
 /* Default doubles per-vcpu halt_poll_ns. */
 unsigned int halt_poll_ns_grow = 2;
-module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_grow, uint, 0644);
 EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
 
 /* Default resets per-vcpu halt_poll_ns . */
 unsigned int halt_poll_ns_shrink;
-module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_shrink, uint, 0644);
 EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
 
 /*
@@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
                return PTR_ERR(file);
        }
 
+       /*
+        * Don't call kvm_put_kvm anymore at this point; file->f_op is
+        * already set, with ->release() being kvm_vm_release().  In error
+        * cases it will be called by the final fput(file) and will take
+        * care of doing kvm_put_kvm(kvm).
+        */
        if (kvm_create_vm_debugfs(kvm, r) < 0) {
                put_unused_fd(r);
                fput(file);