Merge tag 'kvm-s390-next-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Paolo Bonzini <pbonzini@redhat.com>

Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)
author Paolo Bonzini <pbonzini@redhat.com>
Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index 6e3095d1bad4f87d49b36de3ca82e3fa589d3a84..03df7c1da581a7317c4e566aa0db5e28f86134c6 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -189,6 +189,7 @@ struct vcpu_svm {
         struct nested_state nested;
  
         bool nmi_singlestep;
+       u64 nmi_singlestep_guest_rflags;
  
         unsigned int3_injected;
         unsigned long int3_rip;
@@ -963,6 +964,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
  }
  
+static void disable_nmi_singlestep(struct vcpu_svm *svm)
+{
+       svm->nmi_singlestep = false;
+       if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+               /* Clear our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+       }
+}
+
  /* Note:
   * This hash table is used to map VM_ID to a struct kvm_arch,
   * when handling AMD IOMMU GALOG notification to schedule in
@@ -1712,11 +1725,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
  
  static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
  {
-       return to_svm(vcpu)->vmcb->save.rflags;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long rflags = svm->vmcb->save.rflags;
+
+       if (svm->nmi_singlestep) {
+               /* Hide our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       rflags &= ~X86_EFLAGS_RF;
+       }
+       return rflags;
  }
  
  static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
  {
+       if (to_svm(vcpu)->nmi_singlestep)
+               rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+
         /*
          * Any change of EFLAGS.VM is accompanied by a reload of SS
          * (caused by either a task switch or an inter-privilege IRET),
@@ -2111,10 +2137,7 @@ static int db_interception(struct vcpu_svm *svm)
         }
  
         if (svm->nmi_singlestep) {
-               svm->nmi_singlestep = false;
-               if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
-                       svm->vmcb->save.rflags &=
-                               ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+               disable_nmi_singlestep(svm);
         }
  
         if (svm->vcpu.guest_debug &
@@ -2533,6 +2556,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
         return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
  }
  
+/* DB exceptions for our internal use must not cause vmexit */
+static int nested_svm_intercept_db(struct vcpu_svm *svm)
+{
+       unsigned long dr6;
+
+       /* if we're not singlestepping, it's not ours */
+       if (!svm->nmi_singlestep)
+               return NESTED_EXIT_DONE;
+
+       /* if it's not a singlestep exception, it's not ours */
+       if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+               return NESTED_EXIT_DONE;
+       if (!(dr6 & DR6_BS))
+               return NESTED_EXIT_DONE;
+
+       /* if the guest is singlestepping, it should get the vmexit */
+       if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+               disable_nmi_singlestep(svm);
+               return NESTED_EXIT_DONE;
+       }
+
+       /* it's ours, the nested hypervisor must not see this one */
+       return NESTED_EXIT_HOST;
+}
+
  static int nested_svm_exit_special(struct vcpu_svm *svm)
  {
         u32 exit_code = svm->vmcb->control.exit_code;
@@ -2588,8 +2636,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
         }
         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
-               if (svm->nested.intercept_exceptions & excp_bits)
-                       vmexit = NESTED_EXIT_DONE;
+               if (svm->nested.intercept_exceptions & excp_bits) {
+                       if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+                               vmexit = nested_svm_intercept_db(svm);
+                       else
+                               vmexit = NESTED_EXIT_DONE;
+               }
                 /* async page fault always cause vmexit */
                 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
                          svm->apf_reason != 0)
@@ -4626,10 +4678,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
             == HF_NMI_MASK)
                 return; /* IRET will cause a vm exit */
  
+       if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+               return; /* STGI will cause a vm exit */
+
+       if (svm->nested.exit_required)
+               return; /* we're not going to run the guest yet */
+
         /*
          * Something prevents NMI from been injected. Single step over possible
          * problem (IRET or exception injection or interrupt shadow)
          */
+       svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
         svm->nmi_singlestep = true;
         svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
  }
@@ -4770,6 +4829,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
         if (unlikely(svm->nested.exit_required))
                 return;
  
+       /*
+        * Disable singlestep if we're injecting an interrupt/exception.
+        * We don't want our modified rflags to be pushed on the stack where
+        * we might not be able to easily reset them if we disabled NMI
+        * singlestep later.
+        */
+       if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+               /*
+                * Event injection happens before external interrupts cause a
+                * vmexit and interrupts are disabled here, so smp_send_reschedule
+                * is enough to force an immediate vmexit.
+                */
+               disable_nmi_singlestep(svm);
+               smp_send_reschedule(vcpu->cpu);
+       }
+
         pre_svm_run(svm);
  
         sync_lapic_to_cr8(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index c6dec552b28fa7e48e312d5b7727cc2e9843965d..e8b61ad84a8efc3820f88943addd615bda325887 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7653,7 +7653,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
         unsigned long type, types;
         gva_t gva;
         struct x86_exception e;
-       int vpid;
+       struct {
+               u64 vpid;
+               u64 gla;
+       } operand;
  
         if (!(vmx->nested.nested_vmx_secondary_ctls_high &
               SECONDARY_EXEC_ENABLE_VPID) ||
@@ -7683,17 +7686,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
         if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                         vmx_instruction_info, false, &gva))
                 return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
-                               sizeof(u32), &e)) {
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+                               sizeof(operand), &e)) {
                 kvm_inject_page_fault(vcpu, &e);
                 return 1;
         }
+       if (operand.vpid >> 16) {
+               nested_vmx_failValid(vcpu,
+                       VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
  
         switch (type) {
         case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+               if (is_noncanonical_address(operand.gla)) {
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+                       return kvm_skip_emulated_instruction(vcpu);
+               }
+               /* fall through */
         case VMX_VPID_EXTENT_SINGLE_CONTEXT:
         case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
-               if (!vpid) {
+               if (!operand.vpid) {
                         nested_vmx_failValid(vcpu,
                                 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                         return kvm_skip_emulated_instruction(vcpu);
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat

index 2cf5176bbeee88feca35f6dadca99f3a53ab3eb2..dd8f00cfb8b482b71b60529fac0618363dc8e540 100755 (executable)
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -662,7 +662,7 @@ class TracepointProvider(Provider):
          self.setup_traces()
          self.fields = self._fields
  
-    def read(self):
+    def read(self, by_guest=0):
          """Returns 'event name: current value' for all enabled events."""
          ret = defaultdict(int)
          for group in self.group_leaders:
@@ -681,12 +681,14 @@ class TracepointProvider(Provider):
  class DebugfsProvider(Provider):
      """Provides data from the files that KVM creates in the kvm debugfs
      folder."""
-    def __init__(self, pid, fields_filter):
+    def __init__(self, pid, fields_filter, include_past):
          self.update_fields(fields_filter)
          self._baseline = {}
          self.do_read = True
          self.paths = []
          self.pid = pid
+        if include_past:
+            self.restore()
  
      def get_available_fields(self):
          """"Returns a list of available fields.
@@ -729,8 +731,15 @@ class DebugfsProvider(Provider):
              self.do_read = True
          self.reset()
  
-    def read(self, reset=0):
-        """Returns a dict with format:'file name / field -> current value'."""
+    def read(self, reset=0, by_guest=0):
+        """Returns a dict with format:'file name / field -> current value'.
+
+        Parameter 'reset':
+          0   plain read
+          1   reset field counts to 0
+          2   restore the original field counts
+
+        """
          results = {}
  
          # If no debugfs filtering support is available, then don't read.
@@ -747,12 +756,22 @@ class DebugfsProvider(Provider):
              for field in self._fields:
                  value = self.read_field(field, path)
                  key = path + field
-                if reset:
+                if reset == 1:
                      self._baseline[key] = value
+                if reset == 2:
+                    self._baseline[key] = 0
                  if self._baseline.get(key, -1) == -1:
                      self._baseline[key] = value
-                results[field] = (results.get(field, 0) + value -
-                                  self._baseline.get(key, 0))
+                increment = (results.get(field, 0) + value -
+                             self._baseline.get(key, 0))
+                if by_guest:
+                    pid = key.split('-')[0]
+                    if pid in results:
+                        results[pid] += increment
+                    else:
+                        results[pid] = increment
+                else:
+                    results[field] = increment
  
          return results
  
@@ -771,6 +790,11 @@ class DebugfsProvider(Provider):
          self._baseline = {}
          self.read(1)
  
+    def restore(self):
+        """Reset field counters"""
+        self._baseline = {}
+        self.read(2)
+
  
  class Stats(object):
      """Manages the data providers and the data they provide.
@@ -791,7 +815,8 @@ class Stats(object):
          providers = []
  
          if options.debugfs:
-            providers.append(DebugfsProvider(options.pid, options.fields))
+            providers.append(DebugfsProvider(options.pid, options.fields,
+                                             options.dbgfs_include_past))
          if options.tracepoints or not providers:
              providers.append(TracepointProvider(options.pid, options.fields))
  
@@ -832,18 +857,44 @@ class Stats(object):
              for provider in self.providers:
                  provider.pid = self._pid_filter
  
-    def get(self):
+    def get(self, by_guest=0):
          """Returns a dict with field -> (value, delta to last value) of all
          provider data."""
          for provider in self.providers:
-            new = provider.read()
-            for key in provider.fields:
+            new = provider.read(by_guest=by_guest)
+            for key in new if by_guest else provider.fields:
                  oldval = self.values.get(key, (0, 0))[0]
                  newval = new.get(key, 0)
                  newdelta = newval - oldval
                  self.values[key] = (newval, newdelta)
          return self.values
  
+    def toggle_display_guests(self, to_pid):
+        """Toggle between collection of stats by individual event and by
+        guest pid
+
+        Events reported by DebugfsProvider change when switching to/from
+        reading by guest values. Hence we have to remove the excess event
+        names from self.values.
+
+        """
+        if any(isinstance(ins, TracepointProvider) for ins in self.providers):
+            return 1
+        if to_pid:
+            for provider in self.providers:
+                if isinstance(provider, DebugfsProvider):
+                    for key in provider.fields:
+                        if key in self.values.keys():
+                            del self.values[key]
+        else:
+            oldvals = self.values.copy()
+            for key in oldvals:
+                if key.isdigit():
+                    del self.values[key]
+        # Update oldval (see get())
+        self.get(to_pid)
+        return 0
+
  DELAY_DEFAULT = 3.0
  MAX_GUEST_NAME_LEN = 48
  MAX_REGEX_LEN = 44
@@ -859,6 +910,7 @@ class Tui(object):
          self._delay_initial = 0.25
          self._delay_regular = DELAY_DEFAULT
          self._sorting = SORT_DEFAULT
+        self._display_guests = 0
  
      def __enter__(self):
          """Initialises curses for later use.  Based on curses.wrapper
@@ -1007,8 +1059,12 @@ class Tui(object):
              if len(regex) > MAX_REGEX_LEN:
                  regex = regex[:MAX_REGEX_LEN] + '...'
              self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
+        if self._display_guests:
+            col_name = 'Guest Name'
+        else:
+            col_name = 'Event'
          self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
-                           ('Event', 'Total', '%Total', 'CurAvg/s'),
+                           (col_name, 'Total', '%Total', 'CurAvg/s'),
                             curses.A_STANDOUT)
          self.screen.addstr(4, 1, 'Collecting data...')
          self.screen.refresh()
@@ -1017,7 +1073,7 @@ class Tui(object):
          row = 3
          self.screen.move(row, 0)
          self.screen.clrtobot()
-        stats = self.stats.get()
+        stats = self.stats.get(self._display_guests)
  
          def sortCurAvg(x):
              # sort by current events if available
@@ -1045,6 +1101,8 @@ class Tui(object):
                  break
              if values[0] is not None:
                  cur = int(round(values[1] / sleeptime)) if values[1] else ''
+                if self._display_guests:
+                    key = self.get_gname_from_pid(key)
                  self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                     (key, values[0], values[0] * 100 / total,
                                      cur))
@@ -1053,9 +1111,26 @@ class Tui(object):
              self.screen.addstr(4, 1, 'No matching events reported yet')
          self.screen.refresh()
  
+    def show_msg(self, text):
+        """Display message centered text and exit on key press"""
+        hint = 'Press any key to continue'
+        curses.cbreak()
+        self.screen.erase()
+        (x, term_width) = self.screen.getmaxyx()
+        row = 2
+        for line in text:
+            start = (term_width - len(line)) / 2
+            self.screen.addstr(row, start, line)
+            row += 1
+        self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
+                           curses.A_STANDOUT)
+        self.screen.getkey()
+
      def show_help_interactive(self):
          """Display help with list of interactive commands"""
-        msg = ('   c     clear filter',
+        msg = ('   b     toggle events by guests (debugfs only, honors'
+               ' filters)',
+               '   c     clear filter',
                 '   f     filter by regular expression',
                 '   g     filter by guest name',
                 '   h     display interactive commands reference',
@@ -1195,7 +1270,7 @@ class Tui(object):
                                 'This might limit the shown data to the trace '
                                 'statistics.')
              self.screen.addstr(5, 0, msg)
-            self.print_all_gnames()
+            self.print_all_gnames(7)
              curses.echo()
              self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
              gname = self.screen.getstr()
@@ -1236,6 +1311,14 @@ class Tui(object):
              sleeptime = self._delay_regular
              try:
                  char = self.screen.getkey()
+                if char == 'b':
+                    self._display_guests = not self._display_guests
+                    if self.stats.toggle_display_guests(self._display_guests):
+                        self.show_msg(['Command not available with tracepoints'
+                                       ' enabled', 'Restart with debugfs only '
+                                       '(see option \'-d\') and try again!'])
+                        self._display_guests = not self._display_guests
+                    self.refresh_header()
                  if char == 'c':
                      self.stats.fields_filter = DEFAULT_REGEX
                      self.refresh_header(0)
@@ -1270,6 +1353,8 @@ class Tui(object):
                      sleeptime = self._delay_initial
                  if char == 'x':
                      self.update_drilldown()
+                    # prevents display of current values on next refresh
+                    self.stats.get()
              except KeyboardInterrupt:
                  break
              except curses.error:
@@ -1337,6 +1422,7 @@ Requirements:
    the large number of files that are possibly opened.
  
  Interactive Commands:
+   b     toggle events by guests (debugfs only, honors filters)
     c     clear filter
     f     filter by regular expression
     g     filter by guest name
@@ -1381,6 +1467,13 @@ Press any other key to refresh statistics immediately.
                           dest='once',
                           help='run in batch mode for one second',
                           )
+    optparser.add_option('-i', '--debugfs-include-past',
+                         action='store_true',
+                         default=False,
+                         dest='dbgfs_include_past',
+                         help='include all available data on past events for '
+                              'debugfs',
+                         )
      optparser.add_option('-l', '--log',
                           action='store_true',
                           default=False,
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt

index e24ac464d3417750bc869196bd1a008c4d77f180..e5cf836be8a1848bb82f39cfa3c7c75dcc67b4fa 100644 (file)
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -29,6 +29,8 @@ meaning of events.
  INTERACTIVE COMMANDS
  --------------------
  [horizontal]
+*b*::  toggle events by guests (debugfs only, honors filters)
+
  *c*::  clear filter
  
  *f*::  filter by regular expression
@@ -70,6 +72,10 @@ OPTIONS
  --debugfs::
         retrieve statistics from debugfs
  
+-i::
+--debugfs-include-past::
+       include all available data on past events for debugfs
+
  -p<pid>::
  --pid=<pid>::
         limit statistics to one virtual machine (pid)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index f0fe9d02f6bb2c47d909cf493ad6e19778cc666c..19f0ecb9b93e23501af3f5c21e2c971cebfd3c6a 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -73,17 +73,17 @@ MODULE_LICENSE("GPL");
  
  /* Architectures should define their poll value according to the halt latency */
  unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
-module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns, uint, 0644);
  EXPORT_SYMBOL_GPL(halt_poll_ns);
  
  /* Default doubles per-vcpu halt_poll_ns. */
  unsigned int halt_poll_ns_grow = 2;
-module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_grow, uint, 0644);
  EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
  
  /* Default resets per-vcpu halt_poll_ns . */
  unsigned int halt_poll_ns_shrink;
-module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_shrink, uint, 0644);
  EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
  
  /*
@@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
                 return PTR_ERR(file);
         }
  
+       /*
+        * Don't call kvm_put_kvm anymore at this point; file->f_op is
+        * already set, with ->release() being kvm_vm_release().  In error
+        * cases it will be called by the final fput(file) and will take
+        * care of doing kvm_put_kvm(kvm).
+        */
         if (kvm_create_vm_debugfs(kvm, r) < 0) {
                 put_unused_fd(r);
                 fput(file);
author	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 28 Jun 2017 20:39:02 +0000 (22:39 +0200)
arch/x86/kvm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx.c		patch \| blob \| history
tools/kvm/kvm_stat/kvm_stat		patch \| blob \| history
tools/kvm/kvm_stat/kvm_stat.txt		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history