struct nested_state nested;
bool nmi_singlestep;
+ u64 nmi_singlestep_guest_rflags;
unsigned int3_injected;
unsigned long int3_rip;
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
+static void disable_nmi_singlestep(struct vcpu_svm *svm)
+{
+ svm->nmi_singlestep = false;
+ if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+ /* Clear our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+ }
+}
+
/* Note:
* This hash table is used to map VM_ID to a struct kvm_arch,
* when handling AMD IOMMU GALOG notification to schedule in
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
- return to_svm(vcpu)->vmcb->save.rflags;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long rflags = svm->vmcb->save.rflags;
+
+ if (svm->nmi_singlestep) {
+ /* Hide our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ rflags &= ~X86_EFLAGS_RF;
+ }
+ return rflags;
}
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ if (to_svm(vcpu)->nmi_singlestep)
+ rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+
/*
* Any change of EFLAGS.VM is accompanied by a reload of SS
* (caused by either a task switch or an inter-privilege IRET),
}
if (svm->nmi_singlestep) {
- svm->nmi_singlestep = false;
- if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
- svm->vmcb->save.rflags &=
- ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ disable_nmi_singlestep(svm);
}
if (svm->vcpu.guest_debug &
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
}
+/* DB exceptions for our internal use must not cause vmexit */
+static int nested_svm_intercept_db(struct vcpu_svm *svm)
+{
+ unsigned long dr6;
+
+ /* if we're not singlestepping, it's not ours */
+ if (!svm->nmi_singlestep)
+ return NESTED_EXIT_DONE;
+
+ /* if it's not a singlestep exception, it's not ours */
+ if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+ return NESTED_EXIT_DONE;
+ if (!(dr6 & DR6_BS))
+ return NESTED_EXIT_DONE;
+
+ /* if the guest is singlestepping, it should get the vmexit */
+ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+ disable_nmi_singlestep(svm);
+ return NESTED_EXIT_DONE;
+ }
+
+ /* it's ours, the nested hypervisor must not see this one */
+ return NESTED_EXIT_HOST;
+}
+
static int nested_svm_exit_special(struct vcpu_svm *svm)
{
u32 exit_code = svm->vmcb->control.exit_code;
}
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (svm->nested.intercept_exceptions & excp_bits)
- vmexit = NESTED_EXIT_DONE;
+ if (svm->nested.intercept_exceptions & excp_bits) {
+ if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+ vmexit = nested_svm_intercept_db(svm);
+ else
+ vmexit = NESTED_EXIT_DONE;
+ }
/* async page fault always cause vmexit */
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
svm->apf_reason != 0)
== HF_NMI_MASK)
return; /* IRET will cause a vm exit */
+ if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+ return; /* STGI will cause a vm exit */
+
+ if (svm->nested.exit_required)
+ return; /* we're not going to run the guest yet */
+
/*
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
*/
+ svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}
if (unlikely(svm->nested.exit_required))
return;
+ /*
+ * Disable singlestep if we're injecting an interrupt/exception.
+ * We don't want our modified rflags to be pushed on the stack where
+ * we might not be able to easily reset them if we disabled NMI
+ * singlestep later.
+ */
+ if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+ /*
+ * Event injection happens before external interrupts cause a
+ * vmexit and interrupts are disabled here, so smp_send_reschedule
+ * is enough to force an immediate vmexit.
+ */
+ disable_nmi_singlestep(svm);
+ smp_send_reschedule(vcpu->cpu);
+ }
+
pre_svm_run(svm);
sync_lapic_to_cr8(vcpu);
self.setup_traces()
self.fields = self._fields
- def read(self):
+ def read(self, by_guest=0):
"""Returns 'event name: current value' for all enabled events."""
ret = defaultdict(int)
for group in self.group_leaders:
class DebugfsProvider(Provider):
"""Provides data from the files that KVM creates in the kvm debugfs
folder."""
- def __init__(self, pid, fields_filter):
+ def __init__(self, pid, fields_filter, include_past):
self.update_fields(fields_filter)
self._baseline = {}
self.do_read = True
self.paths = []
self.pid = pid
+ if include_past:
+ self.restore()
def get_available_fields(self):
""""Returns a list of available fields.
self.do_read = True
self.reset()
- def read(self, reset=0):
- """Returns a dict with format:'file name / field -> current value'."""
+ def read(self, reset=0, by_guest=0):
+ """Returns a dict with format:'file name / field -> current value'.
+
+ Parameter 'reset':
+ 0 plain read
+ 1 reset field counts to 0
+ 2 restore the original field counts
+
+ """
results = {}
# If no debugfs filtering support is available, then don't read.
for field in self._fields:
value = self.read_field(field, path)
key = path + field
- if reset:
+ if reset == 1:
self._baseline[key] = value
+ if reset == 2:
+ self._baseline[key] = 0
if self._baseline.get(key, -1) == -1:
self._baseline[key] = value
- results[field] = (results.get(field, 0) + value -
- self._baseline.get(key, 0))
+ increment = (results.get(field, 0) + value -
+ self._baseline.get(key, 0))
+ if by_guest:
+ pid = key.split('-')[0]
+ if pid in results:
+ results[pid] += increment
+ else:
+ results[pid] = increment
+ else:
+ results[field] = increment
return results
self._baseline = {}
self.read(1)
+ def restore(self):
+ """Reset field counters"""
+ self._baseline = {}
+ self.read(2)
+
class Stats(object):
"""Manages the data providers and the data they provide.
providers = []
if options.debugfs:
- providers.append(DebugfsProvider(options.pid, options.fields))
+ providers.append(DebugfsProvider(options.pid, options.fields,
+ options.dbgfs_include_past))
if options.tracepoints or not providers:
providers.append(TracepointProvider(options.pid, options.fields))
for provider in self.providers:
provider.pid = self._pid_filter
- def get(self):
+ def get(self, by_guest=0):
"""Returns a dict with field -> (value, delta to last value) of all
provider data."""
for provider in self.providers:
- new = provider.read()
- for key in provider.fields:
+ new = provider.read(by_guest=by_guest)
+ for key in new if by_guest else provider.fields:
oldval = self.values.get(key, (0, 0))[0]
newval = new.get(key, 0)
newdelta = newval - oldval
self.values[key] = (newval, newdelta)
return self.values
+ def toggle_display_guests(self, to_pid):
+ """Toggle between collection of stats by individual event and by
+ guest pid
+
+ Events reported by DebugfsProvider change when switching to/from
+ reading by guest values. Hence we have to remove the excess event
+ names from self.values.
+
+ """
+ if any(isinstance(ins, TracepointProvider) for ins in self.providers):
+ return 1
+ if to_pid:
+ for provider in self.providers:
+ if isinstance(provider, DebugfsProvider):
+ for key in provider.fields:
+ if key in self.values.keys():
+ del self.values[key]
+ else:
+ oldvals = self.values.copy()
+ for key in oldvals:
+ if key.isdigit():
+ del self.values[key]
+ # Update oldval (see get())
+ self.get(to_pid)
+ return 0
+
DELAY_DEFAULT = 3.0
MAX_GUEST_NAME_LEN = 48
MAX_REGEX_LEN = 44
self._delay_initial = 0.25
self._delay_regular = DELAY_DEFAULT
self._sorting = SORT_DEFAULT
+ self._display_guests = 0
def __enter__(self):
"""Initialises curses for later use. Based on curses.wrapper
if len(regex) > MAX_REGEX_LEN:
regex = regex[:MAX_REGEX_LEN] + '...'
self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
+ if self._display_guests:
+ col_name = 'Guest Name'
+ else:
+ col_name = 'Event'
self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
- ('Event', 'Total', '%Total', 'CurAvg/s'),
+ (col_name, 'Total', '%Total', 'CurAvg/s'),
curses.A_STANDOUT)
self.screen.addstr(4, 1, 'Collecting data...')
self.screen.refresh()
row = 3
self.screen.move(row, 0)
self.screen.clrtobot()
- stats = self.stats.get()
+ stats = self.stats.get(self._display_guests)
def sortCurAvg(x):
# sort by current events if available
break
if values[0] is not None:
cur = int(round(values[1] / sleeptime)) if values[1] else ''
+ if self._display_guests:
+ key = self.get_gname_from_pid(key)
self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
(key, values[0], values[0] * 100 / total,
cur))
self.screen.addstr(4, 1, 'No matching events reported yet')
self.screen.refresh()
+ def show_msg(self, text):
+ """Display message centered text and exit on key press"""
+ hint = 'Press any key to continue'
+ curses.cbreak()
+ self.screen.erase()
+ (x, term_width) = self.screen.getmaxyx()
+ row = 2
+ for line in text:
+ start = (term_width - len(line)) / 2
+ self.screen.addstr(row, start, line)
+ row += 1
+ self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
+ curses.A_STANDOUT)
+ self.screen.getkey()
+
def show_help_interactive(self):
"""Display help with list of interactive commands"""
- msg = (' c clear filter',
+ msg = (' b toggle events by guests (debugfs only, honors'
+ ' filters)',
+ ' c clear filter',
' f filter by regular expression',
' g filter by guest name',
' h display interactive commands reference',
'This might limit the shown data to the trace '
'statistics.')
self.screen.addstr(5, 0, msg)
- self.print_all_gnames()
+ self.print_all_gnames(7)
curses.echo()
self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
gname = self.screen.getstr()
sleeptime = self._delay_regular
try:
char = self.screen.getkey()
+ if char == 'b':
+ self._display_guests = not self._display_guests
+ if self.stats.toggle_display_guests(self._display_guests):
+ self.show_msg(['Command not available with tracepoints'
+ ' enabled', 'Restart with debugfs only '
+ '(see option \'-d\') and try again!'])
+ self._display_guests = not self._display_guests
+ self.refresh_header()
if char == 'c':
self.stats.fields_filter = DEFAULT_REGEX
self.refresh_header(0)
sleeptime = self._delay_initial
if char == 'x':
self.update_drilldown()
+ # prevents display of current values on next refresh
+ self.stats.get()
except KeyboardInterrupt:
break
except curses.error:
the large number of files that are possibly opened.
Interactive Commands:
+ b toggle events by guests (debugfs only, honors filters)
c clear filter
f filter by regular expression
g filter by guest name
dest='once',
help='run in batch mode for one second',
)
+ optparser.add_option('-i', '--debugfs-include-past',
+ action='store_true',
+ default=False,
+ dest='dbgfs_include_past',
+ help='include all available data on past events for '
+ 'debugfs',
+ )
optparser.add_option('-l', '--log',
action='store_true',
default=False,