From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Mar 2017 12:48:18 +0100 Subject: [PATCH] sched/clock, x86/perf: Fix "perf test tsc" People reported that commit: 5680d8094ffa ("sched/clock: Provide better clock continuity") broke "perf test tsc". That commit added another offset to the reported clock value; so take that into account when computing the provided offset values. Reported-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity") Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 9 ++++++--- arch/x86/include/asm/timer.h | 2 ++ arch/x86/kernel/tsc.c | 4 ++-- include/linux/sched/clock.h | 13 +++++++------ kernel/sched/clock.c | 22 +++++++++++----------- 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2aa1ad194db2..580b60f5ac83 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { struct cyc2ns_data *data; + u64 offset; userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; @@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event, !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); userpg->pmc_width = x86_pmu.cntval_bits; - if (!sched_clock_stable()) + if (!using_native_sched_clock() || !sched_clock_stable()) return; data = cyc2ns_read_begin(); + offset = data->cyc2ns_offset + __sched_clock_offset; + /* * Internal timekeeping for enabled/running/stopped times * is always in the local_clock domain. @@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; - userpg->time_offset = data->cyc2ns_offset - now; + userpg->time_offset = offset - now; /* * cap_user_time_zero doesn't make sense when we're using a different @@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event, */ if (!event->attr.use_clockid) { userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + userpg->time_zero = offset; } cyc2ns_read_end(data); diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index a04eabd43d06..27e9f9d769b8 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void); extern int no_timer_check; +extern bool using_native_sched_clock(void); + /* * We use the full linear equation: f(x) = a + b*x, in order to allow * a continuous function in the face of dynamic freq changes. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c73a7f9e881a..714dfba6a1e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -328,7 +328,7 @@ unsigned long long sched_clock(void) return paravirt_sched_clock(); } -static inline bool using_native_sched_clock(void) +bool using_native_sched_clock(void) { return pv_time_ops.sched_clock == native_sched_clock; } @@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void) unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); -static inline bool using_native_sched_clock(void) { return true; } +bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 4a68c6791207..34fe92ce1ebd 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -54,15 +54,16 @@ static inline u64 local_clock(void) } #else extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ extern int sched_clock_stable(void); extern void clear_sched_clock_stable(void); +/* + * When sched_clock_stable(), __sched_clock_offset provides the offset + * between local_clock() and sched_clock(). + */ +extern u64 __sched_clock_offset; + + extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index fec0f58c8dee..24a3e01bf8cb 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); static int __sched_clock_stable_early = 1; /* - * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset + * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset */ -static __read_mostly u64 raw_offset; -static __read_mostly u64 gtod_offset; +__read_mostly u64 __sched_clock_offset; +static __read_mostly u64 __gtod_offset; struct sched_clock_data { u64 tick_raw; @@ -131,11 +131,11 @@ static void __set_sched_clock_stable(void) /* * Attempt to make the (initial) unstable->stable transition continuous. */ - raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); + __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw); printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); static_branch_enable(&__sched_clock_stable); tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -161,11 +161,11 @@ static void __clear_sched_clock_stable(void) * * Still do what we can. */ - gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); + __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod); printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -238,7 +238,7 @@ static u64 sched_clock_local(struct sched_clock_data *scd) * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + gtod_offset + delta; + clock = scd->tick_gtod + __gtod_offset + delta; min_clock = wrap_max(scd->tick_gtod, old_clock); max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); @@ -324,7 +324,7 @@ u64 sched_clock_cpu(int cpu) u64 clock; if (sched_clock_stable()) - return sched_clock() + raw_offset; + return sched_clock() + __sched_clock_offset; if (unlikely(!sched_clock_running)) return 0ull; -- 2.45.2