From 7bde2d50014d3c5110b1db9a8e2659b6fa5f6b4a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 3 Aug 2017 19:03:14 -0700 Subject: [PATCH] cpufreq: intel_pstate: Improve IO performance with per-core P-states In the current implementation, the response latency between seeing SCHED_CPUFREQ_IOWAIT set and the actual P-state adjustment can be up to 10ms. It can be reduced by bumping up the P-state to the max at the time SCHED_CPUFREQ_IOWAIT is passed to intel_pstate_update_util(). With this change, the IO performance improves significantly. For a simple "grep -r . linux" (Here linux is the kernel source folder) with caches dropped every time on a Broadwell Xeon workstation with per-core P-states, the user and system time is shorter by as much as 30% - 40%. The same performance difference was not observed on clients that don't support per-core P-state. Signed-off-by: Srinivas Pandruvada [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2eac2adff11b..532e261b2cab 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1526,6 +1526,15 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, if (flags & SCHED_CPUFREQ_IOWAIT) { cpu->iowait_boost = int_tofp(1); + cpu->last_update = time; + /* + * The last time the busy was 100% so P-state was max anyway + * so avoid overhead of computation. + */ + if (fp_toint(cpu->sample.busy_scaled) == 100) + return; + + goto set_pstate; } else if (cpu->iowait_boost) { /* Clear iowait_boost if the CPU may have been idle. */ delta_ns = time - cpu->last_update; @@ -1537,6 +1546,7 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL) return; +set_pstate: if (intel_pstate_sample(cpu, time)) { int target_pstate; -- 2.45.2