diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 824c48d..5fa6ee2 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -54,7 +54,7 @@ CONFIG_HIGHPTE=y CONFIG_X86_CHECK_BIOS_CORRUPTION=y # CONFIG_MTRR_SANITIZER is not set CONFIG_EFI=y -CONFIG_HZ_100=y +CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y # CONFIG_COMPAT_VDSO is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index b8c4f66..d28bdab 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -52,7 +52,7 @@ CONFIG_NUMA=y CONFIG_X86_CHECK_BIOS_CORRUPTION=y # CONFIG_MTRR_SANITIZER is not set CONFIG_EFI=y -CONFIG_HZ_100=y +CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y # CONFIG_COMPAT_VDSO is not set diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 460efa2..18da4f8 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -30,8 +30,8 @@ struct cs_dbs_tuners { }; /* Conservative governor macros */ -#define DEF_FREQUENCY_UP_THRESHOLD (63) -#define DEF_FREQUENCY_DOWN_THRESHOLD (26) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_THRESHOLD (20) #define DEF_FREQUENCY_STEP (5) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 7b3187a..3a1f49f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -20,7 +20,7 @@ #include "cpufreq_ondemand.h" /* On-demand governor macros */ -#define DEF_FREQUENCY_UP_THRESHOLD (63) +#define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_UP_THRESHOLD (95) @@ -129,7 +129,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) } /* - * Every sampling_rate, we check, if current idle time is less than 37% + * Every sampling_rate, we check, if current idle time is less than 20% * (default), then we try to increase frequency. Else, we adjust the frequency * proportional to load. */ diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index ecde22d..1532e4e 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -4,8 +4,8 @@ choice prompt "Timer frequency" - default HZ_250 - default HZ_100 if SCHED_MUQSS + default HZ_128 if SCHED_MUQSS + default HZ_250 if !SCHED_MUQSS help Allows the configuration of the timer frequency. It is customary to have the timer interrupt run at 1000 Hz but 100 Hz may be more @@ -24,6 +24,16 @@ choice with lots of processors that may show reduced performance if too many timer interrupts are occurring. + config HZ_128 + bool "128 HZ" + help + 128 Hz is a suitable choice in combination with MuQSS which does + not rely on ticks for rescheduling interrupts, and is not Hz limited + for timeouts and sleeps from both the kernel and userspace. + This allows us to benefit from the lower overhead and higher + throughput of fewer timer ticks and the microoptimisation of Hz + divisions being a power of 2. + config HZ_250 bool "250 HZ" help @@ -51,6 +61,7 @@ endchoice config HZ int default 100 if HZ_100 + default 128 if HZ_128 default 250 if HZ_250 default 300 if HZ_300 default 1000 if HZ_1000 diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c index 2fa9683..c2f04da 100644 --- a/kernel/sched/MuQSS.c +++ b/kernel/sched/MuQSS.c @@ -137,7 +137,7 @@ void print_scheduler_version(void) { - printk(KERN_INFO "MuQSS CPU scheduler v0.140 by Con Kolivas.\n"); + printk(KERN_INFO "MuQSS CPU scheduler v0.144 by Con Kolivas.\n"); } /* @@ -2349,6 +2349,76 @@ static void account_task_cpu(struct rq *rq, struct task_struct *p) p->last_ran = rq->niffies; } +static inline int hrexpiry_enabled(struct rq *rq) +{ + if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized)) + return 0; + return hrtimer_is_hres_active(&rq->hrexpiry_timer); +} + +/* + * Use HR-timers to deliver accurate preemption points. + */ +static inline void hrexpiry_clear(struct rq *rq) +{ + if (!hrexpiry_enabled(rq)) + return; + if (hrtimer_active(&rq->hrexpiry_timer)) + hrtimer_cancel(&rq->hrexpiry_timer); +} + +/* + * High-resolution time_slice expiry. + * Runs from hardirq context with interrupts disabled. + */ +static enum hrtimer_restart hrexpiry(struct hrtimer *timer) +{ + struct rq *rq = container_of(timer, struct rq, hrexpiry_timer); + struct task_struct *p; + + /* This can happen during CPU hotplug / resume */ + if (unlikely(cpu_of(rq) != smp_processor_id())) + goto out; + + /* + * We're doing this without the runqueue lock but this should always + * be run on the local CPU. Time slice should run out in __schedule + * but we set it to zero here in case niffies is slightly less. + */ + p = rq->curr; + p->time_slice = 0; + __set_tsk_resched(p); +out: + return HRTIMER_NORESTART; +} + +/* + * Called to set the hrexpiry timer state. + * + * called with irqs disabled from the local CPU only + */ +static void hrexpiry_start(struct rq *rq, u64 delay) +{ + if (!hrexpiry_enabled(rq)) + return; + + hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay), + HRTIMER_MODE_REL_PINNED); +} + +static void init_rq_hrexpiry(struct rq *rq) +{ + hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rq->hrexpiry_timer.function = hrexpiry; +} + +static inline int rq_dither(struct rq *rq) +{ + if (!hrexpiry_enabled(rq)) + return HALF_JIFFY_US; + return 0; +} + /* * wake_up_new_task - wake up a newly created task for the first time. * @@ -2417,8 +2487,14 @@ void wake_up_new_task(struct task_struct *p) * usually avoids a lot of COW overhead. */ __set_tsk_resched(rq_curr); - } else + } else { + /* + * Adjust the hrexpiry since rq_curr will keep + * running and its timeslice has been shortened. + */ + hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice)); try_preempt(p, new_rq); + } } } else { time_slice_expired(p, new_rq); @@ -3100,87 +3176,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } -#ifdef CONFIG_HIGH_RES_TIMERS -static inline int hrexpiry_enabled(struct rq *rq) -{ - if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized)) - return 0; - return hrtimer_is_hres_active(&rq->hrexpiry_timer); -} - -/* - * Use HR-timers to deliver accurate preemption points. - */ -static void hrexpiry_clear(struct rq *rq) -{ - if (!hrexpiry_enabled(rq)) - return; - if (hrtimer_active(&rq->hrexpiry_timer)) - hrtimer_cancel(&rq->hrexpiry_timer); -} - -/* - * High-resolution time_slice expiry. - * Runs from hardirq context with interrupts disabled. - */ -static enum hrtimer_restart hrexpiry(struct hrtimer *timer) -{ - struct rq *rq = container_of(timer, struct rq, hrexpiry_timer); - struct task_struct *p; - - /* This can happen during CPU hotplug / resume */ - if (unlikely(cpu_of(rq) != smp_processor_id())) - goto out; - - /* - * We're doing this without the runqueue lock but this should always - * be run on the local CPU. Time slice should run out in __schedule - * but we set it to zero here in case niffies is slightly less. - */ - p = rq->curr; - p->time_slice = 0; - __set_tsk_resched(p); -out: - return HRTIMER_NORESTART; -} - -/* - * Called to set the hrexpiry timer state. - * - * called with irqs disabled from the local CPU only - */ -static void hrexpiry_start(struct rq *rq, u64 delay) -{ - if (!hrexpiry_enabled(rq)) - return; - - hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay), - HRTIMER_MODE_REL_PINNED); -} - -static void init_rq_hrexpiry(struct rq *rq) -{ - hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - rq->hrexpiry_timer.function = hrexpiry; -} - -static inline int rq_dither(struct rq *rq) -{ - if (!hrexpiry_enabled(rq)) - return HALF_JIFFY_US; - return 0; -} -#else /* CONFIG_HIGH_RES_TIMERS */ -static inline void init_rq_hrexpiry(struct rq *rq) -{ -} - -static inline int rq_dither(struct rq *rq) -{ - return HALF_JIFFY_US; -} -#endif /* CONFIG_HIGH_RES_TIMERS */ - /* * Functions to test for when SCHED_ISO tasks have used their allocated * quota as real time scheduling and convert them back to SCHED_NORMAL. All @@ -3483,16 +3478,15 @@ static inline void check_deadline(struct task_struct *p, struct rq *rq) static inline struct task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle) { + struct rq *locked = NULL, *chosen = NULL; struct task_struct *edt = idle; - struct rq *locked = NULL; int i, best_entries = 0; u64 best_key = ~0ULL; for (i = 0; i < num_possible_cpus(); i++) { struct rq *other_rq = rq_order(rq, i); int entries = other_rq->sl->entries; - struct task_struct *p; - u64 key; + skiplist_node *next; /* * Check for queued entres lockless first. The local runqueue @@ -3526,35 +3520,47 @@ static inline struct task_struct continue; } } - key = other_rq->node.next[0]->key; - /* Reevaluate key after locking */ - if (unlikely(key >= best_key)) { - /* This will always be when rq != other_rq */ - unlock_rq(other_rq); - continue; - } - p = other_rq->node.next[0]->value; - if (!smt_schedule(p, rq)) { - if (i) - unlock_rq(other_rq); - continue; - } + next = &other_rq->node; + /* + * In interactive mode we check beyond the best entry on other + * runqueues if we can't get the best for smt or affinity + * reasons. + */ + while ((next = next->next[0]) != &other_rq->node) { + struct task_struct *p; + u64 key = next->key; - /* Make sure affinity is ok */ - if (i) { - if (needs_other_cpu(p, cpu)) { - unlock_rq(other_rq); + /* Reevaluate key after locking */ + if (key >= best_key) + break; + + p = next->value; + if (!smt_schedule(p, rq)) { + if (i && !sched_interactive) + break; continue; } - if (locked) - unlock_rq(locked); - locked = other_rq; - } - best_entries = entries; - best_key = key; - edt = p; + /* Make sure affinity is ok */ + if (i) { + if (needs_other_cpu(p, cpu)) { + if (sched_interactive) + continue; + break; + } + /* From this point on p is the best so far */ + if (locked) + unlock_rq(locked); + chosen = locked = other_rq; + } + best_entries = entries; + best_key = key; + edt = p; + break; + } + if (i && other_rq != chosen) + unlock_rq(other_rq); } if (likely(edt != idle)) @@ -3632,12 +3638,10 @@ static inline void schedule_debug(struct task_struct *prev) */ static inline void set_rq_task(struct rq *rq, struct task_struct *p) { -#ifdef CONFIG_HIGH_RES_TIMERS if (p == rq->idle || p->policy == SCHED_FIFO) hrexpiry_clear(rq); else hrexpiry_start(rq, US_TO_NS(p->time_slice)); -#endif /* CONFIG_HIGH_RES_TIMERS */ if (rq->clock - rq->last_tick > HALF_JIFFY_NS) rq->dither = 0; else