From 6b45f1f363d7f6959b648cf49252f378022d11c6 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 21 Oct 2016 19:49:03 +1100 Subject: [PATCH 81/89] Remove the last remnants of the global runqueue, moving all variables to per-runqueue except for the cpu idle map. --- kernel/sched/MuQSS.c | 161 ++++++++++++++++++--------------------------------- kernel/sched/MuQSS.h | 3 + 2 files changed, 60 insertions(+), 104 deletions(-) diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c index 99f02a4..475e7fc 100644 --- a/kernel/sched/MuQSS.c +++ b/kernel/sched/MuQSS.c @@ -162,6 +162,8 @@ int sched_iso_cpu __read_mostly = 70; */ static int prio_ratios[NICE_WIDTH] __read_mostly; +static cpumask_t cpu_idle_map; + /* * The quota handed out to tasks of all priority levels when refilling their * time_slice. @@ -171,27 +173,6 @@ static inline int timeslice(void) return MS_TO_US(rr_interval); } -/* - * The global runqueue data that all CPUs work off. Contains either atomic - * variables and a cpu bitmap set atomically. - */ -struct global_rq { -#ifdef CONFIG_SMP - atomic_t nr_running ____cacheline_aligned_in_smp; - atomic_t nr_uninterruptible ____cacheline_aligned_in_smp; - atomic64_t nr_switches ____cacheline_aligned_in_smp; - atomic_t qnr ____cacheline_aligned_in_smp; /* queued not running */ -#else - atomic_t nr_running ____cacheline_aligned; - atomic_t nr_uninterruptible ____cacheline_aligned; - atomic64_t nr_switches ____cacheline_aligned; - atomic_t qnr ____cacheline_aligned; /* queued not running */ -#endif -#ifdef CONFIG_SMP - cpumask_t cpu_idle_map; -#endif -}; - #ifdef CONFIG_SMP /* * We add the notion of a root-domain which will be used to define per-domain @@ -224,13 +205,6 @@ static struct root_domain def_root_domain; #endif /* CONFIG_SMP */ -/* There can be only one */ -#ifdef CONFIG_SMP -static struct global_rq grq ____cacheline_aligned_in_smp; -#else -static struct global_rq grq ____cacheline_aligned; -#endif - static DEFINE_MUTEX(sched_hotcpu_mutex); /* cpus with isolated domains */ @@ -780,6 +754,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) */ if (unlikely(task_on_rq_migrating(prev))) { sched_info_dequeued(rq, prev); + rq->nr_running--; /* * We move the ownership of prev to the new cpu now. ttwu can't * activate prev to the wrong cpu since it has to grab this @@ -790,6 +765,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) raw_spin_lock(&prev->pi_lock); rq = __task_rq_lock(prev); + rq->nr_running++; /* Check that someone else hasn't already queued prev */ if (likely(!task_queued(prev))) { enqueue_task(rq, prev, 0); @@ -991,26 +967,6 @@ static inline int task_timeslice(struct task_struct *p) return (rr_interval * task_prio_ratio(p) / 128); } -/* - * qnr is the "queued but not running" count which is the total number of - * tasks on the global runqueue list waiting for cpu time but not actually - * currently running on a cpu. - */ -static inline void inc_qnr(void) -{ - atomic_inc(&grq.qnr); -} - -static inline void dec_qnr(void) -{ - atomic_dec(&grq.qnr); -} - -static inline int queued_notrunning(void) -{ - return atomic_read(&grq.qnr); -} - #ifdef CONFIG_SMP /* Entered with rq locked */ static inline void resched_if_idle(struct rq *rq) @@ -1115,7 +1071,7 @@ static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask) static inline void set_cpuidle_map(int cpu) { if (likely(cpu_online(cpu))) - atomic_set_cpu(cpu, &grq.cpu_idle_map); + atomic_set_cpu(cpu, &cpu_idle_map); } static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask) @@ -1125,12 +1081,12 @@ static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask) static inline void clear_cpuidle_map(int cpu) { - atomic_clear_cpu(cpu, &grq.cpu_idle_map); + atomic_clear_cpu(cpu, &cpu_idle_map); } static bool suitable_idle_cpus(struct task_struct *p) { - return (cpumask_intersects(&p->cpus_allowed, &grq.cpu_idle_map)); + return (cpumask_intersects(&p->cpus_allowed, &cpu_idle_map)); } /* @@ -1261,7 +1217,7 @@ static struct rq *resched_best_idle(struct task_struct *p, int cpu) struct rq *rq; int best_cpu; - cpumask_and(&tmpmask, &p->cpus_allowed, &grq.cpu_idle_map); + cpumask_and(&tmpmask, &p->cpus_allowed, &cpu_idle_map); best_cpu = best_mask_cpu(cpu, task_rq(p), &tmpmask); rq = cpu_rq(best_cpu); if (!smt_schedule(p, rq)) @@ -1373,12 +1329,11 @@ static void activate_task(struct task_struct *p, struct rq *rq) p->prio = effective_prio(p); if (task_contributes_to_load(p)) - atomic_dec(&grq.nr_uninterruptible); + rq->nr_uninterruptible--; enqueue_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; - atomic_inc(&grq.nr_running); - inc_qnr(); + rq->nr_running++; } /* @@ -1388,10 +1343,10 @@ static void activate_task(struct task_struct *p, struct rq *rq) static inline void deactivate_task(struct task_struct *p, struct rq *rq) { if (task_contributes_to_load(p)) - atomic_inc(&grq.nr_uninterruptible); + rq->nr_uninterruptible++; p->on_rq = 0; - atomic_dec(&grq.nr_running); + rq->nr_running--; sched_info_dequeued(rq, p); } @@ -1459,11 +1414,12 @@ static inline void take_task(struct rq *rq, int cpu, struct task_struct *p) dequeue_task(p_rq, p, DEQUEUE_SAVE); if (p_rq != rq) { + p_rq->nr_running--; sched_info_dequeued(p_rq, p); + rq->nr_running++; sched_info_queued(rq, p); } set_task_cpu(p, cpu); - dec_qnr(); } /* @@ -1476,7 +1432,6 @@ static inline void return_task(struct task_struct *p, struct rq *rq, if (deactivate) deactivate_task(p, rq); else { - inc_qnr(); #ifdef CONFIG_SMP /* * set_task_cpu was called on the running task that doesn't @@ -1798,7 +1753,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) #ifdef CONFIG_SMP if (p->sched_contributes_to_load) - atomic_dec(&grq.nr_uninterruptible); + rq->nr_uninterruptible--; #endif ttwu_activate(rq, p); @@ -2682,22 +2637,6 @@ context_switch(struct rq *rq, struct task_struct *prev, } /* - * nr_running, nr_uninterruptible and nr_context_switches: - * - * externally visible scheduler statistics: current number of runnable - * threads, total number of context switches performed since bootup. - */ -unsigned long nr_running(void) -{ - return atomic_read(&grq.nr_running); -} - -static unsigned long nr_uninterruptible(void) -{ - return atomic_read(&grq.nr_uninterruptible); -} - -/* * Check if only the current task is running on the cpu. * * Caution: this function does not check that the caller has disabled @@ -2721,9 +2660,31 @@ bool single_task_running(void) } EXPORT_SYMBOL(single_task_running); +/* + * nr_running, nr_uninterruptible and nr_context_switches: + * + * externally visible scheduler statistics: current number of runnable + * threads, total number of context switches performed since bootup. + */ unsigned long long nr_context_switches(void) { - return (unsigned long long)atomic64_read(&grq.nr_switches); + long long sum = 0; + int i; + + for_each_possible_cpu(i) + sum += cpu_rq(i)->nr_switches; + + return sum; +} + +unsigned long nr_running(void) +{ + long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->nr_running; + + return sum; } unsigned long nr_iowait(void) @@ -2744,7 +2705,14 @@ unsigned long nr_iowait_cpu(int cpu) unsigned long nr_active(void) { - return nr_running() + nr_uninterruptible(); + long i, sum = 0; + + for_each_online_cpu(i) { + sum += cpu_rq(i)->nr_running; + sum += cpu_rq(i)->nr_uninterruptible; + } + + return sum; } /* @@ -3846,9 +3814,6 @@ static void wake_smt_siblings(struct rq *this_rq) { int other_cpu; - if (!queued_notrunning()) - return; - for_each_cpu(other_cpu, &this_rq->thread_mask) { struct rq *rq; @@ -4012,23 +3977,16 @@ static void __sched notrace __schedule(bool preempt) return_task(prev, rq, cpu, deactivate); } - if (unlikely(!queued_notrunning())) { - next = idle; - schedstat_inc(rq, sched_goidle); + next = earliest_deadline_task(rq, cpu, idle); + if (likely(next->prio != PRIO_LIMIT)) { + clear_cpuidle_map(cpu); + next->last_ran = niffies; + } else { set_cpuidle_map(cpu); update_load_avg(rq); - } else { - next = earliest_deadline_task(rq, cpu, idle); - if (likely(next->prio != PRIO_LIMIT)) - clear_cpuidle_map(cpu); - else { - set_cpuidle_map(cpu); - update_load_avg(rq); - } } set_rq_task(rq, next); - next->last_ran = niffies; if (likely(prev != next)) { /* @@ -4040,16 +3998,14 @@ static void __sched notrace __schedule(bool preempt) check_siblings(rq); else wake_siblings(rq); - atomic64_inc(&grq.nr_switches); + rq->nr_switches++; rq->curr = next; ++*switch_count; trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next); /* unlocks the rq */ - } else { - check_siblings(rq); + } else rq_unlock_irq(rq); - } } static inline void sched_submit_work(struct task_struct *tsk) @@ -7468,7 +7424,7 @@ static const cpumask_t *thread_cpumask(int cpu) /* All this CPU's SMT siblings are idle */ static bool siblings_cpu_idle(struct rq *rq) { - return cpumask_subset(&rq->thread_mask, &grq.cpu_idle_map); + return cpumask_subset(&rq->thread_mask, &cpu_idle_map); } #endif #ifdef CONFIG_SCHED_MC @@ -7479,7 +7435,7 @@ static const cpumask_t *core_cpumask(int cpu) /* All this CPU's shared cache siblings are idle */ static bool cache_cpu_idle(struct rq *rq) { - return cpumask_subset(&rq->core_mask, &grq.cpu_idle_map); + return cpumask_subset(&rq->core_mask, &cpu_idle_map); } #endif @@ -7660,15 +7616,11 @@ void __init sched_init(void) for (i = 1 ; i < NICE_WIDTH ; i++) prio_ratios[i] = prio_ratios[i - 1] * 11 / 10; - atomic_set(&grq.nr_running, 0); - atomic_set(&grq.nr_uninterruptible, 0); - atomic64_set(&grq.nr_switches, 0); skiplist_node_init(&init_task.node); #ifdef CONFIG_SMP init_defrootdomain(); - atomic_set(&grq.qnr, 0); - cpumask_clear(&grq.cpu_idle_map); + cpumask_clear(&cpu_idle_map); #else uprq = &per_cpu(runqueues, 0); #endif @@ -7682,6 +7634,7 @@ void __init sched_init(void) #endif /* CONFIG_CGROUP_SCHED */ for_each_possible_cpu(i) { rq = cpu_rq(i); + rq->nr_running = rq->nr_uninterruptible = rq->nr_switches = 0; skiplist_init(&rq->node); rq->sl = new_skiplist(&rq->node); raw_spin_lock_init(&rq->lock); diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h index 4e3115d..10a12b3 100644 --- a/kernel/sched/MuQSS.h +++ b/kernel/sched/MuQSS.h @@ -17,6 +17,9 @@ struct rq { struct task_struct *curr, *idle, *stop; struct mm_struct *prev_mm; + long nr_uninterruptible; + s64 nr_switches; + int nr_running; raw_spinlock_t lock; -- 2.7.4