From 6b45f1f363d7f6959b648cf49252f378022d11c6 Mon Sep 17 00:00:00 2001
From: Con Kolivas <kernel@kolivas.org>
Date: Fri, 21 Oct 2016 19:49:03 +1100
Subject: [PATCH 81/89] Remove the last remnants of the global runqueue, moving
 all variables to per-runqueue except for the cpu idle map.

---
 kernel/sched/MuQSS.c | 161 ++++++++++++++++++---------------------------------
 kernel/sched/MuQSS.h |   3 +
 2 files changed, 60 insertions(+), 104 deletions(-)

diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
index 99f02a4..475e7fc 100644
--- a/kernel/sched/MuQSS.c
+++ b/kernel/sched/MuQSS.c
@@ -162,6 +162,8 @@ int sched_iso_cpu __read_mostly = 70;
  */
 static int prio_ratios[NICE_WIDTH] __read_mostly;
 
+static cpumask_t cpu_idle_map;
+
 /*
  * The quota handed out to tasks of all priority levels when refilling their
  * time_slice.
@@ -171,27 +173,6 @@ static inline int timeslice(void)
 	return MS_TO_US(rr_interval);
 }
 
-/*
- * The global runqueue data that all CPUs work off. Contains either atomic
- * variables and a cpu bitmap set atomically.
- */
-struct global_rq {
-#ifdef CONFIG_SMP
-	atomic_t nr_running ____cacheline_aligned_in_smp;
-	atomic_t nr_uninterruptible ____cacheline_aligned_in_smp;
-	atomic64_t nr_switches ____cacheline_aligned_in_smp;
-	atomic_t qnr ____cacheline_aligned_in_smp; /* queued not running */
-#else
-	atomic_t nr_running ____cacheline_aligned;
-	atomic_t nr_uninterruptible ____cacheline_aligned;
-	atomic64_t nr_switches ____cacheline_aligned;
-	atomic_t qnr ____cacheline_aligned; /* queued not running */
-#endif
-#ifdef CONFIG_SMP
-	cpumask_t cpu_idle_map;
-#endif
-};
-
 #ifdef CONFIG_SMP
 /*
  * We add the notion of a root-domain which will be used to define per-domain
@@ -224,13 +205,6 @@ static struct root_domain def_root_domain;
 
 #endif /* CONFIG_SMP */
 
-/* There can be only one */
-#ifdef CONFIG_SMP
-static struct global_rq grq ____cacheline_aligned_in_smp;
-#else
-static struct global_rq grq ____cacheline_aligned;
-#endif
-
 static DEFINE_MUTEX(sched_hotcpu_mutex);
 
 /* cpus with isolated domains */
@@ -780,6 +754,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	if (unlikely(task_on_rq_migrating(prev))) {
 		sched_info_dequeued(rq, prev);
+		rq->nr_running--;
 		/*
 		 * We move the ownership of prev to the new cpu now. ttwu can't
 		 * activate prev to the wrong cpu since it has to grab this
@@ -790,6 +765,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 
 		raw_spin_lock(&prev->pi_lock);
 		rq = __task_rq_lock(prev);
+		rq->nr_running++;
 		/* Check that someone else hasn't already queued prev */
 		if (likely(!task_queued(prev))) {
 			enqueue_task(rq, prev, 0);
@@ -991,26 +967,6 @@ static inline int task_timeslice(struct task_struct *p)
 	return (rr_interval * task_prio_ratio(p) / 128);
 }
 
-/*
- * qnr is the "queued but not running" count which is the total number of
- * tasks on the global runqueue list waiting for cpu time but not actually
- * currently running on a cpu.
- */
-static inline void inc_qnr(void)
-{
-	atomic_inc(&grq.qnr);
-}
-
-static inline void dec_qnr(void)
-{
-	atomic_dec(&grq.qnr);
-}
-
-static inline int queued_notrunning(void)
-{
-	return atomic_read(&grq.qnr);
-}
-
 #ifdef CONFIG_SMP
 /* Entered with rq locked */
 static inline void resched_if_idle(struct rq *rq)
@@ -1115,7 +1071,7 @@ static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask)
 static inline void set_cpuidle_map(int cpu)
 {
 	if (likely(cpu_online(cpu)))
-		atomic_set_cpu(cpu, &grq.cpu_idle_map);
+		atomic_set_cpu(cpu, &cpu_idle_map);
 }
 
 static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
@@ -1125,12 +1081,12 @@ static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
 
 static inline void clear_cpuidle_map(int cpu)
 {
-	atomic_clear_cpu(cpu, &grq.cpu_idle_map);
+	atomic_clear_cpu(cpu, &cpu_idle_map);
 }
 
 static bool suitable_idle_cpus(struct task_struct *p)
 {
-	return (cpumask_intersects(&p->cpus_allowed, &grq.cpu_idle_map));
+	return (cpumask_intersects(&p->cpus_allowed, &cpu_idle_map));
 }
 
 /*
@@ -1261,7 +1217,7 @@ static struct rq *resched_best_idle(struct task_struct *p, int cpu)
 	struct rq *rq;
 	int best_cpu;
 
-	cpumask_and(&tmpmask, &p->cpus_allowed, &grq.cpu_idle_map);
+	cpumask_and(&tmpmask, &p->cpus_allowed, &cpu_idle_map);
 	best_cpu = best_mask_cpu(cpu, task_rq(p), &tmpmask);
 	rq = cpu_rq(best_cpu);
 	if (!smt_schedule(p, rq))
@@ -1373,12 +1329,11 @@ static void activate_task(struct task_struct *p, struct rq *rq)
 
 	p->prio = effective_prio(p);
 	if (task_contributes_to_load(p))
-		atomic_dec(&grq.nr_uninterruptible);
+		rq->nr_uninterruptible--;
 
 	enqueue_task(rq, p, 0);
 	p->on_rq = TASK_ON_RQ_QUEUED;
-	atomic_inc(&grq.nr_running);
-	inc_qnr();
+	rq->nr_running++;
 }
 
 /*
@@ -1388,10 +1343,10 @@ static void activate_task(struct task_struct *p, struct rq *rq)
 static inline void deactivate_task(struct task_struct *p, struct rq *rq)
 {
 	if (task_contributes_to_load(p))
-		atomic_inc(&grq.nr_uninterruptible);
+		rq->nr_uninterruptible++;
 
 	p->on_rq = 0;
-	atomic_dec(&grq.nr_running);
+	rq->nr_running--;
 	sched_info_dequeued(rq, p);
 }
 
@@ -1459,11 +1414,12 @@ static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
 
 	dequeue_task(p_rq, p, DEQUEUE_SAVE);
 	if (p_rq != rq) {
+		p_rq->nr_running--;
 		sched_info_dequeued(p_rq, p);
+		rq->nr_running++;
 		sched_info_queued(rq, p);
 	}
 	set_task_cpu(p, cpu);
-	dec_qnr();
 }
 
 /*
@@ -1476,7 +1432,6 @@ static inline void return_task(struct task_struct *p, struct rq *rq,
 	if (deactivate)
 		deactivate_task(p, rq);
 	else {
-		inc_qnr();
 #ifdef CONFIG_SMP
 		/*
 		 * set_task_cpu was called on the running task that doesn't
@@ -1798,7 +1753,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
 
 #ifdef CONFIG_SMP
 	if (p->sched_contributes_to_load)
-		atomic_dec(&grq.nr_uninterruptible);
+		rq->nr_uninterruptible--;
 #endif
 
 	ttwu_activate(rq, p);
@@ -2682,22 +2637,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
 }
 
 /*
- * nr_running, nr_uninterruptible and nr_context_switches:
- *
- * externally visible scheduler statistics: current number of runnable
- * threads, total number of context switches performed since bootup.
- */
-unsigned long nr_running(void)
-{
-	return atomic_read(&grq.nr_running);
-}
-
-static unsigned long nr_uninterruptible(void)
-{
-	return atomic_read(&grq.nr_uninterruptible);
-}
-
-/*
  * Check if only the current task is running on the cpu.
  *
  * Caution: this function does not check that the caller has disabled
@@ -2721,9 +2660,31 @@ bool single_task_running(void)
 }
 EXPORT_SYMBOL(single_task_running);
 
+/*
+ * nr_running, nr_uninterruptible and nr_context_switches:
+ *
+ * externally visible scheduler statistics: current number of runnable
+ * threads, total number of context switches performed since bootup.
+ */
 unsigned long long nr_context_switches(void)
 {
-	return (unsigned long long)atomic64_read(&grq.nr_switches);
+	long long sum = 0;
+	int i;
+
+	for_each_possible_cpu(i)
+		sum += cpu_rq(i)->nr_switches;
+
+	return sum;
+}
+
+unsigned long nr_running(void)
+{
+	long i, sum = 0;
+
+	for_each_online_cpu(i)
+		sum += cpu_rq(i)->nr_running;
+
+	return sum;
 }
 
 unsigned long nr_iowait(void)
@@ -2744,7 +2705,14 @@ unsigned long nr_iowait_cpu(int cpu)
 
 unsigned long nr_active(void)
 {
-	return nr_running() + nr_uninterruptible();
+	long i, sum = 0;
+
+	for_each_online_cpu(i) {
+		sum += cpu_rq(i)->nr_running;
+		sum += cpu_rq(i)->nr_uninterruptible;
+	}
+
+	return sum;
 }
 
 /*
@@ -3846,9 +3814,6 @@ static void wake_smt_siblings(struct rq *this_rq)
 {
 	int other_cpu;
 
-	if (!queued_notrunning())
-		return;
-
 	for_each_cpu(other_cpu, &this_rq->thread_mask) {
 		struct rq *rq;
 
@@ -4012,23 +3977,16 @@ static void __sched notrace __schedule(bool preempt)
 		return_task(prev, rq, cpu, deactivate);
 	}
 
-	if (unlikely(!queued_notrunning())) {
-		next = idle;
-		schedstat_inc(rq, sched_goidle);
+	next = earliest_deadline_task(rq, cpu, idle);
+	if (likely(next->prio != PRIO_LIMIT)) {
+		clear_cpuidle_map(cpu);
+		next->last_ran = niffies;
+	} else {
 		set_cpuidle_map(cpu);
 		update_load_avg(rq);
-	} else {
-		next = earliest_deadline_task(rq, cpu, idle);
-		if (likely(next->prio != PRIO_LIMIT))
-			clear_cpuidle_map(cpu);
-		else {
-			set_cpuidle_map(cpu);
-			update_load_avg(rq);
-		}
 	}
 
 	set_rq_task(rq, next);
-	next->last_ran = niffies;
 
 	if (likely(prev != next)) {
 		/*
@@ -4040,16 +3998,14 @@ static void __sched notrace __schedule(bool preempt)
 			check_siblings(rq);
 		else
 			wake_siblings(rq);
-		atomic64_inc(&grq.nr_switches);
+		rq->nr_switches++;
 		rq->curr = next;
 		++*switch_count;
 
 		trace_sched_switch(preempt, prev, next);
 		rq = context_switch(rq, prev, next); /* unlocks the rq */
-	} else {
-		check_siblings(rq);
+	} else
 		rq_unlock_irq(rq);
-	}
 }
 
 static inline void sched_submit_work(struct task_struct *tsk)
@@ -7468,7 +7424,7 @@ static const cpumask_t *thread_cpumask(int cpu)
 /* All this CPU's SMT siblings are idle */
 static bool siblings_cpu_idle(struct rq *rq)
 {
-	return cpumask_subset(&rq->thread_mask, &grq.cpu_idle_map);
+	return cpumask_subset(&rq->thread_mask, &cpu_idle_map);
 }
 #endif
 #ifdef CONFIG_SCHED_MC
@@ -7479,7 +7435,7 @@ static const cpumask_t *core_cpumask(int cpu)
 /* All this CPU's shared cache siblings are idle */
 static bool cache_cpu_idle(struct rq *rq)
 {
-	return cpumask_subset(&rq->core_mask, &grq.cpu_idle_map);
+	return cpumask_subset(&rq->core_mask, &cpu_idle_map);
 }
 #endif
 
@@ -7660,15 +7616,11 @@ void __init sched_init(void)
 	for (i = 1 ; i < NICE_WIDTH ; i++)
 		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
 
-	atomic_set(&grq.nr_running, 0);
-	atomic_set(&grq.nr_uninterruptible, 0);
-	atomic64_set(&grq.nr_switches, 0);
 	skiplist_node_init(&init_task.node);
 
 #ifdef CONFIG_SMP
 	init_defrootdomain();
-	atomic_set(&grq.qnr, 0);
-	cpumask_clear(&grq.cpu_idle_map);
+	cpumask_clear(&cpu_idle_map);
 #else
 	uprq = &per_cpu(runqueues, 0);
 #endif
@@ -7682,6 +7634,7 @@ void __init sched_init(void)
 #endif /* CONFIG_CGROUP_SCHED */
 	for_each_possible_cpu(i) {
 		rq = cpu_rq(i);
+		rq->nr_running = rq->nr_uninterruptible = rq->nr_switches = 0;
 		skiplist_init(&rq->node);
 		rq->sl = new_skiplist(&rq->node);
 		raw_spin_lock_init(&rq->lock);
diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
index 4e3115d..10a12b3 100644
--- a/kernel/sched/MuQSS.h
+++ b/kernel/sched/MuQSS.h
@@ -17,6 +17,9 @@
 struct rq {
 	struct task_struct *curr, *idle, *stop;
 	struct mm_struct *prev_mm;
+	long nr_uninterruptible;
+	s64 nr_switches;
+	int nr_running;
 
 	raw_spinlock_t lock;
 
-- 
2.7.4