Estimate load per cpu based on the number of tasks soft affined to it. -ck --- kernel/sched/bfs.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) Index: linux-3.17.2-bfsdev/kernel/sched/bfs.c =================================================================== --- linux-3.17.2-bfsdev.orig/kernel/sched/bfs.c 2014-11-11 11:28:47.194991886 +1100 +++ linux-3.17.2-bfsdev/kernel/sched/bfs.c 2014-11-11 11:29:14.074993523 +1100 @@ -624,7 +624,7 @@ static bool isoprio_suitable(void) /* * Adding to the global runqueue. Enter with grq locked. */ -static void enqueue_task(struct task_struct *p) +static void enqueue_task(struct task_struct *p, struct rq *rq) { if (!rt_task(p)) { /* Check it hasn't gotten rt from PI */ @@ -636,7 +636,7 @@ static void enqueue_task(struct task_str } __set_bit(p->prio, grq.prio_bitmap); list_add_tail(&p->run_list, grq.queue + p->prio); - sched_info_queued(task_rq(p), p); + sched_info_queued(rq, p); } /* Only idle task does this as a real time task*/ @@ -1043,7 +1043,9 @@ static void activate_task(struct task_st p->prio = effective_prio(p); if (task_contributes_to_load(p)) grq.nr_uninterruptible--; - enqueue_task(p); + enqueue_task(p, rq); + rq->soft_affined++; + p->on_rq = 1; grq.nr_running++; inc_qnr(); } @@ -1054,10 +1056,12 @@ static inline void clear_sticky(struct t * deactivate_task - If it's running, it's not on the grq and we can just * decrement the nr_running. Enter with grq locked. */ -static inline void deactivate_task(struct task_struct *p) +static inline void deactivate_task(struct task_struct *p, struct rq *rq) { if (task_contributes_to_load(p)) grq.nr_uninterruptible++; + rq->soft_affined--; + p->on_rq = 0; grq.nr_running--; clear_sticky(p); } @@ -1177,13 +1181,13 @@ static inline void take_task(int cpu, st * Returns a descheduling task to the grq runqueue unless it is being * deactivated. */ -static inline void return_task(struct task_struct *p, bool deactivate) +static inline void return_task(struct task_struct *p, struct rq *rq, bool deactivate) { if (deactivate) - deactivate_task(p); + deactivate_task(p, rq); else { inc_qnr(); - enqueue_task(p); + enqueue_task(p, rq); } } @@ -1536,7 +1540,6 @@ static inline void ttwu_activate(struct bool is_sync) { activate_task(p, rq); - p->on_rq = 1; /* * Sync wakeups (i.e. those types of wakeups where the waker @@ -1781,7 +1784,6 @@ void wake_up_new_task(struct task_struct p->prio = rq->curr->normal_prio; activate_task(p, rq); - p->on_rq = 1; trace_sched_wakeup_new(p, 1); if (unlikely(p->policy == SCHED_FIFO)) goto after_ts_init; @@ -2089,18 +2091,15 @@ unsigned long nr_active(void) return nr_running() + nr_uninterruptible(); } -/* Beyond a task running on this CPU, load is equal everywhere on BFS */ -static inline unsigned long cpu_load(struct rq *rq) -{ - return rq->rq_running + ((queued_notrunning() + nr_uninterruptible()) / grq.noc); -} - +/* Beyond a task running on this CPU, load is equal everywhere on BFS, so we + * base it on the number of running or queued tasks with their ->rq pointer + * set to this cpu as being the CPU they're more likely to run on. */ void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) { struct rq *this = this_rq(); *nr_waiters = atomic_read(&this->nr_iowait); - *load = cpu_load(this); + *load = this->soft_affined; } /* Variables and functions for calc_load */ @@ -3550,7 +3549,7 @@ need_resched: } else swap_sticky(rq, cpu, prev); } - return_task(prev, deactivate); + return_task(prev, rq, deactivate); } if (unlikely(!queued_notrunning())) { @@ -3757,7 +3756,7 @@ void rt_mutex_setprio(struct task_struct if (task_running(p) && prio > oldprio) resched_task(p); if (queued) { - enqueue_task(p); + enqueue_task(p, rq); try_preempt(p, rq); } @@ -3810,7 +3809,7 @@ void set_user_nice(struct task_struct *p p->prio = effective_prio(p); if (queued) { - enqueue_task(p); + enqueue_task(p, rq); if (new_static < old_static) try_preempt(p, rq); } else if (task_running(p)) { @@ -4128,7 +4127,7 @@ recheck: dequeue_task(p); __setscheduler(p, rq, policy, param->sched_priority); if (queued) { - enqueue_task(p); + enqueue_task(p, rq); try_preempt(p, rq); } __task_grq_unlock(); @@ -7156,7 +7155,7 @@ void normalize_rt_tasks(void) dequeue_task(p); __setscheduler(p, rq, SCHED_NORMAL, 0); if (queued) { - enqueue_task(p); + enqueue_task(p, rq); try_preempt(p, rq); }