Index: linux-2.6.31-bfs/kernel/sched_bfs.c =================================================================== --- linux-2.6.31-bfs.orig/kernel/sched_bfs.c 2009-09-17 15:14:42.134194042 +1000 +++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-09-17 17:43:24.098898625 +1000 @@ -183,6 +183,7 @@ struct rq { struct task_struct *curr, *idle; struct mm_struct *prev_mm; + struct list_head queue; /* Place to store currently running task */ /* Stored data about rq->curr to work outside grq lock */ unsigned long rq_deadline; @@ -477,9 +478,10 @@ static inline void finish_lock_switch(st /* * A task that is queued will be on the grq run list. * A task that is not running or queued will not be on the grq run list. - * A task that is currently running will have ->oncpu set. - * The only time a task will be both queued and running by these definitions - * is during schedule, and all under grq_lock so it should never be seen. + * A task that is currently running will have ->oncpu set and be queued + * temporarily in its own rq queue. + * A task that is running and no longer queued will be seen only on + * context switch exit. */ static inline int task_queued(struct task_struct *p) @@ -487,6 +489,11 @@ static inline int task_queued(struct tas return (!list_empty(&p->run_list)); } +static inline int task_queued_only(struct task_struct *p) +{ + return (!list_empty(&p->run_list) && !task_running(p)); +} + /* * Removing from the global runqueue. Enter with grq locked. */ @@ -660,6 +667,7 @@ static inline void take_task(struct rq * { set_task_cpu(p, rq->cpu); dequeue_task(p); + list_add(&p->run_list, &rq->queue); } /* @@ -668,6 +676,7 @@ static inline void take_task(struct rq * */ static inline void return_task(struct task_struct *p, int deactivate) { + list_del_init(&p->run_list); if (deactivate) deactivate_task(p); else @@ -1042,7 +1051,12 @@ static int try_to_wake_up(struct task_st if (!(old_state & state)) goto out_unlock; - if (task_queued(p) || task_running(p)) + /* + * Note this catches tasks that are running and queued, but returns + * false during the context switch when they're running and no + * longer queued. + */ + if (task_queued(p)) goto out_running; activate_task(p, rq); @@ -2874,7 +2888,7 @@ void rt_mutex_setprio(struct task_struct rq = time_task_grq_lock(p, &flags); oldprio = p->prio; - queued = task_queued(p); + queued = task_queued_only(p); if (queued) dequeue_task(p); p->prio = prio; @@ -2924,7 +2938,7 @@ void set_user_nice(struct task_struct *p p->static_prio = new_static; goto out_unlock; } - queued = task_queued(p); + queued = task_queued_only(p); /* * If p is actually running, we don't need to do anything when * changing the priority because the grq is unaffected. @@ -3069,7 +3083,7 @@ static inline struct task_struct *find_p /* Actually do priority change: must hold grq lock. */ static void __setscheduler(struct task_struct *p, int policy, int prio) { - BUG_ON(task_queued(p)); + BUG_ON(task_queued_only(p)); p->policy = policy; p->rt_priority = prio; @@ -3212,7 +3226,7 @@ recheck: goto recheck; } update_rq_clock(rq); - queued = task_queued(p); + queued = task_queued_only(p); if (queued) dequeue_task(p); oldprio = p->prio; @@ -4047,7 +4061,7 @@ int set_cpus_allowed_ptr(struct task_str goto out; } - queued = task_queued(p); + queued = task_queued_only(p); cpumask_copy(&p->cpus_allowed, new_mask); p->rt_nr_cpus_allowed = cpumask_weight(new_mask); @@ -5922,6 +5936,7 @@ void __init sched_init(void) struct rq *rq; rq = cpu_rq(i); + INIT_LIST_HEAD(&rq->queue); rq->rq_deadline = 0; rq->rq_prio = 0; rq->cpu = i; @@ -6026,7 +6041,7 @@ void normalize_rt_tasks(void) rq = __task_grq_lock(p); update_rq_clock(rq); - queued = task_queued(p); + queued = task_queued_only(p); if (queued) dequeue_task(p); __setscheduler(p, SCHED_NORMAL, 0);