Index: linux-2.6.31-bfs/kernel/sched_bfs.c =================================================================== --- linux-2.6.31-bfs.orig/kernel/sched_bfs.c 2009-09-17 20:16:56.962856486 +1000 +++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-09-17 21:45:16.286856583 +1000 @@ -183,6 +183,7 @@ struct rq { struct task_struct *curr, *idle; struct mm_struct *prev_mm; + struct list_head queue; /* Place to store currently running task */ /* Stored data about rq->curr to work outside grq lock */ unsigned long rq_deadline; @@ -477,9 +478,10 @@ static inline void finish_lock_switch(st /* * A task that is queued will be on the grq run list. * A task that is not running or queued will not be on the grq run list. - * A task that is currently running will have ->oncpu set. - * The only time a task will be both queued and running by these definitions - * is during schedule, and all under grq_lock so it should never be seen. + * A task that is currently running will have ->oncpu set and be queued + * temporarily in its own rq queue. + * A task that is running and no longer queued will be seen only on + * context switch exit. */ static inline int task_queued(struct task_struct *p) @@ -487,6 +489,11 @@ static inline int task_queued(struct tas return (!list_empty(&p->run_list)); } +static inline int task_queued_only(struct task_struct *p) +{ + return (!list_empty(&p->run_list) && !task_running(p)); +} + /* * Removing from the global runqueue. Enter with grq locked. */ @@ -545,7 +552,7 @@ static inline void requeue_task(struct t sched_info_queued(p); } -static inline int prio_ratio(struct task_struct *p) +static inline int pratio(struct task_struct *p) { return prio_ratios[TASK_USER_PRIO(p)]; } @@ -557,7 +564,7 @@ static inline int prio_ratio(struct task */ static inline int task_timeslice(struct task_struct *p) { - return (rr_interval * prio_ratio(p) / 100); + return (rr_interval * pratio(p) / 100); } /* @@ -660,6 +667,7 @@ static inline void take_task(struct rq * { set_task_cpu(p, rq->cpu); dequeue_task(p); + list_add(&p->run_list, &rq->queue); } /* @@ -668,6 +676,7 @@ static inline void take_task(struct rq * */ static inline void return_task(struct task_struct *p, int deactivate) { + list_del_init(&p->run_list); if (deactivate) deactivate_task(p); else @@ -1042,7 +1051,12 @@ static int try_to_wake_up(struct task_st if (!(old_state & state)) goto out_unlock; - if (task_queued(p) || task_running(p)) + /* + * Note this catches tasks that are running and queued, but returns + * false during the context switch when they're running and no + * longer queued. + */ + if (task_queued(p)) goto out_running; activate_task(p, rq); @@ -1093,6 +1107,7 @@ int wake_up_state(struct task_struct *p, void sched_fork(struct task_struct *p, int clone_flags) { int cpu = get_cpu(); + struct rq *rq; #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); @@ -1131,23 +1146,21 @@ void sched_fork(struct task_struct *p, i /* * Share the timeslice between parent and child, thus the * total amount of pending timeslices in the system doesn't change, - * resulting in more scheduling fairness. + * resulting in more scheduling fairness. If it's negative, it won't + * matter since that's the same as being 0. current's time_slice is + * actually in rq_time_slice when it's running. */ local_irq_disable(); - if (current->time_slice > 0) { - current->time_slice /= 2; - if (current->time_slice) - p->time_slice = current->time_slice; - else - p->time_slice = 1; + rq = task_rq(current); + if (likely(rq->rq_time_slice > 0)) { + rq->rq_time_slice /= 2; /* * The remainder of the first timeslice might be recovered by * the parent if the child exits early enough. */ p->first_time_slice = 1; - } else - p->time_slice = 0; - + } + p->time_slice = rq->rq_time_slice; local_irq_enable(); out: put_cpu(); @@ -2126,7 +2139,7 @@ EXPORT_SYMBOL(sub_preempt_count); */ static inline int prio_deadline_diff(struct task_struct *p) { - return (prio_ratio(p) * rr_interval * HZ / 1000 / 100) ? : 1; + return (pratio(p) * rr_interval * HZ / 1000 / 100) ? : 1; } static inline int longest_deadline(void) @@ -2874,7 +2887,7 @@ void rt_mutex_setprio(struct task_struct rq = time_task_grq_lock(p, &flags); oldprio = p->prio; - queued = task_queued(p); + queued = task_queued_only(p); if (queued) dequeue_task(p); p->prio = prio; @@ -2896,7 +2909,7 @@ void rt_mutex_setprio(struct task_struct */ static void adjust_deadline(struct task_struct *p, int new_prio) { - p->deadline += (prio_ratios[USER_PRIO(new_prio)] - prio_ratio(p)) * + p->deadline += (prio_ratios[USER_PRIO(new_prio)] - pratio(p)) * rr_interval * HZ / 1000 / 100; } @@ -2924,7 +2937,7 @@ void set_user_nice(struct task_struct *p p->static_prio = new_static; goto out_unlock; } - queued = task_queued(p); + queued = task_queued_only(p); /* * If p is actually running, we don't need to do anything when * changing the priority because the grq is unaffected. @@ -3069,7 +3082,7 @@ static inline struct task_struct *find_p /* Actually do priority change: must hold grq lock. */ static void __setscheduler(struct task_struct *p, int policy, int prio) { - BUG_ON(task_queued(p)); + BUG_ON(task_queued_only(p)); p->policy = policy; p->rt_priority = prio; @@ -3212,7 +3225,7 @@ recheck: goto recheck; } update_rq_clock(rq); - queued = task_queued(p); + queued = task_queued_only(p); if (queued) dequeue_task(p); oldprio = p->prio; @@ -4047,7 +4060,7 @@ int set_cpus_allowed_ptr(struct task_str goto out; } - queued = task_queued(p); + queued = task_queued_only(p); cpumask_copy(&p->cpus_allowed, new_mask); p->rt_nr_cpus_allowed = cpumask_weight(new_mask); @@ -5922,6 +5935,7 @@ void __init sched_init(void) struct rq *rq; rq = cpu_rq(i); + INIT_LIST_HEAD(&rq->queue); rq->rq_deadline = 0; rq->rq_prio = 0; rq->cpu = i; @@ -6026,7 +6040,7 @@ void normalize_rt_tasks(void) rq = __task_grq_lock(p); update_rq_clock(rq); - queued = task_queued(p); + queued = task_queued_only(p); if (queued) dequeue_task(p); __setscheduler(p, SCHED_NORMAL, 0);