Uniprocessor optimisations. There is only one runqueue so make looking it up cheap. Yet more tweaking of try_preempt. Trivial documentation and indentation changes. Microoptimise locking in sched_exit nr_uninterruptible should be a long. Revert the exponential deadline changes; they caused behavioural regressions. Bring back expired deadlines; removing them caused regressions. --- init/main.c | 2 - kernel/sched_bfs.c | 68 +++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 57 insertions(+), 13 deletions(-) Index: linux-2.6.31-bfs/init/main.c =================================================================== --- linux-2.6.31-bfs.orig/init/main.c 2009-10-06 12:27:56.984739467 +1100 +++ linux-2.6.31-bfs/init/main.c 2009-10-06 12:28:22.411739927 +1100 @@ -843,7 +843,7 @@ static noinline int init_post(void) system_state = SYSTEM_RUNNING; numa_default_policy(); - printk(KERN_INFO"Running BFS CPU scheduler v0.302 by Con Kolivas.\n"); + printk(KERN_INFO"Running BFS CPU scheduler v0.303 by Con Kolivas.\n"); if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) printk(KERN_WARNING "Warning: unable to open an initial console.\n"); Index: linux-2.6.31-bfs/kernel/sched_bfs.c =================================================================== --- linux-2.6.31-bfs.orig/kernel/sched_bfs.c 2009-10-06 12:27:56.969740075 +1100 +++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-10-06 12:28:22.419740036 +1100 @@ -148,6 +148,11 @@ int rr_interval __read_mostly = 6; int sched_iso_cpu __read_mostly = 70; /* + * The relative length of deadline for each priority(nice) level. + */ +static int prio_ratios[PRIO_RANGE] __read_mostly; + +/* * The quota handed out to tasks of all priority levels when refilling their * time_slice. */ @@ -300,10 +305,18 @@ static inline int cpu_of(struct rq *rq) #define for_each_domain(cpu, __sd) \ for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) +#ifdef CONFIG_SMP #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() (&__get_cpu_var(runqueues)) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) +#else /* CONFIG_SMP */ +static struct rq *uprq; +#define cpu_rq(cpu) (uprq) +#define this_rq() (uprq) +#define task_rq(p) (uprq) +#define cpu_curr(cpu) ((uprq)->curr) +#endif #include "sched_stats.h" @@ -564,6 +577,11 @@ static inline void requeue_task(struct t sched_info_queued(p); } +static inline int task_prio_ratio(struct task_struct *p) +{ + return prio_ratios[TASK_USER_PRIO(p)]; +} + /* * task_timeslice - all tasks of all priorities get the exact same timeslice * length. CPU distribution is handled by giving different deadlines to @@ -571,7 +589,7 @@ static inline void requeue_task(struct t */ static inline int task_timeslice(struct task_struct *p) { - return (TASK_USER_PRIO(p) + 1) * rr_interval; + return (rr_interval * task_prio_ratio(p) / 100); } #ifdef CONFIG_SMP @@ -926,11 +944,14 @@ unsigned long wait_task_inactive(struct * work out! In the unlikely event rq is dereferenced * since we're lockless, grab it again. */ +#ifdef CONFIG_SMP retry_rq: rq = task_rq(p); if (unlikely(!rq)) goto retry_rq; - +#else /* CONFIG_SMP */ + rq = task_rq(p); +#endif /* * If the task is actively running on another CPU * still, just relax and busy-wait without holding @@ -1064,20 +1085,27 @@ static void try_preempt(struct task_stru offset_deadline = -cache_distance(this_rq, rq, p); if (rq_prio != PRIO_LIMIT) offset_deadline += rq->rq_deadline; + else + if (rq == this_rq) { + /* this_rq is idle, use that over everything */ + highest_prio_rq = rq; + goto out; + } - if (rq_prio > highest_prio || (rq_prio == highest_prio && + if (rq_prio > highest_prio || (time_after(offset_deadline, latest_deadline) || - (this_rq == rq && offset_deadline == latest_deadline)))) { + (offset_deadline == latest_deadline && this_rq == rq))) { latest_deadline = offset_deadline; highest_prio = rq_prio; highest_prio_rq = rq; } } - if (p->prio > highest_prio || (p->policy == SCHED_NORMAL && - p->prio == highest_prio && !time_before(p->deadline, latest_deadline))) + if (p->prio > highest_prio || (p->prio == highest_prio && + p->policy == SCHED_NORMAL && !time_before(p->deadline, latest_deadline))) return; +out: /* p gets to preempt highest_prio_rq->curr */ resched_task(highest_prio_rq->curr); return; @@ -1262,17 +1290,18 @@ void wake_up_new_task(struct task_struct rq = task_grq_lock(p, &flags); ; parent = p->parent; BUG_ON(p->state != TASK_RUNNING); + /* Unnecessary but small chance that the parent changed cpus */ set_task_cpu(p, task_cpu(parent)); activate_task(p, rq); trace_sched_wakeup_new(rq, p, 1); if (!(clone_flags & CLONE_VM) && rq->curr == parent && - !suitable_idle_cpus(p)) { + !suitable_idle_cpus(p)) { /* * The VM isn't cloned, so we're in a good position to * do child-runs-first in anticipation of an exec. This * usually avoids a lot of COW overhead. */ - resched_task(parent); + resched_task(parent); } else try_preempt(p, rq); task_grq_unlock(&flags); @@ -1297,10 +1326,10 @@ void sched_exit(struct task_struct *p) int *par_tslice, *p_tslice; parent = p->parent; - rq = task_grq_lock(parent, &flags); par_tslice = &parent->time_slice; p_tslice = &p->time_slice; + rq = task_grq_lock(parent, &flags); /* The real time_slice of the "curr" task is on the rq var.*/ if (p == rq->curr) p_tslice = &rq->rq_time_slice; @@ -1537,7 +1566,7 @@ unsigned long nr_running(void) unsigned long nr_uninterruptible(void) { - unsigned long nu = grq.nr_uninterruptible; + long nu = grq.nr_uninterruptible; if (unlikely(nu < 0)) nu = 0; @@ -1780,7 +1809,7 @@ update_cpu_clock(struct rq *rq, struct t * Return any ns on the sched_clock that have not yet been accounted in * @p in case that task is currently running. * - * Called with task_grq_lock() held on @rq. + * Called with task_grq_lock() held. */ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) { @@ -2147,7 +2176,7 @@ EXPORT_SYMBOL(sub_preempt_count); */ static inline int prio_deadline_diff(int user_prio) { - return (user_prio + 1) * rr_interval * HZ / 500; + return (prio_ratios[user_prio] * rr_interval * HZ / (1000 * 100)) ? : 1; } static inline int task_deadline_diff(struct task_struct *p) @@ -2232,6 +2261,15 @@ retry: dl = p->deadline + cache_distance(task_rq(p), rq, p); /* + * Look for tasks with old deadlines and pick them in FIFO + * order, taking the first one found. + */ + if (time_before(dl, jiffies)) { + edt = p; + goto out_take; + } + + /* * No rt tasks. Find the earliest deadline task. Now we're in * O(n) territory. This is what we silenced the compiler for: * edt will always start as idle. @@ -6007,9 +6045,15 @@ void __init sched_init(void) int i; struct rq *rq; + prio_ratios[0] = 100; + for (i = 1 ; i < PRIO_RANGE ; i++) + prio_ratios[i] = prio_ratios[i - 1] * 11 / 10; + spin_lock_init(&grq.lock); #ifdef CONFIG_SMP init_defrootdomain(); +#else + uprq = &per_cpu(runqueues, 0); #endif for_each_possible_cpu(i) { rq = cpu_rq(i);