Make sure we disable preemption in try_to_wake_up and when changing the cpu in set_cpus_allowed_ptr. Rework the change in rr_interval with number of cpus to not go up quite so quickly, scaling only by 50% every doubling of CPUs for better interactivity on multicore machines. Throughput did not appear to decrease measurably with this change. -ck --- include/linux/sched.h | 2 +- kernel/sched_bfs.c | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) Index: linux-2.6.33-bfs/include/linux/sched.h =================================================================== --- linux-2.6.33-bfs.orig/include/linux/sched.h 2010-05-18 17:30:14.587299206 +1000 +++ linux-2.6.33-bfs/include/linux/sched.h 2010-05-18 17:30:42.824299835 +1000 @@ -1612,7 +1612,7 @@ static inline void tsk_cpus_current(stru static inline void print_scheduler_version(void) { - printk(KERN_INFO"BFS CPU scheduler v0.316 by Con Kolivas.\n"); + printk(KERN_INFO"BFS CPU scheduler v0.318 by Con Kolivas.\n"); } static inline int iso_task(struct task_struct *p) Index: linux-2.6.33-bfs/kernel/sched_bfs.c =================================================================== --- linux-2.6.33-bfs.orig/kernel/sched_bfs.c 2010-05-18 17:30:14.562298019 +1000 +++ linux-2.6.33-bfs/kernel/sched_bfs.c 2010-05-18 17:32:19.458298645 +1000 @@ -1260,6 +1260,8 @@ static int try_to_wake_up(struct task_st unsigned long flags; struct rq *rq; + get_cpu(); + /* This barrier is undocumented, probably for p->state? くそ */ smp_wmb(); @@ -1294,6 +1296,8 @@ out_running: p->state = TASK_RUNNING; out_unlock: task_grq_unlock(&flags); + put_cpu(); + return success; } @@ -4331,8 +4335,11 @@ int set_cpus_allowed_ptr(struct task_str /* Task is running on the wrong cpu now, reschedule it. */ set_tsk_need_resched(p); running_wrong = 1; - } else + } else { + get_cpu(); set_task_cpu(p, cpumask_any_and(cpu_active_mask, new_mask)); + put_cpu(); + } out: if (queued) @@ -6270,7 +6277,7 @@ static int cache_cpu_idle(unsigned long void __init sched_init_smp(void) { struct sched_domain *sd; - int cpu; + int cpu, i, cpu_scale; cpumask_var_t non_isolated_cpus; @@ -6309,7 +6316,13 @@ void __init sched_init_smp(void) * allowing us to increase the base rr_interval, but in a non linear * fashion. */ - rr_interval *= 1 + ilog2(num_online_cpus()); + cpu_scale = ilog2(num_online_cpus()); + rr_interval *= 100; + for (i = 0; i < cpu_scale; i++) { + rr_interval *= 3; + rr_interval /= 2; + } + rr_interval /= 100; grq_lock_irq(); /*