---
 Documentation/sysctl/kernel.txt |    2 -
 kernel/sched.c                  |   45 ++++++++++++++++++++--------------------
 kernel/softirq.c                |    2 -
 kernel/sysctl.c                 |   12 ++++++----
 4 files changed, 32 insertions(+), 29 deletions(-)

Index: linux-2.6.20.7-sd/kernel/sched.c
===================================================================
--- linux-2.6.20.7-sd.orig/kernel/sched.c	2007-04-19 12:17:46.000000000 +1000
+++ linux-2.6.20.7-sd/kernel/sched.c	2007-04-20 00:45:05.000000000 +1000
@@ -53,6 +53,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/kprobes.h>
 #include <linux/delayacct.h>
+#include <linux/log2.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -79,7 +80,7 @@
 /* Some helpers for converting to/from various scales.*/
 #define NS_TO_JIFFIES(TIME)	((TIME) / (1000000000 / HZ))
 #define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
-#define MS_TO_NS(TIME)		((TIME) * 1000000)
+#define MS_TO_US(TIME)		((TIME) * 1000)
 /* Can return 0 */
 #define MS_TO_JIFFIES(TIME)	((TIME) * HZ / 1000)
 #define JIFFIES_TO_MS(TIME)	((TIME) * 1000 / HZ)
@@ -91,9 +92,8 @@
  * Value is in ms and set to a minimum of 8ms. Scales with number of cpus.
  * Tunable via /proc interface.
  */
-int rr_interval __read_mostly;
+int rr_interval __read_mostly = 8;
 
-#define RR_INTERVAL		8
 #define DEF_TIMESLICE		(rr_interval * 20)
 
 /*
@@ -975,23 +975,20 @@ static int effective_prio(struct task_st
  * tick still. Below nice 0 they get progressively larger.
  * ie nice -6..0 = rr_interval. nice -10 = 2.5 * rr_interval
  * nice -20 = 10 * rr_interval. nice 1-19 = rr_interval / 2.
- * Value returned is in nanoseconds.
+ * Value returned is in microseconds.
  */
 static unsigned int rr_quota(struct task_struct *p)
 {
 	int nice = TASK_NICE(p), rr = rr_interval;
 
-	/* Ensure that rr_interval is at least 1 tick */
-	if (unlikely(!MS_TO_JIFFIES(rr)))
-		rr = rr_interval = JIFFIES_TO_MS(1) ? : 1;
 	if (!rt_task(p)) {
 		if (nice < -6) {
 			rr *= nice * nice;
 			rr /= 40;
-		} else if (nice > 0 && (rr * HZ / 1000 / 2) > 0)
-			rr /= 2;
+		} else if (nice > 0)
+			rr = rr / 2 ? : 1;
 	}
-	return MS_TO_NS(rr);
+	return MS_TO_US(rr);
 }
 
 /*
@@ -3010,16 +3007,17 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 /*
  * This is called on clock ticks and on context switches.
  * Bank in p->sched_time the ns elapsed since the last tick or switch.
- * CPU scheduler quota accounting is also performed here.
+ * CPU scheduler quota accounting is also performed here in microseconds.
  * The value returned from sched_clock() occasionally gives bogus values so
  * some sanity checking is required.
  */
-static inline void
+static void
 update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now,
 		 int tick)
 {
 	cputime64_t time_diff = now - p->last_ran;
-	unsigned int min_diff = 1000;
+	const unsigned int min_diff = 1000;
+	int us_time_diff;
 
 	if (tick) {
 		/*
@@ -3038,8 +3036,11 @@ update_cpu_clock(struct task_struct *p, 
 		if (time_diff > JIFFIES_TO_NS(1) || time_diff < min_diff)
 			time_diff = min_diff;
 	}
+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
+	us_time_diff = time_diff;
+	us_time_diff /= 1000;
 	if (p != rq->idle && p->policy != SCHED_FIFO)
-		p->time_slice -= time_diff;
+		p->time_slice -= us_time_diff;
 	p->sched_time += time_diff;
 	p->last_ran = rq->most_recent_timestamp = now;
 }
@@ -3140,8 +3141,7 @@ void account_steal_time(struct task_stru
 static void task_expired_entitlement(struct rq *rq, struct task_struct *p)
 {
 	struct prio_array *old_array;
-	int overrun;
-	int old_prio;
+	int overrun, old_prio;
 
 	if (unlikely(p->first_time_slice))
 		p->first_time_slice = 0;
@@ -6826,6 +6826,13 @@ void __init sched_init_smp(void)
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed(current, non_isolated_cpus) < 0)
 		BUG();
+
+	/*
+	 * Assume that every added cpu gives us slightly less overall latency
+	 * allowing us to increase the base rr_interval, but in a non linear
+	 * fashion.
+	 */
+	rr_interval *= 1 + ilog2(num_online_cpus());
 }
 #else
 void __init sched_init_smp(void)
@@ -6846,7 +6853,6 @@ int in_sched_functions(unsigned long add
 void __init sched_init(void)
 {
 	int i, j, k;
-	unsigned int rr_us = 0, rr_inc = RR_INTERVAL * 1000;
 
 	/* Generate the priority matrix */
 	for (i = 0; i < PRIO_RANGE; i++) {
@@ -6897,12 +6903,7 @@ void __init sched_init(void)
 			__set_bit(MAX_PRIO, array->prio_bitmap);
 		}
 
-		/* Every added cpu increases the rr_interval */
-		rr_us += rr_inc;
-		rr_inc /= 2;
 	}
-	rr_interval = rr_us / 1000;
-
 	set_load_weight(&init_task);
 
 #ifdef CONFIG_SMP
Index: linux-2.6.20.7-sd/kernel/sysctl.c
===================================================================
--- linux-2.6.20.7-sd.orig/kernel/sysctl.c	2007-04-19 09:51:30.000000000 +1000
+++ linux-2.6.20.7-sd/kernel/sysctl.c	2007-04-20 00:45:59.000000000 +1000
@@ -229,11 +229,13 @@ static void register_proc_table(ctl_tabl
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
 
-/* Constants for minimum and maximum testing in vm_table.
+
+/* Constants for minimum and maximum testing.
    We use these as one-element integer vectors. */
-static int  __read_mostly zero;
-static int  __read_mostly one = 1;
-static int  __read_mostly one_hundred = 100;
+static int __read_mostly zero;
+static int __read_mostly one = 1;
+static int __read_mostly one_hundred = 100;
+static int __read_mostly five_thousand = 5000;
 
 
 /* The default sysctl tables: */
@@ -693,7 +695,7 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec_minmax,
 		.strategy	= &sysctl_intvec,
 		.extra1		= &one,
-		.extra2		= &one_hundred,
+		.extra2		= &five_thousand,
 	},
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
Index: linux-2.6.20.7-sd/Documentation/sysctl/kernel.txt
===================================================================
--- linux-2.6.20.7-sd.orig/Documentation/sysctl/kernel.txt	2007-04-19 09:51:30.000000000 +1000
+++ linux-2.6.20.7-sd/Documentation/sysctl/kernel.txt	2007-04-20 00:45:05.000000000 +1000
@@ -298,7 +298,7 @@ overall. This value is in milliseconds a
 depends on the number of cpus available at scheduler initialisation
 with a minimum of 8.
 
-Valid values are from 1-100.
+Valid values are from 1-5000.
 
 ==============================================================
 
Index: linux-2.6.20.7-sd/kernel/softirq.c
===================================================================
--- linux-2.6.20.7-sd.orig/kernel/softirq.c	2007-02-05 22:52:04.000000000 +1100
+++ linux-2.6.20.7-sd/kernel/softirq.c	2007-04-20 00:45:05.000000000 +1000
@@ -469,7 +469,7 @@ void __init softirq_init(void)
 
 static int ksoftirqd(void * __bind_cpu)
 {
-	set_user_nice(current, 19);
+	set_user_nice(current, 15);
 	current->flags |= PF_NOFREEZE;
 
 	set_current_state(TASK_INTERRUPTIBLE);
