include/linux/sched.h | 2 +- include/linux/sysctl.h | 1 + kernel/sched.c | 42 ++++++++++++++++++++++++++++++++++-------- kernel/sysctl.c | 8 ++++++++ 4 files changed, 44 insertions(+), 9 deletions(-) Index: linux-2.6.17-rc5-ck1/include/linux/sched.h =================================================================== --- linux-2.6.17-rc5-ck1.orig/include/linux/sched.h 2006-05-25 12:57:50.000000000 +1000 +++ linux-2.6.17-rc5-ck1/include/linux/sched.h 2006-05-25 12:57:50.000000000 +1000 @@ -202,7 +202,7 @@ extern void show_stack(struct task_struc void io_schedule(void); long io_schedule_timeout(long timeout); -extern int sched_interactive; +extern int sched_interactive, sched_compute; extern void cpu_init (void); extern void trap_init(void); Index: linux-2.6.17-rc5-ck1/include/linux/sysctl.h =================================================================== --- linux-2.6.17-rc5-ck1.orig/include/linux/sysctl.h 2006-05-25 12:57:50.000000000 +1000 +++ linux-2.6.17-rc5-ck1/include/linux/sysctl.h 2006-05-25 12:57:50.000000000 +1000 @@ -149,6 +149,7 @@ enum KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */ + KERN_COMPUTE=74, /* adjust timeslices for a compute server */ }; Index: linux-2.6.17-rc5-ck1/kernel/sched.c =================================================================== --- linux-2.6.17-rc5-ck1.orig/kernel/sched.c 2006-05-25 12:57:50.000000000 +1000 +++ linux-2.6.17-rc5-ck1/kernel/sched.c 2006-05-25 12:57:50.000000000 +1000 @@ -60,8 +60,17 @@ /* * sched_interactive - sysctl which allows interactive tasks to have bonus * raise its priority. + * sched_compute - sysctl which enables long timeslices and delayed preemption + * for compute server usage. */ int sched_interactive __read_mostly = 1; +int sched_compute __read_mostly = 0; + +/* + * CACHE_DELAY is the time preemption is delayed in sched_compute mode + * and is set to a nominal 10ms. + */ +#define CACHE_DELAY (10 * (HZ) / 1001 + 1) /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -92,9 +101,10 @@ int sched_interactive __read_mostly = 1; /* * This is the time all tasks within the same priority round robin. - * Set to a minimum of 6ms. + * Set to a minimum of 6ms. It is 10 times longer in compute mode. */ -#define RR_INTERVAL ((6 * HZ / 1001) + 1) +#define _RR_INTERVAL ((6 * HZ / 1001) + 1) +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute)) #define DEF_TIMESLICE (RR_INTERVAL * 19) #define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp) \ @@ -135,6 +145,7 @@ struct runqueue { unsigned long nr_uninterruptible; unsigned long long timestamp_last_tick; + unsigned short cache_ticks, preempted; task_t *curr, *idle; struct mm_struct *prev_mm; unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; @@ -737,7 +748,7 @@ static int effective_prio(const task_t * best_bonus = bonus(p); prio = MAX_RT_PRIO + best_bonus; - if (sched_interactive && !batch_task(p)) + if (sched_interactive && !sched_compute && !batch_task(p)) prio -= p->bonus; rr = rr_interval(p); @@ -1208,12 +1219,21 @@ static inline int wake_idle(int cpu, tas #endif /* - * Check to see if p preempts rq->curr and resched if it does. + * Check to see if p preempts rq->curr and resched if it does. In compute + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted. */ -static inline void preempt(const task_t *p, runqueue_t *rq) +static void fastcall preempt(const task_t *p, runqueue_t *rq) { - if (TASK_PREEMPTS_CURR(p, rq)) - resched_task(rq->curr); + task_t *curr = rq->curr; + + if (p->prio >= curr->prio) + return; + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm || + rt_task(p) || curr == rq->idle) { + resched_task(curr); + return; + } + rq->preempted = 1; } /*** @@ -2669,6 +2689,11 @@ void scheduler_tick(void) time_slice_expired(p, rq); goto out_unlock; } + rq->cache_ticks++; + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY) { + set_tsk_need_resched(p); + goto out_unlock; + } expired_balance = 0; out_unlock: spin_unlock(&rq->lock); @@ -2993,6 +3018,7 @@ switch_tasks: sched_info_switch(prev, next); if (likely(prev != next)) { + rq->preempted = rq->cache_ticks = 0; next->timestamp = now; rq->nr_switches++; rq->curr = next; @@ -6031,7 +6057,7 @@ void __init sched_init(void) rq = cpu_rq(i); spin_lock_init(&rq->lock); - rq->nr_running = 0; + rq->nr_running = rq->cache_ticks = rq->preempted = 0; #ifdef CONFIG_SMP rq->sd = NULL; Index: linux-2.6.17-rc5-ck1/kernel/sysctl.c =================================================================== --- linux-2.6.17-rc5-ck1.orig/kernel/sysctl.c 2006-05-25 12:57:50.000000000 +1000 +++ linux-2.6.17-rc5-ck1/kernel/sysctl.c 2006-05-25 12:57:50.000000000 +1000 @@ -631,6 +631,14 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = KERN_COMPUTE, + .procname = "compute", + .data = &sched_compute, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) { .ctl_name = KERN_UNKNOWN_NMI_PANIC,