--- linux-2.6.0-test2/include/linux/sched.h 2003-07-28 10:39:10.000000000 +1000 +++ linux-2.6.0-test2-O10/include/linux/sched.h 2003-07-28 10:44:50.000000000 +1000 @@ -340,6 +340,7 @@ struct task_struct { unsigned long sleep_avg; unsigned long last_run; + int activated; unsigned long policy; unsigned long cpus_allowed; --- linux-2.6.0-test2/kernel/sched.c 2003-07-28 10:39:10.000000000 +1000 +++ linux-2.6.0-test2-O10/kernel/sched.c 2003-07-28 10:44:50.000000000 +1000 @@ -58,6 +58,8 @@ #define USER_PRIO(p) ((p)-MAX_RT_PRIO) #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) +#define AVG_TIMESLICE (MIN_TIMESLICE + ((MAX_TIMESLICE - MIN_TIMESLICE) *\ + (MAX_PRIO-1-NICE_TO_PRIO(0))/(MAX_USER_PRIO - 1))) /* * These are the 'tuning knobs' of the scheduler: @@ -68,13 +70,15 @@ */ #define MIN_TIMESLICE ( 10 * HZ / 1000) #define MAX_TIMESLICE (200 * HZ / 1000) -#define CHILD_PENALTY 50 +#define TIMESLICE_GRANULARITY (HZ/40 ?: 1) +#define CHILD_PENALTY 90 #define PARENT_PENALTY 100 #define EXIT_WEIGHT 3 #define PRIO_BONUS_RATIO 25 +#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100) #define INTERACTIVE_DELTA 2 -#define MAX_SLEEP_AVG (10*HZ) -#define STARVATION_LIMIT (10*HZ) +#define MAX_SLEEP_AVG (AVG_TIMESLICE * MAX_BONUS) +#define STARVATION_LIMIT (MAX_SLEEP_AVG) #define NODE_THRESHOLD 125 /* @@ -115,6 +119,11 @@ #define TASK_INTERACTIVE(p) \ ((p)->prio <= (p)->static_prio - DELTA(p)) +#define TASK_PREEMPTS_CURR(p, rq) \ + ((p)->prio < (rq)->curr->prio || \ + ((p)->prio == (rq)->curr->prio && \ + (p)->time_slice > (rq)->curr->time_slice * 2)) + /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] * to time slice values. @@ -339,42 +348,56 @@ static inline void __activate_task(task_ nr_running_inc(rq); } -/* - * activate_task - move a task to the runqueue and do priority recalculation - * - * Update all the scheduling statistics stuff. (sleep average - * calculation, priority modifiers, etc.) - */ -static inline void activate_task(task_t *p, runqueue_t *rq) +static void recalc_task_prio(task_t *p) { long sleep_time = jiffies - p->last_run - 1; if (sleep_time > 0) { - int sleep_avg; + p->activated = 0; /* - * This code gives a bonus to interactive tasks. - * - * The boost works by updating the 'average sleep time' - * value here, based on ->last_run. The more time a task - * spends sleeping, the higher the average gets - and the - * higher the priority boost gets as well. + * User tasks that sleep a long time are categorised as + * idle and will get just under interactive status to + * prevent them suddenly becoming cpu hogs and starving + * other processes. */ - sleep_avg = p->sleep_avg + sleep_time; + if (p->mm && sleep_time > HZ) + p->sleep_avg = MAX_SLEEP_AVG * + (MAX_BONUS - 1) / MAX_BONUS - 1; + else { - /* - * 'Overflow' bonus ticks go to the waker as well, so the - * ticks are not lost. This has the effect of further - * boosting tasks that are related to maximum-interactive - * tasks. - */ - if (sleep_avg > MAX_SLEEP_AVG) - sleep_avg = MAX_SLEEP_AVG; - if (p->sleep_avg != sleep_avg) { - p->sleep_avg = sleep_avg; - p->prio = effective_prio(p); + /* + * Processes that sleep get pushed to one higher + * priority each time they sleep greater than + * one tick. -ck + */ + p->sleep_avg = (p->sleep_avg * MAX_BONUS / + MAX_SLEEP_AVG + 1) * + MAX_SLEEP_AVG / MAX_BONUS; + + if (p->sleep_avg > MAX_SLEEP_AVG) + p->sleep_avg = MAX_SLEEP_AVG; } } + p->prio = effective_prio(p); + +} + + +/* + * activate_task - move a task to the runqueue and do priority recalculation + * + * Update all the scheduling statistics stuff. (sleep average + * calculation, priority modifiers, etc.) + */ +static inline void activate_task(task_t *p, runqueue_t *rq) +{ + if (likely(p->last_run)){ + p->activated = 1; + recalc_task_prio(p); + } else + p->last_run = jiffies; + __activate_task(p, rq); } @@ -501,7 +524,7 @@ repeat_lock_task: __activate_task(p, rq); else { activate_task(p, rq); - if (p->prio < rq->curr->prio) + if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); } success = 1; @@ -550,9 +573,13 @@ void wake_up_forked_process(task_t * p) * and children as well, to keep max-interactive tasks * from forking tasks that are max-interactive. */ - current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100; - p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100; + current->sleep_avg = current->sleep_avg * MAX_BONUS / MAX_SLEEP_AVG * + PARENT_PENALTY / 100 * MAX_SLEEP_AVG / + MAX_BONUS; + p->sleep_avg = p->sleep_avg * MAX_BONUS / MAX_SLEEP_AVG * + CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS; p->prio = effective_prio(p); + p->last_run = 0; set_task_cpu(p, smp_processor_id()); if (unlikely(!current->array)) @@ -1246,6 +1273,17 @@ void scheduler_tick(int user_ticks, int enqueue_task(p, rq->expired); } else enqueue_task(p, rq->active); + } else if (!((task_timeslice(p) - p->time_slice) % + TIMESLICE_GRANULARITY) && (p->time_slice > MIN_TIMESLICE) && + (p->array == rq->active)) { + /* + * Running user tasks get requeued with their remaining + * timeslice after TIMESLICE_GRANULARITY provided they have at + * least MIN_TIMESLICE to go. + */ + dequeue_task(p, rq->active); + set_tsk_need_resched(p); + enqueue_task(p, rq->active); } out_unlock: spin_unlock(&rq->lock); @@ -1285,6 +1323,13 @@ need_resched: release_kernel_lock(prev); prev->last_run = jiffies; + /* + * If a task has run less than one tick make sure it is still + * charged one sleep_avg for running. + */ + if (unlikely((task_timeslice(prev) == prev->time_slice) && + prev->sleep_avg)) + prev->sleep_avg--; spin_lock_irq(&rq->lock); /* @@ -1332,6 +1377,13 @@ pick_next_task: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + if (next->activated) { + next->activated = 0; + array = next->array; + dequeue_task(next, array); + recalc_task_prio(next); + enqueue_task(next, array); + } switch_tasks: prefetch(next); clear_tsk_need_resched(prev);