--- linux-2.6.0-test2/include/linux/sched.h 2003-07-28 20:46:40.000000000 +1000 +++ linux-2.6.0-test2-O12/include/linux/sched.h 2003-07-30 10:12:39.000000000 +1000 @@ -340,6 +340,8 @@ struct task_struct { unsigned long sleep_avg; unsigned long last_run; + unsigned long interactive_credit; + int activated; unsigned long policy; unsigned long cpus_allowed; --- linux-2.6.0-test2/kernel/sched.c 2003-07-28 20:46:40.000000000 +1000 +++ linux-2.6.0-test2-O12/kernel/sched.c 2003-08-01 12:36:47.000000000 +1000 @@ -58,6 +58,8 @@ #define USER_PRIO(p) ((p)-MAX_RT_PRIO) #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) +#define AVG_TIMESLICE (MIN_TIMESLICE + ((MAX_TIMESLICE - MIN_TIMESLICE) *\ + (MAX_PRIO-1-NICE_TO_PRIO(0))/(MAX_USER_PRIO - 1))) /* * These are the 'tuning knobs' of the scheduler: @@ -68,13 +70,15 @@ */ #define MIN_TIMESLICE ( 10 * HZ / 1000) #define MAX_TIMESLICE (200 * HZ / 1000) -#define CHILD_PENALTY 50 +#define TIMESLICE_GRANULARITY (HZ/40 ?: 1) +#define CHILD_PENALTY 90 #define PARENT_PENALTY 100 #define EXIT_WEIGHT 3 #define PRIO_BONUS_RATIO 25 +#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100) #define INTERACTIVE_DELTA 2 -#define MAX_SLEEP_AVG (10*HZ) -#define STARVATION_LIMIT (10*HZ) +#define MAX_SLEEP_AVG (AVG_TIMESLICE * MAX_BONUS) +#define STARVATION_LIMIT (MAX_SLEEP_AVG) #define NODE_THRESHOLD 125 /* @@ -115,6 +119,14 @@ #define TASK_INTERACTIVE(p) \ ((p)->prio <= (p)->static_prio - DELTA(p)) +#define JUST_INTERACTIVE_SLEEP(p) \ + (MAX_SLEEP_AVG - (DELTA(p) * AVG_TIMESLICE)) + +#define TASK_PREEMPTS_CURR(p, rq) \ + ((p)->prio < (rq)->curr->prio || \ + ((p)->prio == (rq)->curr->prio && \ + (p)->time_slice > (rq)->curr->time_slice * 2)) + /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] * to time slice values. @@ -339,42 +351,65 @@ static inline void __activate_task(task_ nr_running_inc(rq); } -/* - * activate_task - move a task to the runqueue and do priority recalculation - * - * Update all the scheduling statistics stuff. (sleep average - * calculation, priority modifiers, etc.) - */ -static inline void activate_task(task_t *p, runqueue_t *rq) +static void recalc_task_prio(task_t *p) { long sleep_time = jiffies - p->last_run - 1; if (sleep_time > 0) { - int sleep_avg; + p->activated = 0; /* - * This code gives a bonus to interactive tasks. - * - * The boost works by updating the 'average sleep time' - * value here, based on ->last_run. The more time a task - * spends sleeping, the higher the average gets - and the - * higher the priority boost gets as well. + * User tasks that sleep a long time are categorised as + * idle and will get just interactive status to stay active & + * prevent them suddenly becoming cpu hogs and starving + * other processes. */ - sleep_avg = p->sleep_avg + sleep_time; + if (p->mm && sleep_time > HZ) + p->sleep_avg = JUST_INTERACTIVE_SLEEP(p); + else { + /* + * Processes with credit get pushed to one higher + * priority each time they sleep greater than + * one tick. -ck + */ + if (p->interactive_credit) + p->sleep_avg = (p->sleep_avg * MAX_BONUS / + MAX_SLEEP_AVG + 1) * + MAX_SLEEP_AVG / MAX_BONUS; + else { + /* + * The rest earn sleep_avg according to their sleep + * time up to a maximum of their timeslice size. + */ + if (sleep_time > task_timeslice(p)) + sleep_time = task_timeslice(p); + p->sleep_avg += sleep_time; + } - /* - * 'Overflow' bonus ticks go to the waker as well, so the - * ticks are not lost. This has the effect of further - * boosting tasks that are related to maximum-interactive - * tasks. - */ - if (sleep_avg > MAX_SLEEP_AVG) - sleep_avg = MAX_SLEEP_AVG; - if (p->sleep_avg != sleep_avg) { - p->sleep_avg = sleep_avg; - p->prio = effective_prio(p); + /* + * Fully interactive tasks gain interactive credits + * to cash in when needed. + */ + if (p->sleep_avg > MAX_SLEEP_AVG){ + p->sleep_avg = MAX_SLEEP_AVG; + p->interactive_credit++; + } } } + p->prio = effective_prio(p); +} + +/* + * activate_task - move a task to the runqueue and do priority recalculation + * + * Update all the scheduling statistics stuff. (sleep average + * calculation, priority modifiers, etc.) + */ +static inline void activate_task(task_t *p, runqueue_t *rq) +{ + p->activated = 1; + recalc_task_prio(p); + __activate_task(p, rq); } @@ -501,7 +536,7 @@ repeat_lock_task: __activate_task(p, rq); else { activate_task(p, rq); - if (p->prio < rq->curr->prio) + if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); } success = 1; @@ -550,9 +585,14 @@ void wake_up_forked_process(task_t * p) * and children as well, to keep max-interactive tasks * from forking tasks that are max-interactive. */ - current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100; - p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100; + current->sleep_avg = current->sleep_avg * MAX_BONUS / MAX_SLEEP_AVG * + PARENT_PENALTY / 100 * MAX_SLEEP_AVG / + MAX_BONUS; + p->sleep_avg = p->sleep_avg * MAX_BONUS / MAX_SLEEP_AVG * + CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS; p->prio = effective_prio(p); + p->last_run = jiffies; + p->interactive_credit = 0; set_task_cpu(p, smp_processor_id()); if (unlikely(!current->array)) @@ -1215,8 +1255,9 @@ void scheduler_tick(int user_ticks, int * it possible for interactive tasks to use up their * timeslices at their highest priority levels. */ - if (p->sleep_avg) - p->sleep_avg--; + if (p->sleep_avg && (!p->interactive_credit || !(jiffies % + (MAX_BONUS + 1 - (p->sleep_avg * MAX_BONUS / MAX_SLEEP_AVG))))) + p->sleep_avg--; if (unlikely(rt_task(p))) { /* * RR tasks need a special form of timeslice management. @@ -1239,13 +1280,31 @@ void scheduler_tick(int user_ticks, int p->prio = effective_prio(p); p->time_slice = task_timeslice(p); p->first_time_slice = 0; + /* + * This drop in interactive_credit is really just a + * sanity check to make sure tasks that only slept once + * for long enough dont act like interactive tasks + */ + if (p->interactive_credit) + p->interactive_credit--; - if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { + if (p->mm && (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq))) { if (!rq->expired_timestamp) rq->expired_timestamp = jiffies; enqueue_task(p, rq->expired); } else enqueue_task(p, rq->active); + } else if (p->mm && !((task_timeslice(p) - p->time_slice) % + TIMESLICE_GRANULARITY) && (p->time_slice > MIN_TIMESLICE) && + (p->array == rq->active)) { + /* + * Running user tasks get requeued with their remaining + * timeslice after TIMESLICE_GRANULARITY provided they have at + * least MIN_TIMESLICE to go. + */ + dequeue_task(p, rq->active); + set_tsk_need_resched(p); + enqueue_task(p, rq->active); } out_unlock: spin_unlock(&rq->lock); @@ -1285,6 +1344,13 @@ need_resched: release_kernel_lock(prev); prev->last_run = jiffies; + /* + * If a task has run less than one tick make sure it is still + * charged one sleep_avg for running. + */ + if (unlikely((task_timeslice(prev) == prev->time_slice) && + prev->sleep_avg)) + prev->sleep_avg--; spin_lock_irq(&rq->lock); /* @@ -1332,6 +1398,13 @@ pick_next_task: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + if (next->activated) { + next->activated = 0; + array = next->array; + dequeue_task(next, array); + recalc_task_prio(next); + enqueue_task(next, array); + } switch_tasks: prefetch(next); clear_tsk_need_resched(prev);