fs/proc/array.c | 4 - include/linux/sched.h | 2 kernel/sched.c | 117 ++++++++++++++++++++++++++------------------------ 3 files changed, 64 insertions(+), 59 deletions(-) Index: linux-2.6.14-ck9/fs/proc/array.c =================================================================== --- linux-2.6.14-ck9.orig/fs/proc/array.c +++ linux-2.6.14-ck9/fs/proc/array.c @@ -165,7 +165,7 @@ static inline char * task_state(struct t read_lock(&tasklist_lock); buffer += sprintf(buffer, "State:\t%s\n" - "Burst:\t%d\n" + "Bonus:\t%d\n" "Tgid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" @@ -173,7 +173,7 @@ static inline char * task_state(struct t "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), - p->burst, + p->bonus, p->tgid, p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, pid_alive(p) && p->ptrace ? p->parent->pid : 0, Index: linux-2.6.14-ck9/include/linux/sched.h =================================================================== --- linux-2.6.14-ck9.orig/include/linux/sched.h +++ linux-2.6.14-ck9/include/linux/sched.h @@ -670,7 +670,7 @@ struct task_struct { unsigned long long timestamp; unsigned long runtime, totalrun, ns_debit; - unsigned int burst; + unsigned int bonus; unsigned int slice, time_slice; unsigned long long sched_time; /* sched_clock time spent running */ Index: linux-2.6.14-ck9/kernel/sched.c =================================================================== --- linux-2.6.14-ck9.orig/kernel/sched.c +++ linux-2.6.14-ck9/kernel/sched.c @@ -16,9 +16,9 @@ * by Davide Libenzi, preemptible kernel bits by Robert Love. * 2003-09-03 Interactivity tuning by Con Kolivas. * 2004-04-02 Scheduler domains code by Nick Piggin - * 2005-11-02 New staircase scheduling policy by Con Kolivas with help + * 2005-11-08 New staircase scheduling policy by Con Kolivas with help * from William Lee Irwin III, Zwane Mwaikambo & Peter Williams. - * Staircase v12.2 + * Staircase v13 */ #include @@ -633,30 +633,11 @@ static inline void __activate_idle_task( } /* - * burst - extra intervals an interactive task can run for at best priority - * instead of descending priorities. + * Bonus - How much higher than its base priority an interactive task can run. */ -static inline unsigned int burst(task_t *p) +static inline unsigned int bonus(task_t *p) { - if (likely(!rt_task(p))) { - unsigned int task_user_prio = TASK_USER_PRIO(p); - return 39 - task_user_prio; - } else - return p->burst; -} - -static void inc_burst(task_t *p) -{ - unsigned int best_burst; - best_burst = burst(p); - if (p->burst < best_burst) - p->burst++; -} - -static void dec_burst(task_t *p) -{ - if (p->burst) - p->burst--; + return TASK_USER_PRIO(p); } static inline unsigned int rr_interval(task_t * p) @@ -671,33 +652,61 @@ static inline unsigned int rr_interval(t /* * slice - the duration a task runs before getting requeued at its best - * priority and has its burst decremented. + * priority and has its bonus decremented. */ static inline unsigned int slice(task_t *p) { unsigned int slice, rr; + slice = rr = rr_interval(p); if (likely(!rt_task(p))) - slice += burst(p) * rr; + slice += (39 - TASK_USER_PRIO(p)) * rr; return slice; } /* - * sched_interactive - sysctl which allows interactive tasks to have bursts + * We increase our bonus by sleeping more than the time we ran. + * The ratio of sleep to run gives us the cpu% that we last ran and determines + * the maximum bonus we can acquire. + */ +static void inc_bonus(task_t *p, unsigned long totalrun, unsigned long sleep) +{ + unsigned int best_bonus; + + best_bonus = sleep / (totalrun + 1); + if (p->bonus >= best_bonus) + return; + + p->bonus++; + best_bonus = bonus(p); + if (p->bonus > best_bonus) + p->bonus = best_bonus; +} + +static void dec_bonus(task_t *p) +{ + if (p->bonus) + p->bonus--; +} + +/* + * sched_interactive - sysctl which allows interactive tasks to have bonus + * raise its priority. */ int sched_interactive = 1; /* - * effective_prio - dynamic priority dependent on burst. + * effective_prio - dynamic priority dependent on bonus. * The priority normally decreases by one each RR_INTERVAL. - * As the burst increases the priority stays at the top "stair" or + * As the bonus increases the initial priority starts at a higher "stair" or * priority for longer. */ static int effective_prio(task_t *p) { int prio; - unsigned int full_slice, used_slice, first_slice; - unsigned int best_burst, rr; + unsigned int full_slice, used_slice = 0; + unsigned int best_bonus, rr; + if (rt_task(p)) return p->prio; if (batch_task(p)) { @@ -722,20 +731,17 @@ static int effective_prio(task_t *p) return MAX_RT_PRIO; } - best_burst = burst(p); full_slice = slice(p); + if (full_slice > p->slice) + used_slice = full_slice - p->slice; + + best_bonus = bonus(p); + prio = MAX_RT_PRIO + best_bonus; + if (sched_interactive && !sched_compute) + prio -= p->bonus; + rr = rr_interval(p); - used_slice = full_slice - p->slice; - if (p->burst > best_burst) - p->burst = best_burst; - first_slice = rr; - if (sched_interactive && !sched_compute && p->mm) - first_slice *= (p->burst + 1); - prio = MAX_PRIO - 2 - best_burst; - - if (used_slice < first_slice) - return prio; - prio += 1 + (used_slice - first_slice) / rr; + prio += used_slice / rr; if (prio >= MAX_PRIO - 2) prio = MAX_PRIO - 2; return prio; @@ -747,7 +753,7 @@ static void continue_slice(task_t *p) if (total_run >= p->slice) { p->totalrun -= JIFFIES_TO_NS(p->slice); - dec_burst(p); + dec_bonus(p); } else { unsigned int remainder; p->slice -= total_run; @@ -769,16 +775,13 @@ static inline void recalc_task_prio(task /* * Priority is elevated back to best by amount of sleep_time. - * sleep_time is scaled down by number of tasks currently running. */ - if (rq_running > 1) - sleep_time /= rq_running; p->totalrun += p->runtime; if (NS_TO_JIFFIES(p->totalrun) >= p->slice && NS_TO_JIFFIES(sleep_time) < p->slice) { p->flags &= ~PF_NONSLEEP; - dec_burst(p); + dec_bonus(p); p->totalrun -= JIFFIES_TO_NS(p->slice); if (sleep_time > p->totalrun) p->totalrun = 0; @@ -800,7 +803,7 @@ static inline void recalc_task_prio(task if (sleep_time >= p->totalrun) { if (!(p->flags & PF_NONSLEEP)) - inc_burst(p); + inc_bonus(p, p->totalrun, sleep_time); p->totalrun = 0; goto out; } @@ -820,6 +823,8 @@ out: static void activate_task(task_t *p, runqueue_t *rq, int local) { unsigned long long now = sched_clock(); + unsigned long rr = rr_interval(p); + #ifdef CONFIG_SMP if (!local) { /* Compensate for drifting sched_clock */ @@ -829,7 +834,7 @@ static void activate_task(task_t *p, run } #endif p->slice = slice(p); - p->time_slice = rr_interval(p); + p->time_slice = p->slice % rr ? : rr; recalc_task_prio(p, now, rq->nr_running); p->flags &= ~PF_NONSLEEP; p->prio = effective_prio(p); @@ -1454,9 +1459,9 @@ void fastcall wake_up_new_task(task_t *p cpu = task_cpu(p); /* - * Forked process gets no burst to prevent fork bombs. + * Forked process gets no bonus to prevent fork bombs. */ - p->burst = 0; + p->bonus = 0; if (likely(cpu == this_cpu)) { current->flags |= PF_NONSLEEP; @@ -2599,10 +2604,10 @@ void scheduler_tick(void) goto out_unlock; p->ns_debit %= NSJIFFY; /* - * Tasks lose burst each time they use up a full slice(). + * Tasks lose bonus each time they use up a full slice(). */ if (!--p->slice) { - dec_burst(p); + dec_bonus(p); p->slice = slice(p); time_slice_expired(p, rq); p->totalrun = 0; @@ -3435,8 +3440,8 @@ void set_user_nice(task_t *p, long nice) delta = new_prio - old_prio; p->static_prio = NICE_TO_PRIO(nice); p->prio += delta; - if (p->burst > burst(p)) - p->burst = burst(p); + if (p->bonus > bonus(p)) + p->bonus= bonus(p); if (queued) { enqueue_task(p, rq);