--- linux-2.6.0-test11-ht3/kernel/sched.c 2003-12-15 23:38:33.250059542 +1100 +++ linux-2.6.0-test11-ck2/kernel/sched.c 2003-12-15 23:53:57.186115402 +1100 @@ -141,7 +141,7 @@ INTERACTIVE_DELTA) #define TASK_INTERACTIVE(p) \ - ((p)->prio <= (p)->static_prio - DELTA(p)) + ((p)->prio <= (p)->static_prio - DELTA(p) && !batch_task(p)) #define JUST_INTERACTIVE_SLEEP(p) \ (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ @@ -153,8 +153,13 @@ #define LOW_CREDIT(p) \ ((p)->interactive_credit < -CREDIT_LIMIT) +/* + * Batch tasks are preempted by any priority normal tasks and never + * preempt other tasks. + */ #define TASK_PREEMPTS_CURR(p, rq) \ - ((p)->prio < (rq)->curr->prio) + ((((p)->prio < (rq)->curr->prio) || (batch_task((rq)->curr))) && \ + (!batch_task(p))) /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] @@ -172,6 +177,13 @@ static inline unsigned int task_timeslice(task_t *p) { + /* + * Batch tasks get much longer timeslices to optimise cpu throughput. + * Since they yield to any other tasks this is not a problem. + */ + if (unlikely(batch_task(p))) + return BASE_TIMESLICE(p) * 10; + return BASE_TIMESLICE(p); } @@ -199,10 +211,10 @@ struct prio_array { struct runqueue { spinlock_t lock; unsigned long nr_running, nr_switches, expired_timestamp, - nr_uninterruptible; + nr_uninterruptible, nr_batch; task_t *curr, *idle; struct mm_struct *prev_mm; - prio_array_t *active, *expired, arrays[2]; + prio_array_t *active, *expired, *batch, arrays[3]; int prev_cpu_load[NR_CPUS]; unsigned long cpu; #ifdef CONFIG_NUMA @@ -274,9 +286,11 @@ __init void node_nr_running_init(void) #else /* !CONFIG_NUMA */ -# define nr_running_init(rq) do { } while (0) -# define nr_running_inc(rq) do { (rq)->nr_running++; } while (0) -# define nr_running_dec(rq) do { (rq)->nr_running--; } while (0) +# define nr_running_init(rq) do { } while (0) +# define nr_running_inc(rq) do { (rq)->nr_running++; } while (0) +# define nr_running_dec(rq) do { (rq)->nr_running--; } while (0) +# define nr_batch_inc(rq) do { (rq)->nr_batch++; } while (0) +# define nr_batch_dec(rq) do { (rq)->nr_batch--; } while (0) #endif /* CONFIG_NUMA */ @@ -359,12 +373,13 @@ static inline void enqueue_task(struct t */ static int effective_prio(task_t *p) { - int bonus, prio; + int prio, bonus = 0; if (rt_task(p)) return p->prio; - bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; + if (likely(!batch_task(p))) + bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; if (prio < MAX_RT_PRIO) @@ -379,7 +394,12 @@ static int effective_prio(task_t *p) */ static inline void __activate_task(task_t *p, runqueue_t *rq) { - enqueue_task(p, rq->active); + if (unlikely(batch_task(p))) { + enqueue_task(p, rq->batch); + nr_batch_inc(rq); + } else + enqueue_task(p, rq->active); + nr_running_inc(rq); } @@ -503,6 +523,8 @@ static inline void activate_task(task_t static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) { nr_running_dec(rq); + if (unlikely(batch_task(p))) + nr_batch_dec(rq); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); @@ -711,10 +733,20 @@ void wake_up_forked_process(task_t * p) else { p->prio = current->prio; list_add_tail(&p->run_list, ¤t->run_list); - p->array = current->array; - p->array->nr_active++; + if (unlikely(batch_task(p))) { + p->array = task_rq(current)->batch; + p->array->nr_active++; + nr_batch_inc(rq); + } else { + p->array = current->array; + p->array->nr_active++; + } nr_running_inc(rq); } + + if (unlikely(batch_task(current) && !batch_task(p))) + resched_task(current); + task_rq_unlock(rq, &flags); } @@ -1139,7 +1171,12 @@ static inline void pull_task(runqueue_t nr_running_dec(src_rq); set_task_cpu(p, this_cpu); nr_running_inc(this_rq); - enqueue_task(p, this_rq->active); + if (unlikely(batch_task(p))) { + nr_batch_dec(src_rq); + nr_batch_inc(this_rq); + enqueue_task(p, this_rq->batch); + } else + enqueue_task(p, this_rq->active); /* * Note that idle threads have a prio of MAX_PRIO, for this test * to be always true for them. @@ -1399,13 +1436,23 @@ void scheduler_tick(int user_ticks, int cpustat->iowait += sys_ticks; else cpustat->idle += sys_ticks; + + if (rq->nr_batch) { + resched_task(p); + goto out; + } rebalance_tick(rq, 1); return; } - if (TASK_NICE(p) > 0) - cpustat->nice += user_ticks; - else - cpustat->user += user_ticks; + + if (unlikely(batch_task(p))) { + cpustat->idle += user_ticks; + } else { + if (TASK_NICE(p) > 0) + cpustat->nice += user_ticks; + else + cpustat->user += user_ticks; + } cpustat->system += sys_ticks; /* Task might have expired already, but not scheduled off yet */ @@ -1444,12 +1491,17 @@ void scheduler_tick(int user_ticks, int p->time_slice = task_timeslice(p); p->first_time_slice = 0; - if (!rq->expired_timestamp) + if (!rq->expired_timestamp && !batch_task(p)) rq->expired_timestamp = jiffies; - if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { - enqueue_task(p, rq->expired); - } else - enqueue_task(p, rq->active); + + if (unlikely(batch_task(p))) + enqueue_task(p, rq->batch); + else { + if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) + enqueue_task(p, rq->expired); + else + enqueue_task(p, rq->active); + } } else { /* * Prevent a too long timeslice allowing a task to monopolize @@ -1570,16 +1622,60 @@ pick_next_task: /* * Switch the active and expired arrays. */ - rq->active = rq->expired; - rq->expired = array; + if (likely(rq->expired->nr_active)) { + rq->active = rq->expired; + rq->expired = array; + rq->expired_timestamp = 0; + } else { + /* + * Switch to the batch array if there are no + * normal tasks left waiting. + */ + rq->active = rq->batch; + rq->batch = array; + } + array = rq->active; - rq->expired_timestamp = 0; } idx = sched_find_first_bit(array->bitmap); queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + /* + * If a batch process is on the active array and normal processes + * are still running move the batch process to the batch + * array. + */ + if (unlikely(batch_task(next))) { + if (rq->nr_running > rq->nr_batch) { + dequeue_task(next, array); + enqueue_task(next, rq->batch); + goto pick_next_task; + } +#ifdef CONFIG_SMP + /* + * Check for hyper-threading and do not schedule a batch task + * if the sibling is running a normal task. + */ + if (ht_active) { + runqueue_t *htrq; + htrq = cpu_rq(cpu_sibling_map[(rq->cpu)]); + if (htrq->nr_running > htrq->nr_batch) { + dequeue_task(next, array); + enqueue_task(next, rq->batch); + next = rq->idle; + goto switch_tasks; + } + } + } else if (ht_active) { + runqueue_t *htrq; + htrq = cpu_rq(cpu_sibling_map[(rq->cpu)]); + if (batch_task(htrq->curr)) + resched_task(htrq->curr); +#endif + } + if (next->activated > 0) { unsigned long long delta = now - next->timestamp; @@ -1901,10 +1997,11 @@ void set_user_nice(task_t *p, long nice) enqueue_task(p, array); /* * If the task increased its priority or is running and - * lowered its priority, then reschedule its CPU: + * lowered its priority or is batch, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || batch_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -2042,8 +2139,8 @@ static int setscheduler(pid_t pid, int p else { retval = -EINVAL; if (policy != SCHED_FIFO && policy != SCHED_RR && - policy != SCHED_NORMAL) - goto out_unlock; + policy != SCHED_NORMAL && policy !=SCHED_BATCH) + goto out_unlock; } /* @@ -2053,8 +2150,9 @@ static int setscheduler(pid_t pid, int p retval = -EINVAL; if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1) goto out_unlock; - if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0)) - goto out_unlock; + if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) != + (lp.sched_priority == 0)) + goto out_unlock; retval = -EPERM; if ((policy == SCHED_FIFO || policy == SCHED_RR) && @@ -2075,7 +2173,7 @@ static int setscheduler(pid_t pid, int p p->policy = policy; p->rt_priority = lp.sched_priority; oldprio = p->prio; - if (policy != SCHED_NORMAL) + if (policy != SCHED_NORMAL && policy != SCHED_BATCH) p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority; else p->prio = p->static_prio; @@ -2087,9 +2185,9 @@ static int setscheduler(pid_t pid, int p * this runqueue and our priority is higher than the current's */ if (rq->curr == p) { - if (p->prio > oldprio) + if (p->prio > oldprio || batch_task(p)) resched_task(rq->curr); - } else if (p->prio < rq->curr->prio) + } else if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); } @@ -2295,7 +2393,10 @@ asmlinkage long sys_sched_yield(void) */ if (likely(!rt_task(current))) { dequeue_task(current, array); - enqueue_task(current, rq->expired); + if (unlikely(batch_task(current))) + enqueue_task(current, rq->batch); + else + enqueue_task(current, rq->expired); } else { list_del(¤t->run_list); list_add_tail(¤t->run_list, array->queue + current->prio); @@ -2380,6 +2481,7 @@ asmlinkage long sys_sched_get_priority_m ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; break; } @@ -2403,6 +2505,7 @@ asmlinkage long sys_sched_get_priority_m ret = 1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; } return ret; @@ -2836,12 +2939,13 @@ void __init sched_init(void) rq->cpu = (unsigned long)(i); rq->active = rq->arrays; rq->expired = rq->arrays + 1; + rq->batch = rq->arrays + 2; spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->migration_queue); atomic_set(&rq->nr_iowait, 0); nr_running_init(rq); - for (j = 0; j < 2; j++) { + for (j = 0; j < 3; j++) { array = rq->arrays + j; for (k = 0; k < MAX_PRIO; k++) { INIT_LIST_HEAD(array->queue + k); --- linux-2.6.0-test11-ht3/include/linux/sched.h 2003-12-15 23:33:47.962495605 +1100 +++ linux-2.6.0-test11-ck2/include/linux/sched.h 2003-12-15 23:40:32.717778686 +1100 @@ -126,6 +126,7 @@ extern unsigned long nr_iowait(void); #define SCHED_NORMAL 0 #define SCHED_FIFO 1 #define SCHED_RR 2 +#define SCHED_BATCH 3 struct sched_param { int sched_priority; @@ -285,6 +286,7 @@ struct signal_struct { #define MAX_PRIO (MAX_RT_PRIO + 40) #define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#define batch_task(p) ((p)->policy == SCHED_BATCH) /* * Some day this will be a full-fledged user tracking system..