--- linux-2.6.0-test11-ht1/kernel/sched.c 2003-12-05 13:32:59.000000000 +1100 +++ linux-2.6.0-test11-b1/kernel/sched.c 2003-12-06 00:34:12.832783883 +1100 @@ -141,7 +141,7 @@ INTERACTIVE_DELTA) #define TASK_INTERACTIVE(p) \ - ((p)->prio <= (p)->static_prio - DELTA(p)) + ((p)->prio <= (p)->static_prio - DELTA(p) && !batch_task(p)) #define JUST_INTERACTIVE_SLEEP(p) \ (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ @@ -169,6 +169,13 @@ static inline unsigned int task_timeslice(task_t *p) { + /* + * Batch tasks get much longer timeslices to optimise cpu throughput. + * Since they yield to any other tasks this is not a problem. + */ + if (unlikely(batch_task(p))) + return BASE_TIMESLICE(p) * 10; + return BASE_TIMESLICE(p); } @@ -238,9 +245,9 @@ typedef struct cpu_s { struct runqueue { spinlock_t lock; unsigned long nr_running, nr_switches, expired_timestamp, - nr_uninterruptible; + nr_uninterruptible, nr_batch; struct mm_struct *prev_mm; - prio_array_t *active, *expired, arrays[2]; + prio_array_t *active, *expired, *batch, arrays[3]; int prev_cpu_load[NR_CPUS]; #ifdef CONFIG_NUMA atomic_t *node_nr_running; @@ -319,9 +326,11 @@ __init void node_nr_running_init(void) #else /* !CONFIG_NUMA */ -# define nr_running_init(rq) do { } while (0) -# define nr_running_inc(rq) do { (rq)->nr_running++; } while (0) -# define nr_running_dec(rq) do { (rq)->nr_running--; } while (0) +# define nr_running_init(rq) do { } while (0) +# define nr_running_inc(rq) do { (rq)->nr_running++; } while (0) +# define nr_running_dec(rq) do { (rq)->nr_running--; } while (0) +# define nr_batch_inc(rq) do { (rq)->nr_batch++; } while (0) +# define nr_batch_dec(rq) do { (rq)->nr_batch--; } while (0) #endif /* CONFIG_NUMA */ @@ -414,7 +423,7 @@ static int effective_prio(task_t *p) prio = p->static_prio - bonus; if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; - if (prio > MAX_PRIO-1) + if (prio > MAX_PRIO-1 || batch_task(p)) prio = MAX_PRIO-1; return prio; } @@ -424,7 +433,12 @@ static int effective_prio(task_t *p) */ static inline void __activate_task(task_t *p, runqueue_t *rq) { - enqueue_task(p, rq->active); + if (unlikely(batch_task(p))){ + enqueue_task(p, rq->batch); + nr_batch_inc(rq); + } else + enqueue_task(p, rq->active); + nr_running_inc(rq); } @@ -548,6 +562,8 @@ static inline void activate_task(task_t static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) { nr_running_dec(rq); + if (unlikely(batch_task(p))) + nr_batch_dec(rq); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); @@ -675,16 +691,22 @@ static void wake_up_cpu(runqueue_t *rq, return resched_cpu(curr_cpu->cpu); } - if (p->prio < cpu_curr_ptr(cpu)->prio) - return resched_task(cpu_curr_ptr(cpu)); + /* + * SCHED_BATCH tasks never preempt but are always preempted by + * other policy tasks + */ + if (!batch_task(p) && (p->prio < cpu_curr_ptr(cpu)->prio || + batch_task(cpu_curr_ptr(cpu)))) + return resched_task(cpu_curr_ptr(cpu)); for_each_sibling(idx, rq) { curr_cpu = rq->cpu + idx; if (!task_allowed(p, curr_cpu->cpu)) continue; curr = curr_cpu->curr; - if (p->prio < curr->prio) - return resched_task(curr); + if (!batch_task(p) && (p->prio < curr->prio || + batch_task(curr))) + return resched_task(curr); } } @@ -797,9 +819,20 @@ void wake_up_forked_process(task_t * p) p->prio = current->prio; list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; - p->array->nr_active++; + if (unlikely(batch_task(p))){ + p->array = task_rq(current)->batch; + p->array->nr_active++; + nr_batch_inc(rq); + } else { + p->array = current->array; + p->array->nr_active++; + } nr_running_inc(rq); } + + if (unlikely(batch_task(current) && !batch_task(p))) + resched_task(current); + task_rq_unlock(rq, &flags); } @@ -1228,7 +1261,13 @@ static inline void pull_task(runqueue_t nr_running_dec(src_rq); set_task_cpu(p, this_cpu); nr_running_inc(this_rq); - enqueue_task(p, this_rq->active); + if (unlikely(batch_task(p))){ + nr_batch_dec(src_rq); + nr_batch_inc(this_rq); + enqueue_task(p, this_rq->batch); + } else + enqueue_task(p, this_rq->active); + wake_up_cpu(this_rq, this_cpu, p); } @@ -1593,13 +1632,36 @@ void scheduler_tick(int user_ticks, int cpustat->iowait += sys_ticks; else cpustat->idle += sys_ticks; + + /* + * Check to see if batch tasks are waiting to run and + * reschedule the idle task if no normal tasks are + * on this runqueue. + */ + if (rq->nr_batch && rq->nr_batch == rq->nr_running) { + resched_task(p); + goto out; + } rebalance_tick(rq, cpu, 1); return; } - if (TASK_NICE(p) > 0) - cpustat->nice += user_ticks; - else - cpustat->user += user_ticks; + + if (unlikely(batch_task(p))){ + /* + * SCHED_BATCH tasks are counted as idle time. + */ + cpustat->idle += user_ticks; + if (rq->nr_running > rq->nr_batch){ + resched_task(p); + goto out; + } + } else { + if (TASK_NICE(p) > 0) + cpustat->nice += user_ticks; + else + cpustat->user += user_ticks; + } + cpustat->system += sys_ticks; /* Task might have expired already, but not scheduled off yet */ @@ -1772,16 +1834,40 @@ pick_idle: /* * Switch the active and expired arrays. */ - rq->active = rq->expired; - rq->expired = array; + if (likely(rq->expired->nr_active)){ + rq->active = rq->expired; + rq->expired = array; + rq->expired_timestamp = 0; + } else { + /* + * Switch to the batch array if there are no + * normal tasks left waiting. + */ + rq->active = rq->batch; + rq->batch = array; + } + array = rq->active; - rq->expired_timestamp = 0; } new_array: idx = sched_find_first_bit(array->bitmap); - queue = array->queue + idx; - next = list_entry(queue->next, task_t, run_list); + queue = array->queue + idx; + next = list_entry(queue->next, task_t, run_list); + + /* + * If a batch process is on the active array and normal processes + * are still running move the batch process to the batch + * array. + */ + if (unlikely(batch_task(next))){ + if (rq->nr_running > rq->nr_batch){ + dequeue_task(next, array); + enqueue_task(next, rq->batch); + goto pick_next_task; + } + } + if ((next != prev) && (rq_nr_cpus(rq) > 1)) { struct list_head *tmp = queue->next; @@ -1793,6 +1879,10 @@ new_array: } idx = find_next_bit(array->bitmap, MAX_PRIO, ++idx); if (idx == MAX_PRIO) { + /* + * Batch tasks are not considered for siblings + * on busy shared runqueues. + */ if (retry || !rq->expired->nr_active) { goto pick_idle; } @@ -2134,10 +2224,11 @@ void set_user_nice(task_t *p, long nice) enqueue_task(p, array); /* * If the task increased its priority or is running and - * lowered its priority, then reschedule its CPU: + * lowered its priority or is batch, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(p))) - resched_task(cpu_curr_ptr(task_cpu(p))); + if (delta < 0 || ((delta > 0 || batch_task(p)) && + task_running(p))) + resched_task(cpu_curr_ptr(task_cpu(p))); } out_unlock: task_rq_unlock(rq, &flags); @@ -2275,8 +2366,8 @@ static int setscheduler(pid_t pid, int p else { retval = -EINVAL; if (policy != SCHED_FIFO && policy != SCHED_RR && - policy != SCHED_NORMAL) - goto out_unlock; + policy != SCHED_NORMAL && policy !=SCHED_BATCH) + goto out_unlock; } /* @@ -2286,8 +2377,9 @@ static int setscheduler(pid_t pid, int p retval = -EINVAL; if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1) goto out_unlock; - if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0)) - goto out_unlock; + if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) != + (lp.sched_priority == 0)) + goto out_unlock; retval = -EPERM; if ((policy == SCHED_FIFO || policy == SCHED_RR) && @@ -2308,7 +2400,7 @@ static int setscheduler(pid_t pid, int p p->policy = policy; p->rt_priority = lp.sched_priority; oldprio = p->prio; - if (policy != SCHED_NORMAL) + if (policy != SCHED_NORMAL && policy != SCHED_BATCH) p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority; else p->prio = p->static_prio; @@ -2320,10 +2412,11 @@ static int setscheduler(pid_t pid, int p * this runqueue and our priority is higher than the current's */ if (cpu_curr_ptr(task_cpu(p)) == p) { - if (p->prio > oldprio) + if (p->prio > oldprio || batch_task(p)) + resched_task(cpu_curr_ptr(task_cpu(p))); + } else if (!batch_task(p) && + p->prio < cpu_curr_ptr(task_cpu(p))->prio) resched_task(cpu_curr_ptr(task_cpu(p))); - } else if (p->prio < cpu_curr_ptr(task_cpu(p))->prio) - resched_task(cpu_curr_ptr(task_cpu(p))); } out_unlock: @@ -2528,7 +2621,10 @@ asmlinkage long sys_sched_yield(void) */ if (likely(!rt_task(current))) { dequeue_task(current, array); - enqueue_task(current, rq->expired); + if (unlikely(batch_task(current))) + enqueue_task(current, rq->batch); + else + enqueue_task(current, rq->expired); } else { list_del(¤t->run_list); list_add_tail(¤t->run_list, array->queue + current->prio); @@ -2613,6 +2709,7 @@ asmlinkage long sys_sched_get_priority_m ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; break; } @@ -2636,6 +2733,7 @@ asmlinkage long sys_sched_get_priority_m ret = 1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; } return ret; @@ -3080,6 +3178,7 @@ void __init sched_init(void) rq = cpu_rq(i); rq->active = rq->arrays; rq->expired = rq->arrays + 1; + rq->batch = rq->arrays + 2; spin_lock_init(&rq->lock); INIT_LIST_HEAD(migration_queue(i)); rq->nr_cpus = 1; @@ -3087,7 +3186,7 @@ void __init sched_init(void) atomic_set(&rq->nr_iowait, 0); nr_running_init(rq); - for (j = 0; j < 2; j++) { + for (j = 0; j < 3; j++) { array = rq->arrays + j; for (k = 0; k < MAX_PRIO; k++) { INIT_LIST_HEAD(array->queue + k); --- linux-2.6.0-test11-ht1/include/linux/sched.h 2003-12-05 13:27:40.000000000 +1100 +++ linux-2.6.0-test11-b1/include/linux/sched.h 2003-12-05 21:10:11.000000000 +1100 @@ -126,6 +126,7 @@ extern unsigned long nr_iowait(void); #define SCHED_NORMAL 0 #define SCHED_FIFO 1 #define SCHED_RR 2 +#define SCHED_BATCH 3 struct sched_param { int sched_priority; @@ -303,6 +304,7 @@ struct signal_struct { #define MAX_PRIO (MAX_RT_PRIO + 40) #define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#define batch_task(p) ((p)->policy == SCHED_BATCH) /* * Some day this will be a full-fledged user tracking system..