include/linux/init_task.h | 4 +-- include/linux/sched.h | 9 +++++--- kernel/sched.c | 50 ++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 52 insertions(+), 11 deletions(-) Index: linux-2.6.16-staircase/include/linux/init_task.h =================================================================== --- linux-2.6.16-staircase.orig/include/linux/init_task.h 2006-04-04 15:58:09.000000000 +1000 +++ linux-2.6.16-staircase/include/linux/init_task.h 2006-04-04 16:07:35.000000000 +1000 @@ -83,8 +83,8 @@ extern struct group_info init_groups; .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ + .prio = MAX_PRIO-21, \ + .static_prio = MAX_PRIO-21, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ Index: linux-2.6.16-staircase/include/linux/sched.h =================================================================== --- linux-2.6.16-staircase.orig/include/linux/sched.h 2006-04-04 16:07:35.000000000 +1000 +++ linux-2.6.16-staircase/include/linux/sched.h 2006-04-04 16:07:35.000000000 +1000 @@ -162,9 +162,10 @@ extern unsigned long nr_iowait(void); #define SCHED_RR 2 #define SCHED_BATCH 3 #define SCHED_ISO 4 +#define SCHED_IDLEPRIO 5 #define SCHED_MIN 0 -#define SCHED_MAX 4 +#define SCHED_MAX 5 #define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ (policy) <= SCHED_MAX) @@ -497,12 +498,14 @@ struct signal_struct { #define MAX_RT_PRIO MAX_USER_RT_PRIO #define ISO_PRIO (MAX_RT_PRIO - 1) -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define MIN_USER_PRIO (MAX_PRIO - 1) +#define MAX_PRIO (MAX_RT_PRIO + 41) +#define MIN_USER_PRIO (MAX_PRIO - 2) +#define IDLEPRIO_PRIO (MAX_PRIO - 1) #define rt_task(p) (unlikely(SCHED_RT((p)->policy))) #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) #define iso_task(p) (unlikely((p)->policy == SCHED_ISO)) +#define idleprio_task(p) (unlikely((p)->policy == SCHED_IDLEPRIO)) /* * Some day this will be a full-fledged user tracking system.. Index: linux-2.6.16-staircase/kernel/sched.c =================================================================== --- linux-2.6.16-staircase.orig/kernel/sched.c 2006-04-04 16:07:35.000000000 +1000 +++ linux-2.6.16-staircase/kernel/sched.c 2006-04-04 16:07:35.000000000 +1000 @@ -661,7 +661,7 @@ static inline void dec_bonus(task_t *p) * As the bonus increases the initial priority starts at a higher "stair" or * priority for longer. */ -static int effective_prio(const task_t *p) +static int effective_prio(task_t *p) { int prio; unsigned int full_slice, used_slice = 0; @@ -681,6 +681,19 @@ static int effective_prio(const task_t * return ISO_PRIO; } + if (idleprio_task(p)) { + if (unlikely(p->flags & (PF_NONSLEEP | PF_FREEZE))) { + /* + * If idleprio is waking up from in kernel activity + * or being frozen, reschedule at a normal priority + * to begin with. + */ + p->time_slice = p->slice % RR_INTERVAL ? : RR_INTERVAL; + return MIN_USER_PRIO; + } + return IDLEPRIO_PRIO; + } + full_slice = slice(p); if (full_slice > p->slice) used_slice = full_slice - p->slice; @@ -2364,7 +2377,7 @@ void account_user_time(struct task_struc /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (TASK_NICE(p) > 0 || idleprio_task(p)) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -2658,11 +2671,22 @@ static int dependent_sleeper(int this_cp if ((jiffies % DEF_TIMESLICE) > (sd->per_cpu_gain * DEF_TIMESLICE / 100)) ret = 1; - } else + else if (idleprio_task(p)) + ret = 1; + } else { if (smt_curr->static_prio < p->static_prio && !TASK_PREEMPTS_CURR(p, smt_rq) && smt_slice(smt_curr, sd) > slice(p)) ret = 1; + else if (idleprio_task(p) && !idleprio_task(smt_curr) && + smt_curr->slice * sd->per_cpu_gain > + slice(smt_curr)) + /* + * With batch tasks they run just the last + * per_cpu_gain percent of the smt task's slice. + */ + ret = 1; + } check_smt_task: if ((!smt_curr->mm && smt_curr != smt_rq->idle) || @@ -2682,10 +2706,15 @@ check_smt_task: if ((jiffies % DEF_TIMESLICE) > (sd->per_cpu_gain * DEF_TIMESLICE / 100)) resched_task(smt_curr); + else if (idleprio_task(smt_curr)) + resched_task(smt_curr); } else { if (TASK_PREEMPTS_CURR(p, smt_rq) && smt_slice(p, sd) > slice(smt_curr)) resched_task(smt_curr); + else if (idleprio_task(smt_curr) && !idleprio_task(p) && + p->slice * sd->per_cpu_gain > slice(p)) + resched_task(smt_curr); else wakeup_busy_runqueue(smt_rq); } @@ -3322,8 +3351,9 @@ void set_user_nice(task_t *p, long nice) * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || idleprio_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -3515,6 +3545,12 @@ recheck: return -EPERM; } + if (!(p->mm) && policy == SCHED_IDLEPRIO) + /* + * Don't allow kernel threads to be SCHED_IDLEPRIO. + */ + return -EINVAL; + retval = security_task_setscheduler(p, policy, param); if (retval) return retval; @@ -3813,7 +3849,7 @@ asmlinkage long sys_sched_yield(void) schedstat_inc(rq, yld_cnt); current->slice = slice(current); current->time_slice = rr_interval(current); - if (likely(!rt_task(current))) + if (likely(!rt_task(current) && !idleprio_task(current))) newprio = MIN_USER_PRIO; requeue_task(current, rq, newprio); @@ -3968,6 +4004,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; break; } @@ -3993,6 +4030,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; } return ret;