include/linux/init_task.h | 4 +- include/linux/sched.h | 9 ++++-- kernel/sched.c | 65 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 67 insertions(+), 11 deletions(-) Index: linux-2.6.17-rc5-ck2/include/linux/init_task.h =================================================================== --- linux-2.6.17-rc5-ck2.orig/include/linux/init_task.h 2006-06-04 13:14:33.000000000 +1000 +++ linux-2.6.17-rc5-ck2/include/linux/init_task.h 2006-06-04 13:14:34.000000000 +1000 @@ -85,8 +85,8 @@ extern struct group_info init_groups; .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ + .prio = MAX_PRIO-21, \ + .static_prio = MAX_PRIO-21, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ Index: linux-2.6.17-rc5-ck2/include/linux/sched.h =================================================================== --- linux-2.6.17-rc5-ck2.orig/include/linux/sched.h 2006-06-04 13:14:33.000000000 +1000 +++ linux-2.6.17-rc5-ck2/include/linux/sched.h 2006-06-04 13:14:34.000000000 +1000 @@ -165,9 +165,10 @@ extern unsigned long weighted_cpuload(co #define SCHED_RR 2 #define SCHED_BATCH 3 #define SCHED_ISO 4 +#define SCHED_IDLEPRIO 5 #define SCHED_MIN 0 -#define SCHED_MAX 4 +#define SCHED_MAX 5 #define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ (policy) <= SCHED_MAX) @@ -493,12 +494,14 @@ struct signal_struct { #define MAX_RT_PRIO MAX_USER_RT_PRIO #define ISO_PRIO (MAX_RT_PRIO - 1) -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define MIN_USER_PRIO (MAX_PRIO - 1) +#define MAX_PRIO (MAX_RT_PRIO + 41) +#define MIN_USER_PRIO (MAX_PRIO - 2) +#define IDLEPRIO_PRIO (MAX_PRIO - 1) #define rt_task(p) (unlikely(SCHED_RT((p)->policy))) #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) #define iso_task(p) (unlikely((p)->policy == SCHED_ISO)) +#define idleprio_task(p) (unlikely((p)->policy == SCHED_IDLEPRIO)) /* * Some day this will be a full-fledged user tracking system.. Index: linux-2.6.17-rc5-ck2/kernel/sched.c =================================================================== --- linux-2.6.17-rc5-ck2.orig/kernel/sched.c 2006-06-04 13:14:32.000000000 +1000 +++ linux-2.6.17-rc5-ck2/kernel/sched.c 2006-06-04 13:14:34.000000000 +1000 @@ -626,6 +626,12 @@ static void set_load_weight(task_t *p) else #endif p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority); + } else if (idleprio_task(p)) { + /* + * We want idleprio_tasks to have a presence on weighting but + * as small as possible + */ + p->load_weight = 1; } else p->load_weight = TASK_LOAD_WEIGHT(p); } @@ -733,13 +739,24 @@ static inline void slice_overrun(struct } while (unlikely(p->totalrun > ns_slice)); } +static inline int idleprio_suitable(const struct task_struct *p) +{ + return (!p->mutexes_held && + !(p->flags & (PF_FREEZE | PF_NONSLEEP))); +} + +static inline int idleprio(const struct task_struct *p) +{ + return (p->prio == IDLEPRIO_PRIO); +} + /* * effective_prio - dynamic priority dependent on bonus. * The priority normally decreases by one each RR_INTERVAL. * As the bonus increases the initial priority starts at a higher "stair" or * priority for longer. */ -static int effective_prio(const task_t *p) +static int effective_prio(task_t *p) { int prio; unsigned int full_slice, used_slice = 0; @@ -759,6 +776,18 @@ static int effective_prio(const task_t * return ISO_PRIO; } + if (idleprio_task(p)) { + if (unlikely(!idleprio_suitable(p))) { + /* + * If idleprio tasks are holding a semaphore, mutex, + * or being frozen, schedule at a normal priority. + */ + p->time_slice = p->slice % RR_INTERVAL ? : RR_INTERVAL; + return MIN_USER_PRIO; + } + return IDLEPRIO_PRIO; + } + full_slice = slice(p); if (full_slice > p->slice) used_slice = full_slice - p->slice; @@ -2578,7 +2607,7 @@ void account_user_time(struct task_struc /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (TASK_NICE(p) > 0 || idleprio_task(p)) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -2716,6 +2745,9 @@ void scheduler_tick(void) goto out_unlock; } + if (idleprio_task(p) && !idleprio(p) && idleprio_suitable(p)) + set_tsk_need_resched(p); + debit = ns_diff(rq->timestamp_last_tick, p->timestamp); p->ns_debit += debit; if (p->ns_debit < NSJIFFY) @@ -2859,11 +2891,24 @@ static int dependent_sleeper(int this_cp if ((jiffies % DEF_TIMESLICE) > (sd->per_cpu_gain * DEF_TIMESLICE / 100)) ret = 1; - } else + else if (idleprio(p)) + ret = 1; + } else { if (smt_curr->static_prio < p->static_prio && !TASK_PREEMPTS_CURR(p, smt_rq) && smt_slice(smt_curr, sd) > slice(p)) ret = 1; + else if (idleprio(p) && !idleprio_task(smt_curr) && + smt_curr->slice * sd->per_cpu_gain > + slice(smt_curr)) { + /* + * With idleprio tasks they run just the last + * per_cpu_gain percent of the smt task's + * slice. + */ + ret = 1; + } + } unlock: spin_unlock(&smt_rq->lock); @@ -3483,8 +3528,9 @@ void set_user_nice(task_t *p, long nice) * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || idleprio_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -3677,6 +3723,11 @@ recheck: return -EPERM; } + if (!(p->mm) && policy == SCHED_IDLEPRIO) { + /* Don't allow kernel threads to be SCHED_IDLEPRIO. */ + return -EINVAL; + } + retval = security_task_setscheduler(p, policy, param); if (retval) return retval; @@ -3975,7 +4026,7 @@ asmlinkage long sys_sched_yield(void) schedstat_inc(rq, yld_cnt); current->slice = slice(current); current->time_slice = rr_interval(current); - if (likely(!rt_task(current))) + if (likely(!rt_task(current) && !idleprio(current))) newprio = MIN_USER_PRIO; requeue_task(current, rq, newprio); @@ -4130,6 +4181,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; break; } @@ -4155,6 +4207,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; } return ret;