 include/linux/init_task.h |    4 +-
 include/linux/sched.h     |    9 +++--
 kernel/sched.c            |   70 ++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 72 insertions(+), 11 deletions(-)

Index: linux-2.6.17-rc5-ck1/include/linux/init_task.h
===================================================================
--- linux-2.6.17-rc5-ck1.orig/include/linux/init_task.h	2006-05-25 12:57:51.000000000 +1000
+++ linux-2.6.17-rc5-ck1/include/linux/init_task.h	2006-05-25 12:57:51.000000000 +1000
@@ -85,8 +85,8 @@ extern struct group_info init_groups;
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= 0,						\
 	.lock_depth	= -1,						\
-	.prio		= MAX_PRIO-20,					\
-	.static_prio	= MAX_PRIO-20,					\
+	.prio		= MAX_PRIO-21,					\
+	.static_prio	= MAX_PRIO-21,					\
 	.policy		= SCHED_NORMAL,					\
 	.cpus_allowed	= CPU_MASK_ALL,					\
 	.mm		= NULL,						\
Index: linux-2.6.17-rc5-ck1/include/linux/sched.h
===================================================================
--- linux-2.6.17-rc5-ck1.orig/include/linux/sched.h	2006-05-25 12:57:51.000000000 +1000
+++ linux-2.6.17-rc5-ck1/include/linux/sched.h	2006-05-25 12:57:51.000000000 +1000
@@ -165,9 +165,10 @@ extern unsigned long weighted_cpuload(co
 #define SCHED_RR		2
 #define SCHED_BATCH		3
 #define SCHED_ISO		4
+#define SCHED_IDLEPRIO		5
 
 #define SCHED_MIN		0
-#define SCHED_MAX		4
+#define SCHED_MAX		5
 
 #define SCHED_RANGE(policy)	((policy) >= SCHED_MIN && \
 					(policy) <= SCHED_MAX)
@@ -493,12 +494,14 @@ struct signal_struct {
 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
 #define ISO_PRIO		(MAX_RT_PRIO - 1)
 
-#define MAX_PRIO		(MAX_RT_PRIO + 40)
-#define MIN_USER_PRIO		(MAX_PRIO - 1)
+#define MAX_PRIO		(MAX_RT_PRIO + 41)
+#define MIN_USER_PRIO		(MAX_PRIO - 2)
+#define IDLEPRIO_PRIO		(MAX_PRIO - 1)
 
 #define rt_task(p)		(unlikely(SCHED_RT((p)->policy)))
 #define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
 #define iso_task(p)		(unlikely((p)->policy == SCHED_ISO))
+#define idleprio_task(p)	(unlikely((p)->policy == SCHED_IDLEPRIO))
 
 /*
  * Some day this will be a full-fledged user tracking system..
Index: linux-2.6.17-rc5-ck1/kernel/sched.c
===================================================================
--- linux-2.6.17-rc5-ck1.orig/kernel/sched.c	2006-05-25 12:57:51.000000000 +1000
+++ linux-2.6.17-rc5-ck1/kernel/sched.c	2006-05-25 13:02:13.000000000 +1000
@@ -627,6 +627,12 @@ static void set_load_weight(task_t *p)
 		else
 #endif
 			p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority);
+	} else if (idleprio_task(p)) {
+		/*
+		 * We want idleprio_tasks to have a presence on weighting but
+		 * as small as possible
+		 */
+		p->load_weight = 1;
 	} else
 		p->load_weight = TASK_LOAD_WEIGHT(p);
 }
@@ -734,13 +740,24 @@ static inline void slice_overrun(struct 
 	} while (unlikely(p->totalrun > ns_slice));
 }
 
+static inline int idleprio_suitable(const struct task_struct *p)
+{
+	return (!p->mutexes_held &&
+		!(p->flags & (PF_FREEZE | PF_NONSLEEP)));
+}
+
+static inline int idleprio(const struct task_struct *p)
+{
+	return (p->prio == IDLEPRIO_PRIO);
+}
+
 /*
  * effective_prio - dynamic priority dependent on bonus.
  * The priority normally decreases by one each RR_INTERVAL.
  * As the bonus increases the initial priority starts at a higher "stair" or
  * priority for longer.
  */
-static int effective_prio(const task_t *p)
+static int effective_prio(task_t *p)
 {
 	int prio;
 	unsigned int full_slice, used_slice = 0;
@@ -760,6 +777,18 @@ static int effective_prio(const task_t *
 			return ISO_PRIO;
 	}
 
+	if (idleprio_task(p)) {
+		if (unlikely(!idleprio_suitable(p))) {
+			/*
+			 * If idleprio tasks are holding a semaphore, mutex,
+			 * or being frozen, schedule at a normal priority.
+			 */
+			p->time_slice = p->slice % RR_INTERVAL ? : RR_INTERVAL;
+			return MIN_USER_PRIO;
+		}
+		return IDLEPRIO_PRIO;
+	}
+
 	full_slice = slice(p);
 	if (full_slice > p->slice)
 		used_slice = full_slice - p->slice;
@@ -2580,7 +2609,7 @@ void account_user_time(struct task_struc
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
-	if (TASK_NICE(p) > 0)
+	if (TASK_NICE(p) > 0 || idleprio_task(p))
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
 	else
 		cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -2718,6 +2747,9 @@ void scheduler_tick(void)
 		goto out_unlock;
 	}
 
+	if (idleprio_task(p) && !idleprio(p) && idleprio_suitable(p))
+		set_tsk_need_resched(p);
+
 	debit = ns_diff(rq->timestamp_last_tick, p->timestamp);
 	p->ns_debit += debit;
 	if (p->ns_debit < NSJIFFY)
@@ -2875,11 +2907,24 @@ static int dependent_sleeper(int this_cp
 			if ((jiffies % DEF_TIMESLICE) >
 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
 					ret = 1;
-		} else
+			else if (idleprio(p))
+				ret = 1;
+		} else {
 			if (smt_curr->static_prio < p->static_prio &&
 				!TASK_PREEMPTS_CURR(p, smt_rq) &&
 				smt_slice(smt_curr, sd) > slice(p))
 					ret = 1;
+			else if (idleprio(p) && !idleprio_task(smt_curr) &&
+				smt_curr->slice * sd->per_cpu_gain >
+				slice(smt_curr)) {
+				/*
+				 * With idleprio tasks they run just the last
+				 * per_cpu_gain percent of the smt task's
+				 * slice.
+				 */
+				ret = 1;
+			}
+		}
 
 check_smt_task:
 		if ((!smt_curr->mm && smt_curr != smt_rq->idle) ||
@@ -2899,10 +2944,15 @@ check_smt_task:
 			if ((jiffies % DEF_TIMESLICE) >
 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
 					resched_task(smt_curr);
+			else if (idleprio(smt_curr))
+				resched_task(smt_curr);
 		} else {
 			if (TASK_PREEMPTS_CURR(p, smt_rq) &&
 				smt_slice(p, sd) > slice(smt_curr))
 					resched_task(smt_curr);
+			else if (idleprio(smt_curr) && !idleprio_task(p) &&
+				p->slice * sd->per_cpu_gain > slice(p))
+					resched_task(smt_curr);
 			else
 				wakeup_busy_runqueue(smt_rq);
 		}
@@ -3541,8 +3591,9 @@ void set_user_nice(task_t *p, long nice)
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
 		 */
-		if (delta < 0 || (delta > 0 && task_running(rq, p)))
-			resched_task(rq->curr);
+		if (delta < 0 || ((delta > 0 || idleprio_task(p)) &&
+			task_running(rq, p)))
+				resched_task(rq->curr);
 	}
 out_unlock:
 	task_rq_unlock(rq, &flags);
@@ -3735,6 +3786,11 @@ recheck:
 			return -EPERM;
 	}
 
+	if (!(p->mm) && policy == SCHED_IDLEPRIO) {
+		/* Don't allow kernel threads to be SCHED_IDLEPRIO. */
+		return -EINVAL;
+	}
+
 	retval = security_task_setscheduler(p, policy, param);
 	if (retval)
 		return retval;
@@ -4033,7 +4089,7 @@ asmlinkage long sys_sched_yield(void)
 	schedstat_inc(rq, yld_cnt);
 	current->slice = slice(current);
 	current->time_slice = rr_interval(current);
-	if (likely(!rt_task(current)))
+	if (likely(!rt_task(current) && !idleprio(current)))
 		newprio = MIN_USER_PRIO;
 
 	requeue_task(current, rq, newprio);
@@ -4188,6 +4244,7 @@ asmlinkage long sys_sched_get_priority_m
 	case SCHED_NORMAL:
 	case SCHED_BATCH:
 	case SCHED_ISO:
+	case SCHED_IDLEPRIO:
 		ret = 0;
 		break;
 	}
@@ -4213,6 +4270,7 @@ asmlinkage long sys_sched_get_priority_m
 	case SCHED_NORMAL:
 	case SCHED_BATCH:
 	case SCHED_ISO:
+	case SCHED_IDLEPRIO:
 		ret = 0;
 	}
 	return ret;
