 include/linux/init_task.h |    4 +--
 include/linux/sched.h     |    6 +++--
 kernel/sched.c            |   55 +++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 53 insertions(+), 12 deletions(-)

Index: linux-2.6.15-rc5-ck1/include/linux/init_task.h
===================================================================
--- linux-2.6.15-rc5-ck1.orig/include/linux/init_task.h
+++ linux-2.6.15-rc5-ck1/include/linux/init_task.h
@@ -83,8 +83,8 @@ extern struct group_info init_groups;
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= 0,						\
 	.lock_depth	= -1,						\
-	.prio		= MAX_PRIO-20,					\
-	.static_prio	= MAX_PRIO-20,					\
+	.prio		= MAX_PRIO-21,					\
+	.static_prio	= MAX_PRIO-21,					\
 	.policy		= SCHED_NORMAL,					\
 	.cpus_allowed	= CPU_MASK_ALL,					\
 	.mm		= NULL,						\
Index: linux-2.6.15-rc5-ck1/include/linux/sched.h
===================================================================
--- linux-2.6.15-rc5-ck1.orig/include/linux/sched.h
+++ linux-2.6.15-rc5-ck1/include/linux/sched.h
@@ -158,9 +158,10 @@ extern unsigned long nr_iowait(void);
 #define SCHED_NORMAL		0
 #define SCHED_FIFO		1
 #define SCHED_RR		2
+#define SCHED_BATCH		3
 
 #define SCHED_MIN		0
-#define SCHED_MAX		2
+#define SCHED_MAX		3
 
 #define SCHED_RANGE(policy)	((policy) >= SCHED_MIN && \
 					(policy) <= SCHED_MAX)
@@ -496,9 +497,10 @@ struct signal_struct {
 #define MAX_USER_RT_PRIO	100
 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
 
-#define MAX_PRIO		(MAX_RT_PRIO + 40)
+#define MAX_PRIO		(MAX_RT_PRIO + 41)
 
 #define rt_task(p)		(unlikely((p)->prio < MAX_RT_PRIO))
+#define batch_task(p)		((p)->policy == SCHED_BATCH)
 
 /*
  * Some day this will be a full-fledged user tracking system..
Index: linux-2.6.15-rc5-ck1/kernel/sched.c
===================================================================
--- linux-2.6.15-rc5-ck1.orig/kernel/sched.c
+++ linux-2.6.15-rc5-ck1/kernel/sched.c
@@ -709,6 +709,20 @@ static int effective_prio(task_t *p)
 
 	if (rt_task(p))
 		return p->prio;
+	if (batch_task(p)) {
+		if (unlikely(p->flags & (PF_NONSLEEP | PF_FREEZE))) {
+			/*
+			 * If batch is waking up from in kernel activity
+			 * or being frozen, reschedule at a normal priority
+			 * to begin with.
+			 */
+			p->flags |= PF_YIELDED;
+			p->time_slice = p->slice % RR_INTERVAL() ? :
+				RR_INTERVAL();
+			return MAX_PRIO - 2;
+		}
+		return MAX_PRIO - 1;
+	}
 
 	full_slice = slice(p);
 	if (full_slice > p->slice)
@@ -721,8 +735,8 @@ static int effective_prio(task_t *p)
 
 	rr = rr_interval(p);
 	prio += used_slice / rr;
-	if (prio >= MAX_PRIO - 1)
-		prio = MAX_PRIO - 1;
+	if (prio >= MAX_PRIO - 2)
+		prio = MAX_PRIO - 2;
 	return prio;
 }
 
@@ -2444,7 +2458,7 @@ void account_user_time(struct task_struc
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
-	if (TASK_NICE(p) > 0)
+	if (TASK_NICE(p) > 0 || batch_task(p))
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
 	else
 		cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -2705,11 +2719,22 @@ static inline int dependent_sleeper(int 
 			if ((jiffies % DEF_TIMESLICE) >
 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
 					ret = 1;
-		} else
+			else if (batch_task(p))
+				ret = 1;
+		} else {
 			if (smt_curr->static_prio < p->static_prio &&
 				!TASK_PREEMPTS_CURR(p, smt_rq) &&
 				smt_slice(smt_curr, sd) > slice(p))
 					ret = 1;
+			else if (batch_task(p) && !batch_task(smt_curr) &&
+				smt_curr->slice * sd->per_cpu_gain >
+				slice(smt_curr))
+			/*
+			 * With batch tasks they run just the last
+			 * per_cpu_gain percent of the smt task's slice.
+			 */
+				ret = 1;
+		}
 
 check_smt_task:
 		if ((!smt_curr->mm && smt_curr != smt_rq->idle) ||
@@ -2729,10 +2754,15 @@ check_smt_task:
 			if ((jiffies % DEF_TIMESLICE) >
 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
 					resched_task(smt_curr);
+			else if (batch_task(smt_curr))
+				resched_task(smt_curr);
 		} else {
 			if (TASK_PREEMPTS_CURR(p, smt_rq) &&
 				smt_slice(p, sd) > slice(smt_curr))
 					resched_task(smt_curr);
+			else if (batch_task(smt_curr) && !batch_task(p) &&
+				p->slice * sd->per_cpu_gain > slice(p))
+					resched_task(smt_curr);
 			else
 				wakeup_busy_runqueue(smt_rq);
 		}
@@ -3385,8 +3415,9 @@ void set_user_nice(task_t *p, long nice)
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
 		 */
-		if (delta < 0 || (delta > 0 && task_running(rq, p)))
-			resched_task(rq->curr);
+		if (delta < 0 || ((delta > 0 || batch_task(p)) &&
+			task_running(rq, p)))
+				resched_task(rq->curr);
 	}
 out_unlock:
 	task_rq_unlock(rq, &flags);
@@ -3564,6 +3595,12 @@ recheck:
 			return -EPERM;
 	}
 
+	if (!(p->mm) && policy == SCHED_BATCH)
+		/*
+		 * Don't allow kernel threads to be SCHED_BATCH.
+		 */
+		return -EINVAL;
+
 	retval = security_task_setscheduler(p, policy, param);
 	if (retval)
 		return retval;
@@ -3858,9 +3895,9 @@ asmlinkage long sys_sched_yield(void)
 	schedstat_inc(rq, yld_cnt);
 	current->slice = slice(current);
 	current->time_slice = rr_interval(current);
-	if (likely(!rt_task(current))) {
+	if (likely(!rt_task(current) && !batch_task(current))) {
 		current->flags |= PF_YIELDED;
-		newprio = MAX_PRIO - 1;
+		newprio = MAX_PRIO - 2;
 	}
 
 	if (newprio != current->prio) {
@@ -4016,6 +4053,7 @@ asmlinkage long sys_sched_get_priority_m
 		ret = MAX_USER_RT_PRIO-1;
 		break;
 	case SCHED_NORMAL:
+	case SCHED_BATCH:
 		ret = 0;
 		break;
 	}
@@ -4039,6 +4077,7 @@ asmlinkage long sys_sched_get_priority_m
 		ret = 1;
 		break;
 	case SCHED_NORMAL:
+	case SCHED_BATCH:
 		ret = 0;
 	}
 	return ret;

