---
 include/linux/init_task.h   |    4 
 include/linux/kernel_stat.h |    3 
 include/linux/list.h        |   42 ----
 include/linux/sched.h       |   11 -
 kernel/sched.c              |  381 +++++++++++++++-----------------------------
 kernel/timer.c              |    5 
 6 files changed, 148 insertions(+), 298 deletions(-)

Index: linux-2.6.21-rc4-rsdl/include/linux/kernel_stat.h
===================================================================
--- linux-2.6.21-rc4-rsdl.orig/include/linux/kernel_stat.h	2006-09-21 19:54:58.000000000 +1000
+++ linux-2.6.21-rc4-rsdl/include/linux/kernel_stat.h	2007-03-26 10:14:31.000000000 +1000
@@ -16,11 +16,14 @@
 
 struct cpu_usage_stat {
 	cputime64_t user;
+	cputime64_t user_ns;
 	cputime64_t nice;
+	cputime64_t nice_ns;
 	cputime64_t system;
 	cputime64_t softirq;
 	cputime64_t irq;
 	cputime64_t idle;
+	cputime64_t idle_ns;
 	cputime64_t iowait;
 	cputime64_t steal;
 };
Index: linux-2.6.21-rc4-rsdl/include/linux/sched.h
===================================================================
--- linux-2.6.21-rc4-rsdl.orig/include/linux/sched.h	2007-03-26 10:14:05.000000000 +1000
+++ linux-2.6.21-rc4-rsdl/include/linux/sched.h	2007-03-26 10:15:10.000000000 +1000
@@ -825,18 +825,15 @@ struct task_struct {
 
 	unsigned long policy;
 	cpumask_t cpus_allowed;
-	unsigned int time_slice;
+	int time_slice;
 	/*
 	 * How much this task is entitled to run at the current priority
 	 * before being requeued at a lower priority.
 	 */
+	int quota;
+	/* How much this task receives at each priority level */
 	unsigned int first_time_slice;
 	/* Is this the very first time_slice this task has ever run. */
-	unsigned int quota;
-	/*
-	 * How much this task contributes to the current priority queue
-	 * length
-	 */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
@@ -891,7 +888,7 @@ struct task_struct {
 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
 
 	unsigned long rt_priority;
-	cputime_t utime, stime;
+	cputime_t utime, utime_ns, stime;
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time;
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
Index: linux-2.6.21-rc4-rsdl/kernel/sched.c
===================================================================
--- linux-2.6.21-rc4-rsdl.orig/kernel/sched.c	2007-03-26 10:14:06.000000000 +1000
+++ linux-2.6.21-rc4-rsdl/kernel/sched.c	2007-03-26 10:15:31.000000000 +1000
@@ -16,8 +16,8 @@
  *		by Davide Libenzi, preemptible kernel bits by Robert Love.
  *  2003-09-03	Interactivity tuning by Con Kolivas.
  *  2004-04-02	Scheduler domains code by Nick Piggin
- *  2007-03-02	Rotating Staircase deadline scheduling policy by Con Kolivas
- *		RSDL v0.33
+ *  2007-03-02	Staircase deadline scheduling policy by Con Kolivas
+ *		SD v0.34
  */
 
 #include <linux/mm.h>
@@ -87,11 +87,20 @@ unsigned long long __attribute__((weak))
 #define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
 #define SCHED_PRIO(p)		((p)+MAX_RT_PRIO)
 
+/*
+ * Some helpers for converting nanosecond timing to jiffy resolution
+ */
+#define NS_TO_JIFFIES(TIME)	((TIME) / (1000000000 / HZ))
+#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
+#define JIFFY_NS		JIFFIES_TO_NS(1)
+#define NS_TO_MS(TIME)		((TIME) / 1000000)
+#define MS_TO_NS(TIME)		((TIME) * 1000000)
+
 #define TASK_PREEMPTS_CURR(p, curr)	((p)->prio < (curr)->prio)
 
 /*
  * This is the time all tasks within the same priority round robin.
- * Set to a minimum of 8ms. Scales with number of cpus and rounds with HZ.
+ * Value is in ms and set to a minimum of 8ms. Scales with number of cpus.
  * Tunable via /proc interface.
  */
 int rr_interval __read_mostly;
@@ -126,8 +135,10 @@ struct prio_array {
 
 	DECLARE_BITMAP(prio_bitmap, MAX_PRIO + 1);
 	/*
-	 * The bitmap of priorities queued; The dynamic bits can have
-	 * false positives. Include 1 bit for delimiter.
+	 * The bitmap of priorities queued for this array. While the expired
+	 * array will never have realtime tasks on it, it is simpler to have
+	 * equal sized bitmaps for a cheap array swap. Include 1 bit for
+	 * delimiter.
 	 */
 };
 
@@ -166,12 +177,6 @@ struct rq {
 	unsigned long next_balance;
 	struct mm_struct *prev_mm;
 
-	long prio_quota[PRIO_RANGE];
-	/*
-	 * The quota of ticks the runqueue runs at each dynamic priority
-	 * before cycling to the next priority.
-	 */
-
 	struct prio_array *active, *expired, arrays[2];
 	unsigned long *dyn_bitmap, *exp_bitmap;
 
@@ -626,26 +631,13 @@ static inline int task_queued(struct tas
 	return !list_empty(&task->run_list);
 }
 
-static inline void set_task_entitlement(struct task_struct *p)
-{
-	__set_bit(USER_PRIO(p->prio), p->bitmap);
-	p->time_slice = p->quota;
-}
-
-/*
- * There is no specific hard accounting. The dynamic bits can have
- * false positives. rt_tasks can only be on the active queue.
- */
 static inline void set_dynamic_bit(struct task_struct *p, struct rq *rq)
 {
 	__set_bit(p->prio, p->array->prio_bitmap);
 }
 
 /*
- * Removing from a runqueue. While we don't know with absolute certainty
- * where this task really is, the p->array and p->prio are very likely
- * so we check that queue to see if we can clear that bit to take some
- * load off finding false positives in next_dynamic_task().
+ * Removing from a runqueue.
  */
 static void dequeue_task(struct task_struct *p, struct rq *rq)
 {
@@ -662,6 +654,7 @@ static inline void task_new_array(struct
 {
 	bitmap_zero(p->bitmap, PRIO_RANGE);
 	p->rotation = rq->prio_rotation;
+	p->time_slice = p->quota;
 }
 
 /* Find the first slot from the relevant prio_matrix entry */
@@ -671,13 +664,6 @@ static inline int first_prio_slot(struct
 		prio_matrix[USER_PRIO(p->static_prio)], PRIO_RANGE));
 }
 
-/* Is a dynamic_prio part of the allocated slots for this static_prio */
-static inline int entitled_slot(int static_prio, int dynamic_prio)
-{
-	return !test_bit(USER_PRIO(dynamic_prio),
-		prio_matrix[USER_PRIO(static_prio)]);
-}
-
 /*
  * Find the first unused slot by this task that is also in its prio_matrix
  * level. Ensure that the prio_level is not unnecessarily low by checking
@@ -706,20 +692,19 @@ static void queue_expired(struct task_st
 	task_new_array(p, rq);
 	p->prio = p->normal_prio = first_prio_slot(p);
 	p->time_slice = p->quota;
+	p->rotation = rq->prio_rotation;
 }
 
-#define rq_quota(rq, prio)	((rq)->prio_quota[USER_PRIO(prio)])
-
 /*
- * recalc_task_prio determines what prio a non rt_task will be
+ * recalc_task_prio determines what priority a non rt_task will be
  * queued at. If the task has already been running during this runqueue's
  * major rotation (rq->prio_rotation) then it continues at the same
  * priority if it has tick entitlement left. If it does not have entitlement
  * left, it finds the next priority slot according to its nice value that it
  * has not extracted quota from. If it has not run during this major
- * rotation, it starts at its static priority and has its bitmap quota
+ * rotation, it starts at the next_entitled_slot and has its bitmap quota
  * cleared. If it does not have any slots left it has all its slots reset and
- * is queued on the expired at its static priority.
+ * is queued on the expired at its first_prio_slot.
  */
 static void recalc_task_prio(struct task_struct *p, struct rq *rq)
 {
@@ -728,8 +713,9 @@ static void recalc_task_prio(struct task
 
 	if (p->rotation == rq->prio_rotation) {
 		if (p->array == array) {
-			if (p->time_slice && rq_quota(rq, p->prio))
+			if (p->time_slice > 0)
 				return;
+			p->time_slice = p->quota;
 		} else if (p->array == rq->expired) {
 			queue_expired(p, rq);
 			return;
@@ -743,17 +729,14 @@ static void recalc_task_prio(struct task
 		queue_expired(p, rq);
 		return;
 	}
-	rq_quota(rq, queue_prio) += p->quota;
 	p->prio = p->normal_prio = queue_prio;
 	p->array = array;
-	set_task_entitlement(p);
+	__set_bit(USER_PRIO(p->prio), p->bitmap);
 }
 
 /*
  * Adding to a runqueue. The dynamic priority queue that it is added to is
- * determined by the priority rotation of the runqueue it is being added to
- * and the quota still available in the task in p->bitmap and p->time_slice
- * (see recalc_task_prio above).
+ * determined by recalc_task_prio() above.
  */
 static inline void __enqueue_task(struct task_struct *p, struct rq *rq)
 {
@@ -808,13 +791,14 @@ static void requeue_task(struct task_str
  * task_timeslice - the total duration a task can run during one major
  * rotation.
  */
-static inline unsigned int task_timeslice(struct task_struct *p)
+static inline int task_timeslice(struct task_struct *p)
 {
-	unsigned int slice, rr;
+	int slice, rr;
 
 	slice = rr = p->quota;
 	if (!rt_task(p))
 		slice += (PRIO_RANGE - 1 - TASK_USER_PRIO(p)) * rr;
+	slice = NS_TO_JIFFIES(slice) ? : 1;
 	return slice;
 }
 
@@ -921,22 +905,24 @@ static int effective_prio(struct task_st
 }
 
 /*
- * All tasks have quotas based on rr_interval. From nice 0 to 19 they are
- * all equal to it and below zero they get exponentially larger making their
- * effective quota significantly larger. rt tasks all get rr_interval.
- * ie nice -6..19 = rr_interval. nice -10 = 2.5 * rr_interval
- * nice -20 = 10 * rr_interval. This makes the ratios between -20 and 0
- * similar to the ratios between 0 and +19.
+ * All tasks have quotas based on rr_interval. RT tasks all get rr_interval.
+ * From nice 1 to 19 they are smaller than it only if they are at least one
+ * tick still. Below nice 0 they get progressively larger.
+ * ie nice -6..0 = rr_interval. nice -10 = 2.5 * rr_interval
+ * nice -20 = 10 * rr_interval. nice 1-19 = rr_interval / 2.
  */
 static unsigned int rr_quota(struct task_struct *p)
 {
 	int nice = TASK_NICE(p), rr = rr_interval;
 
-	if (nice < -6 && !rt_task(p)) {
-		rr *= nice * nice;
-		rr /= 40;
+	if (!rt_task(p)) {
+		if (nice < -6) {
+			rr *= nice * nice;
+			rr /= 40;
+		} else if (nice > 0 && (rr * HZ / 1000 / 2) > 0)
+			rr /= 2;
 	}
-	return rr;
+	return MS_TO_NS(rr);
 }
 
 /*
@@ -1550,7 +1536,7 @@ int fastcall wake_up_state(struct task_s
 	return try_to_wake_up(p, state, 0);
 }
 
-static void task_running_tick(struct rq *rq, struct task_struct *p, int tick);
+static void task_running_tick(struct rq *rq, struct task_struct *p);
 /*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
@@ -1597,7 +1583,9 @@ void fastcall sched_fork(struct task_str
 	 * resulting in more scheduling fairness.
 	 */
 	local_irq_disable();
-	p->time_slice = (current->time_slice + 1) >> 1;
+	if (unlikely(p->time_slice < 2))
+		p->time_slice = 2;
+	p->time_slice = current->time_slice >> 1;
 	/*
 	 * The remainder of the first timeslice might be recovered by
 	 * the parent if the child exits early enough.
@@ -1605,15 +1593,6 @@ void fastcall sched_fork(struct task_str
 	p->first_time_slice = 1;
 	current->time_slice >>= 1;
 	p->timestamp = sched_clock();
-	if (!current->time_slice) {
-		/*
-		 * This case happens when the parent has only a single jiffy
-		 * left from its timeslice. Taking the runqueue lock is not
-		 * a problem.
-		 */
-		current->time_slice = 1;
-		task_running_tick(cpu_rq(cpu), current, 0);
-	}
 	local_irq_enable();
 out:
 	put_cpu();
@@ -2044,6 +2023,8 @@ static inline void enqueue_pulled_task(s
 				queue_expired(p, rq);
 				goto out_queue;
 			}
+			if (p->time_slice < 0)
+				task_new_array(p, rq);
 		} else
 			task_new_array(p, rq);
 	}
@@ -2052,7 +2033,6 @@ static inline void enqueue_pulled_task(s
 		queue_expired(p, rq);
 		goto out_queue;
 	}
-	rq_quota(rq, queue_prio) += p->quota;
 	p->prio = queue_prio;
 out_queue:
 	p->normal_prio = p->prio;
@@ -2998,8 +2978,63 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 static inline void
 update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
 {
-	p->sched_time += now - p->last_ran;
-	p->last_ran = rq->most_recent_timestamp = now;
+	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+	cputime64_t time_diff;
+
+	/* Sanity check. It should never go backwards or ruin accounting */
+	if (unlikely(now < p->last_ran))
+		goto out_set;
+	/* All the userspace visible cpu accounting is done here */
+	time_diff = now - p->last_ran;
+	p->sched_time += time_diff;
+	if (p != rq->idle) {
+		cputime_t utime_diff = time_diff;
+
+		if (TASK_NICE(p) > 0) {
+			cpustat->nice_ns = cputime64_add(cpustat->nice_ns,
+							 time_diff);
+			if (cpustat->nice_ns > JIFFY_NS) {
+				cpustat->nice_ns =
+					cputime64_sub(cpustat->nice_ns,
+					JIFFY_NS);
+				cpustat->nice =
+					cputime64_add(cpustat->nice, 1);
+			}
+		} else {
+			cpustat->user_ns = cputime64_add(cpustat->user_ns,
+							 time_diff);
+			if (cpustat->user_ns > JIFFY_NS) {
+				cpustat->user_ns =
+					cputime64_sub(cpustat->user_ns,
+					JIFFY_NS);
+				cpustat ->user =
+					cputime64_add(cpustat->user, 1);
+			}
+		}
+		p->utime_ns = cputime_add(p->utime_ns, utime_diff);
+		if (p->utime_ns > JIFFY_NS) {
+			p->utime_ns = cputime_sub(p->utime_ns, JIFFY_NS);
+			p->utime = cputime_add(p->utime,
+					       jiffies_to_cputime(1));
+		}
+		/* cpu scheduler quota accounting is performed here */
+		if (p->policy != SCHED_FIFO)
+			p->time_slice -= time_diff;
+
+	} else {
+		cpustat->idle_ns = cputime64_add(cpustat->idle_ns, time_diff);
+		if (cpustat->idle_ns > JIFFY_NS) {
+			cpustat->idle_ns = cputime64_sub(cpustat->idle_ns,
+							 JIFFY_NS);
+			cpustat->idle = cputime64_add(cpustat->idle, 1);
+		}
+	}
+out_set:
+	/*
+	 * We still need to set these values even if the clock appeared to
+	 * go backwards in case _this_ is the correct timestamp.
+	 */
+	rq->most_recent_timestamp = p->last_ran = now;
 }
 
 /*
@@ -3064,8 +3099,6 @@ void account_system_time(struct task_str
 		cpustat->system = cputime64_add(cpustat->system, tmp);
 	else if (atomic_read(&rq->nr_iowait) > 0)
 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
-	else
-		cpustat->idle = cputime64_add(cpustat->idle, tmp);
 	/* Account for system time used */
 	acct_update_integrals(p);
 }
@@ -3100,7 +3133,6 @@ static void task_expired_entitlement(str
 	struct prio_array *old_array;
 	int old_prio;
 
-	set_tsk_need_resched(p);
 	if (unlikely(p->first_time_slice))
 		p->first_time_slice = 0;
 	if (rt_task(p)) {
@@ -3114,122 +3146,23 @@ static void task_expired_entitlement(str
 	requeue_task(p, rq, old_array, old_prio);
 }
 
-/*
- * A major priority rotation occurs when all priority quotas for this array
- * have been exhausted.
- */
-static inline void major_prio_rotation(struct rq *rq)
-{
-	struct prio_array *new_array = rq->expired;
-
-	rq->expired = rq->active;
-	rq->active = new_array;
-	rq->exp_bitmap = rq->expired->prio_bitmap;
-	rq->dyn_bitmap = rq->active->prio_bitmap;
-	rq->best_static_prio = MAX_PRIO - 1;
-	rq->prio_rotation++;
-}
-
-/*
- * This is the heart of the virtual deadline priority management.
- *
- * We have used up the quota allocated to this priority level so we rotate
- * the prio_level of the runqueue to the next lowest priority. We merge any
- * remaining tasks at this level current_queue with the next priority and
- * reset this level's queue. MAX_PRIO - 1 is a special case where we perform
- * a major rotation.
- */
-static inline void rotate_runqueue_priority(struct rq *rq)
+static void task_running_tick(struct rq *rq, struct task_struct *p)
 {
-	int new_prio_level;
-	struct prio_array *array;
-
-	/*
-	 * Make sure we don't have tasks still on the active array that
-	 * haven't run due to not preempting a lower priority task. This can
-	 * happen on list merging or smp balancing.
-	 */
-	if (unlikely(sched_find_first_bit(rq->dyn_bitmap) < rq->prio_level))
-		return;
-
-	array = rq->active;
-	if (rq->prio_level > MAX_PRIO - 2) {
-		/* Major rotation required */
-		struct prio_array *new_queue = rq->expired;
-
-		/*
-		 * On a major rotation we move everything remaining to best
-		 * priority on the new array. The priority matrix bitmap will
-		 * ensure tasks only get the slots each static priority
-		 * deserves.
-		 */
-		new_prio_level = MAX_RT_PRIO;
-		if (!list_empty(array->queue + rq->prio_level)) {
-			list_splice_tail_init(array->queue + rq->prio_level,
-					 new_queue->queue + new_prio_level);
-		}
-		memset(rq->prio_quota, 0, ARRAY_SIZE(rq->prio_quota));
-		major_prio_rotation(rq);
-	} else {
-		/* Minor rotation */
-		new_prio_level = rq->prio_level + 1;
-		__clear_bit(rq->prio_level, rq->dyn_bitmap);
-		if (!list_empty(array->queue + rq->prio_level)) {
-			list_splice_tail_init(array->queue + rq->prio_level,
-					 array->queue + new_prio_level);
-			__set_bit(new_prio_level, rq->dyn_bitmap);
-		}
-		rq_quota(rq, rq->prio_level) = 0;
-	}
-	rq->prio_level = new_prio_level;
-	/*
-	 * As we are merging to a prio_level that may not have anything in
-	 * its quota we add 1 to ensure the tasks get to run in schedule() to
-	 * add their quota to it.
-	 */
-	rq_quota(rq, new_prio_level) += 1;
-}
-
-static void task_running_tick(struct rq *rq, struct task_struct *p, int tick)
-{
-	if (unlikely(!task_queued(p))) {
-		/* Task has expired but was not scheduled yet */
-		set_tsk_need_resched(p);
-		return;
-	}
 	/* SCHED_FIFO tasks never run out of timeslice. */
 	if (unlikely(p->policy == SCHED_FIFO))
 		return;
 
+	if (p->time_slice > 0)
+		return;
 	spin_lock(&rq->lock);
-	/*
-	 * Accounting is performed by both the task and the runqueue. This
-	 * allows frequently sleeping tasks to get their proper quota of
-	 * cpu as the runqueue will have their quota still available at
-	 * the appropriate priority level. It also means frequently waking
-	 * tasks that might miss the scheduler_tick() will get forced down
-	 * priority regardless.
-	 */
-	if (!--p->time_slice)
-		task_expired_entitlement(rq, p);
-	/*
-	 * If we're actually calling this function not in a scheduler_tick
-	 * we are doing so to fix accounting across fork and should not be
-	 * deducting anything from rq_quota.
-	 */
-	if (!tick)
-		goto out_unlock;
-	/*
-	 * We only employ the deadline mechanism if we run over the quota.
-	 * It allows aliasing problems around the scheduler_tick to be
-	 * less harmful.
-	 */
-	if (!rt_task(p) && --rq_quota(rq, rq->prio_level) < 0) {
-		if (unlikely(p->first_time_slice))
-			p->first_time_slice = 0;
-		rotate_runqueue_priority(rq);
+	if (unlikely(!task_queued(p))) {
+		/* Task has expired but was not scheduled off yet */
 		set_tsk_need_resched(p);
+		goto out_unlock;
 	}
+	/* p->time_slice <= 0 */
+	task_expired_entitlement(rq, p);
+	set_tsk_need_resched(p);
 out_unlock:
 	spin_unlock(&rq->lock);
 }
@@ -3248,7 +3181,7 @@ void scheduler_tick(void)
 	update_cpu_clock(p, rq, now);
 
 	if (p != rq->idle)
-		task_running_tick(rq, p, 1);
+		task_running_tick(rq, p);
 #ifdef CONFIG_SMP
 	update_load(rq);
 	if (time_after_eq(jiffies, rq->next_balance))
@@ -3295,79 +3228,41 @@ EXPORT_SYMBOL(sub_preempt_count);
 #endif
 
 /*
- * If a task is queued at a priority that isn't from its bitmap we exchange
- * by setting one of the entitlement bits.
- */
-static inline void exchange_slot(struct task_struct *p, struct rq *rq)
-{
-	int slot = next_entitled_slot(p, rq);
-
-	if (slot < MAX_PRIO)
-		__set_bit(USER_PRIO(slot), p->bitmap);
-}
-
-/*
- * next_dynamic_task finds the next suitable dynamic task. As the dyn_bitmap
- * contains all the active and expired dynamic tasks sequentially we only
- * need to do one bitmap lookup.
+ * next_dynamic_task finds the next suitable dynamic task.
  */
 static inline struct task_struct *next_dynamic_task(struct rq *rq, int idx)
 {
 	struct task_struct *next;
 	struct list_head *queue;
 	struct prio_array *array = rq->active;
-	int expirations = 0;
 
 retry:
 	if (idx >= MAX_PRIO) {
-		BUG_ON(++expirations > 1);
-		/*
-		 * We have selected a bit from the expired range so there are
-		 * no more tasks in the active array.
-		 */
-		major_prio_rotation(rq);
-		array = rq->active;
+		/* There are no more tasks in the active array. Swap arrays */
+		array = rq->expired;
+		rq->expired = rq->active;
+		rq->active = array;
+		rq->exp_bitmap = rq->expired->prio_bitmap;
+		rq->dyn_bitmap = rq->active->prio_bitmap;
+		rq->best_static_prio = MAX_PRIO - 1;
+		rq->prio_rotation++;
 		idx = find_next_bit(rq->dyn_bitmap, MAX_PRIO, MAX_RT_PRIO);
 	}
-	if (unlikely(list_empty(array->queue + idx))) {
+	queue = array->queue + idx;
+	next = list_entry(queue->next, struct task_struct, run_list);
+	if (unlikely(next->time_slice < 0)) {
 		/*
-		 * This can happen because they are not always cleared on
-		 * dequeue_task since they may have been dequeued while
-		 * waiting on a runqueue and a rotation has occurred in the
-		 * interim. A very rare occurrence.
+		 * Unlucky enough that this task ran out of time_slice
+		 * before it hit a scheduler_tick so it should have its
+		 * priority reassessed and choose another task (possibly
+		 * the same one)
 		 */
-		__clear_bit(idx, rq->dyn_bitmap);
-		idx = find_next_bit(rq->dyn_bitmap, MAX_PRIO, idx + 1);
+		task_expired_entitlement(rq, next);
+		idx = find_next_bit(rq->dyn_bitmap, MAX_PRIO, MAX_RT_PRIO);
 		goto retry;
 	}
-	queue = array->queue + idx;
-	next = list_entry(queue->next, struct task_struct, run_list);
 	rq->prio_level = idx;
-	/*
-	 * When the task is chosen it is checked to see if its quota has been
-	 * added to this runqueue level which is only performed once per
-	 * level per major rotation for each running task.
-	 */
-	if (next->rotation != rq->prio_rotation) {
-			/* Task has moved during major rotation */
-			task_new_array(next, rq);
-			if (!entitled_slot(next->static_prio, idx))
-				exchange_slot(next, rq);
-			set_task_entitlement(next);
-			rq_quota(rq, idx) += next->quota;
-	} else if (!test_bit(USER_PRIO(idx), next->bitmap)) {
-			/* Task has moved during minor rotation */
-			if (!entitled_slot(next->static_prio, idx))
-				exchange_slot(next, rq);
-			set_task_entitlement(next);
-			rq_quota(rq, idx) += next->quota;
-	}
-	/*
-	 * next needs to have its prio and array reset here in case the
-	 * values are wrong due to priority rotation.
-	 */
-	next->prio = idx;
-	next->array = array;
+	next->rotation = rq->prio_rotation;
 	if (next->static_prio < rq->best_static_prio &&
 	    next->policy != SCHED_BATCH)
 		rq->best_static_prio = next->static_prio;
@@ -4832,9 +4727,9 @@ void __cpuinit init_idle(struct task_str
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
 
-	bitmap_zero(idle->bitmap, PRIO_RANGE + 1);
-	idle->timestamp = sched_clock();
-	idle->array = NULL;
+	bitmap_zero(idle->bitmap, PRIO_RANGE);
+	idle->timestamp = idle->last_ran = sched_clock();
+	idle->array = rq->active;
 	idle->prio = idle->normal_prio = NICE_TO_PRIO(0);
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
@@ -6844,14 +6739,12 @@ void __init sched_init(void)
 			/* delimiter for bitsearch */
 			__set_bit(MAX_PRIO, array->prio_bitmap);
 		}
-		for (k = 0; k < PRIO_RANGE; k++)
-			rq->prio_quota[k] = 0;
 
 		/* Every added cpu increases the rr_interval */
 		rr_us += rr_inc;
 		rr_inc /= 2;
 	}
-	rr_interval = rr_us / 1000 ? : 1;
+	rr_interval = rr_us / 1000;
 
 	set_load_weight(&init_task);
 
Index: linux-2.6.21-rc4-rsdl/kernel/timer.c
===================================================================
--- linux-2.6.21-rc4-rsdl.orig/kernel/timer.c	2007-03-17 08:34:41.000000000 +1100
+++ linux-2.6.21-rc4-rsdl/kernel/timer.c	2007-03-26 10:14:31.000000000 +1000
@@ -1196,10 +1196,9 @@ void update_process_times(int user_tick)
 	int cpu = smp_processor_id();
 
 	/* Note: this timer irq context must be accounted for as well. */
-	if (user_tick)
-		account_user_time(p, jiffies_to_cputime(1));
-	else
+	if (!user_tick)
 		account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1));
+	/* User time is accounted for in update_cpu_clock in sched.c */
 	run_local_timers();
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_tick);
Index: linux-2.6.21-rc4-rsdl/include/linux/init_task.h
===================================================================
--- linux-2.6.21-rc4-rsdl.orig/include/linux/init_task.h	2007-03-26 10:14:05.000000000 +1000
+++ linux-2.6.21-rc4-rsdl/include/linux/init_task.h	2007-03-26 10:14:31.000000000 +1000
@@ -109,8 +109,8 @@ extern struct group_info init_groups;
 	.active_mm	= &init_mm,					\
 	.run_list	= LIST_HEAD_INIT(tsk.run_list),			\
 	.ioprio		= 0,						\
-	.time_slice	= HZ,						\
-	.quota		= HZ,						\
+	.time_slice	= 1000000000,						\
+	.quota		= 1000000000,						\
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
 	.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children),		\
 	.ptrace_list	= LIST_HEAD_INIT(tsk.ptrace_list),		\
Index: linux-2.6.21-rc4-rsdl/include/linux/list.h
===================================================================
--- linux-2.6.21-rc4-rsdl.orig/include/linux/list.h	2007-03-17 08:35:02.000000000 +1100
+++ linux-2.6.21-rc4-rsdl/include/linux/list.h	2007-03-26 10:14:31.000000000 +1000
@@ -333,20 +333,6 @@ static inline void __list_splice(struct 
 	at->prev = last;
 }
 
-static inline void __list_splice_tail(struct list_head *list,
-				      struct list_head *head)
-{
-	struct list_head *first = list->next;
-	struct list_head *last = list->prev;
-	struct list_head *at = head->prev;
-
-	first->prev = at;
-	at->next = first;
-
-	last->next = head;
-	head->prev = last;
-}
-
 /**
  * list_splice - join two lists
  * @list: the new list to add.
@@ -359,18 +345,6 @@ static inline void list_splice(struct li
 }
 
 /**
- * list_splice_tail - join two lists at one's tail
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice_tail(struct list_head *list,
-				    struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice_tail(list, head);
-}
-
-/**
  * list_splice_init - join two lists and reinitialise the emptied list.
  * @list: the new list to add.
  * @head: the place to add it in the first list.
@@ -443,22 +417,6 @@ static inline void list_splice_init_rcu(
 }
 
 /**
- * list_splice_tail_init - join 2 lists at one's tail & reinitialise emptied
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_tail_init(struct list_head *list,
-					 struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice_tail(list, head);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
  * list_entry - get the struct for this entry
  * @ptr:	the &struct list_head pointer.
  * @type:	the type of the struct this is embedded in.
