Index: linux-2.6.9-rc2-mm1/kernel/sched.c
===================================================================
--- linux-2.6.9-rc2-mm1.orig/kernel/sched.c	2004-09-19 23:14:54.000000000 +1000
+++ linux-2.6.9-rc2-mm1/kernel/sched.c	2004-09-22 12:29:58.233757719 +1000
@@ -115,6 +115,7 @@ struct runqueue {
 	unsigned long long nr_switches;
 	unsigned long nr_uninterruptible;
 	unsigned long long timestamp_last_tick;
+	unsigned long task_expires;
 	unsigned int cache_ticks, preempted;
 	task_t *curr, *idle;
 	struct mm_struct *prev_mm;
@@ -2048,6 +2049,64 @@ static void slice_expired(task_t *p, run
 	time_slice_expired(p, rq);
 }
 
+static DEFINE_PER_CPU(struct timer_list, time_slice_expiry);
+
+/*
+ * This is called via a timer set by schedule()
+ */
+static void time_slice_expiry_fn(unsigned long cpu)
+{
+	struct timer_list *rt = &__get_cpu_var(time_slice_expiry);
+	runqueue_t *rq;
+	task_t *p = current;
+
+	/* CPU hotplug can drag us off cpu: don't run on wrong CPU */
+	if (cpu_is_offline(cpu)) {
+		del_timer(rt);
+		return;
+	}
+	rq = this_rq();
+	spin_lock(&rq->lock);
+	if (unlikely(rq->task_expires > jiffies || !rq->task_expires))
+		/*
+		 * May have changed before we got the rq lock
+		 */
+		 goto out_unlock;
+	if (!US_TO_JIFFIES(p->slice))
+		slice_expired(p, rq);
+	else
+		time_slice_expired(p, rq);
+out_unlock:
+	spin_unlock(&rq->lock);
+}
+  
+static void set_expiry_timer
+(int cpu, runqueue_t *rq, unsigned long task_expires)
+{
+	struct timer_list *rt = &per_cpu(time_slice_expiry, cpu);
+
+	if (rt->function == NULL) {
+		init_timer(rt);
+		rt->expires = task_expires;
+		rt->data = cpu;
+		rt->function = time_slice_expiry_fn;
+		add_timer_on(rt, cpu);
+		rq->task_expires = task_expires;
+	} else if (task_expires != rq->task_expires)
+		mod_timer(rt, task_expires);
+}
+
+static void del_expiry_timer(int cpu, runqueue_t *rq)
+{
+	struct timer_list *rt = &per_cpu(time_slice_expiry, cpu);
+
+	rq->task_expires = 0;
+	if (rt->function) {
+		del_timer_sync(rt);
+		rt->function = NULL;
+	}
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -2058,8 +2117,6 @@ void scheduler_tick(int user_ticks, int 
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
-	unsigned long long _decrement;
-	long decrement;
 
 	rq->timestamp_last_tick = sched_clock();
 
@@ -2090,36 +2147,13 @@ void scheduler_tick(int user_ticks, int 
 	else
 		cpustat->user += user_ticks;
 	cpustat->system += sys_ticks;
-	/*
-	 * SCHED_FIFO tasks never run out of timeslice.
-	 */
-	if (unlikely(p->policy == SCHED_FIFO))
-		goto out;
 
 	spin_lock(&rq->lock);
 	rq->cache_ticks++;
 
-	decrement = JIFFIES_TO_US(1);
-	_decrement = rq->timestamp_last_tick - p->timestamp;
-	_decrement = NS_TO_US(_decrement);
-	if (_decrement > 0 && _decrement < decrement)
-		decrement = _decrement;
-	if (p->slice > decrement && US_TO_JIFFIES(p->slice - decrement))
-		p->slice -= decrement;
-	else {
-		slice_expired(p, rq);
-		goto out_unlock;
-	}
-	if (p->time_slice > decrement && 
-		US_TO_JIFFIES(p->time_slice - decrement))
-			p->time_slice -= decrement;
-	else {
-		time_slice_expired(p, rq);
-		goto out_unlock;
-	}
 	if (rq->preempted && rq->cache_ticks >= cache_delay)
 		set_tsk_need_resched(p);
-out_unlock:
+
 	spin_unlock(&rq->lock);
 out:
 	rebalance_tick(cpu, rq, NOT_IDLE);
@@ -2291,19 +2325,12 @@ need_resched:
 	schedstat_inc(rq, sched_cnt);
 	now = sched_clock();
 	prev->runtime = NS_TO_US(now - prev->timestamp) ? : 1;
-	if (prev->mm && prev->policy != SCHED_FIFO &&
-		prev->state == TASK_RUNNING &&
-		prev->timestamp > rq->timestamp_last_tick) {
-			/*
-			 * We have not run through a scheduler_tick and are
-			 * still running so charge us with the runtime.
-			 */
-			if (unlikely(US_TO_JIFFIES(prev->slice - 
-				prev->runtime) < 1))
-					slice_expired(prev, rq);
-			else if (unlikely(US_TO_JIFFIES(prev->time_slice -
-				prev->runtime) < 1))
-					time_slice_expired(prev, rq);
+	if (prev != rq->idle && prev->policy != SCHED_FIFO &&
+		system_state == SYSTEM_RUNNING) {
+			if (prev->runtime >= prev->slice)
+				slice_expired(prev, rq);
+			else if (prev->runtime >= prev->time_slice)
+				time_slice_expired(prev, rq);
 			else {
 				 prev->slice -= prev->runtime;
 				 prev->time_slice -= prev->runtime;
@@ -2375,6 +2402,15 @@ switch_tasks:
 	}
 
 	sched_info_switch(prev, next);
+	if (next == rq->idle || next->policy == SCHED_FIFO)
+		del_expiry_timer(cpu, rq);
+	else {
+		unsigned long task_expires = (next->time_slice ? : 1) - 1;
+		task_expires = US_TO_JIFFIES(task_expires) + jiffies + 1;
+		if (task_expires != rq->task_expires)
+			set_expiry_timer(cpu, rq, task_expires);
+	}
+
 	if (likely(prev != next)) {
 		rq->preempted = 0;
 		rq->cache_ticks = 0;

