Implement no-idle-hz aka dynticks on i386.

Original code by Tony Lindgen <tony@atomide.com> and
Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>.

Rewritten and updated by Con Kolivas <kernel@kolivas.org>

Signed-off-by: Con Kolivas <kernel@kolivas.org>

 arch/i386/Kconfig                   |   21 ++
 arch/i386/defconfig                 |    1 
 arch/i386/kernel/Makefile           |    1 
 arch/i386/kernel/apic.c             |   30 +++
 arch/i386/kernel/dyntick.c          |  286 ++++++++++++++++++++++++++++++++++++
 arch/i386/kernel/irq.c              |    4 
 arch/i386/kernel/process.c          |    4 
 arch/i386/kernel/smp.c              |    8 +
 arch/i386/kernel/time.c             |    6 
 arch/i386/kernel/timers/timer_pit.c |   19 ++
 arch/i386/kernel/timers/timer_pm.c  |    2 
 arch/i386/kernel/timers/timer_tsc.c |   51 ++----
 drivers/acpi/Kconfig                |    2 
 include/asm-i386/apic.h             |    2 
 include/asm-i386/dyntick.h          |   75 +++++++++
 include/asm-i386/timer.h            |   35 ++++
 16 files changed, 512 insertions(+), 35 deletions(-)

Index: linux-2.6.16-rc5-dt/drivers/acpi/Kconfig
===================================================================
--- linux-2.6.16-rc5-dt.orig/drivers/acpi/Kconfig	2006-02-27 16:39:57.000000000 +1100
+++ linux-2.6.16-rc5-dt/drivers/acpi/Kconfig	2006-02-27 20:32:03.000000000 +1100
@@ -297,6 +297,8 @@ config X86_PM_TIMER
 	  voltage scaling, unlike the commonly used Time Stamp Counter
 	  (TSC) timing source.
 
+          This timer is selected by dyntick (NO_IDLE_HZ).
+
 	  You should nearly always say Y here because many modern
 	  systems require this timer. 
 
Index: linux-2.6.16-rc5-dt/arch/i386/defconfig
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/defconfig	2006-02-27 16:39:55.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/defconfig	2006-02-27 20:31:23.000000000 +1100
@@ -91,6 +91,7 @@ CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 # CONFIG_HPET_TIMER is not set
 # CONFIG_HPET_EMULATE_RTC is not set
+# CONFIG_NO_IDLE_HZ is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
 CONFIG_SCHED_SMT=y
Index: linux-2.6.16-rc5-dt/arch/i386/Kconfig
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/Kconfig	2006-02-27 16:39:55.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/Kconfig	2006-02-27 20:31:23.000000000 +1100
@@ -173,6 +173,27 @@ config HPET_EMULATE_RTC
 	depends on HPET_TIMER && RTC=y
 	default y
 
+config NO_IDLE_HZ
+	bool "Dynamic Tick Timer - Skip timer ticks during idle"
+	depends on EXPERIMENTAL && X86_32
+	select X86_PM_TIMER
+	select ACPI
+	help
+	  This option enables support for skipping timer ticks when the
+	  processor is idle. During system load, timer is continuous.
+	  This option saves power, as it allows the system to stay in
+	  idle mode longer. Currently the only supported timer is ACPI PM
+	  timer.
+
+	  Note that you can disable dynamic tick timer either by
+	  passing dyntick=disable command line option, or via sysfs:
+
+	  # echo 0 > /sys/devices/system/timer/timer0/dyntick
+
+	  Most users wishing to lower their power usage while retaining
+	  low latencies will most likely want to say Y here in combination
+	  with a high HZ value (eg 1000).
+
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/apic.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/apic.c	2006-02-27 16:39:55.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/apic.c	2006-02-27 20:31:23.000000000 +1100
@@ -27,6 +27,7 @@
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
+#include <linux/dyntick.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -36,11 +37,13 @@
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/i8253.h>
+#include <asm/dyntick.h>
 
 #include <mach_apic.h>
 #include <mach_ipi.h>
 
 #include "io_ports.h"
+#include "do_timer.h"
 
 /*
  * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
@@ -938,6 +941,8 @@ void (*wait_timer_tick)(void) __devinitd
 
 #define APIC_DIVISOR 16
 
+static u32 apic_timer_val __read_mostly;
+
 static void __setup_APIC_LVTT(unsigned int clocks)
 {
 	unsigned int lvtt_value, tmp_value, ver;
@@ -961,7 +966,9 @@ static void __setup_APIC_LVTT(unsigned i
 				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
 				| APIC_TDR_DIV_16);
 
-	apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+	apic_timer_val = clocks / APIC_DIVISOR;
+
+	apic_write_around(APIC_TMICT, apic_timer_val);
 }
 
 static void __devinit setup_APIC_timer(unsigned int clocks)
@@ -981,6 +988,17 @@ static void __devinit setup_APIC_timer(u
 }
 
 /*
+ * Used by NO_IDLE_HZ to skip ticks on idle CPUs. Called with IRQs already
+ * disabled
+ */
+void reprogram_apic_timer(unsigned long count)
+{
+	count = count * apic_timer_val;
+	apic_read(APIC_TMICT);
+	apic_write_around(APIC_TMICT, count);
+}
+
+/*
  * In this function we calibrate APIC bus clocks to the external
  * timer. Unfortunately we cannot use jiffies and the timer irq
  * to calibrate, since some later bootup code depends on getting
@@ -1075,6 +1093,10 @@ void __init setup_boot_APIC_clock(void)
 	 */
 	setup_APIC_timer(calibration_result);
 
+	setup_dyntick_use_apic();
+	set_dyntick_limits((0xFFFFFFFF / calibration_result) * APIC_DIVISOR,
+		2);
+
 	local_irq_restore(flags);
 }
 
@@ -1144,8 +1166,10 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
  * value into /proc/profile.
  */
 
-inline void smp_local_timer_interrupt(struct pt_regs * regs)
+void smp_local_timer_interrupt(struct pt_regs * regs)
 {
+	__dyntick_interrupt(regs);
+
 	profile_tick(CPU_PROFILING, regs);
 #ifdef CONFIG_SMP
 	update_process_times(user_mode_vm(regs));
@@ -1241,6 +1265,7 @@ fastcall void smp_spurious_interrupt(str
 	unsigned long v;
 
 	irq_enter();
+
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
 	 * if it is a vectored one.  Just in case...
@@ -1265,6 +1290,7 @@ fastcall void smp_error_interrupt(struct
 	unsigned long v, v1;
 
 	irq_enter();
+
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/dyntick.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/dyntick.c	2006-02-27 20:32:29.000000000 +1100
@@ -0,0 +1,286 @@
+/*
+ * linux/arch/i386/kernel/dyntick.c
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <tony@atomide.com> and
+ * Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>
+ * Rewritten by Con Kolivas <kernel@kolivas.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/version.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dyntick.h>
+#include <linux/timer.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <asm/apic.h>
+#include <asm/dyntick.h>
+#include <asm/io.h>
+#include <asm/arch_hooks.h>
+#include "do_timer.h"
+
+/*
+ * These handlers deal with all cpus idle on either UP or SMP.
+ */
+static void reprogram_pit_handler(unsigned int skip)
+{
+	reprogram_pit_timer(skip);
+}
+
+static void smp_idle_handler(unsigned int skip)
+{
+	if (skip > PIT_MAX_SKIP) {
+		/*
+		 * The PIT timer skips significantly less duration than the
+		 * APIC timer, so we limit it to PIT_MAX_SKIP. As this is the
+		 * last cpu to fall idle we also reprogram its APIC timer to
+		 * wake at the same time.
+		 */
+		unsigned long next;
+
+		skip = PIT_MAX_SKIP;
+		next = jiffies + skip;
+		dyntick->tick = next;
+		__get_cpu_var(dyn_cpu).next_tick = next;
+	}
+	reprogram_pit_timer(skip);
+}
+
+/*
+ * These reset functions start timers at maximum frequency when the cpus are
+ * busy again or when dynticks are disabled
+ */
+static inline void reset_pit_timer(void)
+{
+	reprogram_pit_timer(1);
+}
+
+static void reset_apic_timer(void)
+{
+	reprogram_apic_timer(1);
+}
+
+/*
+ * Null handlers are used initially while APIC timers are set up as ticks
+ * start before the APIC timer is enabled and do_timer_interrupt_hook
+ * changes its behaviour after they are started
+ */
+static void null_reprogram(unsigned long __unused)
+{
+}
+
+static void null_idle_handler(unsigned int __unused)
+{
+}
+
+static void null_wake(void)
+{
+}
+
+/*
+ * Labels for the different skip mechanisms used
+ */
+enum skip_handler {
+	SKIP_PIT,
+	SKIP_APIC,
+	SKIP_SMP_APIC,
+};
+
+struct dyn_handler {
+	enum skip_handler skip_handler;
+	void (*cpu_wake)(void);
+} dyn_handler = {
+	.skip_handler = SKIP_PIT,
+	.cpu_wake = &null_wake,
+};
+
+/*
+ * The per cpu APIC timer skip function
+ */
+static void apic_reprogram(unsigned long jif_next)
+{
+	reprogram_apic_timer(jif_next - jiffies);
+}
+
+static int arch_enable(void)
+{
+	switch (dyn_handler.skip_handler) {
+		case SKIP_APIC:
+			stop_local_apic();
+		default:
+			break;
+	}
+	return 0;
+}
+
+static int arch_disable(void)
+{
+	reset_pit_timer();
+	switch (dyn_handler.skip_handler) {
+		case SKIP_PIT:
+			break;
+		case SKIP_APIC:
+			start_local_apic();
+			break;
+		case SKIP_SMP_APIC:
+			reset_apic_timer();
+			break;
+	}
+	return 0;
+}
+
+static struct dyntick_timer arch_dyntick = {
+	.lock			= SPIN_LOCK_UNLOCKED,
+	.arch_reprogram		= &null_reprogram,
+	.arch_all_cpus_idle	= &null_idle_handler,
+	.arch_enable		= &arch_enable,
+	.arch_disable		= &arch_disable,
+};
+
+struct dyntick_timer *dyntick = &arch_dyntick;
+
+/*
+ * Only PIT timer skipping is reliable so this is used on all configurations.
+ * All PIT skipping is done from arch_all_cpus_idle in either UP or SMP.
+ * When local APIC support on UP is enabled, the local APIC timer is disabled
+ * when dynticks is enabled and the PIT timer is used. On SMP each cpu also
+ * skips APIC ticks according to its own next timer interrupt from
+ * arch_reprogram.
+ */
+int __init dyntick_arch_init(void)
+{
+	if (dyn_handler.skip_handler == SKIP_APIC && num_present_cpus()> 1)
+		dyn_handler.skip_handler = SKIP_SMP_APIC;
+
+	switch (dyn_handler.skip_handler) {
+		case SKIP_PIT:
+			printk(KERN_INFO "dyntick: Using PIT "
+				"reprogramming\n");
+			dyntick->arch_all_cpus_idle = &reprogram_pit_handler;
+			set_dyntick_limits(PIT_MAX_SKIP, 1);
+			break;
+		case SKIP_APIC:
+			printk(KERN_INFO "dyntick: Disabling APIC timer, "
+				"using PIT reprogramming\n");
+			dyntick->arch_all_cpus_idle = &reprogram_pit_handler;
+			set_dyntick_limits(PIT_MAX_SKIP, 1);
+			stop_local_apic();
+			break;
+		case SKIP_SMP_APIC:
+			printk(KERN_INFO "dyntick: Using per cpu APIC "
+				"reprogramming, skipping PIT when all cpus "
+				"idle\n");
+			dyntick->arch_reprogram = &apic_reprogram;
+			dyntick->arch_all_cpus_idle = &smp_idle_handler;
+			dyn_handler.cpu_wake = &reset_apic_timer;
+			break;
+	}
+	cpus_clear(nohz_cpu_mask);
+	printk(KERN_INFO "dyntick: Maximum ticks to skip limited to %i\n",
+		dyntick->max_skip);
+
+	return 0;
+}
+
+static int __init dyntick_init(void)
+{
+	dyntick->arch_init = dyntick_arch_init;
+	dyntick_register(&arch_dyntick);
+
+	return 0;
+}
+
+arch_initcall(dyntick_init);
+
+void __init setup_dyntick_use_apic(void)
+{
+	dyn_handler.skip_handler = SKIP_APIC;
+}
+
+/*
+ * When an interrupt occurs on a cpu that is already skipping, that cpu's
+ * timer is restarted at maximum frequency with cpu_wake if needed on SMP.
+ * The nohz_cpu_mask is checked at this point to see if all cpus are idle.
+ * When all cpus are detected as being idle (which is always true on UP when
+ * one is idle), the PIT timer is restarted at maximum frequency, and lost
+ * ticks are accounted for.
+ */
+static void do_dyntick_interrupt(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	dyn_handler.cpu_wake();
+
+	spin_lock(&dyntick->lock);
+	if (clear_nohz_cpu(cpu)) {
+		int lost;
+
+		spin_unlock(&dyntick->lock);
+
+		reset_pit_timer();
+
+		write_seqlock(&xtime_lock);
+		lost = cur_timer->mark_offset();
+		if (lost && in_irq())
+			do_timer(regs);
+		write_sequnlock(&xtime_lock);
+
+		kstat_cpu(0).cpustat.idle += (lost - 1);
+		conditional_run_local_timers();
+	} else
+		spin_unlock(&dyntick->lock);
+}
+
+/*
+ * This is called from all interrupt handlers. It checks per_cpu data first
+ * to see that this cpu is not currently skipping ticks. If it is skipping
+ * ticks it calls do_dyntick_interrupt.
+ */
+inline void __dyntick_interrupt(struct pt_regs *regs)
+{
+	if (test_nohz_cpu())
+		do_dyntick_interrupt(regs);
+}
+
+void dyntick_interrupt(struct pt_regs *regs)
+{
+	preempt_disable();
+	__dyntick_interrupt(regs);
+	preempt_enable_no_resched();
+}
+
+/* Updates the irq idle timestamp when we reprogram it */
+void set_irq_idle_timestamp(const unsigned long next)
+{
+	__get_cpu_var(irq_stat).idle_timestamp = next;
+}
+
+/*
+ * Called from every idle tick.
+ */
+inline void idle_reprogram_timer(void)
+{
+	local_irq_disable();
+	if (!need_resched())
+		timer_dyn_reprogram();
+	local_irq_enable();
+}
+
+void __init dyntick_time_init(struct timer_opts *cur_timer)
+{
+	if (strncmp(cur_timer->name, "pmtmr", 3) == 0) {
+		dyntick->state |= dyntick_SUITABLE;
+		printk(KERN_INFO "dyntick: Found suitable timer: %s\n",
+			cur_timer->name);
+	} else
+		printk(KERN_ERR "dyntick: Cannot use timer %s - pmtmr "
+			"failed: ACPI disabled?\n", cur_timer->name);
+}
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/irq.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/irq.c	2006-02-27 16:39:55.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/irq.c	2006-02-27 20:31:23.000000000 +1100
@@ -18,6 +18,8 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
+#include <linux/dyntick.h>
+#include <asm/dyntick.h>
 
 DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
 EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -76,6 +78,8 @@ fastcall unsigned int do_IRQ(struct pt_r
 	}
 #endif
 
+	__dyntick_interrupt(regs);
+
 #ifdef CONFIG_4KSTACKS
 
 	curctx = (union irq_ctx *) current_thread_info();
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/Makefile	2006-02-27 16:39:55.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/Makefile	2006-02-27 20:31:23.000000000 +1100
@@ -33,6 +33,7 @@ obj-$(CONFIG_MODULES)		+= module.o
 obj-y				+= sysenter.o vsyscall.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat.o
 obj-$(CONFIG_HPET_TIMER) 	+= time_hpet.o
+obj-$(CONFIG_NO_IDLE_HZ) 	+= dyntick.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_stub.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault.o
 obj-$(CONFIG_VM86)		+= vm86.o
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/process.c	2006-02-27 16:39:55.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/process.c	2006-02-27 20:31:23.000000000 +1100
@@ -39,6 +39,7 @@
 #include <linux/ptrace.h>
 #include <linux/random.h>
 #include <linux/kprobes.h>
+#include <linux/dyntick.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -57,6 +58,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
+#include <asm/dyntick.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -195,6 +197,8 @@ void cpu_idle(void)
 				play_dead();
 
 			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+			idle_reprogram_timer();
+
 			idle();
 		}
 		preempt_enable_no_resched();
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/smp.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/smp.c	2005-10-28 20:21:34.000000000 +1000
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/smp.c	2006-02-27 20:31:23.000000000 +1100
@@ -20,9 +20,11 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
+#include <linux/dyntick.h>
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
+#include <asm/dyntick.h>
 #include <mach_apic.h>
 
 /*
@@ -315,6 +317,8 @@ fastcall void smp_invalidate_interrupt(s
 
 	cpu = get_cpu();
 
+	__dyntick_interrupt(regs);
+
 	if (!cpu_isset(cpu, flush_cpumask))
 		goto out;
 		/* 
@@ -600,6 +604,8 @@ void smp_send_stop(void)
 fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
+
+	dyntick_interrupt(regs);
 }
 
 fastcall void smp_call_function_interrupt(struct pt_regs *regs)
@@ -609,6 +615,7 @@ fastcall void smp_call_function_interrup
 	int wait = call_data->wait;
 
 	ack_APIC_irq();
+
 	/*
 	 * Notify initiating CPU that I've grabbed the data and am
 	 * about to execute the function
@@ -619,6 +626,7 @@ fastcall void smp_call_function_interrup
 	 * At this point the info structure may be out of scope unless wait==1
 	 */
 	irq_enter();
+	__dyntick_interrupt(regs);
 	(*func)(info);
 	irq_exit();
 
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/time.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/time.c	2006-02-27 20:31:14.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/time.c	2006-02-27 20:31:23.000000000 +1100
@@ -46,6 +46,7 @@
 #include <linux/bcd.h>
 #include <linux/efi.h>
 #include <linux/mca.h>
+#include <linux/dyntick.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -56,6 +57,7 @@
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/timer.h>
+#include <asm/dyntick.h>
 
 #include "mach_time.h"
 
@@ -434,7 +436,7 @@ static struct sysdev_class timer_sysclas
 
 
 /* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
+struct sys_device device_timer = {
 	.id	= 0,
 	.cls	= &timer_sysclass,
 };
@@ -490,5 +492,7 @@ void __init time_init(void)
 	cur_timer = select_timer();
 	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
 
+	dyntick_time_init(cur_timer);
+
 	time_init_hook();
 }
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pit.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/timers/timer_pit.c	2006-02-27 20:31:14.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pit.c	2006-02-27 20:31:23.000000000 +1100
@@ -149,6 +149,25 @@ static unsigned long get_offset_pit(void
 	return count;
 }
 
+/*
+ * Reprograms the next timer interrupt
+ * PIT timer reprogramming code taken from APM code.
+ * Note that PIT timer is a 16-bit timer.
+ * Called with irqs already disabled.
+ */
+void reprogram_pit_timer(unsigned long jiffies_to_skip)
+{
+	int skip = jiffies_to_skip * LATCH;
+
+	if (skip > 0xffff)
+		skip = 0xffff;
+
+	spin_lock(&i8253_lock);
+	outb_p(0x34, PIT_MODE);		/* binary, mode 2, LSB/MSB, ch 0 */
+	outb_p(skip & 0xff, PIT_CH0);	/* LSB */
+	outb(skip >> 8, PIT_CH0);	/* MSB */
+	spin_unlock(&i8253_lock);
+}
 
 /* tsc timer_opts struct */
 struct timer_opts timer_pit = {
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pm.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/timers/timer_pm.c	2006-02-27 20:31:14.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pm.c	2006-02-27 20:31:23.000000000 +1100
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/init.h>
+#include <linux/dyntick.h>
 #include <asm/types.h>
 #include <asm/timer.h>
 #include <asm/smp.h>
@@ -128,6 +129,7 @@ pm_good:
 		return -ENODEV;
 
 	init_cpu_khz();
+	set_dyntick_limits(((0xFFFFFF / 1000000) * 286 * HZ) >> 10, 0);
 	return 0;
 }
 
Index: linux-2.6.16-rc5-dt/include/asm-i386/apic.h
===================================================================
--- linux-2.6.16-rc5-dt.orig/include/asm-i386/apic.h	2006-02-27 16:40:24.000000000 +1100
+++ linux-2.6.16-rc5-dt/include/asm-i386/apic.h	2006-02-27 20:31:23.000000000 +1100
@@ -121,6 +121,7 @@ extern void nmi_watchdog_tick (struct pt
 extern int APIC_init_uniprocessor (void);
 extern void disable_APIC_timer(void);
 extern void enable_APIC_timer(void);
+extern void reprogram_apic_timer(unsigned long count);
 
 extern void enable_NMI_through_LVT0 (void * dummy);
 
@@ -139,6 +140,7 @@ void switch_ipi_to_APIC_timer(void *cpum
 
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
+static inline void reprogram_apic_timer(unsigned long count) { }
 
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
Index: linux-2.6.16-rc5-dt/include/asm-i386/dyntick.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16-rc5-dt/include/asm-i386/dyntick.h	2006-02-27 20:31:23.000000000 +1100
@@ -0,0 +1,75 @@
+/*
+ * linux/include/asm-i386/dyntick.h
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <tony@atomide.com> and
+ * Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>
+ * Rewritten by Con Kolivas <kernel@kolivas.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_I386_dyntick_H_
+#define _ASM_I386_dyntick_H_
+
+#include <asm/apic.h>
+#include <asm/timer.h>
+
+#ifdef CONFIG_NO_IDLE_HZ
+extern void idle_reprogram_timer(void);
+extern void __dyntick_interrupt(struct pt_regs *regs);
+extern void dyntick_interrupt(struct pt_regs *regs);
+extern void __init setup_dyntick_use_apic(void);
+extern void __init dyntick_time_init(struct timer_opts *cur_timer);
+extern void set_irq_idle_timestamp(const unsigned long next);
+
+#define PIT_MAX_SKIP	(0xffff / (LATCH))
+
+#if (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC))
+extern int using_apic_timer;
+
+static inline void start_local_apic(void)
+{
+	using_apic_timer = 1;
+	enable_APIC_timer();
+}
+
+static inline void stop_local_apic(void)
+{
+	disable_APIC_timer();
+	using_apic_timer = 0;
+}
+#else /* (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) */
+static inline void start_local_apic(void)
+{
+}
+
+static inline void stop_local_apic(void)
+{
+}
+#endif /* (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) */
+#else	/* CONFIG_NO_IDLE_HZ */
+static inline void idle_reprogram_timer(void)
+{
+}
+
+static inline void __dyntick_interrupt(struct pt_regs *__unused)
+{
+}
+
+static inline void dyntick_interrupt(struct pt_regs *__unused)
+{
+}
+
+static inline void setup_dyntick_use_apic(void)
+{
+}
+
+static inline void dyntick_time_init(struct timer_opts *__unused)
+{
+}
+#endif	/* CONFIG_NO_IDLE_HZ */
+
+#endif /* _ASM_I386_dyntick_H_ */
Index: linux-2.6.16-rc5-dt/include/asm-i386/timer.h
===================================================================
--- linux-2.6.16-rc5-dt.orig/include/asm-i386/timer.h	2006-02-27 20:31:14.000000000 +1100
+++ linux-2.6.16-rc5-dt/include/asm-i386/timer.h	2006-02-27 20:31:23.000000000 +1100
@@ -1,5 +1,6 @@
 #ifndef _ASMi386_TIMER_H
 #define _ASMi386_TIMER_H
+#include <linux/jiffies.h>
 #include <linux/init.h>
 #include <linux/pm.h>
 
@@ -38,6 +39,7 @@ struct init_timer_opts {
 extern struct timer_opts* __init select_timer(void);
 extern void clock_fallback(void);
 void setup_pit_timer(void);
+extern void reprogram_pit_timer(unsigned long jiffies_to_skip);
 
 /* Modifiers for buggy PIT handling */
 
@@ -67,4 +69,37 @@ extern unsigned long calibrate_tsc_hpet(
 #ifdef CONFIG_X86_PM_TIMER
 extern struct init_timer_opts timer_pmtmr_init;
 #endif
+#ifdef CONFIG_NO_IDLE_HZ
+static inline void tsc_sanity_check(int lost)
+{
+}
+#else /* CONFIG_NO_IDLE_HZ */
+extern void cpufreq_delayed_get(void);
+
+static inline void tsc_sanity_check(int lost)
+{
+	static int lost_count = 0;
+
+	if (lost >= 2) {
+		jiffies_64 += lost-1;
+
+		/* sanity check to ensure we're not always losing ticks */
+		if (lost_count++ > 100) {
+			printk(KERN_WARNING "Losing too many ticks!\n");
+			printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
+			printk(KERN_WARNING "Possible reasons for this are:\n");
+			printk(KERN_WARNING "  You're running with Speedstep,\n");
+			printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
+			printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
+			printk(KERN_WARNING "Falling back to a sane timesource now.\n");
+
+			clock_fallback();
+		}
+		/* ... but give the TSC a fair chance */
+		if (lost_count > 25)
+			cpufreq_delayed_get();
+	} else
+		lost_count = 0;
+}
+#endif /* CONFIG_NO_IDLE_HZ */
 #endif
Index: linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_tsc.c
===================================================================
--- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/timers/timer_tsc.c	2006-02-27 20:31:14.000000000 +1100
+++ linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_tsc.c	2006-02-27 20:31:23.000000000 +1100
@@ -14,6 +14,7 @@
 #include <linux/cpufreq.h>
 #include <linux/string.h>
 #include <linux/jiffies.h>
+#include <linux/dyntick.h>
 
 #include <asm/timer.h>
 #include <asm/io.h>
@@ -32,8 +33,6 @@ static unsigned long hpet_last;
 static struct timer_opts timer_tsc;
 #endif
 
-static inline void cpufreq_delayed_get(void);
-
 int tsc_disable __devinitdata = 0;
 
 static int use_tsc;
@@ -180,10 +179,19 @@ static void delay_tsc(unsigned long loop
 	} while ((now-bclock) < loops);
 }
 
+/* update the monotonic base value */
+static inline void update_monotonic_base(unsigned long long last_offset)
+{
+	unsigned long long this_offset;
+
+	this_offset = ((unsigned long long)last_tsc_high << 32) | last_tsc_low;
+	monotonic_base += cycles_2_ns(this_offset - last_offset);
+}
+
 #ifdef CONFIG_HPET_TIMER
 static int mark_offset_tsc_hpet(void)
 {
-	unsigned long long this_offset, last_offset;
+	unsigned long long last_offset;
 	unsigned long offset, temp, hpet_current;
 	int lost_ticks = 0;
 
@@ -213,9 +221,7 @@ static int mark_offset_tsc_hpet(void)
 	}
 	hpet_last = hpet_current;
 
-	/* update the monotonic base value */
-	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	monotonic_base += cycles_2_ns(this_offset - last_offset);
+	update_monotonic_base(last_offset);
 	write_sequnlock(&monotonic_lock);
 
 	/* calculate delay_at_last_interrupt */
@@ -255,7 +261,7 @@ static void handle_cpufreq_delayed_get(v
  * to verify the CPU frequency the timing core thinks the CPU is running
  * at is still correct.
  */
-static inline void cpufreq_delayed_get(void) 
+void cpufreq_delayed_get(void)
 {
 	if (cpufreq_init && !cpufreq_delayed_issched) {
 		cpufreq_delayed_issched = 1;
@@ -339,7 +345,7 @@ static int __init cpufreq_tsc(void)
 core_initcall(cpufreq_tsc);
 
 #else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
+void cpufreq_delayed_get(void) { return; }
 #endif 
 
 int recalibrate_cpu_khz(void)
@@ -371,8 +377,7 @@ static int mark_offset_tsc(void)
 	int count;
 	int countmp;
 	static int count1 = 0;
-	unsigned long long this_offset, last_offset;
-	static int lost_count = 0;
+	unsigned long long last_offset;
 
 	write_seqlock(&monotonic_lock);
 	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
@@ -439,29 +444,9 @@ static int mark_offset_tsc(void)
 	delta += delay_at_last_interrupt;
 	lost = delta/(1000000/HZ);
 	delay = delta%(1000000/HZ);
-	if (lost >= 2 && detect_lost_ticks) {
-		jiffies_64 += lost-1;
-
-		/* sanity check to ensure we're not always losing ticks */
-		if (lost_count++ > 100) {
-			printk(KERN_WARNING "Losing too many ticks!\n");
-			printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
-			printk(KERN_WARNING "Possible reasons for this are:\n");
-			printk(KERN_WARNING "  You're running with Speedstep,\n");
-			printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
-			printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
-			printk(KERN_WARNING "Falling back to a sane timesource now.\n");
+	tsc_sanity_check(lost);
 
-			clock_fallback();
-		}
-		/* ... but give the TSC a fair chance */
-		if (lost_count > 25)
-			cpufreq_delayed_get();
-	} else
-		lost_count = 0;
-	/* update the monotonic base value */
-	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	monotonic_base += cycles_2_ns(this_offset - last_offset);
+	update_monotonic_base(last_offset);
 	write_sequnlock(&monotonic_lock);
 
 	/* calculate delay_at_last_interrupt */
@@ -564,6 +549,8 @@ static int __init init_tsc(char* overrid
 					cpu_khz / 1000, cpu_khz % 1000);
 			}
 			set_cyc2ns_scale(cpu_khz);
+			set_dyntick_limits((0xFFFFFFFF / (cpu_khz * 1000)) *
+				HZ, 0);
 			return 0;
 		}
 	}
