Implement no-idle-hz aka dynticks on i386. Original code by Tony Lindgen and Tuukka Tikkanen . Rewritten and updated by Con Kolivas Signed-off-by: Con Kolivas arch/i386/Kconfig | 21 ++ arch/i386/defconfig | 1 arch/i386/kernel/Makefile | 1 arch/i386/kernel/apic.c | 30 +++ arch/i386/kernel/dyntick.c | 286 ++++++++++++++++++++++++++++++++++++ arch/i386/kernel/irq.c | 4 arch/i386/kernel/process.c | 4 arch/i386/kernel/smp.c | 8 + arch/i386/kernel/time.c | 6 arch/i386/kernel/timers/timer_pit.c | 19 ++ arch/i386/kernel/timers/timer_pm.c | 2 arch/i386/kernel/timers/timer_tsc.c | 51 ++---- drivers/acpi/Kconfig | 2 include/asm-i386/apic.h | 2 include/asm-i386/dyntick.h | 75 +++++++++ include/asm-i386/timer.h | 35 ++++ 16 files changed, 512 insertions(+), 35 deletions(-) Index: linux-2.6.16-rc5-dt/drivers/acpi/Kconfig =================================================================== --- linux-2.6.16-rc5-dt.orig/drivers/acpi/Kconfig 2006-02-27 16:39:57.000000000 +1100 +++ linux-2.6.16-rc5-dt/drivers/acpi/Kconfig 2006-02-27 20:32:03.000000000 +1100 @@ -297,6 +297,8 @@ config X86_PM_TIMER voltage scaling, unlike the commonly used Time Stamp Counter (TSC) timing source. + This timer is selected by dyntick (NO_IDLE_HZ). + You should nearly always say Y here because many modern systems require this timer. Index: linux-2.6.16-rc5-dt/arch/i386/defconfig =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/defconfig 2006-02-27 16:39:55.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/defconfig 2006-02-27 20:31:23.000000000 +1100 @@ -91,6 +91,7 @@ CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y # CONFIG_HPET_TIMER is not set # CONFIG_HPET_EMULATE_RTC is not set +# CONFIG_NO_IDLE_HZ is not set CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_SCHED_SMT=y Index: linux-2.6.16-rc5-dt/arch/i386/Kconfig =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/Kconfig 2006-02-27 16:39:55.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/Kconfig 2006-02-27 20:31:23.000000000 +1100 @@ -173,6 +173,27 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && RTC=y default y +config NO_IDLE_HZ + bool "Dynamic Tick Timer - Skip timer ticks during idle" + depends on EXPERIMENTAL && X86_32 + select X86_PM_TIMER + select ACPI + help + This option enables support for skipping timer ticks when the + processor is idle. During system load, timer is continuous. + This option saves power, as it allows the system to stay in + idle mode longer. Currently the only supported timer is ACPI PM + timer. + + Note that you can disable dynamic tick timer either by + passing dyntick=disable command line option, or via sysfs: + + # echo 0 > /sys/devices/system/timer/timer0/dyntick + + Most users wishing to lower their power usage while retaining + low latencies will most likely want to say Y here in combination + with a high HZ value (eg 1000). + config SMP bool "Symmetric multi-processing support" ---help--- Index: linux-2.6.16-rc5-dt/arch/i386/kernel/apic.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/apic.c 2006-02-27 16:39:55.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/apic.c 2006-02-27 20:31:23.000000000 +1100 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -36,11 +37,13 @@ #include #include #include +#include #include #include #include "io_ports.h" +#include "do_timer.h" /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as @@ -938,6 +941,8 @@ void (*wait_timer_tick)(void) __devinitd #define APIC_DIVISOR 16 +static u32 apic_timer_val __read_mostly; + static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; @@ -961,7 +966,9 @@ static void __setup_APIC_LVTT(unsigned i & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + apic_timer_val = clocks / APIC_DIVISOR; + + apic_write_around(APIC_TMICT, apic_timer_val); } static void __devinit setup_APIC_timer(unsigned int clocks) @@ -981,6 +988,17 @@ static void __devinit setup_APIC_timer(u } /* + * Used by NO_IDLE_HZ to skip ticks on idle CPUs. Called with IRQs already + * disabled + */ +void reprogram_apic_timer(unsigned long count) +{ + count = count * apic_timer_val; + apic_read(APIC_TMICT); + apic_write_around(APIC_TMICT, count); +} + +/* * In this function we calibrate APIC bus clocks to the external * timer. Unfortunately we cannot use jiffies and the timer irq * to calibrate, since some later bootup code depends on getting @@ -1075,6 +1093,10 @@ void __init setup_boot_APIC_clock(void) */ setup_APIC_timer(calibration_result); + setup_dyntick_use_apic(); + set_dyntick_limits((0xFFFFFFFF / calibration_result) * APIC_DIVISOR, + 2); + local_irq_restore(flags); } @@ -1144,8 +1166,10 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer); * value into /proc/profile. */ -inline void smp_local_timer_interrupt(struct pt_regs * regs) +void smp_local_timer_interrupt(struct pt_regs * regs) { + __dyntick_interrupt(regs); + profile_tick(CPU_PROFILING, regs); #ifdef CONFIG_SMP update_process_times(user_mode_vm(regs)); @@ -1241,6 +1265,7 @@ fastcall void smp_spurious_interrupt(str unsigned long v; irq_enter(); + /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1265,6 +1290,7 @@ fastcall void smp_error_interrupt(struct unsigned long v, v1; irq_enter(); + /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); Index: linux-2.6.16-rc5-dt/arch/i386/kernel/dyntick.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/dyntick.c 2006-02-27 20:32:29.000000000 +1100 @@ -0,0 +1,286 @@ +/* + * linux/arch/i386/kernel/dyntick.c + * + * Copyright (C) 2004 Nokia Corporation + * Written by Tony Lindgen and + * Tuukka Tikkanen + * Rewritten by Con Kolivas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "do_timer.h" + +/* + * These handlers deal with all cpus idle on either UP or SMP. + */ +static void reprogram_pit_handler(unsigned int skip) +{ + reprogram_pit_timer(skip); +} + +static void smp_idle_handler(unsigned int skip) +{ + if (skip > PIT_MAX_SKIP) { + /* + * The PIT timer skips significantly less duration than the + * APIC timer, so we limit it to PIT_MAX_SKIP. As this is the + * last cpu to fall idle we also reprogram its APIC timer to + * wake at the same time. + */ + unsigned long next; + + skip = PIT_MAX_SKIP; + next = jiffies + skip; + dyntick->tick = next; + __get_cpu_var(dyn_cpu).next_tick = next; + } + reprogram_pit_timer(skip); +} + +/* + * These reset functions start timers at maximum frequency when the cpus are + * busy again or when dynticks are disabled + */ +static inline void reset_pit_timer(void) +{ + reprogram_pit_timer(1); +} + +static void reset_apic_timer(void) +{ + reprogram_apic_timer(1); +} + +/* + * Null handlers are used initially while APIC timers are set up as ticks + * start before the APIC timer is enabled and do_timer_interrupt_hook + * changes its behaviour after they are started + */ +static void null_reprogram(unsigned long __unused) +{ +} + +static void null_idle_handler(unsigned int __unused) +{ +} + +static void null_wake(void) +{ +} + +/* + * Labels for the different skip mechanisms used + */ +enum skip_handler { + SKIP_PIT, + SKIP_APIC, + SKIP_SMP_APIC, +}; + +struct dyn_handler { + enum skip_handler skip_handler; + void (*cpu_wake)(void); +} dyn_handler = { + .skip_handler = SKIP_PIT, + .cpu_wake = &null_wake, +}; + +/* + * The per cpu APIC timer skip function + */ +static void apic_reprogram(unsigned long jif_next) +{ + reprogram_apic_timer(jif_next - jiffies); +} + +static int arch_enable(void) +{ + switch (dyn_handler.skip_handler) { + case SKIP_APIC: + stop_local_apic(); + default: + break; + } + return 0; +} + +static int arch_disable(void) +{ + reset_pit_timer(); + switch (dyn_handler.skip_handler) { + case SKIP_PIT: + break; + case SKIP_APIC: + start_local_apic(); + break; + case SKIP_SMP_APIC: + reset_apic_timer(); + break; + } + return 0; +} + +static struct dyntick_timer arch_dyntick = { + .lock = SPIN_LOCK_UNLOCKED, + .arch_reprogram = &null_reprogram, + .arch_all_cpus_idle = &null_idle_handler, + .arch_enable = &arch_enable, + .arch_disable = &arch_disable, +}; + +struct dyntick_timer *dyntick = &arch_dyntick; + +/* + * Only PIT timer skipping is reliable so this is used on all configurations. + * All PIT skipping is done from arch_all_cpus_idle in either UP or SMP. + * When local APIC support on UP is enabled, the local APIC timer is disabled + * when dynticks is enabled and the PIT timer is used. On SMP each cpu also + * skips APIC ticks according to its own next timer interrupt from + * arch_reprogram. + */ +int __init dyntick_arch_init(void) +{ + if (dyn_handler.skip_handler == SKIP_APIC && num_present_cpus()> 1) + dyn_handler.skip_handler = SKIP_SMP_APIC; + + switch (dyn_handler.skip_handler) { + case SKIP_PIT: + printk(KERN_INFO "dyntick: Using PIT " + "reprogramming\n"); + dyntick->arch_all_cpus_idle = &reprogram_pit_handler; + set_dyntick_limits(PIT_MAX_SKIP, 1); + break; + case SKIP_APIC: + printk(KERN_INFO "dyntick: Disabling APIC timer, " + "using PIT reprogramming\n"); + dyntick->arch_all_cpus_idle = &reprogram_pit_handler; + set_dyntick_limits(PIT_MAX_SKIP, 1); + stop_local_apic(); + break; + case SKIP_SMP_APIC: + printk(KERN_INFO "dyntick: Using per cpu APIC " + "reprogramming, skipping PIT when all cpus " + "idle\n"); + dyntick->arch_reprogram = &apic_reprogram; + dyntick->arch_all_cpus_idle = &smp_idle_handler; + dyn_handler.cpu_wake = &reset_apic_timer; + break; + } + cpus_clear(nohz_cpu_mask); + printk(KERN_INFO "dyntick: Maximum ticks to skip limited to %i\n", + dyntick->max_skip); + + return 0; +} + +static int __init dyntick_init(void) +{ + dyntick->arch_init = dyntick_arch_init; + dyntick_register(&arch_dyntick); + + return 0; +} + +arch_initcall(dyntick_init); + +void __init setup_dyntick_use_apic(void) +{ + dyn_handler.skip_handler = SKIP_APIC; +} + +/* + * When an interrupt occurs on a cpu that is already skipping, that cpu's + * timer is restarted at maximum frequency with cpu_wake if needed on SMP. + * The nohz_cpu_mask is checked at this point to see if all cpus are idle. + * When all cpus are detected as being idle (which is always true on UP when + * one is idle), the PIT timer is restarted at maximum frequency, and lost + * ticks are accounted for. + */ +static void do_dyntick_interrupt(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + dyn_handler.cpu_wake(); + + spin_lock(&dyntick->lock); + if (clear_nohz_cpu(cpu)) { + int lost; + + spin_unlock(&dyntick->lock); + + reset_pit_timer(); + + write_seqlock(&xtime_lock); + lost = cur_timer->mark_offset(); + if (lost && in_irq()) + do_timer(regs); + write_sequnlock(&xtime_lock); + + kstat_cpu(0).cpustat.idle += (lost - 1); + conditional_run_local_timers(); + } else + spin_unlock(&dyntick->lock); +} + +/* + * This is called from all interrupt handlers. It checks per_cpu data first + * to see that this cpu is not currently skipping ticks. If it is skipping + * ticks it calls do_dyntick_interrupt. + */ +inline void __dyntick_interrupt(struct pt_regs *regs) +{ + if (test_nohz_cpu()) + do_dyntick_interrupt(regs); +} + +void dyntick_interrupt(struct pt_regs *regs) +{ + preempt_disable(); + __dyntick_interrupt(regs); + preempt_enable_no_resched(); +} + +/* Updates the irq idle timestamp when we reprogram it */ +void set_irq_idle_timestamp(const unsigned long next) +{ + __get_cpu_var(irq_stat).idle_timestamp = next; +} + +/* + * Called from every idle tick. + */ +inline void idle_reprogram_timer(void) +{ + local_irq_disable(); + if (!need_resched()) + timer_dyn_reprogram(); + local_irq_enable(); +} + +void __init dyntick_time_init(struct timer_opts *cur_timer) +{ + if (strncmp(cur_timer->name, "pmtmr", 3) == 0) { + dyntick->state |= dyntick_SUITABLE; + printk(KERN_INFO "dyntick: Found suitable timer: %s\n", + cur_timer->name); + } else + printk(KERN_ERR "dyntick: Cannot use timer %s - pmtmr " + "failed: ACPI disabled?\n", cur_timer->name); +} Index: linux-2.6.16-rc5-dt/arch/i386/kernel/irq.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/irq.c 2006-02-27 16:39:55.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/irq.c 2006-02-27 20:31:23.000000000 +1100 @@ -18,6 +18,8 @@ #include #include #include +#include +#include DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); @@ -76,6 +78,8 @@ fastcall unsigned int do_IRQ(struct pt_r } #endif + __dyntick_interrupt(regs); + #ifdef CONFIG_4KSTACKS curctx = (union irq_ctx *) current_thread_info(); Index: linux-2.6.16-rc5-dt/arch/i386/kernel/Makefile =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/Makefile 2006-02-27 16:39:55.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/Makefile 2006-02-27 20:31:23.000000000 +1100 @@ -33,6 +33,7 @@ obj-$(CONFIG_MODULES) += module.o obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o +obj-$(CONFIG_NO_IDLE_HZ) += dyntick.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_DOUBLEFAULT) += doublefault.o obj-$(CONFIG_VM86) += vm86.o Index: linux-2.6.16-rc5-dt/arch/i386/kernel/process.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/process.c 2006-02-27 16:39:55.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/process.c 2006-02-27 20:31:23.000000000 +1100 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -57,6 +58,7 @@ #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -195,6 +197,8 @@ void cpu_idle(void) play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + idle_reprogram_timer(); + idle(); } preempt_enable_no_resched(); Index: linux-2.6.16-rc5-dt/arch/i386/kernel/smp.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/smp.c 2005-10-28 20:21:34.000000000 +1000 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/smp.c 2006-02-27 20:31:23.000000000 +1100 @@ -20,9 +20,11 @@ #include #include #include +#include #include #include +#include #include /* @@ -315,6 +317,8 @@ fastcall void smp_invalidate_interrupt(s cpu = get_cpu(); + __dyntick_interrupt(regs); + if (!cpu_isset(cpu, flush_cpumask)) goto out; /* @@ -600,6 +604,8 @@ void smp_send_stop(void) fastcall void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); + + dyntick_interrupt(regs); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) @@ -609,6 +615,7 @@ fastcall void smp_call_function_interrup int wait = call_data->wait; ack_APIC_irq(); + /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function @@ -619,6 +626,7 @@ fastcall void smp_call_function_interrup * At this point the info structure may be out of scope unless wait==1 */ irq_enter(); + __dyntick_interrupt(regs); (*func)(info); irq_exit(); Index: linux-2.6.16-rc5-dt/arch/i386/kernel/time.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/time.c 2006-02-27 20:31:14.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/time.c 2006-02-27 20:31:23.000000000 +1100 @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include #include "mach_time.h" @@ -434,7 +436,7 @@ static struct sysdev_class timer_sysclas /* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { +struct sys_device device_timer = { .id = 0, .cls = &timer_sysclass, }; @@ -490,5 +492,7 @@ void __init time_init(void) cur_timer = select_timer(); printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); + dyntick_time_init(cur_timer); + time_init_hook(); } Index: linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pit.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/timers/timer_pit.c 2006-02-27 20:31:14.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pit.c 2006-02-27 20:31:23.000000000 +1100 @@ -149,6 +149,25 @@ static unsigned long get_offset_pit(void return count; } +/* + * Reprograms the next timer interrupt + * PIT timer reprogramming code taken from APM code. + * Note that PIT timer is a 16-bit timer. + * Called with irqs already disabled. + */ +void reprogram_pit_timer(unsigned long jiffies_to_skip) +{ + int skip = jiffies_to_skip * LATCH; + + if (skip > 0xffff) + skip = 0xffff; + + spin_lock(&i8253_lock); + outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(skip & 0xff, PIT_CH0); /* LSB */ + outb(skip >> 8, PIT_CH0); /* MSB */ + spin_unlock(&i8253_lock); +} /* tsc timer_opts struct */ struct timer_opts timer_pit = { Index: linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pm.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/timers/timer_pm.c 2006-02-27 20:31:14.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_pm.c 2006-02-27 20:31:23.000000000 +1100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +129,7 @@ pm_good: return -ENODEV; init_cpu_khz(); + set_dyntick_limits(((0xFFFFFF / 1000000) * 286 * HZ) >> 10, 0); return 0; } Index: linux-2.6.16-rc5-dt/include/asm-i386/apic.h =================================================================== --- linux-2.6.16-rc5-dt.orig/include/asm-i386/apic.h 2006-02-27 16:40:24.000000000 +1100 +++ linux-2.6.16-rc5-dt/include/asm-i386/apic.h 2006-02-27 20:31:23.000000000 +1100 @@ -121,6 +121,7 @@ extern void nmi_watchdog_tick (struct pt extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); +extern void reprogram_apic_timer(unsigned long count); extern void enable_NMI_through_LVT0 (void * dummy); @@ -139,6 +140,7 @@ void switch_ipi_to_APIC_timer(void *cpum #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } +static inline void reprogram_apic_timer(unsigned long count) { } #endif /* !CONFIG_X86_LOCAL_APIC */ Index: linux-2.6.16-rc5-dt/include/asm-i386/dyntick.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.16-rc5-dt/include/asm-i386/dyntick.h 2006-02-27 20:31:23.000000000 +1100 @@ -0,0 +1,75 @@ +/* + * linux/include/asm-i386/dyntick.h + * + * Copyright (C) 2004 Nokia Corporation + * Written by Tony Lindgen and + * Tuukka Tikkanen + * Rewritten by Con Kolivas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_I386_dyntick_H_ +#define _ASM_I386_dyntick_H_ + +#include +#include + +#ifdef CONFIG_NO_IDLE_HZ +extern void idle_reprogram_timer(void); +extern void __dyntick_interrupt(struct pt_regs *regs); +extern void dyntick_interrupt(struct pt_regs *regs); +extern void __init setup_dyntick_use_apic(void); +extern void __init dyntick_time_init(struct timer_opts *cur_timer); +extern void set_irq_idle_timestamp(const unsigned long next); + +#define PIT_MAX_SKIP (0xffff / (LATCH)) + +#if (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) +extern int using_apic_timer; + +static inline void start_local_apic(void) +{ + using_apic_timer = 1; + enable_APIC_timer(); +} + +static inline void stop_local_apic(void) +{ + disable_APIC_timer(); + using_apic_timer = 0; +} +#else /* (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) */ +static inline void start_local_apic(void) +{ +} + +static inline void stop_local_apic(void) +{ +} +#endif /* (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) */ +#else /* CONFIG_NO_IDLE_HZ */ +static inline void idle_reprogram_timer(void) +{ +} + +static inline void __dyntick_interrupt(struct pt_regs *__unused) +{ +} + +static inline void dyntick_interrupt(struct pt_regs *__unused) +{ +} + +static inline void setup_dyntick_use_apic(void) +{ +} + +static inline void dyntick_time_init(struct timer_opts *__unused) +{ +} +#endif /* CONFIG_NO_IDLE_HZ */ + +#endif /* _ASM_I386_dyntick_H_ */ Index: linux-2.6.16-rc5-dt/include/asm-i386/timer.h =================================================================== --- linux-2.6.16-rc5-dt.orig/include/asm-i386/timer.h 2006-02-27 20:31:14.000000000 +1100 +++ linux-2.6.16-rc5-dt/include/asm-i386/timer.h 2006-02-27 20:31:23.000000000 +1100 @@ -1,5 +1,6 @@ #ifndef _ASMi386_TIMER_H #define _ASMi386_TIMER_H +#include #include #include @@ -38,6 +39,7 @@ struct init_timer_opts { extern struct timer_opts* __init select_timer(void); extern void clock_fallback(void); void setup_pit_timer(void); +extern void reprogram_pit_timer(unsigned long jiffies_to_skip); /* Modifiers for buggy PIT handling */ @@ -67,4 +69,37 @@ extern unsigned long calibrate_tsc_hpet( #ifdef CONFIG_X86_PM_TIMER extern struct init_timer_opts timer_pmtmr_init; #endif +#ifdef CONFIG_NO_IDLE_HZ +static inline void tsc_sanity_check(int lost) +{ +} +#else /* CONFIG_NO_IDLE_HZ */ +extern void cpufreq_delayed_get(void); + +static inline void tsc_sanity_check(int lost) +{ + static int lost_count = 0; + + if (lost >= 2) { + jiffies_64 += lost-1; + + /* sanity check to ensure we're not always losing ticks */ + if (lost_count++ > 100) { + printk(KERN_WARNING "Losing too many ticks!\n"); + printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); + printk(KERN_WARNING "Possible reasons for this are:\n"); + printk(KERN_WARNING " You're running with Speedstep,\n"); + printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); + printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); + printk(KERN_WARNING "Falling back to a sane timesource now.\n"); + + clock_fallback(); + } + /* ... but give the TSC a fair chance */ + if (lost_count > 25) + cpufreq_delayed_get(); + } else + lost_count = 0; +} +#endif /* CONFIG_NO_IDLE_HZ */ #endif Index: linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux-2.6.16-rc5-dt.orig/arch/i386/kernel/timers/timer_tsc.c 2006-02-27 20:31:14.000000000 +1100 +++ linux-2.6.16-rc5-dt/arch/i386/kernel/timers/timer_tsc.c 2006-02-27 20:31:23.000000000 +1100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -32,8 +33,6 @@ static unsigned long hpet_last; static struct timer_opts timer_tsc; #endif -static inline void cpufreq_delayed_get(void); - int tsc_disable __devinitdata = 0; static int use_tsc; @@ -180,10 +179,19 @@ static void delay_tsc(unsigned long loop } while ((now-bclock) < loops); } +/* update the monotonic base value */ +static inline void update_monotonic_base(unsigned long long last_offset) +{ + unsigned long long this_offset; + + this_offset = ((unsigned long long)last_tsc_high << 32) | last_tsc_low; + monotonic_base += cycles_2_ns(this_offset - last_offset); +} + #ifdef CONFIG_HPET_TIMER static int mark_offset_tsc_hpet(void) { - unsigned long long this_offset, last_offset; + unsigned long long last_offset; unsigned long offset, temp, hpet_current; int lost_ticks = 0; @@ -213,9 +221,7 @@ static int mark_offset_tsc_hpet(void) } hpet_last = hpet_current; - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); + update_monotonic_base(last_offset); write_sequnlock(&monotonic_lock); /* calculate delay_at_last_interrupt */ @@ -255,7 +261,7 @@ static void handle_cpufreq_delayed_get(v * to verify the CPU frequency the timing core thinks the CPU is running * at is still correct. */ -static inline void cpufreq_delayed_get(void) +void cpufreq_delayed_get(void) { if (cpufreq_init && !cpufreq_delayed_issched) { cpufreq_delayed_issched = 1; @@ -339,7 +345,7 @@ static int __init cpufreq_tsc(void) core_initcall(cpufreq_tsc); #else /* CONFIG_CPU_FREQ */ -static inline void cpufreq_delayed_get(void) { return; } +void cpufreq_delayed_get(void) { return; } #endif int recalibrate_cpu_khz(void) @@ -371,8 +377,7 @@ static int mark_offset_tsc(void) int count; int countmp; static int count1 = 0; - unsigned long long this_offset, last_offset; - static int lost_count = 0; + unsigned long long last_offset; write_seqlock(&monotonic_lock); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; @@ -439,29 +444,9 @@ static int mark_offset_tsc(void) delta += delay_at_last_interrupt; lost = delta/(1000000/HZ); delay = delta%(1000000/HZ); - if (lost >= 2 && detect_lost_ticks) { - jiffies_64 += lost-1; - - /* sanity check to ensure we're not always losing ticks */ - if (lost_count++ > 100) { - printk(KERN_WARNING "Losing too many ticks!\n"); - printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); - printk(KERN_WARNING "Possible reasons for this are:\n"); - printk(KERN_WARNING " You're running with Speedstep,\n"); - printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); - printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); - printk(KERN_WARNING "Falling back to a sane timesource now.\n"); + tsc_sanity_check(lost); - clock_fallback(); - } - /* ... but give the TSC a fair chance */ - if (lost_count > 25) - cpufreq_delayed_get(); - } else - lost_count = 0; - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); + update_monotonic_base(last_offset); write_sequnlock(&monotonic_lock); /* calculate delay_at_last_interrupt */ @@ -564,6 +549,8 @@ static int __init init_tsc(char* overrid cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz); + set_dyntick_limits((0xFFFFFFFF / (cpu_khz * 1000)) * + HZ, 0); return 0; } }