Index: linux-2.6.13-rc6-ck1/arch/i386/Kconfig =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/Kconfig 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/Kconfig 2005-08-14 00:40:54.000000000 +1000 @@ -458,6 +458,41 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && RTC=y default y +config NO_IDLE_HZ + bool "Dynamic Tick Timer - Skip timer ticks during idle" + depends on EXPERIMENTAL + help + This option enables support for skipping timer ticks when the + processor is idle. During system load, timer is continuous. + This option saves power, as it allows the system to stay in + idle mode longer. Currently supported timers are ACPI PM + timer, local APIC timer, and TSC timer. HPET timer is currently + not supported. + + Note that you can disable dynamic tick timer either by + passing dyntick=disable command line option, or via sysfs: + + # echo 0 > /sys/devices/system/dyn_tick/dyn_tick0/enable + +config DYN_TICK_USE_APIC + bool "Use APIC timer instead of PIT timer" + depends on NO_IDLE_HZ + help + This option enables using APIC timer interrupt if your hardware + supports it. APIC timer allows longer sleep periods compared + to PIT timer, however on MOST recent hardware disabling the PIT + timer also disables APIC timer interrupts, and the system won't + run properly. Symptoms include slow system boot, and time running + slow. + + If unsure, do NOT enable this option. + + Note that you can disable apic usage by dynamic tick timer + either by passing dyntick=noapic command line option, or via + sysfs: + + # echo 0 > /sys/devices/system/dyn_tick/dyn_tick0/useapic + config SMP bool "Symmetric multi-processing support" ---help--- Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/apic.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/apic.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/apic.c 2005-08-14 00:40:55.000000000 +1000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -931,6 +932,8 @@ void (*wait_timer_tick)(void) __devinitd #define APIC_DIVISOR 16 +u32 apic_timer_val; + static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; @@ -949,7 +952,9 @@ static void __setup_APIC_LVTT(unsigned i & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + apic_timer_val = clocks / APIC_DIVISOR; + + apic_write_around(APIC_TMICT, apic_timer_val); } static void __devinit setup_APIC_timer(unsigned int clocks) @@ -1062,6 +1067,9 @@ void __init setup_boot_APIC_clock(void) */ setup_APIC_timer(calibration_result); + setup_dyn_tick_use_apic(calibration_result); + set_dyn_tick_max_skip( (0xFFFFFFFF/calibration_result) * APIC_DIVISOR); + local_irq_enable(); } @@ -1200,6 +1208,9 @@ fastcall void smp_apic_timer_interrupt(s * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(); + + dyn_tick_interrupt(LOCAL_TIMER_VECTOR, regs); + smp_local_timer_interrupt(regs); irq_exit(); } @@ -1212,6 +1223,9 @@ fastcall void smp_spurious_interrupt(str unsigned long v; irq_enter(); + + dyn_tick_interrupt(SPURIOUS_APIC_VECTOR, regs); + /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1236,6 +1250,9 @@ fastcall void smp_error_interrupt(struct unsigned long v, v1; irq_enter(); + + dyn_tick_interrupt(ERROR_APIC_VECTOR, regs); + /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/dyn-tick.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/dyn-tick.c 2005-01-12 16:19:45.000000000 +1100 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/dyn-tick.c 2005-08-14 00:40:55.000000000 +1000 @@ -0,0 +1,137 @@ +/* + * linux/arch/i386/kernel/dyn-tick.c + * + * Copyright (C) 2004 Nokia Corporation + * Written by Tony Lindgen and + * Tuukka Tikkanen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +static void arch_reprogram_timer(unsigned long jif_next) +{ + unsigned int skip = jif_next - jiffies; + + if (cpu_has_local_apic()) { + if (dyn_tick->state & DYN_TICK_TIMER_INT) + reprogram_apic_timer(skip); + } else { + if (dyn_tick->state & DYN_TICK_TIMER_INT) + reprogram_pit_timer(skip); + else + disable_pit_timer(); + } + + /* Fixme: Disable NMI Watchdog */ +} + +static void arch_all_cpus_idle(int how_long) +{ + if (cpu_has_local_apic()) + if (dyn_tick->state & DYN_TICK_TIMER_INT) + disable_pit_timer(); +} + +static struct dyn_tick_timer arch_dyn_tick_timer = { + .arch_reprogram_timer = &arch_reprogram_timer, + .arch_all_cpus_idle = &arch_all_cpus_idle, +}; + +static int __init dyn_tick_init(void) +{ + arch_dyn_tick_timer.arch_init = dyn_tick_arch_init; + dyn_tick_register(&arch_dyn_tick_timer); + + return 0; +} + +arch_initcall(dyn_tick_init); + +int __init dyn_tick_arch_init(void) +{ + + if (!(dyn_tick->state & DYN_TICK_USE_APIC) || !cpu_has_local_apic()) + dyn_tick->max_skip = 0xffff / LATCH; /* PIT timer length */ + printk(KERN_INFO "dyn-tick: Maximum ticks to skip limited to %i\n", + dyn_tick->max_skip); + + return 0; +} + +/* Functions that need blank prototypes for !CONFIG_NO_IDLE_HZ below here */ +void set_dyn_tick_max_skip(unsigned int max_skip) +{ + if (!dyn_tick->max_skip || max_skip < dyn_tick->max_skip) + dyn_tick->max_skip = max_skip; +} + +void setup_dyn_tick_use_apic(unsigned int calibration_result) +{ + if (calibration_result) + dyn_tick->state |= DYN_TICK_USE_APIC; + else + printk(KERN_INFO "dyn-tick: Cannot use local APIC\n"); +} + +void dyn_tick_interrupt(int irq, struct pt_regs *regs) +{ + int all_were_sleeping = 0; + int cpu = smp_processor_id(); + + if (!cpu_isset(cpu, nohz_cpu_mask)) + return; + + spin_lock(&dyn_tick_lock); + + if (cpus_equal(nohz_cpu_mask, cpu_online_map)) + all_were_sleeping = 1; + cpu_clear(cpu, nohz_cpu_mask); + + if (all_were_sleeping) { + /* Recover jiffies */ + cur_timer->mark_offset(); + if (cpu_has_local_apic()) + if (dyn_tick->state & DYN_TICK_TIMER_INT) + enable_pit_timer(); + } + + spin_unlock(&dyn_tick_lock); + + if (cpu_has_local_apic()) { + /* Fixme: Needs to be more accurate */ + reprogram_apic_timer(1); + } else { + reprogram_pit_timer(1); + } + + conditional_run_local_timers(); + + /* Fixme: Enable NMI watchdog */ +} + + +void dyn_tick_time_init(struct timer_opts *cur_timer) +{ + spin_lock_init(&dyn_tick_lock); + + if (strncmp(cur_timer->name, "tsc", 3) == 0 || + strncmp(cur_timer->name, "pmtmr", 3) == 0) { + dyn_tick->state |= DYN_TICK_SUITABLE; + printk(KERN_INFO "dyn-tick: Found suitable timer: %s\n", + cur_timer->name); + } else + printk(KERN_ERR "dyn-tick: Cannot use timer %s\n", + cur_timer->name); +} Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/io_apic.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/io_apic.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/io_apic.c 2005-08-14 00:40:55.000000000 +1000 @@ -1157,6 +1157,7 @@ next: static struct hw_interrupt_type ioapic_level_type; static struct hw_interrupt_type ioapic_edge_type; +static struct hw_interrupt_type ioapic_edge_type_irq0; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -1168,15 +1169,19 @@ static inline void ioapic_register_intr( if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) irq_desc[vector].handler = &ioapic_level_type; - else + else if (vector) irq_desc[vector].handler = &ioapic_edge_type; + else + irq_desc[vector].handler = &ioapic_edge_type_irq0; set_intr_gate(vector, interrupt[vector]); } else { if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) irq_desc[irq].handler = &ioapic_level_type; - else + else if (irq) irq_desc[irq].handler = &ioapic_edge_type; + else + irq_desc[irq].handler = &ioapic_edge_type_irq0; set_intr_gate(vector, interrupt[irq]); } } @@ -1288,7 +1293,7 @@ static void __init setup_ExtINT_IRQ0_pin * The timer IRQ doesn't have to know that behind the * scene we have a 8259A-master in AEOI mode ... */ - irq_desc[0].handler = &ioapic_edge_type; + irq_desc[0].handler = &ioapic_edge_type_irq0; /* * Add it to the IO-APIC irq-routing table: @@ -2014,6 +2019,18 @@ static struct hw_interrupt_type ioapic_l .set_affinity = set_ioapic_affinity, }; +/* Needed to disable PIT interrupts when all CPUs sleep */ +static struct hw_interrupt_type ioapic_edge_type_irq0 = { + .typename = "IO-APIC-edge-irq0", + .startup = startup_edge_ioapic, + .shutdown = shutdown_edge_ioapic, + .enable = unmask_IO_APIC_irq, + .disable = mask_IO_APIC_irq, + .ack = ack_edge_ioapic, + .end = end_edge_ioapic, + .set_affinity = set_ioapic_affinity, +}; + static inline void init_IO_APIC_traps(void) { int irq; Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/irq.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/irq.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/irq.c 2005-08-14 00:40:54.000000000 +1000 @@ -18,6 +18,7 @@ #include #include #include +#include DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); @@ -76,6 +77,8 @@ fastcall unsigned int do_IRQ(struct pt_r } #endif + dyn_tick_interrupt(irq, regs); + #ifdef CONFIG_4KSTACKS curctx = (union irq_ctx *) current_thread_info(); Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/Makefile =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/Makefile 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/Makefile 2005-08-14 00:40:54.000000000 +1000 @@ -32,6 +32,7 @@ obj-$(CONFIG_MODULES) += module.o obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o +obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/process.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/process.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/process.c 2005-08-14 00:40:55.000000000 +1000 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -200,6 +201,8 @@ void cpu_idle(void) if (cpu_is_offline(cpu)) play_dead(); + dyn_tick_reprogram_timer(); + __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/smp.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/smp.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/smp.c 2005-08-14 00:40:55.000000000 +1000 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -314,6 +315,8 @@ fastcall void smp_invalidate_interrupt(s { unsigned long cpu; + dyn_tick_interrupt(INVALIDATE_TLB_VECTOR, regs); + cpu = get_cpu(); if (!cpu_isset(cpu, flush_cpumask)) @@ -601,6 +604,8 @@ void smp_send_stop(void) fastcall void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); + + dyn_tick_interrupt(RESCHEDULE_VECTOR, regs); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) @@ -610,6 +615,9 @@ fastcall void smp_call_function_interrup int wait = call_data->wait; ack_APIC_irq(); + + dyn_tick_interrupt(CALL_FUNCTION_VECTOR, regs); + /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/time.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/time.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/time.c 2005-08-14 00:40:55.000000000 +1000 @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -253,7 +254,7 @@ EXPORT_SYMBOL(profile_pc); * as well as call the "do_timer()" routine every clocktick */ static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) + struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { @@ -423,7 +424,7 @@ static struct sysdev_class timer_sysclas /* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { +struct sys_device device_timer = { .id = 0, .cls = &timer_sysclass, }; @@ -479,5 +480,7 @@ void __init time_init(void) cur_timer = select_timer(); printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); + dyn_tick_time_init(cur_timer); + time_init_hook(); } Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/timers/timer_pit.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/timers/timer_pit.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/timers/timer_pit.c 2005-08-14 00:40:55.000000000 +1000 @@ -148,6 +148,38 @@ static unsigned long get_offset_pit(void return count; } +void disable_pit_timer(void) +{ + irq_desc[0].handler->disable(0); +} + +void enable_pit_timer(void) +{ + irq_desc[0].handler->enable(0); +} + +/* + * Reprograms the next timer interrupt + * PIT timer reprogramming code taken from APM code. + * Note that PIT timer is a 16-bit timer, which allows max + * skip of only few seconds. + */ +void reprogram_pit_timer(int jiffies_to_skip) +{ + int skip; + extern spinlock_t i8253_lock; + unsigned long flags; + + skip = jiffies_to_skip * LATCH; + if (skip > 0xffff) + skip = 0xffff; + + spin_lock_irqsave(&i8253_lock, flags); + outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(skip & 0xff, PIT_CH0); /* LSB */ + outb(skip >> 8, PIT_CH0); /* MSB */ + spin_unlock_irqrestore(&i8253_lock, flags); +} /* tsc timer_opts struct */ struct timer_opts timer_pit = { Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/timers/timer_pm.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/timers/timer_pm.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/timers/timer_pm.c 2005-08-14 00:40:55.000000000 +1000 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +129,7 @@ pm_good: return -ENODEV; init_cpu_khz(); + set_dyn_tick_max_skip( (0xFFFFFF / (286 * 1000000)) * 1024 * HZ ); return 0; } Index: linux-2.6.13-rc6-ck1/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/arch/i386/kernel/timers/timer_tsc.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/arch/i386/kernel/timers/timer_tsc.c 2005-08-14 00:40:55.000000000 +1000 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -166,10 +167,19 @@ static void delay_tsc(unsigned long loop } while ((now-bclock) < loops); } +/* update the monotonic base value */ +static inline void update_monotonic_base(unsigned long long last_offset) +{ + unsigned long long this_offset; + + this_offset = ((unsigned long long)last_tsc_high << 32) | last_tsc_low; + monotonic_base += cycles_2_ns(this_offset - last_offset); +} + #ifdef CONFIG_HPET_TIMER static void mark_offset_tsc_hpet(void) { - unsigned long long this_offset, last_offset; + unsigned long long last_offset; unsigned long offset, temp, hpet_current; write_seqlock(&monotonic_lock); @@ -197,9 +207,7 @@ static void mark_offset_tsc_hpet(void) } hpet_last = hpet_current; - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); + update_monotonic_base(last_offset); write_sequnlock(&monotonic_lock); /* calculate delay_at_last_interrupt */ @@ -346,7 +354,7 @@ static void mark_offset_tsc(void) int count; int countmp; static int count1 = 0; - unsigned long long this_offset, last_offset; + unsigned long long last_offset; static int lost_count = 0; write_seqlock(&monotonic_lock); @@ -417,6 +425,7 @@ static void mark_offset_tsc(void) if (lost >= 2) { jiffies_64 += lost-1; +#ifndef CONFIG_NO_IDLE_HZ /* sanity check to ensure we're not always losing ticks */ if (lost_count++ > 100) { printk(KERN_WARNING "Losing too many ticks!\n"); @@ -432,11 +441,11 @@ static void mark_offset_tsc(void) /* ... but give the TSC a fair chance */ if (lost_count > 25) cpufreq_delayed_get(); +#endif } else lost_count = 0; - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); + + update_monotonic_base(last_offset); write_sequnlock(&monotonic_lock); /* calculate delay_at_last_interrupt */ @@ -537,6 +546,8 @@ static int __init init_tsc(char* overrid cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); + /* Fixme: Make use of 64-bit TSC to recover jiffies */ + set_dyn_tick_max_skip( (0xFFFFFFFF / (cpu_khz * 1000)) * HZ); return 0; } } Index: linux-2.6.13-rc6-ck1/include/asm-i386/dyn-tick.h =================================================================== --- linux-2.6.13-rc6-ck1.orig/include/asm-i386/dyn-tick.h 2005-01-12 16:19:45.000000000 +1100 +++ linux-2.6.13-rc6-ck1/include/asm-i386/dyn-tick.h 2005-08-14 00:40:55.000000000 +1000 @@ -0,0 +1,90 @@ +/* + * linux/include/asm-i386/dyn-tick.h + * + * Copyright (C) 2004 Nokia Corporation + * Written by Tony Lindgen and + * Tuukka Tikkanen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_I386_DYN_TICK_H_ +#define _ASM_I386_DYN_TICK_H_ + +#include + +#ifdef CONFIG_NO_IDLE_HZ +extern int dyn_tick_arch_init(void); +extern void disable_pit_timer(void); +extern void enable_pit_timer(void); +extern void reprogram_pit_timer(int jiffies_to_skip); +extern void set_dyn_tick_max_skip(unsigned int max_skip); +extern void setup_dyn_tick_use_apic(unsigned int calibration_result); +extern void dyn_tick_interrupt(int irq, struct pt_regs *regs); +extern void dyn_tick_time_init(struct timer_opts *cur_timer); +extern u32 apic_timer_val; + +#if defined(CONFIG_DYN_TICK_USE_APIC) +#define DYNTICK_APICABLE 1 + +#if (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) +static inline int cpu_has_local_apic(void) +{ + return (dyn_tick->state & DYN_TICK_USE_APIC); +} + +#else /* (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) */ +static inline int cpu_has_local_apic(void) +{ + return 0; +} +#endif /* (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)) */ + +#else /* defined(CONFIG_DYN_TICK_USE_APIC) */ +#define DYNTICK_APICABLE 0 +static inline int cpu_has_local_apic(void) +{ + return 0; +} +#endif /* defined(CONFIG_DYN_TICK_USE_APIC) */ + +static inline void reprogram_apic_timer(unsigned int count) +{ +#ifdef CONFIG_X86_LOCAL_APIC + unsigned long flags; + + /* Fixme: Make count more accurate. Otherwise can lead + * to latencies of upto 1 jiffy in servicing timers. + */ + count *= apic_timer_val; + local_irq_save(flags); + apic_write_around(APIC_TMICT, count); + local_irq_restore(flags); +#endif /* CONFIG_X86_LOCAL_APIC */ +} + +#else /* CONFIG_NO_IDLE_HZ */ +static inline void set_dyn_tick_max_skip(unsigned int max_skip) +{ +} + +static inline void reprogram_apic_timer(unsigned int count) +{ +} + +static inline void setup_dyn_tick_use_apic(unsigned int calibration_result) +{ +} + +static inline void dyn_tick_interrupt(int irq, struct pt_regs *regs) +{ +} + +static inline void dyn_tick_time_init(struct timer_opts *cur_timer) +{ +} +#endif /* CONFIG_NO_IDLE_HZ */ + +#endif /* _ASM_I386_DYN_TICK_H_ */ Index: linux-2.6.13-rc6-ck1/include/linux/dyn-tick.h =================================================================== --- linux-2.6.13-rc6-ck1.orig/include/linux/dyn-tick.h 2005-01-12 16:19:45.000000000 +1100 +++ linux-2.6.13-rc6-ck1/include/linux/dyn-tick.h 2005-08-14 00:40:55.000000000 +1000 @@ -0,0 +1,72 @@ +/* + * linux/include/linux/dyn-tick.h + * + * Copyright (C) 2004 Nokia Corporation + * Written by Tony Lindgen and + * Tuukka Tikkanen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _DYN_TICK_TIMER_H +#define _DYN_TICK_TIMER_H + +#include +#include + +#define DYN_TICK_APICABLE (1 << 5) +#define DYN_TICK_TIMER_INT (1 << 4) +#define DYN_TICK_USE_APIC (1 << 3) +#define DYN_TICK_SKIPPING (1 << 2) +#define DYN_TICK_ENABLED (1 << 1) +#define DYN_TICK_SUITABLE (1 << 0) + +#define DYN_TICK_MIN_SKIP 2 + +struct dyn_tick_state { + unsigned int state; /* Current state */ + unsigned int max_skip; /* Max number of ticks to skip */ +}; + +struct dyn_tick_timer { + int (*arch_init) (void); + void (*arch_enable) (void); + void (*arch_disable) (void); + void (*arch_reprogram_timer) (unsigned long); + void (*arch_all_cpus_idle) (int); +}; + +extern struct dyn_tick_state *dyn_tick; +extern spinlock_t dyn_tick_lock; +extern void dyn_tick_register(struct dyn_tick_timer *new_timer); + +#ifdef CONFIG_NO_IDLE_HZ +extern unsigned long dyn_tick_reprogram_timer(void); + +static inline int dyn_tick_enabled(void) +{ + return (dyn_tick->state & DYN_TICK_ENABLED); +} + +#else /* CONFIG_NO_IDLE_HZ */ +static inline int arch_has_safe_halt(void) +{ + return 0; +} + +static inline unsigned long dyn_tick_reprogram_timer(void) +{ +} + +static inline int dyn_tick_enabled(void) +{ + return 0; +} +#endif /* CONFIG_NO_IDLE_HZ */ + +/* Pick up arch specific header */ +#include + +#endif /* _DYN_TICK_TIMER_H */ Index: linux-2.6.13-rc6-ck1/include/linux/timer.h =================================================================== --- linux-2.6.13-rc6-ck1.orig/include/linux/timer.h 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/include/linux/timer.h 2005-08-14 00:40:55.000000000 +1000 @@ -87,6 +87,7 @@ static inline void add_timer(struct time extern void init_timers(void); extern void run_local_timers(void); +extern void conditional_run_local_timers(void); extern void it_real_fn(unsigned long); #endif Index: linux-2.6.13-rc6-ck1/kernel/dyn-tick.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/kernel/dyn-tick.c 2005-01-12 16:19:45.000000000 +1100 +++ linux-2.6.13-rc6-ck1/kernel/dyn-tick.c 2005-08-14 00:40:55.000000000 +1000 @@ -0,0 +1,272 @@ +/* + * linux/kernel/dyn-tick.c + * + * Beginnings of generic dynamic tick timer support + * + * Copyright (C) 2004 Nokia Corporation + * Written by Tony Lindgen and + * Tuukka Tikkanen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io_ports.h" + +#define DYN_TICK_VERSION "050610-1" +#define DYN_TICK_IS_SET(x) ((dyn_tick->state & (x)) == (x)) + +static struct dyn_tick_state dyn_tick_state; +struct dyn_tick_state *dyn_tick = &dyn_tick_state; +static struct dyn_tick_timer *dyn_tick_cfg; +spinlock_t dyn_tick_lock; + +/* + * Arch independent code needed to reprogram next timer interrupt. + * Gets called from cpu_idle() before entering idle loop. + */ +unsigned long dyn_tick_reprogram_timer(void) +{ + int cpu = smp_processor_id(); + unsigned long delta, flags; + + if (!DYN_TICK_IS_SET(DYN_TICK_ENABLED)) + return 0; + + local_irq_save(flags); + + if (rcu_pending(cpu) || local_softirq_pending()) + return 0; + + /* Check if we can start skipping ticks */ + write_seqlock(&xtime_lock); + + delta = next_timer_interrupt() - jiffies; + if (delta > dyn_tick->max_skip) + delta = dyn_tick->max_skip; + + if (delta > DYN_TICK_MIN_SKIP) { + int idle_time = 0; + + spin_lock(&dyn_tick_lock); + + dyn_tick_cfg->arch_reprogram_timer(jiffies + delta); + + cpu_set(cpu, nohz_cpu_mask); + if (cpus_equal(nohz_cpu_mask, cpu_online_map)) + /* Fixme: idle_time needs to be computed */ + dyn_tick_cfg->arch_all_cpus_idle(idle_time); + + spin_unlock(&dyn_tick_lock); + + } else + delta = 0; + + write_sequnlock(&xtime_lock); + + local_irq_restore(flags); + + return delta; +} + +void __init dyn_tick_register(struct dyn_tick_timer *arch_timer) +{ + dyn_tick_cfg = arch_timer; + printk(KERN_INFO "dyn-tick: Registering dynamic tick timer v%s\n", + DYN_TICK_VERSION); +} + +/* + * --------------------------------------------------------------------------- + * Command line options + * --------------------------------------------------------------------------- + */ +static int __initdata dyntick_autoenable = 1; +static int __initdata dyntick_useapic = 1; + +/* + * dyntick=[disable],[noapic] + */ +static int __init dyntick_setup(char *options) +{ + if (!options) + return 0; + + if (!strncmp(options, "disable", 6)) + dyntick_autoenable = 0; + + if (strstr(options, "noapic")) + dyntick_useapic = 0; + + return 0; +} + +__setup("dyntick=", dyntick_setup); + +/* + * --------------------------------------------------------------------------- + * Sysfs interface + * --------------------------------------------------------------------------- + */ + +extern struct sys_device device_timer; + +static ssize_t show_dyn_tick_state(struct sys_device *dev, char *buf) +{ + return sprintf(buf, + "suitable:\t%i\n" + "enabled:\t%i\n" + "apic suitable:\t%i\n" + "using APIC:\t%i\n", + DYN_TICK_IS_SET(DYN_TICK_SUITABLE), + DYN_TICK_IS_SET(DYN_TICK_ENABLED), + DYN_TICK_IS_SET(DYN_TICK_APICABLE), + DYN_TICK_IS_SET(DYN_TICK_USE_APIC)); +} + +static ssize_t show_dyn_tick_enable(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "enabled:\t%i\n", + DYN_TICK_IS_SET(DYN_TICK_ENABLED)); +} + +static ssize_t set_dyn_tick_enable(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long flags; + unsigned int enable = simple_strtoul(buf, NULL, 2); + + write_seqlock_irqsave(&xtime_lock, flags); + if (enable) { + if (dyn_tick_cfg->arch_enable) + dyn_tick_cfg->arch_enable(); + dyn_tick->state |= DYN_TICK_ENABLED; + } else { + if (dyn_tick_cfg->arch_disable) + dyn_tick_cfg->arch_disable(); + dyn_tick->state &= ~DYN_TICK_ENABLED; + } + write_sequnlock_irqrestore(&xtime_lock, flags); + + return count; +} + +static ssize_t show_dyn_tick_useapic(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "using APIC:\t%i\n", + DYN_TICK_IS_SET(DYN_TICK_USE_APIC)); +} + +static ssize_t set_dyn_tick_useapic(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned long flags; + unsigned int enable = simple_strtoul(buf, NULL, 2); + + if (!DYN_TICK_IS_SET(DYN_TICK_APICABLE)) + goto out; + write_seqlock_irqsave(&xtime_lock, flags); + if (enable) + dyn_tick->state |= DYN_TICK_USE_APIC; + else + dyn_tick->state &= ~DYN_TICK_USE_APIC; + write_sequnlock_irqrestore(&xtime_lock, flags); +out: + return count; +} + +static SYSDEV_ATTR(state, 0444, show_dyn_tick_state, NULL); +static SYSDEV_ATTR(enable, 0644, show_dyn_tick_enable, + set_dyn_tick_enable); +static SYSDEV_ATTR(useapic, 0644, show_dyn_tick_useapic, + set_dyn_tick_useapic); + +static struct sysdev_class dyn_tick_sysclass = { + set_kset_name("dyn_tick"), +}; + +static struct sys_device device_dyn_tick = { + .id = 0, + .cls = &dyn_tick_sysclass, +}; + +static int init_dyn_tick_sysfs(void) +{ + int error = 0; + if ((error = sysdev_class_register(&dyn_tick_sysclass))) + goto out; + if ((error = sysdev_register(&device_dyn_tick))) + goto out; + if ((error = sysdev_create_file(&device_dyn_tick, &attr_state))) + goto out; + if ((error = sysdev_create_file(&device_dyn_tick, &attr_enable))) + goto out; + error = sysdev_create_file(&device_dyn_tick, &attr_useapic); + +out: + return error; +} + +device_initcall(init_dyn_tick_sysfs); + +/* + * --------------------------------------------------------------------------- + * Init functions + * --------------------------------------------------------------------------- + */ + +static int __init dyn_tick_early_init(void) +{ + dyn_tick->state |= DYN_TICK_TIMER_INT; + return 0; +} + +subsys_initcall(dyn_tick_early_init); + +/* + * We need to initialize dynamic tick after calibrate delay + */ +static int __init dyn_tick_late_init(void) +{ + int ret = 0; + + if (dyn_tick_cfg == NULL || dyn_tick_cfg->arch_init == NULL || + !DYN_TICK_IS_SET(DYN_TICK_SUITABLE)) { + printk(KERN_ERR "dyn-tick: No suitable timer found\n"); + return -ENODEV; + } + + if (DYNTICK_APICABLE) + dyn_tick->state |= DYN_TICK_APICABLE; + if (!dyntick_useapic || !DYN_TICK_IS_SET(DYN_TICK_APICABLE)) + dyn_tick->state &= ~DYN_TICK_USE_APIC; + + if ((ret = dyn_tick_cfg->arch_init())) { + printk(KERN_ERR "dyn-tick: Init failed\n"); + return -ENODEV; + } + + if (!ret && dyntick_autoenable) { + dyn_tick->state |= DYN_TICK_ENABLED; + printk(KERN_INFO "dyn-tick: Timer using dynamic tick\n"); + } else + printk(KERN_INFO "dyn-tick: Timer not enabled during boot\n"); + + return ret; +} + +late_initcall(dyn_tick_late_init); Index: linux-2.6.13-rc6-ck1/kernel/Makefile =================================================================== --- linux-2.6.13-rc6-ck1.orig/kernel/Makefile 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/kernel/Makefile 2005-08-14 00:40:54.000000000 +1000 @@ -30,6 +30,7 @@ obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_SECCOMP) += seccomp.o +obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is Index: linux-2.6.13-rc6-ck1/kernel/timer.c =================================================================== --- linux-2.6.13-rc6-ck1.orig/kernel/timer.c 2005-08-14 00:39:52.000000000 +1000 +++ linux-2.6.13-rc6-ck1/kernel/timer.c 2005-08-14 00:40:55.000000000 +1000 @@ -924,6 +924,14 @@ void run_local_timers(void) raise_softirq(TIMER_SOFTIRQ); } +void conditional_run_local_timers(void) +{ + tvec_base_t *base = &__get_cpu_var(tvec_bases); + + if (base->timer_jiffies != jiffies) + run_local_timers(); +} + /* * Called by the timer interrupt. xtime_lock must already be taken * by the timer IRQ!