Index: linux-2.6.10-rc1/arch/i386/Kconfig
===================================================================
--- linux-2.6.10-rc1.orig/arch/i386/Kconfig	2004-10-24 23:59:12.346705122 +1000
+++ linux-2.6.10-rc1/arch/i386/Kconfig	2004-10-25 00:11:43.964926467 +1000
@@ -513,6 +513,17 @@ config PREEMPT
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
+config PREEMPT_BKL
+	bool "Preempt The Big Kernel Lock"
+	depends on PREEMPT || SMP
+	default y
+	help
+	  This option reduces the latency of the kernel by making the
+	  big kernel lock preemptible.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors" if !SMP
 	depends on !(X86_VISWS || X86_VOYAGER)
Index: linux-2.6.10-rc1/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6.10-rc1.orig/arch/i386/kernel/traps.c	2004-10-24 23:51:03.937349025 +1000
+++ linux-2.6.10-rc1/arch/i386/kernel/traps.c	2004-10-25 00:11:43.965926314 +1000
@@ -306,7 +306,7 @@ void die(const char * str, struct pt_reg
 	};
 	static int die_counter;
 
-	if (die.lock_owner != smp_processor_id()) {
+	if (die.lock_owner != _smp_processor_id()) {
 		console_verbose();
 		spin_lock_irq(&die.lock);
 		die.lock_owner = smp_processor_id();
Index: linux-2.6.10-rc1/arch/i386/lib/delay.c
===================================================================
--- linux-2.6.10-rc1.orig/arch/i386/lib/delay.c	2004-08-15 14:08:04.000000000 +1000
+++ linux-2.6.10-rc1/arch/i386/lib/delay.c	2004-10-25 00:11:43.965926314 +1000
@@ -34,7 +34,7 @@ inline void __const_udelay(unsigned long
 	xloops *= 4;
 	__asm__("mull %0"
 		:"=d" (xloops), "=&a" (d0)
-		:"1" (xloops),"0" (current_cpu_data.loops_per_jiffy * (HZ/4)));
+		:"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
         __delay(++xloops);
 }
 
Index: linux-2.6.10-rc1/arch/sh/lib/delay.c
===================================================================
--- linux-2.6.10-rc1.orig/arch/sh/lib/delay.c	2004-08-15 14:08:05.000000000 +1000
+++ linux-2.6.10-rc1/arch/sh/lib/delay.c	2004-10-25 00:11:43.965926314 +1000
@@ -24,7 +24,7 @@ inline void __const_udelay(unsigned long
 	__asm__("dmulu.l	%0, %2\n\t"
 		"sts	mach, %0"
 		: "=r" (xloops)
-		: "0" (xloops), "r" (current_cpu_data.loops_per_jiffy)
+		: "0" (xloops), "r" (cpu_data[_smp_processor_id()].loops_per_jiffy)
 		: "macl", "mach");
 	__delay(xloops * HZ);
 }
Index: linux-2.6.10-rc1/arch/sparc64/lib/delay.c
===================================================================
--- linux-2.6.10-rc1.orig/arch/sparc64/lib/delay.c	2004-10-19 08:57:05.000000000 +1000
+++ linux-2.6.10-rc1/arch/sparc64/lib/delay.c	2004-10-25 00:11:43.966926161 +1000
@@ -31,7 +31,7 @@ void __const_udelay(unsigned long n)
 {
 	n *= 4;
 
-	n *= (cpu_data(smp_processor_id()).udelay_val * (HZ/4));
+	n *= (cpu_data(_smp_processor_id()).udelay_val * (HZ/4));
 	n >>= 32;
 
 	__delay(n + 1);
Index: linux-2.6.10-rc1/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.10-rc1.orig/arch/x86_64/Kconfig	2004-10-24 23:51:04.365282393 +1000
+++ linux-2.6.10-rc1/arch/x86_64/Kconfig	2004-10-25 00:11:43.966926161 +1000
@@ -244,6 +244,17 @@ config PREEMPT
 	  Say Y here if you are feeling brave and building a kernel for a
 	  desktop, embedded or real-time system.  Say N if you are unsure.
 
+config PREEMPT_BKL
+	bool "Preempt The Big Kernel Lock"
+	depends on PREEMPT || SMP
+	default y
+	help
+	  This option reduces the latency of the kernel by making the
+	  big kernel lock preemptible.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP
Index: linux-2.6.10-rc1/arch/x86_64/lib/delay.c
===================================================================
--- linux-2.6.10-rc1.orig/arch/x86_64/lib/delay.c	2004-03-11 21:28:56.000000000 +1100
+++ linux-2.6.10-rc1/arch/x86_64/lib/delay.c	2004-10-25 00:11:43.967926008 +1000
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
 
 inline void __const_udelay(unsigned long xloops)
 {
-        __delay(((xloops * current_cpu_data.loops_per_jiffy) >> 32) * HZ);
+	__delay(((xloops * cpu_data[_smp_processor_id()].loops_per_jiffy) >> 32) * HZ);
 }
 
 void __udelay(unsigned long usecs)
Index: linux-2.6.10-rc1/include/asm-i386/smp.h
===================================================================
--- linux-2.6.10-rc1.orig/include/asm-i386/smp.h	2004-10-19 08:57:11.000000000 +1000
+++ linux-2.6.10-rc1/include/asm-i386/smp.h	2004-10-25 00:11:43.967926008 +1000
@@ -50,7 +50,7 @@ extern u8 x86_cpu_to_apicid[];
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define smp_processor_id() (current_thread_info()->cpu)
+#define __smp_processor_id() (current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
 #define cpu_possible_map cpu_callout_map
Index: linux-2.6.10-rc1/include/asm-x86_64/smp.h
===================================================================
--- linux-2.6.10-rc1.orig/include/asm-x86_64/smp.h	2004-10-19 08:57:12.000000000 +1000
+++ linux-2.6.10-rc1/include/asm-x86_64/smp.h	2004-10-25 00:11:43.968925854 +1000
@@ -66,7 +66,7 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-#define smp_processor_id() read_pda(cpunumber)
+#define __smp_processor_id() read_pda(cpunumber)
 
 extern __inline int hard_smp_processor_id(void)
 {
Index: linux-2.6.10-rc1/include/linux/hardirq.h
===================================================================
--- linux-2.6.10-rc1.orig/include/linux/hardirq.h	2004-10-25 00:10:16.653258320 +1000
+++ linux-2.6.10-rc1/include/linux/hardirq.h	2004-10-25 00:11:43.969925701 +1000
@@ -61,12 +61,16 @@
 #define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
 
-#ifdef CONFIG_PREEMPT
+#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
 # define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
+#else
+# define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+#endif
+
+#ifdef CONFIG_PREEMPT
 # define preemptible()	(preempt_count() == 0 && !irqs_disabled())
 # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
 #else
-# define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != 0)
 # define preemptible()	0
 # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
 #endif
Index: linux-2.6.10-rc1/include/linux/smp.h
===================================================================
--- linux-2.6.10-rc1.orig/include/linux/smp.h	2004-03-11 21:29:26.000000000 +1100
+++ linux-2.6.10-rc1/include/linux/smp.h	2004-10-25 00:11:43.970925548 +1000
@@ -95,8 +95,10 @@ void smp_prepare_boot_cpu(void);
 /*
  *	These macros fold the SMP functionality into a single CPU system
  */
- 
-#define smp_processor_id()			0
+
+#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
+# define smp_processor_id()			0
+#endif
 #define hard_smp_processor_id()			0
 #define smp_threads_ready			1
 #define smp_call_function(func,info,retry,wait)	({ 0; })
@@ -107,6 +109,21 @@ static inline void smp_send_reschedule(i
 
 #endif /* !SMP */
 
+#ifdef __smp_processor_id
+# if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_SMP_PROCESSOR_ID)
+  /*
+   * temporary debugging check detecting places that use
+   * smp_processor_id() in a potentially unsafe way:
+   */
+   extern unsigned int smp_processor_id(void);
+# else
+#  define smp_processor_id() __smp_processor_id()
+# endif
+# define _smp_processor_id() __smp_processor_id()
+#else
+# define _smp_processor_id() smp_processor_id()
+#endif
+
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
 #define put_cpu()		preempt_enable()
 #define put_cpu_no_resched()	preempt_enable_no_resched()
Index: linux-2.6.10-rc1/include/linux/smp_lock.h
===================================================================
--- linux-2.6.10-rc1.orig/include/linux/smp_lock.h	2004-03-11 21:29:27.000000000 +1100
+++ linux-2.6.10-rc1/include/linux/smp_lock.h	2004-10-25 00:11:43.970925548 +1000
@@ -7,59 +7,14 @@
 
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 
-extern spinlock_t kernel_flag;
-
-#define kernel_locked()		(current->lock_depth >= 0)
-
-#define get_kernel_lock()	spin_lock(&kernel_flag)
-#define put_kernel_lock()	spin_unlock(&kernel_flag)
-
-/*
- * Release global kernel lock.
- */
-static inline void release_kernel_lock(struct task_struct *task)
-{
-	if (unlikely(task->lock_depth >= 0))
-		put_kernel_lock();
-}
-
-/*
- * Re-acquire the kernel lock
- */
-static inline void reacquire_kernel_lock(struct task_struct *task)
-{
-	if (unlikely(task->lock_depth >= 0))
-		get_kernel_lock();
-}
-
-/*
- * Getting the big kernel lock.
- *
- * This cannot happen asynchronously,
- * so we only need to worry about other
- * CPU's.
- */
-static inline void lock_kernel(void)
-{
-	int depth = current->lock_depth+1;
-	if (likely(!depth))
-		get_kernel_lock();
-	current->lock_depth = depth;
-}
-
-static inline void unlock_kernel(void)
-{
-	BUG_ON(current->lock_depth < 0);
-	if (likely(--current->lock_depth < 0))
-		put_kernel_lock();
-}
+extern int kernel_locked(void);
+extern void lock_kernel(void);
+extern void unlock_kernel(void);
 
 #else
 
 #define lock_kernel()				do { } while(0)
 #define unlock_kernel()				do { } while(0)
-#define release_kernel_lock(task)		do { } while(0)
-#define reacquire_kernel_lock(task)		do { } while(0)
 #define kernel_locked()				1
 
 #endif /* CONFIG_SMP || CONFIG_PREEMPT */
Index: linux-2.6.10-rc1/include/net/route.h
===================================================================
--- linux-2.6.10-rc1.orig/include/net/route.h	2004-10-19 08:57:12.000000000 +1000
+++ linux-2.6.10-rc1/include/net/route.h	2004-10-25 00:11:43.971925395 +1000
@@ -105,7 +105,7 @@ struct rt_cache_stat 
 
 extern struct rt_cache_stat *rt_cache_stat;
 #define RT_CACHE_STAT_INC(field)					  \
-		(per_cpu_ptr(rt_cache_stat, smp_processor_id())->field++)
+		(per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++)
 
 extern struct ip_rt_acct *ip_rt_acct;
 
Index: linux-2.6.10-rc1/include/net/snmp.h
===================================================================
--- linux-2.6.10-rc1.orig/include/net/snmp.h	2004-08-15 14:08:19.000000000 +1000
+++ linux-2.6.10-rc1/include/net/snmp.h	2004-10-25 00:11:43.971925395 +1000
@@ -128,18 +128,18 @@ struct linux_mib {
 #define SNMP_STAT_USRPTR(name)	(name[1])
 
 #define SNMP_INC_STATS_BH(mib, field) 	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset)	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field + (offset)]++)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++)
 #define SNMP_INC_STATS_USER(mib, field) \
-	(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++)
 #define SNMP_DEC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]--)
+	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--)
 #define SNMP_ADD_STATS_BH(mib, field, addend) 	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend)
 #define SNMP_ADD_STATS_USER(mib, field, addend) 	\
-	(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend)
 
 #endif
Index: linux-2.6.10-rc1/kernel/module.c
===================================================================
--- linux-2.6.10-rc1.orig/kernel/module.c	2004-10-24 23:51:06.656925552 +1000
+++ linux-2.6.10-rc1/kernel/module.c	2004-10-25 00:11:43.973925089 +1000
@@ -395,7 +395,7 @@ static void module_unload_init(struct mo
 	for (i = 0; i < NR_CPUS; i++)
 		local_set(&mod->ref[i].count, 0);
 	/* Hold reference count during initialization. */
-	local_set(&mod->ref[smp_processor_id()].count, 1);
+	local_set(&mod->ref[_smp_processor_id()].count, 1);
 	/* Backwards compatibility macros put refcount during init. */
 	mod->waiter = current;
 }
Index: linux-2.6.10-rc1/kernel/printk.c
===================================================================
--- linux-2.6.10-rc1.orig/kernel/printk.c	2004-10-25 00:10:16.658257559 +1000
+++ linux-2.6.10-rc1/kernel/printk.c	2004-10-25 00:11:43.974924936 +1000
@@ -645,8 +645,9 @@ void release_console_sem(void)
 		_con_start = con_start;
 		_log_end = log_end;
 		con_start = log_end;		/* Flush */
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+		spin_unlock(&logbuf_lock);
 		call_console_drivers(_con_start, _log_end);
+		local_irq_restore(flags);
 	}
 	console_locked = 0;
 	console_may_schedule = 0;
Index: linux-2.6.10-rc1/kernel/sched.c
===================================================================
--- linux-2.6.10-rc1.orig/kernel/sched.c	2004-10-25 00:10:16.660257255 +1000
+++ linux-2.6.10-rc1/kernel/sched.c	2004-10-25 00:11:43.977924476 +1000
@@ -2265,6 +2265,220 @@ static inline int dependent_sleeper(int 
 }
 #endif
 
+#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id) && \
+				defined(CONFIG_DEBUG_SMP_PROCESSOR_ID)
+/*
+ * Debugging check.
+ */
+unsigned int smp_processor_id(void)
+{
+	unsigned long preempt_count = preempt_count();
+	int this_cpu = __smp_processor_id();
+	cpumask_t this_mask;
+
+	if (likely(preempt_count))
+		goto out;
+
+	if (irqs_disabled())
+		goto out;
+
+	/*
+	 * Kernel threads bound to a single CPU can safely use
+	 * smp_processor_id():
+	 */
+	this_mask = cpumask_of_cpu(this_cpu);
+
+	if (cpus_equal(current->cpus_allowed, this_mask))
+		goto out;
+
+	/*
+	 * It is valid to assume CPU-locality during early bootup:
+	 */
+	if (system_state != SYSTEM_RUNNING)
+		goto out;
+
+	/*
+	 * Avoid recursion:
+	 */
+	preempt_disable();
+
+	if (!printk_ratelimit())
+		goto out_enable;
+
+	printk(KERN_ERR "using smp_processor_id() in preemptible code: %s/%d\n",
+		current->comm, current->pid);
+	dump_stack();
+
+out_enable:
+	preempt_enable_no_resched();
+out:
+	return this_cpu;
+}
+
+EXPORT_SYMBOL(smp_processor_id);
+
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+
+#ifdef CONFIG_PREEMPT_BKL
+/*
+ * The 'big kernel semaphore'
+ *
+ * This mutex is taken and released recursively by lock_kernel()
+ * and unlock_kernel().  It is transparently dropped and reaquired
+ * over schedule().  It is used to protect legacy code that hasn't
+ * been migrated to a proper locking design yet.
+ *
+ * Note: code locked by this semaphore will only be serialized against
+ * other code using the same locking facility. The code guarantees that
+ * the task remains on the same CPU.
+ *
+ * Don't use in new code.
+ */
+static __cacheline_aligned_in_smp DECLARE_MUTEX(kernel_sem);
+
+int kernel_locked(void)
+{
+	return current->lock_depth >= 0;
+}
+
+EXPORT_SYMBOL(kernel_locked);
+
+/*
+ * Release global kernel semaphore:
+ */
+static inline void release_kernel_sem(struct task_struct *task)
+{
+	if (unlikely(task->lock_depth >= 0))
+		up(&kernel_sem);
+}
+
+/*
+ * Re-acquire the kernel semaphore.
+ *
+ * This function is called with preemption off.
+ *
+ * We are executing in schedule() so the code must be extremely careful
+ * about recursion, both due to the down() and due to the enabling of
+ * preemption. schedule() will re-check the preemption flag after
+ * reacquiring the semaphore.
+ */
+static inline void reacquire_kernel_sem(struct task_struct *task)
+{
+	int saved_lock_depth = task->lock_depth;
+
+	if (likely(saved_lock_depth < 0))
+		return;
+
+	task->lock_depth = -1;
+	preempt_enable_no_resched();
+
+	down(&kernel_sem);
+
+	preempt_disable();
+	task->lock_depth = saved_lock_depth;
+}
+
+/*
+ * Getting the big kernel semaphore.
+ */
+void lock_kernel(void)
+{
+	struct task_struct *task = current;
+	int depth = task->lock_depth + 1;
+
+	if (likely(!depth))
+		/*
+		 * No recursion worries - we set up lock_depth _after_
+		 */
+		down(&kernel_sem);
+
+	task->lock_depth = depth;
+}
+
+EXPORT_SYMBOL(lock_kernel);
+
+void unlock_kernel(void)
+{
+	struct task_struct *task = current;
+
+	BUG_ON(task->lock_depth < 0);
+
+	if (likely(--task->lock_depth < 0))
+		up(&kernel_sem);
+}
+
+EXPORT_SYMBOL(unlock_kernel);
+
+#else
+
+static spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+
+int kernel_locked(void)
+{
+	return current->lock_depth >= 0;
+}
+
+EXPORT_SYMBOL(kernel_locked);
+
+#define get_kernel_lock()	spin_lock(&kernel_flag)
+#define put_kernel_lock()	spin_unlock(&kernel_flag)
+
+/*
+ * Release global kernel lock.
+ */
+static inline void release_kernel_sem(struct task_struct *task)
+{
+	if (unlikely(task->lock_depth >= 0))
+		put_kernel_lock();
+}
+
+/*
+ * Re-acquire the kernel lock
+ */
+static inline void reacquire_kernel_sem(struct task_struct *task)
+{
+	if (unlikely(task->lock_depth >= 0))
+		get_kernel_lock();
+}
+
+/*
+ * Getting the big kernel lock.
+ *
+ * This cannot happen asynchronously,
+ * so we only need to worry about other
+ * CPU's.
+ */
+void lock_kernel(void)
+{
+	int depth = current->lock_depth+1;
+	if (likely(!depth))
+		get_kernel_lock();
+	current->lock_depth = depth;
+}
+
+EXPORT_SYMBOL(lock_kernel);
+
+void unlock_kernel(void)
+{
+	BUG_ON(current->lock_depth < 0);
+	if (likely(--current->lock_depth < 0))
+		put_kernel_lock();
+}
+
+EXPORT_SYMBOL(unlock_kernel);
+
+#endif
+
+#else
+
+static inline void release_kernel_sem(struct task_struct *task) { }
+static inline void reacquire_kernel_sem(struct task_struct *task) { }
+
+#endif
+
+
 /*
  * schedule() is the main scheduler function.
  */
@@ -2307,7 +2521,7 @@ need_resched:
 		dump_stack();
 	}
 
-	release_kernel_lock(prev);
+	release_kernel_sem(prev);
 	schedstat_inc(rq, sched_cnt);
 	now = sched_clock();
 
@@ -2402,7 +2616,7 @@ switch_tasks:
 	} else
 		spin_unlock_irq(&rq->lock);
 
-	reacquire_kernel_lock(current);
+	reacquire_kernel_sem(current);
 	preempt_enable_no_resched();
 	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
 		goto need_resched;
@@ -2419,6 +2633,12 @@ EXPORT_SYMBOL(schedule);
 asmlinkage void __sched preempt_schedule(void)
 {
 	struct thread_info *ti = current_thread_info();
+#ifdef CONFIG_PREEMPT_BKL
+	struct task_struct *task = current;
+	int saved_lock_depth;
+#endif
+
+
 
 	/*
 	 * If there is a non-zero preempt_count or interrupts are disabled,
@@ -2428,9 +2648,21 @@ asmlinkage void __sched preempt_schedule
 		return;
 
 need_resched:
-	ti->preempt_count = PREEMPT_ACTIVE;
+	preempt_count() += PREEMPT_ACTIVE;
+	/*
+	 * We keep the big kernel semaphore locked, but we
+	 * clear ->lock_depth so that schedule() doesnt
+	 * auto-release the semaphore:
+	 */
+#ifdef CONFIG_PREEMPT_BKL
+	saved_lock_depth = task->lock_depth;
+	task->lock_depth = -1;
+#endif
 	schedule();
-	ti->preempt_count = 0;
+#ifdef CONFIG_PREEMPT_BKL
+	task->lock_depth = saved_lock_depth;
+#endif
+	preempt_count() -= PREEMPT_ACTIVE;
 
 	/* we could miss a preemption opportunity between schedule and now */
 	barrier();
@@ -3152,6 +3384,8 @@ asmlinkage long sys_sched_yield(void)
 
 static inline void __cond_resched(void)
 {
+	if (preempt_count() & PREEMPT_ACTIVE)
+		return;
 	do {
 		preempt_count() += PREEMPT_ACTIVE;
 		schedule();
@@ -3239,7 +3473,7 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = this_rq();
+	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
 
 	atomic_inc(&rq->nr_iowait);
 	schedule();
@@ -3250,7 +3484,7 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = this_rq();
+	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
 	long ret;
 
 	atomic_inc(&rq->nr_iowait);
@@ -3460,7 +3694,7 @@ void __devinit init_idle(task_t *idle, i
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-#ifdef CONFIG_PREEMPT
+#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
 	idle->thread_info->preempt_count = (idle->lock_depth >= 0);
 #else
 	idle->thread_info->preempt_count = 0;
@@ -3862,21 +4096,6 @@ int __init migration_init(void)
 }
 #endif
 
-/*
- * The 'big kernel lock'
- *
- * This spinlock is taken and released recursively by lock_kernel()
- * and unlock_kernel().  It is transparently dropped and reaquired
- * over schedule().  It is used to protect legacy code that hasn't
- * been migrated to a proper locking design yet.
- *
- * Don't use in new code.
- *
- * Note: spinlock debugging needs this even on !CONFIG_SMP.
- */
-spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-EXPORT_SYMBOL(kernel_flag);
-
 #ifdef CONFIG_SMP
 /*
  * Attach the domain 'sd' to 'cpu' as its base domain.  Callers must
Index: linux-2.6.10-rc1/kernel/stop_machine.c
===================================================================
--- linux-2.6.10-rc1.orig/kernel/stop_machine.c	2004-05-23 12:54:58.000000000 +1000
+++ linux-2.6.10-rc1/kernel/stop_machine.c	2004-10-25 00:11:43.979924170 +1000
@@ -90,7 +90,7 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
-		if (i == smp_processor_id())
+		if (i == _smp_processor_id())
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
@@ -172,7 +172,7 @@ struct task_struct *__stop_machine_run(i
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
-		cpu = smp_processor_id();
+		cpu = _smp_processor_id();
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
 	if (!IS_ERR(p)) {
Index: linux-2.6.10-rc1/lib/Kconfig.debug
===================================================================
--- linux-2.6.10-rc1.orig/lib/Kconfig.debug	2004-10-24 23:51:06.682921504 +1000
+++ linux-2.6.10-rc1/lib/Kconfig.debug	2004-10-25 00:11:43.979924170 +1000
@@ -64,6 +64,15 @@ config DEBUG_SPINLOCK_SLEEP
 	  If you say Y here, various routines which may sleep will become very
 	  noisy if they are called with a spinlock held.
 
+config DEBUG_SMP_PROCESSOR_ID
+	bool "Preempt-unsafe smp_processor_id() checking"
+	depends on PREEMPT && X86
+	default y
+	help
+	  If you say Y here then the kernel will use a debug variant of the
+	  commonly used smp_processor_id() function and will print warnings
+	  if kernel code uses it in a preemption-unsafe way.
+
 config DEBUG_HIGHMEM
 	bool "Highmem debugging"
 	depends on DEBUG_KERNEL && HIGHMEM && (X86 || PPC32 || MIPS || SPARC32)

