Index: linux-2.6.10-rc2-ck1/arch/i386/Kconfig
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/i386/Kconfig	2004-11-15 21:34:23.000000000 +1100
+++ linux-2.6.10-rc2-ck1/arch/i386/Kconfig	2004-11-15 21:49:18.000000000 +1100
@@ -513,6 +513,17 @@ config PREEMPT
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
+config PREEMPT_BKL
+	bool "Preempt The Big Kernel Lock"
+	depends on PREEMPT || SMP
+	default y
+	help
+	  This option reduces the latency of the kernel by making the
+	  big kernel lock preemptible.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors" if !SMP
 	depends on !(X86_VISWS || X86_VOYAGER)
Index: linux-2.6.10-rc2-ck1/arch/i386/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/i386/kernel/cpu/mtrr/generic.c	2004-08-15 14:08:04.000000000 +1000
+++ linux-2.6.10-rc2-ck1/arch/i386/kernel/cpu/mtrr/generic.c	2004-11-15 21:49:18.000000000 +1100
@@ -240,11 +240,14 @@ static void prepare_set(void)
 	/*  Note that this is not ideal, since the cache is only flushed/disabled
 	   for this CPU while the MTRRs are changed, but changing this requires
 	   more invasive changes to the way the kernel boots  */
-	spin_lock(&set_atomicity_lock);
+	/*
+	 * Since we are disabling the cache dont allow any interrupts - they
+	 * would run extremely slow and would only increase the pain:
+	 */
+	spin_lock_irq(&set_atomicity_lock);
 
 	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
 	cr0 = read_cr0() | 0x40000000;	/* set CD flag */
-	wbinvd();
 	write_cr0(cr0);
 	wbinvd();
 
@@ -266,8 +269,7 @@ static void prepare_set(void)
 
 static void post_set(void)
 {
-	/*  Flush caches and TLBs  */
-	wbinvd();
+	/*  Flush TLBs (no need to flush caches - they are disabled)  */
 	__flush_tlb();
 
 	/* Intel (P6) standard MTRRs */
@@ -279,7 +281,7 @@ static void post_set(void)
 	/*  Restore value of CR4  */
 	if ( cpu_has_pge )
 		write_cr4(cr4);
-	spin_unlock(&set_atomicity_lock);
+	spin_unlock_irq(&set_atomicity_lock);
 }
 
 static void generic_set_all(void)
Index: linux-2.6.10-rc2-ck1/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/i386/kernel/entry.S	2004-11-15 19:37:26.000000000 +1100
+++ linux-2.6.10-rc2-ck1/arch/i386/kernel/entry.S	2004-11-15 21:49:18.000000000 +1100
@@ -184,9 +184,8 @@ need_resched:
 	jz restore_all
 	testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
 	jz restore_all
-	movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp)
 	sti
-	call schedule
+	call preempt_schedule
 	cli
 	movl $0,TI_preempt_count(%ebp)
 	jmp need_resched
Index: linux-2.6.10-rc2-ck1/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/i386/kernel/traps.c	2004-11-15 19:37:26.000000000 +1100
+++ linux-2.6.10-rc2-ck1/arch/i386/kernel/traps.c	2004-11-15 21:49:18.000000000 +1100
@@ -306,7 +306,7 @@ void die(const char * str, struct pt_reg
 	};
 	static int die_counter;
 
-	if (die.lock_owner != smp_processor_id()) {
+	if (die.lock_owner != _smp_processor_id()) {
 		console_verbose();
 		spin_lock_irq(&die.lock);
 		die.lock_owner = smp_processor_id();
Index: linux-2.6.10-rc2-ck1/arch/i386/lib/delay.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/i386/lib/delay.c	2004-08-15 14:08:04.000000000 +1000
+++ linux-2.6.10-rc2-ck1/arch/i386/lib/delay.c	2004-11-15 21:49:18.000000000 +1100
@@ -34,7 +34,7 @@ inline void __const_udelay(unsigned long
 	xloops *= 4;
 	__asm__("mull %0"
 		:"=d" (xloops), "=&a" (d0)
-		:"1" (xloops),"0" (current_cpu_data.loops_per_jiffy * (HZ/4)));
+		:"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
         __delay(++xloops);
 }
 
Index: linux-2.6.10-rc2-ck1/arch/i386/oprofile/nmi_int.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/i386/oprofile/nmi_int.c	2004-05-23 12:54:46.000000000 +1000
+++ linux-2.6.10-rc2-ck1/arch/i386/oprofile/nmi_int.c	2004-11-15 21:49:18.000000000 +1100
@@ -311,7 +311,7 @@ struct oprofile_operations nmi_ops = {
 
 static int __init p4_init(void)
 {
-	__u8 cpu_model = current_cpu_data.x86_model;
+	__u8 cpu_model = boot_cpu_data.x86_model;
 
 	if (cpu_model > 3)
 		return 0;
@@ -342,7 +342,7 @@ static int __init p4_init(void)
 
 static int __init ppro_init(void)
 {
-	__u8 cpu_model = current_cpu_data.x86_model;
+	__u8 cpu_model = boot_cpu_data.x86_model;
 
 	if (cpu_model > 0xd)
 		return 0;
@@ -366,9 +366,9 @@ static int using_nmi;
 
 int __init nmi_init(struct oprofile_operations ** ops)
 {
-	__u8 vendor = current_cpu_data.x86_vendor;
-	__u8 family = current_cpu_data.x86;
- 
+	__u8 vendor = boot_cpu_data.x86_vendor;
+	__u8 family = boot_cpu_data.x86;
+
 	if (!cpu_has_apic)
 		return -ENODEV;
  
Index: linux-2.6.10-rc2-ck1/arch/sh/lib/delay.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/sh/lib/delay.c	2004-08-15 14:08:05.000000000 +1000
+++ linux-2.6.10-rc2-ck1/arch/sh/lib/delay.c	2004-11-15 21:49:18.000000000 +1100
@@ -24,7 +24,7 @@ inline void __const_udelay(unsigned long
 	__asm__("dmulu.l	%0, %2\n\t"
 		"sts	mach, %0"
 		: "=r" (xloops)
-		: "0" (xloops), "r" (current_cpu_data.loops_per_jiffy)
+		: "0" (xloops), "r" (cpu_data[_smp_processor_id()].loops_per_jiffy)
 		: "macl", "mach");
 	__delay(xloops * HZ);
 }
Index: linux-2.6.10-rc2-ck1/arch/sparc64/lib/delay.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/sparc64/lib/delay.c	2004-10-19 08:57:05.000000000 +1000
+++ linux-2.6.10-rc2-ck1/arch/sparc64/lib/delay.c	2004-11-15 21:49:18.000000000 +1100
@@ -31,7 +31,7 @@ void __const_udelay(unsigned long n)
 {
 	n *= 4;
 
-	n *= (cpu_data(smp_processor_id()).udelay_val * (HZ/4));
+	n *= (cpu_data(_smp_processor_id()).udelay_val * (HZ/4));
 	n >>= 32;
 
 	__delay(n + 1);
Index: linux-2.6.10-rc2-ck1/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/x86_64/Kconfig	2004-11-15 19:37:26.000000000 +1100
+++ linux-2.6.10-rc2-ck1/arch/x86_64/Kconfig	2004-11-15 21:49:18.000000000 +1100
@@ -244,6 +244,17 @@ config PREEMPT
 	  Say Y here if you are feeling brave and building a kernel for a
 	  desktop, embedded or real-time system.  Say N if you are unsure.
 
+config PREEMPT_BKL
+	bool "Preempt The Big Kernel Lock"
+	depends on PREEMPT || SMP
+	default y
+	help
+	  This option reduces the latency of the kernel by making the
+	  big kernel lock preemptible.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP
Index: linux-2.6.10-rc2-ck1/arch/x86_64/lib/delay.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/arch/x86_64/lib/delay.c	2004-03-11 21:28:56.000000000 +1100
+++ linux-2.6.10-rc2-ck1/arch/x86_64/lib/delay.c	2004-11-15 21:49:18.000000000 +1100
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
 
 inline void __const_udelay(unsigned long xloops)
 {
-        __delay(((xloops * current_cpu_data.loops_per_jiffy) >> 32) * HZ);
+	__delay(((xloops * cpu_data[_smp_processor_id()].loops_per_jiffy) >> 32) * HZ);
 }
 
 void __udelay(unsigned long usecs)
Index: linux-2.6.10-rc2-ck1/drivers/video/console/vgacon.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/drivers/video/console/vgacon.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/drivers/video/console/vgacon.c	2004-11-15 21:49:18.000000000 +1100
@@ -49,6 +49,7 @@
 #include <linux/spinlock.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
+#include <linux/smp_lock.h>
 #include <video/vga.h>
 #include <asm/io.h>
 
@@ -763,6 +764,7 @@ static int vgacon_do_font_op(struct vgas
 		charmap += 4 * cmapsz;
 #endif
 
+	unlock_kernel();
 	spin_lock_irq(&vga_lock);
 	/* First, the Sequencer */
 	vga_wseq(state->vgabase, VGA_SEQ_RESET, 0x1);
@@ -848,6 +850,7 @@ static int vgacon_do_font_op(struct vgas
 		vga_wattr(state->vgabase, VGA_AR_ENABLE_DISPLAY, 0);	
 	}
 	spin_unlock_irq(&vga_lock);
+	lock_kernel();
 	return 0;
 }
 
Index: linux-2.6.10-rc2-ck1/fs/dcache.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/dcache.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/dcache.c	2004-11-15 21:49:18.000000000 +1100
@@ -156,7 +156,7 @@ repeat:
 		spin_unlock(&dcache_lock);
 		return;
 	}
-			
+
 	/*
 	 * AV: ->d_delete() is _NOT_ allowed to block now.
 	 */
@@ -388,6 +388,8 @@ static void prune_dcache(int count)
 		struct dentry *dentry;
 		struct list_head *tmp;
 
+		cond_resched_lock(&dcache_lock);
+
 		tmp = dentry_unused.prev;
 		if (tmp == &dentry_unused)
 			break;
@@ -544,6 +546,13 @@ positive:
  * list for prune_dcache(). We descend to the next level
  * whenever the d_subdirs list is non-empty and continue
  * searching.
+ *
+ * It returns zero iff there are no unused children,
+ * otherwise  it returns the number of children moved to
+ * the end of the unused list. This may not be the total
+ * number of unused children, because select_parent can
+ * drop the lock and return early due to latency
+ * constraints.
  */
 static int select_parent(struct dentry * parent)
 {
@@ -573,6 +582,15 @@ resume:
 			dentry_stat.nr_unused++;
 			found++;
 		}
+
+		/*
+		 * We can return to the caller if we have found some (this
+		 * ensures forward progress). We'll be coming back to find
+		 * the rest.
+		 */
+		if (found && need_resched())
+			goto out;
+
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -597,6 +615,7 @@ this_parent->d_parent->d_name.name, this
 #endif
 		goto resume;
 	}
+out:
 	spin_unlock(&dcache_lock);
 	return found;
 }
Index: linux-2.6.10-rc2-ck1/fs/exec.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/exec.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/exec.c	2004-11-15 21:49:18.000000000 +1100
@@ -185,6 +185,7 @@ static int count(char __user * __user * 
 			argv++;
 			if(++i > max)
 				return -E2BIG;
+			cond_resched();
 		}
 	}
 	return i;
Index: linux-2.6.10-rc2-ck1/fs/ext3/balloc.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/ext3/balloc.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/ext3/balloc.c	2004-11-15 21:49:18.000000000 +1100
@@ -378,6 +378,11 @@ do_more:
 		}
 		jbd_lock_bh_state(bitmap_bh);
 #endif
+		if (need_resched()) {
+			jbd_unlock_bh_state(bitmap_bh);
+			cond_resched();
+			jbd_lock_bh_state(bitmap_bh);
+		}
 		/* @@@ This prevents newly-allocated data from being
 		 * freed and then reallocated within the same
 		 * transaction. 
Index: linux-2.6.10-rc2-ck1/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/ext3/ialloc.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/ext3/ialloc.c	2004-11-15 21:49:18.000000000 +1100
@@ -732,6 +732,7 @@ unsigned long ext3_count_free_inodes (st
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		cond_resched();
 	}
 	return desc_count;
 #endif
Index: linux-2.6.10-rc2-ck1/fs/ext3/namei.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/ext3/namei.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/ext3/namei.c	2004-11-15 21:49:18.000000000 +1100
@@ -671,6 +671,7 @@ static int dx_make_map (struct ext3_dir_
 			map_tail->hash = h.hash;
 			map_tail->offs = (u32) ((char *) de - base);
 			count++;
+			cond_resched();
 		}
 		/* XXX: do we need to check rec_len == 0 case? -Chris */
 		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
Index: linux-2.6.10-rc2-ck1/fs/ext3/super.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/ext3/super.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/ext3/super.c	2004-11-15 21:49:18.000000000 +1100
@@ -1247,6 +1247,8 @@ static int ext3_fill_super (struct super
 	sbi->s_resuid = EXT3_DEF_RESUID;
 	sbi->s_resgid = EXT3_DEF_RESGID;
 
+	unlock_kernel();
+
 	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
 	if (!blocksize) {
 		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
@@ -1604,6 +1606,7 @@ static int ext3_fill_super (struct super
 	percpu_counter_mod(&sbi->s_dirs_counter,
 		ext3_count_dirs(sb));
 
+	lock_kernel();
 	return 0;
 
 cantfind_ext3:
@@ -1628,6 +1631,7 @@ failed_mount:
 out_fail:
 	sb->s_fs_info = NULL;
 	kfree(sbi);
+	lock_kernel();
 	return -EINVAL;
 }
 
@@ -2152,9 +2156,11 @@ int ext3_statfs (struct super_block * sb
 		 * block group descriptors.  If the sparse superblocks
 		 * feature is turned on, then not all groups have this.
 		 */
-		for (i = 0; i < ngroups; i++)
+		for (i = 0; i < ngroups; i++) {
 			overhead += ext3_bg_has_super(sb, i) +
 				ext3_bg_num_gdb(sb, i);
+			cond_resched();
+		}
 
 		/*
 		 * Every block group has an inode bitmap, a block
Index: linux-2.6.10-rc2-ck1/fs/fs-writeback.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/fs-writeback.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/fs-writeback.c	2004-11-15 21:49:18.000000000 +1100
@@ -378,6 +378,7 @@ sync_sb_inodes(struct super_block *sb, s
 			list_move(&inode->i_list, &sb->s_dirty);
 		}
 		spin_unlock(&inode_lock);
+		cond_resched();
 		iput(inode);
 		spin_lock(&inode_lock);
 		if (wbc->nr_to_write <= 0)
Index: linux-2.6.10-rc2-ck1/fs/inode.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/inode.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/inode.c	2004-11-15 21:49:18.000000000 +1100
@@ -306,6 +306,14 @@ static int invalidate_list(struct list_h
 		struct list_head * tmp = next;
 		struct inode * inode;
 
+		/*
+		 * We can reschedule here without worrying about the list's
+		 * consistency because the per-sb list of inodes must not
+		 * change during umount anymore, and because iprune_sem keeps
+		 * shrink_icache_memory() away.
+		 */
+		cond_resched_lock(&inode_lock);
+
 		next = next->next;
 		if (tmp == head)
 			break;
Index: linux-2.6.10-rc2-ck1/fs/jbd/checkpoint.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/jbd/checkpoint.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/jbd/checkpoint.c	2004-11-15 21:49:18.000000000 +1100
@@ -333,6 +333,10 @@ int log_do_checkpoint(journal_t *journal
 				break;
 			}
 			retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
+			if (cond_resched_lock(&journal->j_list_lock)) {
+				retry = 1;
+				break;
+			}
 		} while (jh != last_jh && !retry);
 
 		if (batch_count)
@@ -487,6 +491,14 @@ int __journal_clean_checkpoint_list(jour
 				/* Use trylock because of the ranknig */
 				if (jbd_trylock_bh_state(jh2bh(jh)))
 					ret += __try_to_free_cp_buf(jh);
+				/*
+				 * This function only frees up some memory
+				 * if possible so we dont have an obligation
+				 * to finish processing. Bail out if preemption
+				 * requested:
+				 */
+				if (need_resched())
+					goto out;
 			} while (jh != last_jh);
 		}
 	} while (transaction != last_transaction);
Index: linux-2.6.10-rc2-ck1/fs/jbd/commit.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/jbd/commit.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/jbd/commit.c	2004-11-15 21:49:18.000000000 +1100
@@ -262,7 +262,7 @@ write_out_data:
 			__journal_file_buffer(jh, commit_transaction,
 						BJ_Locked);
 			jbd_unlock_bh_state(bh);
-			if (need_resched()) {
+			if (lock_need_resched(&journal->j_list_lock)) {
 				spin_unlock(&journal->j_list_lock);
 				goto write_out_data;
 			}
@@ -288,7 +288,7 @@ write_out_data:
 				jbd_unlock_bh_state(bh);
 				journal_remove_journal_head(bh);
 				put_bh(bh);
-				if (need_resched()) {
+				if (lock_need_resched(&journal->j_list_lock)) {
 					spin_unlock(&journal->j_list_lock);
 					goto write_out_data;
 				}
@@ -333,11 +333,7 @@ write_out_data:
 			jbd_unlock_bh_state(bh);
 		}
 		put_bh(bh);
-		if (need_resched()) {
-			spin_unlock(&journal->j_list_lock);
-			cond_resched();
-			spin_lock(&journal->j_list_lock);
-		}
+		cond_resched_lock(&journal->j_list_lock);
 	}
 	spin_unlock(&journal->j_list_lock);
 
@@ -545,6 +541,8 @@ wait_for_iobuf:
 			wait_on_buffer(bh);
 			goto wait_for_iobuf;
 		}
+		if (cond_resched())
+			goto wait_for_iobuf;
 
 		if (unlikely(!buffer_uptodate(bh)))
 			err = -EIO;
@@ -599,6 +597,8 @@ wait_for_iobuf:
 			wait_on_buffer(bh);
 			goto wait_for_ctlbuf;
 		}
+		if (cond_resched())
+			goto wait_for_ctlbuf;
 
 		if (unlikely(!buffer_uptodate(bh)))
 			err = -EIO;
@@ -695,6 +695,7 @@ skip_commit: /* The journal should be un
 	J_ASSERT(commit_transaction->t_shadow_list == NULL);
 	J_ASSERT(commit_transaction->t_log_list == NULL);
 
+restart_loop:
 	while (commit_transaction->t_forget) {
 		transaction_t *cp_transaction;
 		struct buffer_head *bh;
@@ -768,6 +769,8 @@ skip_commit: /* The journal should be un
 			release_buffer_page(bh);
 		}
 		spin_unlock(&journal->j_list_lock);
+		if (cond_resched())
+			goto restart_loop;
 	}
 
 	/* Done with this transaction! */
Index: linux-2.6.10-rc2-ck1/fs/jbd/recovery.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/jbd/recovery.c	2004-10-19 08:57:11.000000000 +1000
+++ linux-2.6.10-rc2-ck1/fs/jbd/recovery.c	2004-11-15 21:49:18.000000000 +1100
@@ -354,6 +354,8 @@ static int do_one_pass(journal_t *journa
 		struct buffer_head *	obh;
 		struct buffer_head *	nbh;
 
+		cond_resched();		/* We're under lock_kernel() */
+
 		/* If we already know where to stop the log traversal,
 		 * check right now that we haven't gone past the end of
 		 * the log. */
Index: linux-2.6.10-rc2-ck1/fs/select.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/fs/select.c	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/fs/select.c	2004-11-15 21:49:18.000000000 +1100
@@ -240,6 +240,7 @@ int do_select(int n, fd_set_bits *fds, l
 						retval++;
 					}
 				}
+				cond_resched();
 			}
 			if (res_in)
 				*rinp = res_in;
Index: linux-2.6.10-rc2-ck1/include/asm-alpha/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-alpha/spinlock.h	2004-10-19 08:57:11.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-alpha/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -23,6 +23,9 @@ typedef struct {
 	struct task_struct * task;
 	const char *base_file;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #ifdef CONFIG_DEBUG_SPINLOCK
@@ -96,6 +99,9 @@ static inline int _raw_spin_trylock(spin
 
 typedef struct {
 	volatile unsigned int write_lock:1, read_counter:31;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } /*__attribute__((aligned(32)))*/ rwlock_t;
 
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
Index: linux-2.6.10-rc2-ck1/include/asm-arm/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-arm/spinlock.h	2004-06-16 17:35:44.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-arm/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -17,6 +17,9 @@
  */
 typedef struct {
 	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #define SPIN_LOCK_UNLOCKED	(spinlock_t) { 0 }
@@ -70,6 +73,9 @@ static inline void _raw_spin_unlock(spin
  */
 typedef struct {
 	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #define RW_LOCK_UNLOCKED	(rwlock_t) { 0 }
@@ -143,6 +149,8 @@ static inline void _raw_read_unlock(rwlo
 	: "cc", "memory");
 }
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 static inline int _raw_write_trylock(rwlock_t *rw)
 {
 	unsigned long tmp;
Index: linux-2.6.10-rc2-ck1/include/asm-i386/smp.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-i386/smp.h	2004-10-19 08:57:11.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-i386/smp.h	2004-11-15 21:49:18.000000000 +1100
@@ -50,7 +50,7 @@ extern u8 x86_cpu_to_apicid[];
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define smp_processor_id() (current_thread_info()->cpu)
+#define __smp_processor_id() (current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
 #define cpu_possible_map cpu_callout_map
Index: linux-2.6.10-rc2-ck1/include/asm-i386/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-i386/spinlock.h	2004-10-19 08:57:11.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-i386/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -19,6 +19,9 @@ typedef struct {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned magic;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #define SPINLOCK_MAGIC	0xdead4ead
@@ -166,6 +169,9 @@ typedef struct {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned magic;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #define RWLOCK_MAGIC	0xdeaf1eed
@@ -212,6 +218,16 @@ static inline void _raw_write_lock(rwloc
 #define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
 #define _raw_write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
 
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+	atomic_dec(count);
+	if (atomic_read(count) >= 0)
+		return 1;
+	atomic_inc(count);
+	return 0;
+}
+
 static inline int _raw_write_trylock(rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
Index: linux-2.6.10-rc2-ck1/include/asm-ia64/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-ia64/spinlock.h	2004-10-19 08:57:11.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-ia64/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -19,6 +19,9 @@
 
 typedef struct {
 	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #define SPIN_LOCK_UNLOCKED			(spinlock_t) { 0 }
@@ -116,6 +119,9 @@ do {											\
 typedef struct {
 	volatile unsigned int read_counter	: 31;
 	volatile unsigned int write_lock	:  1;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
 
@@ -190,6 +196,8 @@ do {										\
 
 #endif /* !ASM_SUPPORTED */
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 #define _raw_write_unlock(x)								\
 ({											\
 	smp_mb__before_clear_bit();	/* need barrier before releasing lock... */	\
Index: linux-2.6.10-rc2-ck1/include/asm-mips/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-mips/spinlock.h	2004-06-16 17:35:45.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-mips/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -15,6 +15,9 @@
 
 typedef struct {
 	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
@@ -92,6 +95,9 @@ static inline unsigned int _raw_spin_try
 
 typedef struct {
 	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
@@ -168,6 +174,8 @@ static inline void _raw_write_unlock(rwl
 	: "memory");
 }
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 static inline int _raw_write_trylock(rwlock_t *rw)
 {
 	unsigned int tmp;
Index: linux-2.6.10-rc2-ck1/include/asm-parisc/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-parisc/spinlock.h	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/asm-parisc/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -142,6 +142,9 @@ do {									\
 typedef struct {
 	spinlock_t lock;
 	volatile int counter;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #define RW_LOCK_UNLOCKED (rwlock_t) { __SPIN_LOCK_UNLOCKED, 0 }
@@ -150,6 +153,8 @@ typedef struct {
 
 #define rwlock_is_locked(lp) ((lp)->counter != 0)
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 /* read_lock, read_unlock are pretty straightforward.  Of course it somehow
  * sucks we end up saving/restoring flags twice for read_lock_irqsave aso. */
 
Index: linux-2.6.10-rc2-ck1/include/asm-parisc/system.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-parisc/system.h	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/asm-parisc/system.h	2004-11-15 21:49:18.000000000 +1100
@@ -176,6 +176,9 @@ typedef struct {
 	void *previous;
 	struct task_struct * task;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #define __lock_aligned __attribute__((__section__(".data.lock_aligned")))
Index: linux-2.6.10-rc2-ck1/include/asm-ppc64/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-ppc64/spinlock.h	2004-10-19 08:57:12.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-ppc64/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -23,10 +23,16 @@
 
 typedef struct {
 	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 typedef struct {
 	volatile signed int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #ifdef __KERNEL__
@@ -216,6 +222,8 @@ static void __inline__ _raw_read_unlock(
 	: "cr0", "memory");
 }
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 /*
  * This returns the old value in the lock,
  * so we got the write lock if the return value is 0.
Index: linux-2.6.10-rc2-ck1/include/asm-ppc/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-ppc/spinlock.h	2004-06-16 17:35:45.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-ppc/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -13,6 +13,9 @@ typedef struct {
 	volatile unsigned long owner_pc;
 	volatile unsigned long owner_cpu;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #ifdef __KERNEL__
@@ -83,6 +86,9 @@ typedef struct {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	volatile unsigned long owner_pc;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #ifdef CONFIG_DEBUG_SPINLOCK
@@ -192,5 +198,7 @@ extern int _raw_write_trylock(rwlock_t *
 
 #endif
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 #endif /* __ASM_SPINLOCK_H */
 #endif /* __KERNEL__ */
Index: linux-2.6.10-rc2-ck1/include/asm-s390/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-s390/spinlock.h	2004-06-16 17:35:45.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-s390/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -36,6 +36,9 @@
 
 typedef struct {
 	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } __attribute__ ((aligned (4))) spinlock_t;
 
 #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
@@ -105,6 +108,9 @@ extern inline void _raw_spin_unlock(spin
 typedef struct {
 	volatile unsigned long lock;
 	volatile unsigned long owner_pc;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
@@ -211,6 +217,8 @@ typedef struct {
 		       "m" ((rw)->lock) : "2", "3", "cc", "memory" )
 #endif /* __s390x__ */
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 extern inline int _raw_write_trylock(rwlock_t *rw)
 {
 	unsigned long result, reg;
Index: linux-2.6.10-rc2-ck1/include/asm-sh/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-sh/spinlock.h	2004-06-16 17:35:45.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-sh/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -17,6 +17,9 @@
  */
 typedef struct {
 	volatile unsigned long lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #define SPIN_LOCK_UNLOCKED	(spinlock_t) { 0 }
@@ -68,6 +71,9 @@ static inline void _raw_spin_unlock(spin
 typedef struct {
 	spinlock_t lock;
 	atomic_t counter;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #define RW_LOCK_BIAS		0x01000000
@@ -105,6 +111,8 @@ static inline void _raw_write_unlock(rwl
 	_raw_spin_unlock(&rw->lock);
 }
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 static inline int _raw_write_trylock(rwlock_t *rw)
 {
 	if (atomic_sub_and_test(RW_LOCK_BIAS, &rw->counter))
Index: linux-2.6.10-rc2-ck1/include/asm-sparc64/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-sparc64/spinlock.h	2004-10-19 08:57:12.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-sparc64/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -304,6 +304,8 @@ do {	unsigned long flags; \
 
 #endif /* CONFIG_DEBUG_SPINLOCK */
 
+#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
+
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(__SPARC64_SPINLOCK_H) */
Index: linux-2.6.10-rc2-ck1/include/asm-sparc/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-sparc/spinlock.h	2004-06-16 17:35:45.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-sparc/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -16,6 +16,9 @@
 struct _spinlock_debug {
 	unsigned char lock;
 	unsigned long owner_pc;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 };
 typedef struct _spinlock_debug spinlock_t;
 
@@ -36,6 +39,9 @@ struct _rwlock_debug {
 	volatile unsigned int lock;
 	unsigned long owner_pc;
 	unsigned long reader_pc[NR_CPUS];
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 };
 typedef struct _rwlock_debug rwlock_t;
 
@@ -79,8 +85,14 @@ do {	unsigned long flags; \
 
 #else /* !CONFIG_DEBUG_SPINLOCK */
 
-typedef unsigned char spinlock_t;
-#define SPIN_LOCK_UNLOCKED	0
+typedef struct {
+	unsigned char lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED	{ 0, }
 
 #define spin_lock_init(lock)   (*((unsigned char *)(lock)) = 0)
 #define spin_is_locked(lock)    (*((volatile unsigned char *)(lock)) != 0)
@@ -137,7 +149,12 @@ extern __inline__ void _raw_spin_unlock(
  * XXX This might create some problems with my dual spinlock
  * XXX scheme, deadlocks etc. -DaveM
  */
-typedef struct { volatile unsigned int lock; } rwlock_t;
+typedef struct {
+	volatile unsigned int lock;
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
+} rwlock_t;
 
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
 
Index: linux-2.6.10-rc2-ck1/include/asm-x86_64/smp.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-x86_64/smp.h	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/asm-x86_64/smp.h	2004-11-15 21:49:18.000000000 +1100
@@ -66,7 +66,7 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-#define smp_processor_id() read_pda(cpunumber)
+#define __smp_processor_id() read_pda(cpunumber)
 
 extern __inline int hard_smp_processor_id(void)
 {
Index: linux-2.6.10-rc2-ck1/include/asm-x86_64/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/asm-x86_64/spinlock.h	2004-10-19 08:57:12.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/asm-x86_64/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -18,6 +18,9 @@ typedef struct {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned magic;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } spinlock_t;
 
 #define SPINLOCK_MAGIC	0xdead4ead
@@ -139,6 +142,9 @@ typedef struct {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned magic;
 #endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
 } rwlock_t;
 
 #define RWLOCK_MAGIC	0xdeaf1eed
@@ -185,6 +191,16 @@ static inline void _raw_write_lock(rwloc
 #define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
 #define _raw_write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
 
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+	atomic_dec(count);
+	if (atomic_read(count) >= 0)
+		return 1;
+	atomic_inc(count);
+	return 0;
+}
+
 static inline int _raw_write_trylock(rwlock_t *lock)
 {
 	atomic_t *count = (atomic_t *)lock;
Index: linux-2.6.10-rc2-ck1/include/linux/hardirq.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/hardirq.h	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/hardirq.h	2004-11-15 21:49:18.000000000 +1100
@@ -61,12 +61,16 @@
 #define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
 
-#ifdef CONFIG_PREEMPT
+#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
 # define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
+#else
+# define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+#endif
+
+#ifdef CONFIG_PREEMPT
 # define preemptible()	(preempt_count() == 0 && !irqs_disabled())
 # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
 #else
-# define in_atomic()	(preempt_count() != 0)
 # define preemptible()	0
 # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
 #endif
@@ -78,10 +82,10 @@ extern void synchronize_irq(unsigned int
 #endif
 
 #ifdef CONFIG_GENERIC_HARDIRQS
-#define nmi_enter()		(preempt_count() += HARDIRQ_OFFSET)
-#define nmi_exit()		(preempt_count() -= HARDIRQ_OFFSET)
 
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
+#define irq_enter()		add_preempt_count(HARDIRQ_OFFSET)
+#define nmi_enter()		irq_enter()
+#define nmi_exit()		sub_preempt_count(HARDIRQ_OFFSET)
 extern void irq_exit(void);
 #endif
 
Index: linux-2.6.10-rc2-ck1/include/linux/interrupt.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/interrupt.h	2004-11-15 19:37:29.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/interrupt.h	2004-11-15 21:49:18.000000000 +1100
@@ -70,9 +70,9 @@ extern void enable_irq(unsigned int irq)
 
 /* SoftIRQ primitives.  */
 #define local_bh_disable() \
-		do { preempt_count() += SOFTIRQ_OFFSET; barrier(); } while (0)
+		do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0)
 #define __local_bh_enable() \
-		do { barrier(); preempt_count() -= SOFTIRQ_OFFSET; } while (0)
+		do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0)
 
 extern void local_bh_enable(void);
 
Index: linux-2.6.10-rc2-ck1/include/linux/mmzone.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/mmzone.h	2004-11-15 21:32:23.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/mmzone.h	2004-11-15 21:49:18.000000000 +1100
@@ -371,7 +371,7 @@ int lower_zone_protection_sysctl_handler
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
-#define numa_node_id()		(cpu_to_node(smp_processor_id()))
+#define numa_node_id()		(cpu_to_node(_smp_processor_id()))
 
 #ifndef CONFIG_DISCONTIGMEM
 
Index: linux-2.6.10-rc2-ck1/include/linux/preempt.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/preempt.h	2004-03-11 21:29:26.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/preempt.h	2004-11-15 21:49:18.000000000 +1100
@@ -9,17 +9,18 @@
 #include <linux/config.h>
 #include <linux/linkage.h>
 
-#define preempt_count()	(current_thread_info()->preempt_count)
+#ifdef CONFIG_DEBUG_PREEMPT
+  extern void fastcall add_preempt_count(int val);
+  extern void fastcall sub_preempt_count(int val);
+#else
+# define add_preempt_count(val)	do { preempt_count() += (val); } while (0)
+# define sub_preempt_count(val)	do { preempt_count() -= (val); } while (0)
+#endif
 
-#define inc_preempt_count() \
-do { \
-	preempt_count()++; \
-} while (0)
+#define inc_preempt_count() add_preempt_count(1)
+#define dec_preempt_count() sub_preempt_count(1)
 
-#define dec_preempt_count() \
-do { \
-	preempt_count()--; \
-} while (0)
+#define preempt_count()	(current_thread_info()->preempt_count)
 
 #ifdef CONFIG_PREEMPT
 
Index: linux-2.6.10-rc2-ck1/include/linux/sched.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/sched.h	2004-11-15 20:37:31.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/sched.h	2004-11-15 21:49:18.000000000 +1100
@@ -1041,29 +1041,36 @@ static inline int need_resched(void)
 	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
 }
 
-extern void __cond_resched(void);
-static inline void cond_resched(void)
-{
-	if (need_resched())
-		__cond_resched();
-}
+/*
+ * cond_resched() and cond_resched_lock(): latency reduction via
+ * explicit rescheduling in places that are safe. The return
+ * value indicates whether a reschedule was done in fact.
+ * cond_resched_lock() will drop the spinlock before scheduling,
+ * cond_resched_softirq() will enable bhs before scheduling.
+ */
+extern int cond_resched(void);
+extern int cond_resched_lock(spinlock_t * lock);
+extern int cond_resched_softirq(void);
+
+/*
+ * Does a critical section need to be broken due to another
+ * task waiting?:
+ */
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+# define need_lockbreak(lock) ((lock)->break_lock)
+#else
+# define need_lockbreak(lock) 0
+#endif
 
 /*
- * cond_resched_lock() - if a reschedule is pending, drop the given lock,
- * call schedule, and on return reacquire the lock.
- *
- * This works OK both with and without CONFIG_PREEMPT.  We do strange low-level
- * operations here to prevent schedule() from being called twice (once via
- * spin_unlock(), once by hand).
+ * Does a critical section need to be broken due to another
+ * task waiting or preemption being signalled:
  */
-static inline void cond_resched_lock(spinlock_t * lock)
+static inline int lock_need_resched(spinlock_t *lock)
 {
-	if (need_resched()) {
-		_raw_spin_unlock(lock);
-		preempt_enable_no_resched();
-		__cond_resched();
-		spin_lock(lock);
-	}
+	if (need_lockbreak(lock) || need_resched())
+		return 1;
+	return 0;
 }
 
 /* Reevaluate whether the task has signals pending delivery.
Index: linux-2.6.10-rc2-ck1/include/linux/smp.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/smp.h	2004-03-11 21:29:26.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/smp.h	2004-11-15 21:49:18.000000000 +1100
@@ -95,8 +95,10 @@ void smp_prepare_boot_cpu(void);
 /*
  *	These macros fold the SMP functionality into a single CPU system
  */
- 
-#define smp_processor_id()			0
+
+#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
+# define smp_processor_id()			0
+#endif
 #define hard_smp_processor_id()			0
 #define smp_threads_ready			1
 #define smp_call_function(func,info,retry,wait)	({ 0; })
@@ -107,6 +109,21 @@ static inline void smp_send_reschedule(i
 
 #endif /* !SMP */
 
+#ifdef __smp_processor_id
+# if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
+  /*
+   * temporary debugging check detecting places that use
+   * smp_processor_id() in a potentially unsafe way:
+   */
+   extern unsigned int smp_processor_id(void);
+# else
+#  define smp_processor_id() __smp_processor_id()
+# endif
+# define _smp_processor_id() __smp_processor_id()
+#else
+# define _smp_processor_id() smp_processor_id()
+#endif
+
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
 #define put_cpu()		preempt_enable()
 #define put_cpu_no_resched()	preempt_enable_no_resched()
Index: linux-2.6.10-rc2-ck1/include/linux/smp_lock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/smp_lock.h	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/smp_lock.h	2004-11-15 21:49:18.000000000 +1100
@@ -9,15 +9,15 @@
 
 #define kernel_locked()		(current->lock_depth >= 0)
 
-extern int __lockfunc get_kernel_lock(void);
-extern void __lockfunc put_kernel_lock(void);
+extern int __lockfunc __reacquire_kernel_lock(void);
+extern void __lockfunc __release_kernel_lock(void);
 
 /*
  * Release/re-acquire global kernel lock for the scheduler
  */
 #define release_kernel_lock(tsk) do { 		\
 	if (unlikely((tsk)->lock_depth >= 0))	\
-		put_kernel_lock();		\
+		__release_kernel_lock();	\
 } while (0)
 
 /*
@@ -26,16 +26,16 @@ extern void __lockfunc put_kernel_lock(v
  * reacquire_kernel_lock() so that the compiler can see
  * it at compile-time.
  */
-#ifdef CONFIG_SMP
-#define return_value_on_smp return
+#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_BKL)
+# define return_value_on_smp return
 #else
-#define return_value_on_smp
+# define return_value_on_smp
 #endif
 
 static inline int reacquire_kernel_lock(struct task_struct *task)
 {
 	if (unlikely(task->lock_depth >= 0))
-		return_value_on_smp get_kernel_lock();
+		return_value_on_smp __reacquire_kernel_lock();
 	return 0;
 }
 
Index: linux-2.6.10-rc2-ck1/include/linux/spinlock.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/linux/spinlock.h	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/include/linux/spinlock.h	2004-11-15 21:49:18.000000000 +1100
@@ -41,6 +41,7 @@
 #include <asm/spinlock.h>
 
 int __lockfunc _spin_trylock(spinlock_t *lock);
+int __lockfunc _read_trylock(rwlock_t *lock);
 int __lockfunc _write_trylock(rwlock_t *lock);
 
 void __lockfunc _spin_lock(spinlock_t *lock)	__acquires(spinlock_t);
@@ -73,6 +74,7 @@ void __lockfunc _write_unlock_irq(rwlock
 void __lockfunc _write_unlock_bh(rwlock_t *lock)				__releases(rwlock_t);
 
 int __lockfunc _spin_trylock_bh(spinlock_t *lock);
+int __lockfunc generic_raw_read_trylock(rwlock_t *lock);
 int in_lock_functions(unsigned long addr);
 
 #else
@@ -219,11 +221,15 @@ typedef struct {
 #define _raw_read_unlock(lock)	do { (void)(lock); } while(0)
 #define _raw_write_lock(lock)	do { (void)(lock); } while(0)
 #define _raw_write_unlock(lock)	do { (void)(lock); } while(0)
+#define _raw_read_trylock(lock) ({ (void)(lock); (1); })
 #define _raw_write_trylock(lock) ({ (void)(lock); (1); })
 
 #define _spin_trylock(lock)	({preempt_disable(); _raw_spin_trylock(lock) ? \
 				1 : ({preempt_enable(); 0;});})
 
+#define _read_trylock(lock)	({preempt_disable();_raw_read_trylock(lock) ? \
+				1 : ({preempt_enable(); 0;});})
+
 #define _write_trylock(lock)	({preempt_disable(); _raw_write_trylock(lock) ? \
 				1 : ({preempt_enable(); 0;});})
 
@@ -425,16 +431,12 @@ do { \
  * methods are defined as nops in the case they are not required.
  */
 #define spin_trylock(lock)	__cond_lock(_spin_trylock(lock))
+#define read_trylock(lock)	__cond_lock(_read_trylock(lock))
 #define write_trylock(lock)	__cond_lock(_write_trylock(lock))
 
-/* Where's read_trylock? */
-
 #define spin_lock(lock)		_spin_lock(lock)
 #define write_lock(lock)	_write_lock(lock)
 #define read_lock(lock)		_read_lock(lock)
-#define spin_unlock(lock)	_spin_unlock(lock)
-#define write_unlock(lock)	_write_unlock(lock)
-#define read_unlock(lock)	_read_unlock(lock)
 
 #ifdef CONFIG_SMP
 #define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
@@ -454,6 +456,11 @@ do { \
 
 #define write_lock_irq(lock)		_write_lock_irq(lock)
 #define write_lock_bh(lock)		_write_lock_bh(lock)
+
+#define spin_unlock(lock)	_spin_unlock(lock)
+#define write_unlock(lock)	_write_unlock(lock)
+#define read_unlock(lock)	_read_unlock(lock)
+
 #define spin_unlock_irqrestore(lock, flags)	_spin_unlock_irqrestore(lock, flags)
 #define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
@@ -476,6 +483,7 @@ extern void _metered_read_lock    (rwloc
 extern void _metered_read_unlock  (rwlock_t *lock);
 extern void _metered_write_lock   (rwlock_t *lock);
 extern void _metered_write_unlock (rwlock_t *lock);
+extern int  _metered_read_trylock (rwlock_t *lock);
 extern int  _metered_write_trylock(rwlock_t *lock);
 #endif
 
@@ -505,8 +513,11 @@ static inline void bit_spin_lock(int bit
 	preempt_disable();
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 	while (test_and_set_bit(bitnum, addr)) {
-		while (test_bit(bitnum, addr))
+		while (test_bit(bitnum, addr)) {
+			preempt_enable();
 			cpu_relax();
+			preempt_disable();
+		}
 	}
 #endif
 	__acquire(bitlock);
Index: linux-2.6.10-rc2-ck1/include/net/route.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/net/route.h	2004-10-19 08:57:12.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/net/route.h	2004-11-15 21:49:18.000000000 +1100
@@ -105,7 +105,7 @@ struct rt_cache_stat 
 
 extern struct rt_cache_stat *rt_cache_stat;
 #define RT_CACHE_STAT_INC(field)					  \
-		(per_cpu_ptr(rt_cache_stat, smp_processor_id())->field++)
+		(per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++)
 
 extern struct ip_rt_acct *ip_rt_acct;
 
Index: linux-2.6.10-rc2-ck1/include/net/snmp.h
===================================================================
--- linux-2.6.10-rc2-ck1.orig/include/net/snmp.h	2004-08-15 14:08:19.000000000 +1000
+++ linux-2.6.10-rc2-ck1/include/net/snmp.h	2004-11-15 21:49:18.000000000 +1100
@@ -128,18 +128,18 @@ struct linux_mib {
 #define SNMP_STAT_USRPTR(name)	(name[1])
 
 #define SNMP_INC_STATS_BH(mib, field) 	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset)	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field + (offset)]++)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++)
 #define SNMP_INC_STATS_USER(mib, field) \
-	(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++)
 #define SNMP_DEC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]--)
+	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--)
 #define SNMP_ADD_STATS_BH(mib, field, addend) 	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend)
 #define SNMP_ADD_STATS_USER(mib, field, addend) 	\
-	(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend)
 
 #endif
Index: linux-2.6.10-rc2-ck1/init/main.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/init/main.c	2004-11-15 20:19:59.000000000 +1100
+++ linux-2.6.10-rc2-ck1/init/main.c	2004-11-15 21:49:18.000000000 +1100
@@ -445,6 +445,12 @@ static void noinline rest_init(void)
 {
 	kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
 	numa_default_policy();
+	/*
+	 * Re-enable preemption but disable interrupts to make sure
+	 * we dont get preempted until we schedule() in cpu_idle().
+	 */
+	local_irq_disable();
+	preempt_enable_no_resched();
 	unlock_kernel();
  	cpu_idle();
 } 
@@ -511,6 +517,11 @@ asmlinkage void __init start_kernel(void
 	 * time - but meanwhile we still have a functioning scheduler.
 	 */
 	sched_init();
+	/*
+	 * Disable preemption - early bootup scheduling is extremely
+	 * fragile until we cpu_idle() for the first time.
+	 */
+	preempt_disable();
 	build_all_zonelists();
 	page_alloc_init();
 	printk("Kernel command line: %s\n", saved_command_line);
Index: linux-2.6.10-rc2-ck1/kernel/cpu.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/cpu.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/kernel/cpu.c	2004-11-15 21:49:18.000000000 +1100
@@ -132,7 +132,8 @@ int cpu_down(unsigned int cpu)
 	__cpu_die(cpu);
 
 	/* Move it here so it can run. */
-	kthread_bind(p, smp_processor_id());
+	kthread_bind(p, get_cpu());
+	put_cpu();
 
 	/* CPU is completely dead: tell everyone.  Too late to complain. */
 	if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
Index: linux-2.6.10-rc2-ck1/kernel/irq/handle.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/irq/handle.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/kernel/irq/handle.c	2004-11-15 21:49:18.000000000 +1100
@@ -77,7 +77,7 @@ irqreturn_t no_action(int cpl, void *dev
  */
 void irq_exit(void)
 {
-	preempt_count() -= IRQ_EXIT_OFFSET;
+	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 		do_softirq();
 	preempt_enable_no_resched();
Index: linux-2.6.10-rc2-ck1/kernel/kthread.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/kthread.c	2004-08-15 14:08:19.000000000 +1000
+++ linux-2.6.10-rc2-ck1/kernel/kthread.c	2004-11-15 21:49:18.000000000 +1100
@@ -14,6 +14,12 @@
 #include <linux/module.h>
 #include <asm/semaphore.h>
 
+/*
+ * We dont want to execute off keventd since it might
+ * hold a semaphore our callers hold too:
+ */
+static struct workqueue_struct *helper_wq;
+
 struct kthread_create_info
 {
 	/* Information passed to kthread() from keventd. */
@@ -126,12 +132,13 @@ struct task_struct *kthread_create(int (
 	init_completion(&create.started);
 	init_completion(&create.done);
 
-	/* If we're being called to start the first workqueue, we
-	 * can't use keventd. */
-	if (!keventd_up())
+	/*
+	 * The workqueue needs to start up first:
+	 */
+	if (!helper_wq)
 		work.func(work.data);
 	else {
-		schedule_work(&work);
+		queue_work(helper_wq, &work);
 		wait_for_completion(&create.done);
 	}
 	if (!IS_ERR(create.result)) {
@@ -183,3 +190,13 @@ int kthread_stop(struct task_struct *k)
 	return ret;
 }
 EXPORT_SYMBOL(kthread_stop);
+
+static __init int helper_init(void)
+{
+	helper_wq = create_singlethread_workqueue("kthread");
+	BUG_ON(!helper_wq);
+
+	return 0;
+}
+core_initcall(helper_init);
+
Index: linux-2.6.10-rc2-ck1/kernel/module.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/module.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/kernel/module.c	2004-11-15 21:49:18.000000000 +1100
@@ -379,7 +379,7 @@ static void module_unload_init(struct mo
 	for (i = 0; i < NR_CPUS; i++)
 		local_set(&mod->ref[i].count, 0);
 	/* Hold reference count during initialization. */
-	local_set(&mod->ref[smp_processor_id()].count, 1);
+	local_set(&mod->ref[_smp_processor_id()].count, 1);
 	/* Backwards compatibility macros put refcount during init. */
 	mod->waiter = current;
 }
Index: linux-2.6.10-rc2-ck1/kernel/printk.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/printk.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/kernel/printk.c	2004-11-15 21:49:18.000000000 +1100
@@ -284,6 +284,7 @@ int do_syslog(int type, char __user * bu
 			error = __put_user(c,buf);
 			buf++;
 			i++;
+			cond_resched();
 			spin_lock_irq(&logbuf_lock);
 		}
 		spin_unlock_irq(&logbuf_lock);
@@ -325,6 +326,7 @@ int do_syslog(int type, char __user * bu
 			c = LOG_BUF(j);
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,&buf[count-1-i]);
+			cond_resched();
 			spin_lock_irq(&logbuf_lock);
 		}
 		spin_unlock_irq(&logbuf_lock);
@@ -340,6 +342,7 @@ int do_syslog(int type, char __user * bu
 					error = -EFAULT;
 					break;
 				}
+				cond_resched();
 			}
 		}
 		break;
@@ -642,8 +645,9 @@ void release_console_sem(void)
 		_con_start = con_start;
 		_log_end = log_end;
 		con_start = log_end;		/* Flush */
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+		spin_unlock(&logbuf_lock);
 		call_console_drivers(_con_start, _log_end);
+		local_irq_restore(flags);
 	}
 	console_locked = 0;
 	console_may_schedule = 0;
Index: linux-2.6.10-rc2-ck1/kernel/sched.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/sched.c	2004-11-15 20:37:31.000000000 +1100
+++ linux-2.6.10-rc2-ck1/kernel/sched.c	2004-11-15 21:49:18.000000000 +1100
@@ -2301,6 +2301,38 @@ static inline int dependent_sleeper(int 
 }
 #endif
 
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
+
+void fastcall add_preempt_count(int val)
+{
+	/*
+	 * Underflow?
+	 */
+	BUG_ON(((int)preempt_count() < 0));
+	preempt_count() += val;
+	/*
+	 * Spinlock count overflowing soon?
+	 */
+	BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+}
+EXPORT_SYMBOL(add_preempt_count);
+
+void fastcall sub_preempt_count(int val)
+{
+	/*
+	 * Underflow?
+	 */
+	BUG_ON(val > preempt_count());
+	/*
+	 * Is the spinlock portion underflowing?
+	 */
+	BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK));
+	preempt_count() -= val;
+}
+EXPORT_SYMBOL(sub_preempt_count);
+
+#endif
+
 /*
  * schedule() is the main scheduler function.
  */
@@ -2459,7 +2491,10 @@ EXPORT_SYMBOL(schedule);
 asmlinkage void __sched preempt_schedule(void)
 {
 	struct thread_info *ti = current_thread_info();
-
+#ifdef CONFIG_PREEMPT_BKL
+	struct task_struct *task = current;
+	int saved_lock_depth;
+#endif
 	/*
 	 * If there is a non-zero preempt_count or interrupts are disabled,
 	 * we do not want to preempt the current task.  Just return..
@@ -2468,9 +2503,21 @@ asmlinkage void __sched preempt_schedule
 		return;
 
 need_resched:
-	ti->preempt_count = PREEMPT_ACTIVE;
+	add_preempt_count(PREEMPT_ACTIVE);
+	/*
+	 * We keep the big kernel semaphore locked, but we
+	 * clear ->lock_depth so that schedule() doesnt
+	 * auto-release the semaphore:
+	 */
+#ifdef CONFIG_PREEMPT_BKL
+	saved_lock_depth = task->lock_depth;
+	task->lock_depth = -1;
+#endif
 	schedule();
-	ti->preempt_count = 0;
+#ifdef CONFIG_PREEMPT_BKL
+	task->lock_depth = saved_lock_depth;
+#endif
+	sub_preempt_count(PREEMPT_ACTIVE);
 
 	/* we could miss a preemption opportunity between schedule and now */
 	barrier();
@@ -3197,13 +3244,71 @@ asmlinkage long sys_sched_yield(void)
 	return 0;
 }
 
-void __sched __cond_resched(void)
+static inline void __cond_resched(void)
 {
-	set_current_state(TASK_RUNNING);
-	schedule();
+	do {
+		add_preempt_count(PREEMPT_ACTIVE);
+		schedule();
+		sub_preempt_count(PREEMPT_ACTIVE);
+	} while (need_resched());
+}
+
+int __sched cond_resched(void)
+{
+	if (need_resched()) {
+		__cond_resched();
+		return 1;
+	}
+	return 0;
 }
 
-EXPORT_SYMBOL(__cond_resched);
+EXPORT_SYMBOL(cond_resched);
+
+/*
+ * cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ * call schedule, and on return reacquire the lock.
+ *
+ * This works OK both with and without CONFIG_PREEMPT.  We do strange low-level
+ * operations here to prevent schedule() from being called twice (once via
+ * spin_unlock(), once by hand).
+ */
+int cond_resched_lock(spinlock_t * lock)
+{
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
+	if (lock->break_lock) {
+		lock->break_lock = 0;
+		spin_unlock(lock);
+		cpu_relax();
+		spin_lock(lock);
+	}
+#endif
+	if (need_resched()) {
+		_raw_spin_unlock(lock);
+		preempt_enable_no_resched();
+		__cond_resched();
+		spin_lock(lock);
+		return 1;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(cond_resched_lock);
+
+int __sched cond_resched_softirq(void)
+{
+	BUG_ON(!in_softirq());
+
+	if (need_resched()) {
+		__local_bh_enable();
+		__cond_resched();
+		local_bh_disable();
+		return 1;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(cond_resched_softirq);
+
 
 /**
  * yield - yield the current processor to other threads.
@@ -3228,7 +3333,7 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = this_rq();
+	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
 
 	atomic_inc(&rq->nr_iowait);
 	schedule();
@@ -3239,7 +3344,7 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = this_rq();
+	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
 	long ret;
 
 	atomic_inc(&rq->nr_iowait);
@@ -3449,7 +3554,7 @@ void __devinit init_idle(task_t *idle, i
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-#ifdef CONFIG_PREEMPT
+#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
 	idle->thread_info->preempt_count = (idle->lock_depth >= 0);
 #else
 	idle->thread_info->preempt_count = 0;
Index: linux-2.6.10-rc2-ck1/kernel/softirq.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/softirq.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/kernel/softirq.c	2004-11-15 21:49:18.000000000 +1100
@@ -142,7 +142,7 @@ void local_bh_enable(void)
 	 * Keep preemption disabled until we are done with
 	 * softirq processing:
  	 */
-	preempt_count() -= SOFTIRQ_OFFSET - 1;
+ 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
 
 	if (unlikely(!in_interrupt() && local_softirq_pending()))
 		do_softirq();
Index: linux-2.6.10-rc2-ck1/kernel/spinlock.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/spinlock.c	2004-10-19 08:57:12.000000000 +1000
+++ linux-2.6.10-rc2-ck1/kernel/spinlock.c	2004-11-15 21:49:18.000000000 +1100
@@ -2,6 +2,8 @@
  * Copyright (2004) Linus Torvalds
  *
  * Author: Zwane Mwaikambo <zwane@fsmlabs.com>
+ *
+ * Copyright (2004) Ingo Molnar
  */
 
 #include <linux/config.h>
@@ -11,6 +13,17 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 
+/*
+ * Generic declaration of the raw read_trylock() function,
+ * architectures are supposed to optimize this:
+ */
+int __lockfunc generic_raw_read_trylock(rwlock_t *lock)
+{
+	_raw_read_lock(lock);
+	return 1;
+}
+EXPORT_SYMBOL(generic_raw_read_trylock);
+
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
@@ -22,86 +35,29 @@ int __lockfunc _spin_trylock(spinlock_t 
 }
 EXPORT_SYMBOL(_spin_trylock);
 
-int __lockfunc _write_trylock(rwlock_t *lock)
+int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_write_trylock(lock))
+	if (_raw_read_trylock(lock))
 		return 1;
 
 	preempt_enable();
 	return 0;
 }
-EXPORT_SYMBOL(_write_trylock);
-
-#ifdef CONFIG_PREEMPT
-/*
- * This could be a long-held lock.  If another CPU holds it for a long time,
- * and that CPU is not asked to reschedule then *this* CPU will spin on the
- * lock for a long time, even if *this* CPU is asked to reschedule.
- *
- * So what we do here, in the slow (contended) path is to spin on the lock by
- * hand while permitting preemption.
- *
- * Called inside preempt_disable().
- */
-static inline void __preempt_spin_lock(spinlock_t *lock)
-{
-	if (preempt_count() > 1) {
-		_raw_spin_lock(lock);
-		return;
-	}
-
-	do {
-		preempt_enable();
-		while (spin_is_locked(lock))
-			cpu_relax();
-		preempt_disable();
-	} while (!_raw_spin_trylock(lock));
-}
+EXPORT_SYMBOL(_read_trylock);
 
-void __lockfunc _spin_lock(spinlock_t *lock)
+int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (unlikely(!_raw_spin_trylock(lock)))
-		__preempt_spin_lock(lock);
-}
-
-static inline void __preempt_write_lock(rwlock_t *lock)
-{
-	if (preempt_count() > 1) {
-		_raw_write_lock(lock);
-		return;
-	}
-
-	do {
-		preempt_enable();
-		while (rwlock_is_locked(lock))
-			cpu_relax();
-		preempt_disable();
-	} while (!_raw_write_trylock(lock));
-}
+	if (_raw_write_trylock(lock))
+		return 1;
 
-void __lockfunc _write_lock(rwlock_t *lock)
-{
-	preempt_disable();
-	if (unlikely(!_raw_write_trylock(lock)))
-		__preempt_write_lock(lock);
-}
-#else
-void __lockfunc _spin_lock(spinlock_t *lock)
-{
-	preempt_disable();
-	_raw_spin_lock(lock);
+	preempt_enable();
+	return 0;
 }
+EXPORT_SYMBOL(_write_trylock);
 
-void __lockfunc _write_lock(rwlock_t *lock)
-{
-	preempt_disable();
-	_raw_write_lock(lock);
-}
-#endif
-EXPORT_SYMBOL(_spin_lock);
-EXPORT_SYMBOL(_write_lock);
+#ifndef CONFIG_PREEMPT
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
@@ -110,27 +66,6 @@ void __lockfunc _read_lock(rwlock_t *loc
 }
 EXPORT_SYMBOL(_read_lock);
 
-void __lockfunc _spin_unlock(spinlock_t *lock)
-{
-	_raw_spin_unlock(lock);
-	preempt_enable();
-}
-EXPORT_SYMBOL(_spin_unlock);
-
-void __lockfunc _write_unlock(rwlock_t *lock)
-{
-	_raw_write_unlock(lock);
-	preempt_enable();
-}
-EXPORT_SYMBOL(_write_unlock);
-
-void __lockfunc _read_unlock(rwlock_t *lock)
-{
-	_raw_read_unlock(lock);
-	preempt_enable();
-}
-EXPORT_SYMBOL(_read_unlock);
-
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
 	unsigned long flags;
@@ -212,6 +147,130 @@ void __lockfunc _write_lock_bh(rwlock_t 
 }
 EXPORT_SYMBOL(_write_lock_bh);
 
+void __lockfunc _spin_lock(spinlock_t *lock)
+{
+	preempt_disable();
+	_raw_spin_lock(lock);
+}
+
+EXPORT_SYMBOL(_spin_lock);
+
+void __lockfunc _write_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	_raw_write_lock(lock);
+}
+
+EXPORT_SYMBOL(_write_lock);
+
+#else /* CONFIG_PREEMPT: */
+
+/*
+ * This could be a long-held lock. We both prepare to spin for a long
+ * time (making _this_ CPU preemptable if possible), and we also signal
+ * towards that other CPU that it should break the lock ASAP.
+ *
+ * (We do this in a function because inlining it would be excessive.)
+ */
+
+#define BUILD_LOCK_OPS(op, locktype)					\
+void __lockfunc _##op##_lock(locktype *lock)				\
+{									\
+	preempt_disable();						\
+	for (;;) {							\
+		if (likely(_raw_##op##_trylock(lock)))			\
+			break;						\
+		preempt_enable();					\
+		if (!(lock)->break_lock)				\
+			(lock)->break_lock = 1;				\
+		cpu_relax();						\
+		preempt_disable();					\
+	}								\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock);						\
+									\
+unsigned long __lockfunc _##op##_lock_irqsave(locktype *lock)		\
+{									\
+	unsigned long flags;						\
+									\
+	preempt_disable();						\
+	for (;;) {							\
+		local_irq_save(flags);					\
+		if (likely(_raw_##op##_trylock(lock)))			\
+			break;						\
+		local_irq_restore(flags);				\
+									\
+		preempt_enable();					\
+		if (!(lock)->break_lock)				\
+			(lock)->break_lock = 1;				\
+		cpu_relax();						\
+		preempt_disable();					\
+	}								\
+	return flags;							\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock_irqsave);					\
+									\
+void __lockfunc _##op##_lock_irq(locktype *lock)			\
+{									\
+	_##op##_lock_irqsave(lock);					\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock_irq);					\
+									\
+void __lockfunc _##op##_lock_bh(locktype *lock)				\
+{									\
+	unsigned long flags;						\
+									\
+	/*							*/	\
+	/* Careful: we must exclude softirqs too, hence the	*/	\
+	/* irq-disabling. We use the generic preemption-aware	*/	\
+	/* function:						*/	\
+	/**/								\
+	flags = _##op##_lock_irqsave(lock);				\
+	local_bh_disable();						\
+	local_irq_restore(flags);					\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock_bh)
+
+/*
+ * Build preemption-friendly versions of the following
+ * lock-spinning functions:
+ *
+ *         _[spin|read|write]_lock()
+ *         _[spin|read|write]_lock_irq()
+ *         _[spin|read|write]_lock_irqsave()
+ *         _[spin|read|write]_lock_bh()
+ */
+BUILD_LOCK_OPS(spin, spinlock_t);
+BUILD_LOCK_OPS(read, rwlock_t);
+BUILD_LOCK_OPS(write, rwlock_t);
+
+#endif /* CONFIG_PREEMPT */
+
+void __lockfunc _spin_unlock(spinlock_t *lock)
+{
+	_raw_spin_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_spin_unlock);
+
+void __lockfunc _write_unlock(rwlock_t *lock)
+{
+	_raw_write_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_write_unlock);
+
+void __lockfunc _read_unlock(rwlock_t *lock)
+{
+	_raw_read_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_read_unlock);
+
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
 	_raw_spin_unlock(lock);
Index: linux-2.6.10-rc2-ck1/kernel/stop_machine.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/stop_machine.c	2004-05-23 12:54:58.000000000 +1000
+++ linux-2.6.10-rc2-ck1/kernel/stop_machine.c	2004-11-15 21:49:18.000000000 +1100
@@ -90,7 +90,7 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
-		if (i == smp_processor_id())
+		if (i == _smp_processor_id())
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
@@ -172,7 +172,7 @@ struct task_struct *__stop_machine_run(i
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
-		cpu = smp_processor_id();
+		cpu = _smp_processor_id();
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
 	if (!IS_ERR(p)) {
Index: linux-2.6.10-rc2-ck1/kernel/timer.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/kernel/timer.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/kernel/timer.c	2004-11-15 21:49:18.000000000 +1100
@@ -463,7 +463,14 @@ repeat:
 			smp_wmb();
 			timer->base = NULL;
 			spin_unlock_irq(&base->lock);
-			fn(data);
+			{
+				u32 preempt_count = preempt_count();
+				fn(data);
+				if (preempt_count != preempt_count()) {
+					printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count());
+					BUG();
+				}
+			}
 			spin_lock_irq(&base->lock);
 			goto repeat;
 		}
Index: linux-2.6.10-rc2-ck1/lib/Kconfig.debug
===================================================================
--- linux-2.6.10-rc2-ck1.orig/lib/Kconfig.debug	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/lib/Kconfig.debug	2004-11-15 22:03:59.977533737 +1100
@@ -48,6 +48,15 @@ config DEBUG_SLAB
 	  allocation as well as poisoning memory on free to catch use of freed
 	  memory. This can make kmalloc/kfree-intensive workloads much slower.
 
+config DEBUG_PREEMPT
+	bool "Debug preemptible kernel"
+	depends on PREEMPT && X86
+	help
+	  If you say Y here then the kernel will use a debug variant of the
+	  commonly used smp_processor_id() function and will print warnings
+	  if kernel code uses it in a preemption-unsafe way. Also, the kernel
+	  will detect preemption count underflows.
+
 config DEBUG_SPINLOCK
 	bool "Spinlock debugging"
 	depends on DEBUG_KERNEL && (ALPHA || ARM || X86 || IA64 || MIPS || PARISC || PPC32 || (SUPERH && !SUPERH64) || SPARC32 || SPARC64 || USERMODE || X86_64)
Index: linux-2.6.10-rc2-ck1/lib/kernel_lock.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/lib/kernel_lock.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/lib/kernel_lock.c	2004-11-15 21:49:18.000000000 +1100
@@ -7,6 +7,141 @@
  */
 #include <linux/smp_lock.h>
 #include <linux/module.h>
+#include <linux/kallsyms.h>
+
+#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id) && \
+		defined(CONFIG_DEBUG_PREEMPT)
+
+/*
+ * Debugging check.
+ */
+unsigned int smp_processor_id(void)
+{
+	unsigned long preempt_count = preempt_count();
+	int this_cpu = __smp_processor_id();
+	cpumask_t this_mask;
+
+	if (likely(preempt_count))
+		goto out;
+
+	if (irqs_disabled())
+		goto out;
+
+	/*
+	 * Kernel threads bound to a single CPU can safely use
+	 * smp_processor_id():
+	 */
+	this_mask = cpumask_of_cpu(this_cpu);
+
+	if (cpus_equal(current->cpus_allowed, this_mask))
+		goto out;
+
+	/*
+	 * It is valid to assume CPU-locality during early bootup:
+	 */
+	if (system_state != SYSTEM_RUNNING)
+		goto out;
+
+	/*
+	 * Avoid recursion:
+	 */
+	preempt_disable();
+
+	if (!printk_ratelimit())
+		goto out_enable;
+
+	printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid);
+	print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+	dump_stack();
+
+out_enable:
+	preempt_enable_no_resched();
+out:
+	return this_cpu;
+}
+
+EXPORT_SYMBOL(smp_processor_id);
+
+#endif /* PREEMPT && __smp_processor_id && DEBUG_PREEMPT */
+
+#ifdef CONFIG_PREEMPT_BKL
+/*
+ * The 'big kernel semaphore'
+ *
+ * This mutex is taken and released recursively by lock_kernel()
+ * and unlock_kernel().  It is transparently dropped and reaquired
+ * over schedule().  It is used to protect legacy code that hasn't
+ * been migrated to a proper locking design yet.
+ *
+ * Note: code locked by this semaphore will only be serialized against
+ * other code using the same locking facility. The code guarantees that
+ * the task remains on the same CPU.
+ *
+ * Don't use in new code.
+ */
+DECLARE_MUTEX(kernel_sem);
+
+/*
+ * Re-acquire the kernel semaphore.
+ *
+ * This function is called with preemption off.
+ *
+ * We are executing in schedule() so the code must be extremely careful
+ * about recursion, both due to the down() and due to the enabling of
+ * preemption. schedule() will re-check the preemption flag after
+ * reacquiring the semaphore.
+ */
+int __lockfunc __reacquire_kernel_lock(void)
+{
+	struct task_struct *task = current;
+	int saved_lock_depth = task->lock_depth;
+
+	BUG_ON(saved_lock_depth < 0);
+
+	task->lock_depth = -1;
+	preempt_enable_no_resched();
+
+	down(&kernel_sem);
+
+	preempt_disable();
+	task->lock_depth = saved_lock_depth;
+
+	return 0;
+}
+
+void __lockfunc __release_kernel_lock(void)
+{
+	up(&kernel_sem);
+}
+
+/*
+ * Getting the big kernel semaphore.
+ */
+void __lockfunc lock_kernel(void)
+{
+	struct task_struct *task = current;
+	int depth = task->lock_depth + 1;
+
+	if (likely(!depth))
+		/*
+		 * No recursion worries - we set up lock_depth _after_
+		 */
+		down(&kernel_sem);
+
+	task->lock_depth = depth;
+}
+
+void __lockfunc unlock_kernel(void)
+{
+	struct task_struct *task = current;
+
+	BUG_ON(task->lock_depth < 0);
+
+	if (likely(--task->lock_depth < 0))
+		up(&kernel_sem);
+}
+
+#else
 
 /*
  * The 'big kernel lock'
@@ -34,7 +169,7 @@ static spinlock_t kernel_flag __cachelin
  * (This works on UP too - _raw_spin_trylock will never
  * return false in that case)
  */
-int __lockfunc get_kernel_lock(void)
+int __lockfunc __reacquire_kernel_lock(void)
 {
 	while (!_raw_spin_trylock(&kernel_flag)) {
 		if (test_thread_flag(TIF_NEED_RESCHED))
@@ -45,7 +180,7 @@ int __lockfunc get_kernel_lock(void)
 	return 0;
 }
 
-void __lockfunc put_kernel_lock(void)
+void __lockfunc __release_kernel_lock(void)
 {
 	_raw_spin_unlock(&kernel_flag);
 	preempt_enable_no_resched();
@@ -122,5 +257,8 @@ void __lockfunc unlock_kernel(void)
 		__unlock_kernel();
 }
 
+#endif
+
 EXPORT_SYMBOL(lock_kernel);
 EXPORT_SYMBOL(unlock_kernel);
+
Index: linux-2.6.10-rc2-ck1/mm/memory.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/mm/memory.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/mm/memory.c	2004-11-15 21:49:18.000000000 +1100
@@ -490,19 +490,15 @@ static void unmap_page_range(struct mmu_
 	tlb_end_vma(tlb, vma);
 }
 
-/* Dispose of an entire struct mmu_gather per rescheduling point */
-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
-#define ZAP_BLOCK_SIZE	(FREE_PTE_NR * PAGE_SIZE)
-#endif
-
-/* For UP, 256 pages at a time gives nice low latency */
-#if !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
-#define ZAP_BLOCK_SIZE	(256 * PAGE_SIZE)
-#endif
-
+#ifdef CONFIG_PREEMPT
+/*
+ * It's not an issue to have a small zap block size - TLB flushes
+ * only happen once normally, due to the tlb->need_flush optimization.
+ */
+# define ZAP_BLOCK_SIZE	(8 * PAGE_SIZE)
+#else
 /* No preempt: go for improved straight-line efficiency */
-#if !defined(CONFIG_PREEMPT)
-#define ZAP_BLOCK_SIZE	(1024 * PAGE_SIZE)
+# define ZAP_BLOCK_SIZE	(1024 * PAGE_SIZE)
 #endif
 
 /**
@@ -577,15 +573,15 @@ int unmap_vmas(struct mmu_gather **tlbp,
 
 			start += block;
 			zap_bytes -= block;
-			if ((long)zap_bytes > 0)
-				continue;
-			if (!atomic && need_resched()) {
+			if (!atomic) {
 				int fullmm = tlb_is_full_mm(*tlbp);
 				tlb_finish_mmu(*tlbp, tlb_start, start);
 				cond_resched_lock(&mm->page_table_lock);
 				*tlbp = tlb_gather_mmu(mm, fullmm);
 				tlb_start_valid = 0;
 			}
+			if ((long)zap_bytes > 0)
+				continue;
 			zap_bytes = ZAP_BLOCK_SIZE;
 		}
 	}
@@ -778,6 +774,8 @@ int get_user_pages(struct task_struct *t
 		do {
 			struct page *map;
 			int lookup_write = write;
+
+			cond_resched_lock(&mm->page_table_lock);
 			while (!(map = follow_page(mm, start, lookup_write))) {
 				/*
 				 * Shortcut for anonymous pages. We don't want
@@ -1519,6 +1517,7 @@ do_no_page(struct mm_struct *mm, struct 
 	}
 	smp_rmb();  /* Prevent CPU from reordering lock-free ->nopage() */
 retry:
+	cond_resched();
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
 
 	/* no page was available -- either SIGBUS or OOM */
Index: linux-2.6.10-rc2-ck1/mm/msync.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/mm/msync.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/mm/msync.c	2004-11-15 21:52:06.000000000 +1100
@@ -93,7 +93,7 @@ static inline int filemap_sync_pmd_range
 	return error;
 }
 
-static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
+static int __filemap_sync(struct vm_area_struct * vma, unsigned long address,
 	size_t size, unsigned int flags)
 {
 	pgd_t * dir;
@@ -132,6 +132,31 @@ static int filemap_sync(struct vm_area_s
 	return error;
 }
 
+#ifdef CONFIG_PREEMPT
+static int filemap_sync(struct vm_area_struct *vma, unsigned long address,
+			size_t size, unsigned int flags)
+{
+	const size_t chunk = 64 * 1024;	/* bytes */
+	int error = 0;
+
+	while (size) {
+		size_t sz = min(size, chunk);
+
+		error |= __filemap_sync(vma, address, sz, flags);
+		cond_resched();
+		address += sz;
+		size -= sz;
+	}
+	return error;
+}
+#else
+static int filemap_sync(struct vm_area_struct *vma, unsigned long address,
+			size_t size, unsigned int flags)
+{
+	return __filemap_sync(vma, address, size, flags);
+}
+#endif
+
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
Index: linux-2.6.10-rc2-ck1/mm/slab.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/mm/slab.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/mm/slab.c	2004-11-15 21:49:18.000000000 +1100
@@ -2805,7 +2805,7 @@ static void cache_reap(void *unused)
 next_unlock:
 		spin_unlock_irq(&searchp->spinlock);
 next:
-		;
+		cond_resched();
 	}
 	check_irq_on();
 	up(&cache_chain_sem);
Index: linux-2.6.10-rc2-ck1/mm/vmscan.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/mm/vmscan.c	2004-11-15 21:32:23.000000000 +1100
+++ linux-2.6.10-rc2-ck1/mm/vmscan.c	2004-11-15 21:49:18.000000000 +1100
@@ -359,6 +359,8 @@ static int shrink_list(struct list_head 
 		int may_enter_fs;
 		int referenced;
 
+		cond_resched();
+
 		page = lru_to_page(page_list);
 		list_del(&page->lru);
 
@@ -711,6 +713,7 @@ refill_inactive_zone(struct zone *zone, 
 		reclaim_mapped = 1;
 
 	while (!list_empty(&l_hold)) {
+		cond_resched();
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
 		if (page_mapped(page)) {
Index: linux-2.6.10-rc2-ck1/net/core/sock.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/net/core/sock.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/net/core/sock.c	2004-11-15 21:49:18.000000000 +1100
@@ -942,6 +942,15 @@ static void __release_sock(struct sock *
 
 			skb->next = NULL;
 			sk->sk_backlog_rcv(sk, skb);
+
+			/*
+			 * We are in process context here with softirqs
+			 * disabled, use cond_resched_softirq() to preempt.
+			 * This is safe to do because we've taken the backlog
+			 * queue private:
+			 */
+			cond_resched_softirq();
+
 			skb = next;
 		} while (skb != NULL);
 
Index: linux-2.6.10-rc2-ck1/net/ipv4/tcp_ipv4.c
===================================================================
--- linux-2.6.10-rc2-ck1.orig/net/ipv4/tcp_ipv4.c	2004-11-15 19:37:30.000000000 +1100
+++ linux-2.6.10-rc2-ck1/net/ipv4/tcp_ipv4.c	2004-11-15 21:49:18.000000000 +1100
@@ -2232,7 +2232,10 @@ static void *established_get_first(struc
 		struct sock *sk;
 		struct hlist_node *node;
 		struct tcp_tw_bucket *tw;
-	       
+
+		/* We can reschedule _before_ having picked the target: */
+		cond_resched_softirq();
+
 		read_lock(&tcp_ehash[st->bucket].lock);
 		sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family) {
@@ -2279,6 +2282,10 @@ get_tw:
 		}
 		read_unlock(&tcp_ehash[st->bucket].lock);
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
+
+		/* We can reschedule between buckets: */
+		cond_resched_softirq();
+
 		if (++st->bucket < tcp_ehash_size) {
 			read_lock(&tcp_ehash[st->bucket].lock);
 			sk = sk_head(&tcp_ehash[st->bucket].chain);
