Index: linux-2.6.9-rc3-bk9/include/linux/mmzone.h
===================================================================
--- linux-2.6.9-rc3-bk9.orig/include/linux/mmzone.h	2004-10-01 23:57:49.000000000 +1000
+++ linux-2.6.9-rc3-bk9/include/linux/mmzone.h	2004-10-10 00:40:09.674663339 +1000
@@ -113,7 +113,7 @@ struct zone {
 	 */
 	spinlock_t		lock;
 	unsigned long		free_pages;
-	unsigned long		pages_min, pages_low, pages_high;
+	unsigned long		pages_min, pages_low, pages_high, pages_unmapped;
 	/*
 	 * protection[] is a pre-calculated number of extra pages that must be
 	 * available in a zone in order for __alloc_pages() to allocate memory
@@ -264,6 +264,7 @@ typedef struct pglist_data {
 	struct pglist_data *pgdat_next;
 	wait_queue_head_t       kswapd_wait;
 	struct task_struct *kswapd;
+	unsigned long mapped_nrpages;
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
Index: linux-2.6.9-rc3-bk9/include/linux/swap.h
===================================================================
--- linux-2.6.9-rc3-bk9.orig/include/linux/swap.h	2004-10-01 23:57:49.000000000 +1000
+++ linux-2.6.9-rc3-bk9/include/linux/swap.h	2004-10-10 00:40:09.674663339 +1000
@@ -174,7 +174,8 @@ extern void swap_setup(void);
 /* linux/mm/vmscan.c */
 extern int try_to_free_pages(struct zone **, unsigned int, unsigned int);
 extern int shrink_all_memory(int);
-extern int vm_swappiness;
+extern int vm_mapped;
+extern int vm_hardmaplimit;
 
 #ifdef CONFIG_MMU
 /* linux/mm/shmem.c */
Index: linux-2.6.9-rc3-bk9/include/linux/sysctl.h
===================================================================
--- linux-2.6.9-rc3-bk9.orig/include/linux/sysctl.h	2004-10-10 00:38:08.844527828 +1000
+++ linux-2.6.9-rc3-bk9/include/linux/sysctl.h	2004-10-10 00:40:09.675663182 +1000
@@ -158,7 +158,7 @@ enum
 	VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */
 	VM_PAGEBUF=17,		/* struct: Control pagebuf parameters */
 	VM_HUGETLB_PAGES=18,	/* int: Number of available Huge Pages */
-	VM_SWAPPINESS=19,	/* Tendency to steal mapped memory */
+	VM_MAPPED=19,		/* percent mapped min while evicting cache */
 	VM_LOWER_ZONE_PROTECTION=20,/* Amount of protection of lower zones */
 	VM_MIN_FREE_KBYTES=21,	/* Minimum free kilobytes to maintain */
 	VM_MAX_MAP_COUNT=22,	/* int: Maximum number of mmaps/address-space */
@@ -167,6 +167,7 @@ enum
 	VM_HUGETLB_GROUP=25,	/* permitted hugetlb group */
 	VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
 	VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
+	VM_HARDMAPLIMIT=28,	/* Make mapped a hard limit */
 };
 
 
Index: linux-2.6.9-rc3-bk9/kernel/sysctl.c
===================================================================
--- linux-2.6.9-rc3-bk9.orig/kernel/sysctl.c	2004-10-01 23:57:49.000000000 +1000
+++ linux-2.6.9-rc3-bk9/kernel/sysctl.c	2004-10-10 00:40:09.676663026 +1000
@@ -700,16 +700,24 @@ static ctl_table vm_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
-		.ctl_name	= VM_SWAPPINESS,
-		.procname	= "swappiness",
-		.data		= &vm_swappiness,
-		.maxlen		= sizeof(vm_swappiness),
+		.ctl_name	= VM_MAPPED,
+		.procname	= "mapped",
+		.data		= &vm_mapped,
+		.maxlen		= sizeof(vm_mapped),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.ctl_name	= VM_HARDMAPLIMIT,
+		.procname	= "hardmaplimit",
+		.data		= &vm_hardmaplimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #ifdef CONFIG_HUGETLB_PAGE
 	 {
 		.ctl_name	= VM_HUGETLB_PAGES,
Index: linux-2.6.9-rc3-bk9/mm/page_alloc.c
===================================================================
--- linux-2.6.9-rc3-bk9.orig/mm/page_alloc.c	2004-10-01 23:57:49.000000000 +1000
+++ linux-2.6.9-rc3-bk9/mm/page_alloc.c	2004-10-10 00:40:09.677662870 +1000
@@ -700,6 +700,14 @@ rebalance:
 		if (z->free_pages < min)
 			continue;
 
+		if (vm_mapped && wait && 
+			z->free_pages < z->pages_unmapped &&
+			z->free_pages > z->pages_low) {
+				z->zone_pgdat->mapped_nrpages =
+					z->pages_unmapped - z->free_pages;
+				wakeup_kswapd(z);
+		}
+
 		page = buffered_rmqueue(z, order, gfp_mask);
 		if (page)
 			goto got_pg;
@@ -1934,6 +1942,7 @@ static void setup_per_zone_pages_min(voi
 
 		zone->pages_low = zone->pages_min * 2;
 		zone->pages_high = zone->pages_min * 3;
+		zone->pages_unmapped = zone->pages_min * 4;
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
Index: linux-2.6.9-rc3-bk9/mm/vmscan.c
===================================================================
--- linux-2.6.9-rc3-bk9.orig/mm/vmscan.c	2004-10-10 00:38:08.875522988 +1000
+++ linux-2.6.9-rc3-bk9/mm/vmscan.c	2004-10-10 00:43:07.964832183 +1000
@@ -116,10 +116,8 @@ struct shrinker {
 #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
 #endif
 
-/*
- * From 0 .. 100.  Higher means more swappy.
- */
-int vm_swappiness = 60;
+int vm_mapped = 66;
+int vm_hardmaplimit = 1;
 static long total_memory;
 
 static LIST_HEAD(shrinker_list);
@@ -345,7 +343,8 @@ static pageout_t pageout(struct page *pa
 /*
  * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
  */
-static int shrink_list(struct list_head *page_list, struct scan_control *sc)
+static int shrink_list(struct list_head *page_list, struct scan_control *sc,
+			int maplimit)
 {
 	LIST_HEAD(ret_pages);
 	struct pagevec freed_pvec;
@@ -373,6 +372,8 @@ static int shrink_list(struct list_head 
 			goto keep_locked;
 
 		sc->nr_scanned++;
+		if (maplimit && page_mapped(page))
+			goto keep_locked;
 		/* Double the slab pressure for mapped and swapcache pages */
 		if (page_mapped(page) || PageSwapCache(page))
 			sc->nr_scanned++;
@@ -540,6 +541,7 @@ static void shrink_cache(struct zone *zo
 	LIST_HEAD(page_list);
 	struct pagevec pvec;
 	int max_scan = sc->nr_to_scan;
+	int maplimit = 0;
 
 	pagevec_init(&pvec, 1);
 
@@ -581,11 +583,12 @@ static void shrink_cache(struct zone *zo
 			goto done;
 
 		max_scan -= nr_scan;
-		if (current_is_kswapd())
+		if (current_is_kswapd()) {
 			mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
-		else
+			maplimit = !!zone->zone_pgdat->mapped_nrpages;
+		} else
 			mod_page_state_zone(zone, pgscan_direct, nr_scan);
-		nr_freed = shrink_list(&page_list, sc);
+		nr_freed = shrink_list(&page_list, sc, maplimit);
 		if (current_is_kswapd())
 			mod_page_state(kswapd_steal, nr_freed);
 		mod_page_state_zone(zone, pgsteal, nr_freed);
@@ -640,15 +643,12 @@ refill_inactive_zone(struct zone *zone, 
 	int pgdeactivate = 0;
 	int pgscanned = 0;
 	int nr_pages = sc->nr_to_scan;
+	unsigned int mapped_ratio;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
 	LIST_HEAD(l_inactive);	/* Pages to go onto the inactive_list */
 	LIST_HEAD(l_active);	/* Pages to go onto the active_list */
 	struct page *page;
 	struct pagevec pvec;
-	int reclaim_mapped = 0;
-	long mapped_ratio;
-	long distress;
-	long swap_tendency;
 
 	lru_add_drain();
 	pgmoved = 0;
@@ -678,42 +678,14 @@ refill_inactive_zone(struct zone *zone, 
 	zone->nr_active -= pgmoved;
 	spin_unlock_irq(&zone->lru_lock);
 
-	/*
-	 * `distress' is a measure of how much trouble we're having reclaiming
-	 * pages.  0 -> no problems.  100 -> great trouble.
-	 */
-	distress = 100 >> zone->prev_priority;
-
-	/*
-	 * The point of this algorithm is to decide when to start reclaiming
-	 * mapped memory instead of just pagecache.  Work out how much memory
-	 * is mapped.
-	 */
 	mapped_ratio = (sc->nr_mapped * 100) / total_memory;
 
-	/*
-	 * Now decide how much we really want to unmap some pages.  The mapped
-	 * ratio is downgraded - just because there's a lot of mapped memory
-	 * doesn't necessarily mean that page reclaim isn't succeeding.
-	 *
-	 * The distress ratio is important - we don't want to start going oom.
-	 *
-	 * A 100% value of vm_swappiness overrides this algorithm altogether.
-	 */
-	swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
-
-	/*
-	 * Now use this metric to decide whether to start moving mapped memory
-	 * onto the inactive list.
-	 */
-	if (swap_tendency >= 100)
-		reclaim_mapped = 1;
-
 	while (!list_empty(&l_hold)) {
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
 		if (page_mapped(page)) {
-			if (!reclaim_mapped ||
+			if (zone->zone_pgdat->mapped_nrpages ||
+			    (vm_hardmaplimit && mapped_ratio < vm_mapped) ||
 			    (total_swap_pages == 0 && PageAnon(page)) ||
 			    page_referenced(page, 0)) {
 				list_add(&page->lru, &l_active);
@@ -968,12 +940,13 @@ out:
  * the page allocator fallback scheme to ensure that aging of pages is balanced
  * across the zones.
  */
-static int balance_pgdat(pg_data_t *pgdat, int nr_pages)
+static int 
+balance_pgdat(pg_data_t *pgdat, int nr_pages, unsigned long mapped_nrpages)
 {
 	int to_free = nr_pages;
 	int all_zones_ok;
 	int priority;
-	int i;
+	int i, maplimit = 0;
 	int total_scanned, total_reclaimed;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc;
@@ -985,6 +958,23 @@ loop_again:
 	sc.may_writepage = 0;
 	sc.nr_mapped = read_page_state(nr_mapped);
 
+	/*
+	 * Sanity check to ensure we don't have a stale maplimit set
+	 * and are calling balance_pgdat for a different reason.
+	 */
+	if (!nr_pages && mapped_nrpages) {
+		maplimit = 1;
+		nr_pages = mapped_nrpages;
+	}
+	
+	/*
+	 * kswapd does a light balance_pgdat() when there is less than 
+	 * ->pages_unmapped free provided there is less than vm_mapped %
+	 * of that ram mapped.
+	 */
+	if (maplimit && sc.nr_mapped * 100 / total_memory > vm_mapped)
+		return 0;
+
 	inc_page_state(pageoutrun);
 
 	for (i = 0; i < pgdat->nr_zones; i++) {
@@ -999,6 +989,12 @@ loop_again:
 
 		all_zones_ok = 1;
 
+		/*
+		 * Only do low priority scanning if we're here due to
+		 * mapped watermark.
+		 */
+		if (maplimit && priority < DEF_PRIORITY)
+			goto out;
 		if (nr_pages == 0) {
 			/*
 			 * Scan in the highmem->dma direction for the highest
@@ -1014,10 +1010,13 @@ loop_again:
 						priority != DEF_PRIORITY)
 					continue;
 
-				if (zone->free_pages <= zone->pages_high) {
-					end_zone = i;
-					goto scan;
+				if (zone->free_pages <= zone->pages_high ||
+					(maplimit && zone->free_pages <= 
+					zone->pages_unmapped)) {
+						end_zone = i;
+						goto scan;
 				}
+
 			}
 			goto out;
 		} else {
@@ -1158,7 +1157,7 @@ static int kswapd(void *p)
 		schedule();
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		balance_pgdat(pgdat, 0);
+		balance_pgdat(pgdat, 0, pgdat->mapped_nrpages);
 	}
 	return 0;
 }
@@ -1170,8 +1169,10 @@ void wakeup_kswapd(struct zone *zone)
 {
 	if (zone->present_pages == 0)
 		return;
-	if (zone->free_pages > zone->pages_low)
+	if (zone->free_pages > zone->pages_unmapped)
 		return;
+	if (zone->free_pages <= zone->pages_low)
+		zone->zone_pgdat->mapped_nrpages = 0;
 	if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
 		return;
 	wake_up_interruptible(&zone->zone_pgdat->kswapd_wait);
@@ -1194,7 +1195,7 @@ int shrink_all_memory(int nr_pages)
 	current->reclaim_state = &reclaim_state;
 	for_each_pgdat(pgdat) {
 		int freed;
-		freed = balance_pgdat(pgdat, nr_to_free);
+		freed = balance_pgdat(pgdat, nr_to_free, 0);
 		ret += freed;
 		nr_to_free -= freed;
 		if (nr_to_free <= 0)
