Turn the "swappiness" knob into one with well defined semantics. Rename it
"mapped" to correspond directly with the percentage of mapped ram or
"applications" as users think of it. Currently the swappiness algorithm can
easily lead to swapping situations on simple file copies due to the distress
algorithm which too easily overrides the swappiness value. Add a
"hardmaplimit" tunable, on by default, which only allows the vm to override
the "mapped" tunable when distress is at its greatest to prevent false
out-of-memory situations.

Signed-off-by: Con Kolivas <kernel@kolivas.org>

Index: linux-2.6.14-ck2/include/linux/swap.h
===================================================================
--- linux-2.6.14-ck2.orig/include/linux/swap.h	2005-11-02 20:37:36.000000000 +1100
+++ linux-2.6.14-ck2/include/linux/swap.h	2005-11-02 20:39:02.000000000 +1100
@@ -174,7 +174,8 @@ extern void swap_setup(void);
 extern int try_to_free_pages(struct zone **, unsigned int);
 extern int zone_reclaim(struct zone *, unsigned int, unsigned int);
 extern int shrink_all_memory(int);
-extern int vm_swappiness;
+extern int vm_mapped;
+extern int vm_hardmaplimit;
 
 #ifdef CONFIG_MMU
 /* linux/mm/shmem.c */
Index: linux-2.6.14-ck2/include/linux/sysctl.h
===================================================================
--- linux-2.6.14-ck2.orig/include/linux/sysctl.h	2005-11-02 20:38:15.000000000 +1100
+++ linux-2.6.14-ck2/include/linux/sysctl.h	2005-11-02 20:39:24.000000000 +1100
@@ -173,7 +173,7 @@ enum
 	VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */
 	VM_PAGEBUF=17,		/* struct: Control pagebuf parameters */
 	VM_HUGETLB_PAGES=18,	/* int: Number of available Huge Pages */
-	VM_SWAPPINESS=19,	/* Tendency to steal mapped memory */
+	VM_MAPPED=19,		/* percent mapped min while evicting cache */
 	VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */
 	VM_MIN_FREE_KBYTES=21,	/* Minimum free kilobytes to maintain */
 	VM_MAX_MAP_COUNT=22,	/* int: Maximum number of mmaps/address-space */
@@ -185,6 +185,7 @@ enum
 	VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
 	VM_SWAP_PREFETCH=29,	/* int: amount to swap prefetch */
 	VM_READAHEAD_RATIO=30, /* percent of read-ahead size to thrashing-threshold */
+	VM_HARDMAPLIMIT=31,	/* Make mapped a hard limit */
 };
 
 
Index: linux-2.6.14-ck2/kernel/sysctl.c
===================================================================
--- linux-2.6.14-ck2.orig/kernel/sysctl.c	2005-11-02 20:37:36.000000000 +1100
+++ linux-2.6.14-ck2/kernel/sysctl.c	2005-11-02 20:39:02.000000000 +1100
@@ -774,16 +774,24 @@ static ctl_table vm_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
-		.ctl_name	= VM_SWAPPINESS,
-		.procname	= "swappiness",
-		.data		= &vm_swappiness,
-		.maxlen		= sizeof(vm_swappiness),
+		.ctl_name	= VM_MAPPED,
+		.procname	= "mapped",
+		.data		= &vm_mapped,
+		.maxlen		= sizeof(vm_mapped),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.ctl_name	= VM_HARDMAPLIMIT,
+		.procname	= "hardmaplimit",
+		.data		= &vm_hardmaplimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #ifdef CONFIG_HUGETLB_PAGE
 	 {
 		.ctl_name	= VM_HUGETLB_PAGES,
Index: linux-2.6.14-ck2/mm/vmscan.c
===================================================================
--- linux-2.6.14-ck2.orig/mm/vmscan.c	2005-11-02 20:37:36.000000000 +1100
+++ linux-2.6.14-ck2/mm/vmscan.c	2005-11-02 20:39:02.000000000 +1100
@@ -126,9 +126,10 @@ struct shrinker {
 #endif
 
 /*
- * From 0 .. 100.  Higher means more swappy.
+ * From 0 .. 100.  Lower means more swappy.
  */
-int vm_swappiness = 60;
+int vm_mapped = 66;
+int vm_hardmaplimit = 1;
 static long total_memory;
 
 static LIST_HEAD(shrinker_list);
@@ -753,11 +754,14 @@ refill_inactive_zone(struct zone *zone, 
 	 * ratio is downgraded - just because there's a lot of mapped memory
 	 * doesn't necessarily mean that page reclaim isn't succeeding.
 	 *
-	 * The distress ratio is important - we don't want to start going oom.
+	 * This distress value is ignored if we apply a hardmaplimit except
+	 * in extreme distress.
 	 *
-	 * A 100% value of vm_swappiness overrides this algorithm altogether.
+	 * A 0% value of vm_mapped overrides this algorithm altogether.
 	 */
-	swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
+	swap_tendency = mapped_ratio * 100 / (vm_mapped + 1);
+	if (!vm_hardmaplimit || distress == 100)
+		swap_tendency += distress;
 
 	/*
 	 * Now use this metric to decide whether to start moving mapped memory
@@ -771,9 +775,9 @@ refill_inactive_zone(struct zone *zone, 
 		page = lru_to_page(&l_hold);
 		list_del(&page->lru);
 		if (page_mapped(page)) {
-			if (!reclaim_mapped ||
-			    (total_swap_pages == 0 && PageAnon(page)) ||
-			    page_referenced(page, 0, sc->priority <= 0)) {
+			if ((total_swap_pages == 0 && PageAnon(page)) ||
+			    page_referenced(page, 0, sc->priority <= 0) ||
+			    !reclaim_mapped) {
 				list_add(&page->lru, &l_active);
 				continue;
 			}

