When kswapd is awoken due to reclaim by a running task, set the priority of
kswapd to that of the calling task thus making memory reclaim cpu activity
affected by nice level.

Signed-off-by: Con Kolivas <kernel@kolivas.org>

 include/linux/mmzone.h |    2 +-
 mm/page_alloc.c        |    2 +-
 mm/vmscan.c            |   36 ++++++++++++++++++++++++++++++++++--
 3 files changed, 36 insertions(+), 4 deletions(-)

Index: linux-2.6.22-rc4-ck1/include/linux/mmzone.h
===================================================================
--- linux-2.6.22-rc4-ck1.orig/include/linux/mmzone.h	2007-06-10 21:59:55.000000000 +1000
+++ linux-2.6.22-rc4-ck1/include/linux/mmzone.h	2007-06-10 21:59:56.000000000 +1000
@@ -468,7 +468,7 @@ typedef struct pglist_data {
 void get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free);
 void build_all_zonelists(void);
-void wakeup_kswapd(struct zone *zone, int order);
+void wakeup_kswapd(struct zone *zone, int order, struct task_struct *p);
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		int classzone_idx, int alloc_flags);
 enum memmap_context {
Index: linux-2.6.22-rc4-ck1/mm/page_alloc.c
===================================================================
--- linux-2.6.22-rc4-ck1.orig/mm/page_alloc.c	2007-06-10 21:59:55.000000000 +1000
+++ linux-2.6.22-rc4-ck1/mm/page_alloc.c	2007-06-10 21:59:56.000000000 +1000
@@ -1250,7 +1250,7 @@ restart:
 		goto nopage;
 
 	for (z = zonelist->zones; *z; z++)
-		wakeup_kswapd(*z, order);
+		wakeup_kswapd(*z, order, p);
 
 	/*
 	 * OK, we're below the kswapd watermark and have kicked background
Index: linux-2.6.22-rc4-ck1/mm/vmscan.c
===================================================================
--- linux-2.6.22-rc4-ck1.orig/mm/vmscan.c	2007-06-10 21:59:55.000000000 +1000
+++ linux-2.6.22-rc4-ck1/mm/vmscan.c	2007-06-10 21:59:56.000000000 +1000
@@ -956,6 +956,34 @@ static unsigned long shrink_zone(int pri
 }
 
 /*
+ * Helper functions to adjust nice level of kswapd, based on the priority of
+ * the task (p) that called it. If it is already higher priority we do not
+ * demote its nice level since it is still working on behalf of a higher
+ * priority task. With kernel threads we leave it at nice 0.
+ *
+ * We don't ever run kswapd real time, so if a real time task calls kswapd we
+ * set it to highest SCHED_NORMAL priority.
+ */
+static int effective_sc_prio(struct task_struct *p)
+{
+	if (likely(p->mm)) {
+		if (rt_task(p))
+			return -20;
+		return task_nice(p);
+	}
+	return 0;
+}
+
+static void set_kswapd_nice(struct task_struct *kswapd, struct task_struct *p,
+			    int active)
+{
+	long nice = effective_sc_prio(p);
+
+	if (task_nice(kswapd) > nice || !active)
+		set_user_nice(kswapd, nice);
+}
+
+/*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
  * request.
@@ -1346,6 +1374,7 @@ static int kswapd(void *p)
 			if (!freezing(current))
 				schedule();
 
+			set_user_nice(tsk, 0);
 			order = pgdat->kswapd_max_order;
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
@@ -1363,9 +1392,10 @@ static int kswapd(void *p)
 /*
  * A zone is low on free memory, so wake its kswapd task to service it.
  */
-void wakeup_kswapd(struct zone *zone, int order)
+void wakeup_kswapd(struct zone *zone, int order, struct task_struct *p)
 {
 	pg_data_t *pgdat;
+	int active;
 
 	if (!populated_zone(zone))
 		return;
@@ -1377,7 +1407,9 @@ void wakeup_kswapd(struct zone *zone, in
 		pgdat->kswapd_max_order = order;
 	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 		return;
-	if (!waitqueue_active(&pgdat->kswapd_wait))
+	active = waitqueue_active(&pgdat->kswapd_wait);
+	set_kswapd_nice(pgdat->kswapd, p, active);
+	if (!active)
 		return;
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
