Set the initial "priority" of memory reclaim scanning according to the cpu scheduling priority thus determining how aggressively reclaim is to initally progress according to nice level. Signed-off-by: Con Kolivas fs/buffer.c | 2 +- include/linux/swap.h | 3 ++- mm/page_alloc.c | 2 +- mm/vmscan.c | 37 ++++++++++++++++++++++++------------- 4 files changed, 28 insertions(+), 16 deletions(-) Index: linux-2.6.22-rc3-ck1/fs/buffer.c =================================================================== --- linux-2.6.22-rc3-ck1.orig/fs/buffer.c 2007-05-26 20:34:42.000000000 +1000 +++ linux-2.6.22-rc3-ck1/fs/buffer.c 2007-05-26 20:38:36.000000000 +1000 @@ -356,7 +356,7 @@ static void free_more_memory(void) for_each_online_pgdat(pgdat) { zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) - try_to_free_pages(zones, GFP_NOFS); + try_to_free_pages(zones, GFP_NOFS, NULL); } } Index: linux-2.6.22-rc3-ck1/include/linux/swap.h =================================================================== --- linux-2.6.22-rc3-ck1.orig/include/linux/swap.h 2007-05-26 20:36:38.000000000 +1000 +++ linux-2.6.22-rc3-ck1/include/linux/swap.h 2007-05-26 20:38:36.000000000 +1000 @@ -189,7 +189,8 @@ extern int rotate_reclaimable_page(struc extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern unsigned long try_to_free_pages(struct zone **, gfp_t); +extern unsigned long try_to_free_pages(struct zone **, gfp_t, + struct task_struct *p); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_mapped; extern int vm_hardmaplimit; Index: linux-2.6.22-rc3-ck1/mm/page_alloc.c =================================================================== --- linux-2.6.22-rc3-ck1.orig/mm/page_alloc.c 2007-05-26 20:38:36.000000000 +1000 +++ linux-2.6.22-rc3-ck1/mm/page_alloc.c 2007-05-26 20:38:36.000000000 +1000 @@ -1314,7 +1314,7 @@ nofail_alloc: reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask); + did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask, p); p->reclaim_state = NULL; p->flags &= ~PF_MEMALLOC; Index: linux-2.6.22-rc3-ck1/mm/vmscan.c =================================================================== --- linux-2.6.22-rc3-ck1.orig/mm/vmscan.c 2007-05-26 20:38:36.000000000 +1000 +++ linux-2.6.22-rc3-ck1/mm/vmscan.c 2007-05-26 20:38:36.000000000 +1000 @@ -988,6 +988,11 @@ static void set_kswapd_nice(struct task_ set_user_nice(kswapd, nice); } +static int sc_priority(struct task_struct *p) +{ + return (DEF_PRIORITY + (DEF_PRIORITY * effective_sc_prio(p) / 40)); +} + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -1045,7 +1050,8 @@ static unsigned long shrink_zones(int pr * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. */ -unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) +unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask, + struct task_struct *p) { int priority; int ret = 0; @@ -1053,7 +1059,7 @@ unsigned long try_to_free_pages(struct z unsigned long nr_reclaimed = 0; struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long lru_pages = 0; - int i; + int i, scan_priority = DEF_PRIORITY; struct scan_control sc = { .gfp_mask = gfp_mask, .may_writepage = !laptop_mode, @@ -1062,6 +1068,9 @@ unsigned long try_to_free_pages(struct z .mapped = vm_mapped, }; + if (p) + scan_priority = sc_priority(p); + delay_swap_prefetch(); count_vm_event(ALLOCSTALL); @@ -1076,7 +1085,7 @@ unsigned long try_to_free_pages(struct z + zone_page_state(zone, NR_INACTIVE); } - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + for (priority = scan_priority; priority >= 0; priority--) { sc.nr_scanned = 0; if (!priority) disable_swap_token(); @@ -1106,7 +1115,7 @@ unsigned long try_to_free_pages(struct z } /* Take a nap, wait for some writeback to complete */ - if (sc.nr_scanned && priority < DEF_PRIORITY - 2) + if (sc.nr_scanned && priority < scan_priority - 2) congestion_wait(WRITE, HZ/10); } /* top priority shrink_caches still had more to do? don't OOM, then */ @@ -1156,9 +1165,9 @@ out: */ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) { - int all_zones_ok; + int all_zones_ok = 0; int priority; - int i; + int i, scan_priority; unsigned long total_scanned; unsigned long nr_reclaimed; struct reclaim_state *reclaim_state = current->reclaim_state; @@ -1174,6 +1183,8 @@ static unsigned long balance_pgdat(pg_da */ int temp_priority[MAX_NR_ZONES]; + scan_priority = sc_priority(pgdat->kswapd); + loop_again: total_scanned = 0; nr_reclaimed = 0; @@ -1181,9 +1192,9 @@ loop_again: count_vm_event(PAGEOUTRUN); for (i = 0; i < pgdat->nr_zones; i++) - temp_priority[i] = DEF_PRIORITY; + temp_priority[i] = scan_priority; - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + for (priority = scan_priority; priority >= 0; priority--) { int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long lru_pages = 0; @@ -1204,7 +1215,7 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && priority != scan_priority) continue; /* @@ -1213,7 +1224,7 @@ loop_again: * pages_high. */ watermark = zone->pages_high + (zone->pages_high * - priority / DEF_PRIORITY); + priority / scan_priority); if (!zone_watermark_ok(zone, order, watermark, 0, 0)) { end_zone = i; break; @@ -1246,11 +1257,11 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && priority != scan_priority) continue; watermark = zone->pages_high + (zone->pages_high * - priority / DEF_PRIORITY); + priority / scan_priority); if (!zone_watermark_ok(zone, order, watermark, end_zone, 0)) @@ -1285,7 +1296,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && priority < DEF_PRIORITY - 2) + if (total_scanned && priority < scan_priority - 2) congestion_wait(WRITE, HZ/10); /*