Index: linux-2.6.13-ck8/Documentation/sysctl/vm.txt =================================================================== --- linux-2.6.13-ck8.orig/Documentation/sysctl/vm.txt 2005-10-11 09:33:51.000000000 +1000 +++ linux-2.6.13-ck8/Documentation/sysctl/vm.txt 2005-10-11 09:33:51.000000000 +1000 @@ -113,4 +113,4 @@ swap prefetching is compiled in. The val except when laptop_mode is enabled and then it is ten times larger. Setting it to 0 disables prefetching entirely. -The default value is 2. +The default value is dependant on ramsize. Index: linux-2.6.13-ck8/mm/swap_prefetch.c =================================================================== --- linux-2.6.13-ck8.orig/mm/swap_prefetch.c 2005-10-11 09:33:51.000000000 +1000 +++ linux-2.6.13-ck8/mm/swap_prefetch.c 2005-10-11 09:33:51.000000000 +1000 @@ -22,18 +22,10 @@ /* Time between attempting prefetching when vm is idle */ #define PREFETCH_INTERVAL (HZ) -/* sysctl - if/how much to prefetch at a time */ -int swap_prefetch = 2; +/* sysctl - how many SWAP_CLUSTER_MAX pages to prefetch at a time */ +int swap_prefetch = 1; -/* - * How many pages to prefetch at a time. We prefetch SWAP_CLUSTER_MAX * - * swap_prefetch per PREFETCH_INTERVAL, but prefetch ten times as much at a - * time in laptop_mode to minimise the time we keep the disk spinning. - */ -#define PREFETCH_PAGES() (SWAP_CLUSTER_MAX * swap_prefetch * \ - (1 + 9 * laptop_mode)) - -struct swapped_root_t { +struct swapped_root { unsigned long busy; /* vm busy */ spinlock_t lock; /* protects all data */ struct list_head list; /* MRU list of swapped pages */ @@ -43,12 +35,12 @@ struct swapped_root_t { kmem_cache_t *cache; }; -struct swapped_entry_t { +struct swapped_entry { swp_entry_t swp_entry; struct list_head swapped_list; }; -static struct swapped_root_t swapped = { +static struct swapped_root swapped = { .busy = 0, .list = LIST_HEAD_INIT(swapped.list), .swap_tree = RADIX_TREE_INIT(GFP_ATOMIC), @@ -68,17 +60,22 @@ static unsigned long temp_free = 0; */ void __init prepare_prefetch(void) { - long total_memory = nr_free_pagecache_pages(); + long mem = nr_free_pagecache_pages(); swapped.cache = kmem_cache_create("swapped_entry", - sizeof(struct swapped_entry_t), 0, 0, NULL, NULL); + sizeof(struct swapped_entry), 0, 0, NULL, NULL); if (unlikely(!swapped.cache)) panic("prepare_prefetch(): cannot create swapped_entry SLAB cache"); /* Set max number of entries to size of physical ram */ - swapped.maxcount = total_memory; + swapped.maxcount = mem; /* Set maximum amount of mapped pages to prefetch to 2/3 ram */ - mapped_limit = total_memory / 3 * 2; + mapped_limit = mem / 3 * 2; + + /* Set initial swap_prefetch value according to memory size */ + mem /= SWAP_CLUSTER_MAX * 1000; + while ((mem >>= 1)) + swap_prefetch++; spin_lock_init(&swapped.lock); } @@ -110,7 +107,7 @@ void delay_prefetch(void) */ void add_to_swapped_list(unsigned long index) { - struct swapped_entry_t *entry; + struct swapped_entry *entry; int error; if (unlikely(!spin_trylock(&swapped.lock))) @@ -118,7 +115,7 @@ void add_to_swapped_list(unsigned long i if (swapped.count >= swapped.maxcount) { entry = list_entry(swapped.list.next, - struct swapped_entry_t, swapped_list); + struct swapped_entry, swapped_list); radix_tree_delete(&swapped.swap_tree, entry->swp_entry.val); list_del(&entry->swapped_list); swapped.count--; @@ -160,7 +157,7 @@ out: */ void remove_from_swapped_list(unsigned long index) { - struct swapped_entry_t *entry; + struct swapped_entry *entry; unsigned long flags; if (unlikely(!spin_trylock_irqsave(&swapped.lock, flags))) @@ -220,18 +217,23 @@ out: return page; } +enum trickle_return { + SUCCESS, + FAILED, + DELAY, +}; + /* * This tries to read a swp_entry_t into swap cache for swap prefetching. - * Returns 1 on success, 0 on failure, -1 on failure and we should delay - * further prefetching. + * If it returns DELAY we should delay further prefetching. */ -static int trickle_swap_cache_async(swp_entry_t entry) +static enum trickle_return trickle_swap_cache_async(swp_entry_t entry) { + enum trickle_return ret = FAILED; struct page *page = NULL; - int ret = 0; if (unlikely(!read_trylock(&swapper_space.tree_lock))) { - ret = -1; + ret = DELAY; goto out; } /* Entry may already exist */ @@ -245,7 +247,7 @@ static int trickle_swap_cache_async(swp_ /* Get a new page to read from swap */ page = prefetch_get_page(); if (unlikely(!page)) { - ret = -1; + ret = DELAY; goto out; } @@ -255,11 +257,11 @@ static int trickle_swap_cache_async(swp_ lru_cache_add(page); if (unlikely(swap_readpage(NULL, page))) { - ret = -1; + ret = DELAY; goto out_release; } - ret = 1; + ret = SUCCESS; out_release: page_cache_release(page); out: @@ -267,6 +269,16 @@ out: } /* + * How many pages to prefetch at a time. We prefetch SWAP_CLUSTER_MAX * + * swap_prefetch per PREFETCH_INTERVAL, but prefetch ten times as much at a + * time in laptop_mode to minimise the time we keep the disk spinning. + */ +static inline unsigned long prefetch_pages(void) +{ + return (SWAP_CLUSTER_MAX * swap_prefetch * (1 + 9 * laptop_mode)); +} + +/* * We want to be absolutely certain it's ok to start prefetching. */ static int prefetch_suitable(void) @@ -302,7 +314,7 @@ static int prefetch_suitable(void) * (eg during file reads) */ if (last_free) { - if (temp_free + SWAP_CLUSTER_MAX + PREFETCH_PAGES() < + if (temp_free + SWAP_CLUSTER_MAX + prefetch_pages() < last_free) { last_free = temp_free; goto out; @@ -347,17 +359,16 @@ out: * trickle_swap is the main function that initiates the swap prefetching. It * first checks to see if the busy flag is set, and does not prefetch if it * is, as the flag implied we are low on memory or swapping in currently. - * Otherwise it runs till PREFETCH_PAGES() are prefetched. - * This function returns 1 if it succeeds in a cycle of prefetching, 0 if it - * is interrupted or -1 if there is nothing left to prefetch. + * Otherwise it runs till prefetch_pages() are prefetched. */ -static int trickle_swap(void) +static enum trickle_return trickle_swap(void) { - int ret = 0, pages = 0; - struct swapped_entry_t *entry; + enum trickle_return ret = DELAY; + struct swapped_entry *entry; + int pages = 0; - while (pages < PREFETCH_PAGES()) { - int got_page; + while (pages < prefetch_pages()) { + enum trickle_return got_page; if (!prefetch_suitable()) goto out; @@ -366,19 +377,25 @@ static int trickle_swap(void) goto out; if (list_empty(&swapped.list)) { spin_unlock(&swapped.lock); - ret = -1; + ret = FAILED; goto out; } entry = list_entry(swapped.list.next, - struct swapped_entry_t, swapped_list); + struct swapped_entry, swapped_list); spin_unlock(&swapped.lock); got_page = trickle_swap_cache_async(entry->swp_entry); - if (unlikely(got_page == -1)) + switch (got_page) { + case FAILED: + break; + case SUCCESS: + pages++; + break; + case DELAY: goto out; - pages += got_page; + } } - ret = 1; + ret = SUCCESS; out: if (pages) @@ -396,22 +413,27 @@ static int kprefetchd(void *data) sys_ioprio_set(IOPRIO_WHO_PROCESS, 0, IOPRIO_CLASS_IDLE); for ( ; ; ) { - int prefetched; + enum trickle_return prefetched; try_to_freeze(); prepare_to_wait(&kprefetchd_wait, &wait, TASK_INTERRUPTIBLE); schedule(); finish_wait(&kprefetchd_wait, &wait); - /* If trickle_swap() returns -1 the timer is not reset */ + /* FAILED implies no entries left - the timer is not reset */ prefetched = trickle_swap(); - if (prefetched == 1) { + switch (prefetched) { + case SUCCESS: last_free = temp_free; reset_prefetch_timer(); - } else { + break; + case DELAY: last_free = 0; - if (!prefetched) - delay_prefetch_timer(); + delay_prefetch_timer(); + break; + case FAILED: + last_free = 0; + break; } } return 0;