Index: linux-2.6.13-ck6/include/linux/fs.h =================================================================== --- linux-2.6.13-ck6.orig/include/linux/fs.h 2005-09-23 21:06:55.000000000 +1000 +++ linux-2.6.13-ck6/include/linux/fs.h 2005-09-30 01:03:42.000000000 +1000 @@ -340,8 +340,6 @@ struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ rwlock_t tree_lock; /* and rwlock protecting it */ - struct radix_tree_root swap_tree; /* radix tree of swapped pages */ - struct list_head swapped_pages; /* list of swapped pages */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ Index: linux-2.6.13-ck6/mm/swap_prefetch.c =================================================================== --- linux-2.6.13-ck6.orig/mm/swap_prefetch.c 2005-09-30 01:03:17.000000000 +1000 +++ linux-2.6.13-ck6/mm/swap_prefetch.c 2005-09-30 01:03:42.000000000 +1000 @@ -23,6 +23,8 @@ struct swapped_root_t { unsigned long busy; spinlock_t lock; struct list_head list; + struct radix_tree_root swap_tree; + rwlock_t *rwlock; unsigned int count; unsigned int maxcount; kmem_cache_t *cache; @@ -33,9 +35,12 @@ struct swapped_entry_t { struct list_head swapped_list; }; -static struct swapped_root_t swapped_root = { - .list = LIST_HEAD_INIT(swapped_root.list), - .count = 0, +static struct swapped_root_t swapped = { + .busy = 0, + .list = LIST_HEAD_INIT(swapped.list), + .swap_tree = RADIX_TREE_INIT(GFP_ATOMIC), + .rwlock = &swapper_space.tree_lock, + .count = 0, }; static struct timer_list prefetch_timer; @@ -47,23 +52,22 @@ static unsigned long mapped_limit; /* * Create kmem cache for swapped entries */ -void prepare_prefetch(void) +void __init prepare_prefetch(void) { long total_memory = nr_free_pagecache_pages(); long se_size = sizeof(struct swapped_entry_t); - swapped_root.cache = kmem_cache_create("swapped_entry", se_size, + swapped.cache = kmem_cache_create("swapped_entry", se_size, 0, 0, NULL, NULL); - if (unlikely(!swapped_root.cache)) + if (unlikely(!swapped.cache)) panic("prepare_prefetch(): cannot create swapped_entry SLAB cache"); /* Set max count of swapped entries to 5% ram */ - swapped_root.maxcount = (total_memory / 20) * (PAGE_SIZE / se_size); + swapped.maxcount = (total_memory / 20) * (PAGE_SIZE / se_size); /* Set maximum amount of mapped pages to prefetch to 2/3 ram */ mapped_limit = total_memory / 3 * 2; - spin_lock_init(&swapped_root.lock); - swapped_root.busy = 0; + spin_lock_init(&swapped.lock); } static inline void delay_prefetch_timer(void) @@ -82,7 +86,7 @@ static inline void reset_prefetch_timer( */ void delay_prefetch(void) { - __set_bit(0, &swapped_root.busy); + __set_bit(0, &swapped.busy); } /* @@ -94,21 +98,19 @@ void delay_prefetch(void) void add_to_swapped_list(unsigned long index) { struct swapped_entry_t *entry; - struct address_space *mapping = &swapper_space; - unsigned long flags; int error; - if (unlikely(!spin_trylock_irqsave(&swapped_root.lock, flags))) + if (unlikely(!spin_trylock(&swapped.lock))) goto out; - if (swapped_root.count >= swapped_root.maxcount) { - entry = list_entry(swapped_root.list.next, + if (swapped.count >= swapped.maxcount) { + entry = list_entry(swapped.list.next, struct swapped_entry_t, swapped_list); - radix_tree_delete(&mapping->swap_tree, entry->swp_entry.val); + radix_tree_delete(&swapped.swap_tree, entry->swp_entry.val); list_del(&entry->swapped_list); - swapped_root.count--; + swapped.count--; } else { - entry = kmem_cache_alloc(swapped_root.cache, GFP_ATOMIC); + entry = kmem_cache_alloc(swapped.cache, GFP_ATOMIC); if (unlikely(!entry)) /* bad, can't allocate more mem */ goto out_locked; @@ -118,23 +120,23 @@ void add_to_swapped_list(unsigned long i error = radix_tree_preload(GFP_ATOMIC); if (likely(!error)) { - error = radix_tree_insert(&mapping->swap_tree, index, entry); + error = radix_tree_insert(&swapped.swap_tree, index, entry); if (likely(!error)) { /* * If this is the first entry the timer needs to be * (re)started */ - if (list_empty(&swapped_root.list)) + if (list_empty(&swapped.list)) delay_prefetch_timer(); - list_add(&entry->swapped_list, &swapped_root.list); - swapped_root.count++; + list_add(&entry->swapped_list, &swapped.list); + swapped.count++; } radix_tree_preload_end(); } else - kmem_cache_free(swapped_root.cache, entry); + kmem_cache_free(swapped.cache, entry); out_locked: - spin_unlock_irqrestore(&swapped_root.lock, flags); + spin_unlock(&swapped.lock); out: return; } @@ -145,19 +147,18 @@ out: */ void remove_from_swapped_list(unsigned long index) { - struct address_space *mapping = &swapper_space; struct swapped_entry_t *entry; unsigned long flags; - if (unlikely(!spin_trylock_irqsave(&swapped_root.lock, flags))) + if (unlikely(!spin_trylock_irqsave(&swapped.lock, flags))) return; - entry = radix_tree_delete(&mapping->swap_tree, index); - if (entry) { + entry = radix_tree_delete(&swapped.swap_tree, index); + if (likely(entry)) { list_del_init(&entry->swapped_list); - swapped_root.count--; - kmem_cache_free(swapped_root.cache, entry); + swapped.count--; + kmem_cache_free(swapped.cache, entry); } - spin_unlock_irqrestore(&swapped_root.lock, flags); + spin_unlock_irqrestore(&swapped.lock, flags); } /* @@ -165,7 +166,7 @@ void remove_from_swapped_list(unsigned l * then directly allocate the ram. We don't want prefetch to use * __alloc_pages and go calling on reclaim. */ -static struct page * prefetch_get_page(void) +static struct page *prefetch_get_page(void) { struct zone *zone = NULL, *z; struct page *page = NULL; @@ -187,10 +188,6 @@ static struct page * prefetch_get_page(v if (zone_idx(z) == ZONE_DMA) continue; - /* Reasonably stressed zone, bypass it */ - if (z->prev_priority < DEF_PRIORITY / 2) - continue; - /* Select the zone with the most free ram */ if (free > most_free) { most_free = free; @@ -207,7 +204,7 @@ static struct page * prefetch_get_page(v zonelist = NODE_DATA(numa_node_id())->node_zonelists + (GFP_HIGHUSER & GFP_ZONEMASK); -; + zone_statistics(zonelist, zone); } out: @@ -216,21 +213,21 @@ out: /* * This tries to read a swp_entry_t into swap cache for swap prefetching. + * Returns 1 on success, 0 on failure, -1 on failure and we should delay + * further prefetching. */ static int trickle_swap_cache_async(swp_entry_t entry) { struct page *page = NULL; - struct address_space *mapping = &swapper_space; - unsigned long flags; + int ret = 0; /* Entry may already exist */ - local_irq_save(flags); - if (unlikely(!read_trylock(&mapping->tree_lock))) { - local_irq_restore(flags); - goto out_delay; + if (unlikely(!read_trylock(swapped.rwlock))) { + ret = -1; + goto out; } - page = radix_tree_lookup(&mapping->page_tree, entry.val); - read_unlock_irqrestore(&mapping->tree_lock, flags); + page = radix_tree_lookup(&swapper_space.page_tree, entry.val); + read_unlock(swapped.rwlock); if (page) { remove_from_swapped_list(entry.val); goto out; @@ -238,8 +235,10 @@ static int trickle_swap_cache_async(swp_ /* Get a new page to read from swap */ page = prefetch_get_page(); - if (unlikely(!page)) - goto out_delay; + if (unlikely(!page)) { + ret = -1; + goto out; + } if (add_to_swap_cache(page, entry)) { /* Failed to add to swap cache */ @@ -247,13 +246,14 @@ static int trickle_swap_cache_async(swp_ goto out; } - lru_cache_add_active(page); - swap_readpage(NULL, page); - return 1; -out_delay: - return -1; + lru_cache_add(page); + if (unlikely(swap_readpage(NULL, page))) { + ret = -1; + goto out; + } + ret = 1; out: - return 0; + return ret; } /* @@ -264,10 +264,11 @@ static int prefetch_suitable(void) struct page_state ps; unsigned long pending_writes, limit; struct zone *z; + int ret = 0; /* Purposefully racy and might return false positive which is ok */ - if (__test_and_clear_bit(0, &swapped_root.busy)) - goto out_delay; + if (__test_and_clear_bit(0, &swapped.busy)) + goto out; /* * Have some hysteresis between where page reclaiming and prefetching @@ -277,41 +278,40 @@ static int prefetch_suitable(void) if (z->present_pages == 0) continue; if (z->pages_high * 3 > z->free_pages) - goto out_delay; + goto out; } get_page_state(&ps); /* We shouldn't prefetch when we are doing writeback */ if (ps.nr_writeback) - goto out_delay; + goto out; /* Delay prefetching if we have significant amounts of dirty data */ pending_writes = ps.nr_dirty + ps.nr_unstable; if (pending_writes > SWAP_CLUSTER_MAX) - goto out_delay; + goto out; /* >2/3 of the ram is mapped, we need some free for pagecache */ limit = ps.nr_mapped + ps.nr_slab + pending_writes; if (limit > mapped_limit) - goto out_delay; + goto out; /* * Add swapcache to limit as well, but check this last since it needs * locking */ if (unlikely(!read_trylock(&swapper_space.tree_lock))) - goto out_delay; + goto out; limit += total_swapcache_pages; read_unlock(&swapper_space.tree_lock); if (limit > mapped_limit) - goto out_delay; + goto out; /* Survived all that? Hooray we can prefetch! */ - return 1; - -out_delay: - return 0; + ret = 1; +out: + return ret; } /* @@ -333,28 +333,25 @@ static int trickle_swap(void) if (!prefetch_suitable()) goto out; /* Lock is held? We must be busy elsewhere */ - if (unlikely(!spin_trylock(&swapped_root.lock))) + if (unlikely(!spin_trylock(&swapped.lock))) goto out; - if (list_empty(&swapped_root.list)) { + if (list_empty(&swapped.list)) { ret = -1; goto out_unlock; } - entry = list_entry(swapped_root.list.next, + entry = list_entry(swapped.list.next, struct swapped_entry_t, swapped_list); - spin_unlock(&swapped_root.lock); + spin_unlock(&swapped.lock); got_page = trickle_swap_cache_async(entry->swp_entry); - if (unlikely(got_page == -1)) { - ret = -1; - goto out_unlock; - } + if (unlikely(got_page == -1)) + goto out; pages += got_page; } - ret = 1; - goto out; + return 1; out_unlock: - spin_unlock(&swapped_root.lock); + spin_unlock(&swapped.lock); out: return ret; } Index: linux-2.6.13-ck6/mm/swap_state.c =================================================================== --- linux-2.6.13-ck6.orig/mm/swap_state.c 2005-09-23 21:06:55.000000000 +1000 +++ linux-2.6.13-ck6/mm/swap_state.c 2005-09-30 01:03:42.000000000 +1000 @@ -36,8 +36,6 @@ static struct backing_dev_info swap_back struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), .tree_lock = RW_LOCK_UNLOCKED, - .swap_tree = RADIX_TREE_INIT(GFP_ATOMIC), - .swapped_pages = LIST_HEAD_INIT(swapper_space.swapped_pages), .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info,