[lc-checkins] CVS: linux/mm Makefile,1.4,1.5 memory.c,1.12,1.13 mmap.c,1.3,1.4 oom_kill.c,1.4,1.5 pa
Status: Beta
Brought to you by:
nitin_sf
From: Rodrigo S. de C. <rc...@us...> - 2001-12-12 20:45:51
|
Update of /cvsroot/linuxcompressed/linux/mm In directory usw-pr-cvs1:/tmp/cvs-serv17791/mm Modified Files: Makefile memory.c mmap.c oom_kill.c page_alloc.c shmem.c swap_state.c swapfile.c vmscan.c Removed Files: filemap.c Log Message: - 0.20pre2 version updated from 2.4.10 to 2.4.16. - Code was rewritten in swapfile.c to work with the new swap file functions (swap_free, swap_duplicate, swap_info_get, swap_info_put, etc). Index: Makefile =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/Makefile,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -r1.4 -r1.5 *** Makefile 2001/08/17 18:35:11 1.4 --- Makefile 2001/12/12 20:45:46 1.5 *************** *** 10,14 **** O_TARGET := mm.o ! export-objs := shmem.o obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ --- 10,14 ---- O_TARGET := mm.o ! export-objs := shmem.o filemap.o obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ Index: memory.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/memory.c,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -r1.12 -r1.13 *** memory.c 2001/09/29 20:30:14 1.12 --- memory.c 2001/12/12 20:45:46 1.13 *************** *** 82,94 **** if ((!VALID_PAGE(page)) || PageReserved(page)) return; ! /* ! * free_page() used to be able to clear swap cache ! * entries. We may now have to do it manually. ! */ ! if (page->mapping) { ! if (pte_dirty(pte)) ! set_page_dirty(page); ! } ! free_page_and_swap_cache(page); } --- 82,87 ---- if ((!VALID_PAGE(page)) || PageReserved(page)) return; ! if (pte_dirty(pte)) ! set_page_dirty(page); free_page_and_swap_cache(page); } *************** *** 188,192 **** unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; ! unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; src_pgd = pgd_offset(src, address)-1; --- 181,185 ---- unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; ! unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE; src_pgd = pgd_offset(src, address)-1; *************** *** 326,330 **** continue; if (pte_present(pte)) { ! freed ++; /* This will eventually call __free_pte on the pte. */ tlb_remove_page(tlb, ptep, address + offset); --- 319,325 ---- continue; if (pte_present(pte)) { ! struct page *page = pte_page(pte); ! if (VALID_PAGE(page) && !PageReserved(page)) ! freed ++; /* This will eventually call __free_pte on the pte. */ tlb_remove_page(tlb, ptep, address + offset); *************** *** 333,337 **** remove_pte_vswap(ptep); #endif ! swap_free(pte_to_swp_entry(pte)); pte_clear(ptep); } --- 328,332 ---- remove_pte_vswap(ptep); #endif ! free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear(ptep); } *************** *** 535,539 **** if (map) { flush_dcache_page(map); ! atomic_inc(&map->count); } else printk (KERN_INFO "Mapped page missing [%d]\n", i); --- 530,534 ---- if (map) { flush_dcache_page(map); ! page_cache_get(map); } else printk (KERN_INFO "Mapped page missing [%d]\n", i); *************** *** 603,607 **** if (iobuf->locked) UnlockPage(map); ! __free_page(map); } } --- 598,602 ---- if (iobuf->locked) UnlockPage(map); ! page_cache_release(map); } } *************** *** 924,928 **** * and potentially makes it more efficient. * ! * We hold the mm semaphore and the page_table_lock on entry and exit. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, --- 919,924 ---- * and potentially makes it more efficient. * ! * We hold the mm semaphore and the page_table_lock on entry and exit ! * with the page_table_lock released. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, *************** *** 934,972 **** if (!VALID_PAGE(old_page)) goto bad_wp_page; ! ! /* ! * We can avoid the copy if: ! * - we're the only user (count == 1) ! * - the only other user is the swap cache, ! * and the only swap cache user is itself, ! * in which case we can just continue to ! * use the same swap cache (it will be ! * marked dirty). ! */ ! switch (page_count(old_page)) { ! int can_reuse; ! case 3: ! if (!old_page->buffers) ! break; ! /* FallThrough */ ! case 2: ! if (!PageSwapCache(old_page)) ! break; ! if (TryLockPage(old_page)) ! break; ! /* Recheck swapcachedness once the page is locked */ ! can_reuse = exclusive_swap_page(old_page); ! if (can_reuse) ! delete_from_swap_cache(old_page); ! UnlockPage(old_page); ! if (!can_reuse) ! break; ! /* FallThrough */ ! case 1: ! if (PageReserved(old_page)) ! break; ! flush_cache_page(vma, address); ! establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); ! return 1; /* Minor fault */ } --- 930,943 ---- if (!VALID_PAGE(old_page)) goto bad_wp_page; ! ! if (!TryLockPage(old_page)) { ! int reuse = can_share_swap_page(old_page); ! unlock_page(old_page); ! if (reuse) { ! flush_cache_page(vma, address); ! establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); ! spin_unlock(&mm->page_table_lock); ! return 1; /* Minor fault */ ! } } *************** *** 981,985 **** goto no_mem; copy_cow_page(old_page,new_page,address); - page_cache_release(old_page); /* --- 952,955 ---- *************** *** 991,1007 **** ++mm->rss; break_cow(vma, new_page, address, page_table); /* Free the old page.. */ new_page = old_page; } page_cache_release(new_page); return 1; /* Minor fault */ bad_wp_page: printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; no_mem: page_cache_release(old_page); - spin_lock(&mm->page_table_lock); return -1; } --- 961,980 ---- ++mm->rss; break_cow(vma, new_page, address, page_table); + lru_cache_add(new_page); /* Free the old page.. */ new_page = old_page; } + spin_unlock(&mm->page_table_lock); page_cache_release(new_page); + page_cache_release(old_page); return 1; /* Minor fault */ bad_wp_page: + spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; no_mem: page_cache_release(old_page); return -1; } *************** *** 1108,1115 **** num = valid_swaphandles(entry, &offset); for (i = 0; i < num; offset++, i++) { - /* Don't block on I/O for read-ahead */ - if (atomic_read(&nr_async_pages) >= - pager_daemon.swap_cluster << page_cluster) - break; /* Ok, do the async read-ahead now */ #ifdef CONFIG_COMP_CACHE --- 1081,1084 ---- *************** *** 1128,1132 **** /* ! * We hold the mm semaphore and the page_table_lock on entry and exit. */ static int do_swap_page(struct mm_struct * mm, --- 1097,1102 ---- /* ! * We hold the mm semaphore and the page_table_lock on entry and ! * should release the pagetable lock on exit.. */ static int do_swap_page(struct mm_struct * mm, *************** *** 1151,1167 **** #else page = lookup_swap_cache(entry); #endif if (!page) { - lock_kernel(); swapin_readahead(entry); page = read_swap_cache_async(entry); - unlock_kernel(); if (!page) { - spin_lock(&mm->page_table_lock); /* * Back out if somebody else faulted in this pte while * we released the page table lock. */ ! return pte_same(*page_table, orig_pte) ? -1 : 1; } --- 1121,1143 ---- #else page = lookup_swap_cache(entry); + + if ((entry.val & 0x2) == 1) { + printk("entry %08lx\n", entry.val); + BUG(); + } #endif if (!page) { swapin_readahead(entry); page = read_swap_cache_async(entry); if (!page) { /* * Back out if somebody else faulted in this pte while * we released the page table lock. */ ! int retval; ! spin_lock(&mm->page_table_lock); ! retval = pte_same(*page_table, orig_pte) ? -1 : 1; ! spin_unlock(&mm->page_table_lock); ! return retval; } *************** *** 1170,1178 **** } - /* - * Freeze the "shared"ness of the page, ie page_count + swap_count. - * Must lock page before transferring our swap count to already - * obtained page count. - */ lock_page(page); --- 1146,1149 ---- *************** *** 1183,1187 **** spin_lock(&mm->page_table_lock); if (!pte_same(*page_table, orig_pte)) { ! UnlockPage(page); page_cache_release(page); #ifdef CONFIG_COMP_CACHE --- 1154,1159 ---- spin_lock(&mm->page_table_lock); if (!pte_same(*page_table, orig_pte)) { ! spin_unlock(&mm->page_table_lock); ! unlock_page(page); page_cache_release(page); #ifdef CONFIG_COMP_CACHE *************** *** 1220,1241 **** #endif - /* The page isn't present yet, go ahead with the fault. */ - mm->rss++; - pte = mk_pte(page, vma->vm_page_prot); - #ifdef CONFIG_COMP_CACHE remove_pte_vswap(page_table); unlock_vswap(entry); #endif swap_free(entry); ! mark_page_accessed(page); ! if (exclusive_swap_page(page)) { ! if (vma->vm_flags & VM_WRITE) ! pte = pte_mkwrite(pte); ! pte = pte_mkdirty(pte); ! delete_from_swap_cache(page); ! } ! UnlockPage(page); flush_page_to_ram(page); flush_icache_page(vma, page); --- 1192,1212 ---- #endif #ifdef CONFIG_COMP_CACHE remove_pte_vswap(page_table); unlock_vswap(entry); #endif + + /* The page isn't present yet, go ahead with the fault. */ + swap_free(entry); ! if (vm_swap_full()) ! remove_exclusive_swap_page(page); + mm->rss++; + pte = mk_pte(page, vma->vm_page_prot); + if (write_access && can_share_swap_page(page)) + pte = pte_mkdirty(pte_mkwrite(pte)); + unlock_page(page); + flush_page_to_ram(page); flush_icache_page(vma, page); *************** *** 1244,1247 **** --- 1215,1219 ---- /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); + spin_unlock(&mm->page_table_lock); return ret; } *************** *** 1274,1277 **** --- 1246,1250 ---- if (!pte_none(*page_table)) { page_cache_release(page); + spin_unlock(&mm->page_table_lock); return 1; } *************** *** 1279,1282 **** --- 1252,1256 ---- flush_page_to_ram(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + lru_cache_add(page); } *************** *** 1285,1292 **** /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); return 1; /* Minor fault */ no_mem: - spin_lock(&mm->page_table_lock); return -1; } --- 1259,1266 ---- /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); + spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ no_mem: return -1; } *************** *** 1302,1306 **** * * This is called with the MM semaphore held and the page table ! * spinlock held. */ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, --- 1276,1280 ---- * * This is called with the MM semaphore held and the page table ! * spinlock held. Exit with the spinlock released. */ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, *************** *** 1314,1329 **** spin_unlock(&mm->page_table_lock); ! /* ! * The third argument is "no_share", which tells the low-level code ! * to copy, not share the page even if sharing is possible. It's ! * essentially an early COW detection. ! */ ! new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access); - spin_lock(&mm->page_table_lock); if (new_page == NULL) /* no page was available -- SIGBUS */ return 0; if (new_page == NOPAGE_OOM) return -1; /* * This silly early PAGE_DIRTY setting removes a race --- 1288,1312 ---- spin_unlock(&mm->page_table_lock); ! new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); if (new_page == NULL) /* no page was available -- SIGBUS */ return 0; if (new_page == NOPAGE_OOM) return -1; + + /* + * Should we do an early C-O-W break? + */ + if (write_access && !(vma->vm_flags & VM_SHARED)) { + struct page * page = alloc_page(GFP_HIGHUSER); + if (!page) + return -1; + copy_highpage(page, new_page); + page_cache_release(new_page); + lru_cache_add(page); + new_page = page; + } + + spin_lock(&mm->page_table_lock); /* * This silly early PAGE_DIRTY setting removes a race *************** *** 1342,1354 **** flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); ! if (write_access) { entry = pte_mkwrite(pte_mkdirty(entry)); - } else if (page_count(new_page) > 1 && - !(vma->vm_flags & VM_SHARED)) - entry = pte_wrprotect(entry); set_pte(page_table, entry); } else { /* One of our sibling threads was faster, back out. */ page_cache_release(new_page); return 1; } --- 1325,1335 ---- flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); ! if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); set_pte(page_table, entry); } else { /* One of our sibling threads was faster, back out. */ page_cache_release(new_page); + spin_unlock(&mm->page_table_lock); return 1; } *************** *** 1356,1359 **** --- 1337,1341 ---- /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); + spin_unlock(&mm->page_table_lock); return 2; /* Major fault */ } *************** *** 1376,1379 **** --- 1358,1364 ---- * so we don't need to worry about a page being suddenly been added into * our VM. + * + * We enter with the pagetable spinlock held, we are supposed to + * release it when done. */ static inline int handle_pte_fault(struct mm_struct *mm, *************** *** 1403,1406 **** --- 1388,1392 ---- entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); + spin_unlock(&mm->page_table_lock); return 1; } *************** *** 1412,1416 **** unsigned long address, int write_access) { - int ret = -1; pgd_t *pgd; pmd_t *pmd; --- 1398,1401 ---- *************** *** 1429,1436 **** pte_t * pte = pte_alloc(mm, pmd, address); if (pte) ! ret = handle_pte_fault(mm, vma, address, write_access, pte); } spin_unlock(&mm->page_table_lock); ! return ret; } --- 1414,1421 ---- pte_t * pte = pte_alloc(mm, pmd, address); if (pte) ! return handle_pte_fault(mm, vma, address, write_access, pte); } spin_unlock(&mm->page_table_lock); ! return -1; } Index: mmap.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/mmap.c,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -r1.3 -r1.4 *** mmap.c 2001/09/29 20:30:14 1.3 --- mmap.c 2001/12/12 20:45:46 1.4 *************** *** 72,77 **** return 1; ! free = atomic_read(&buffermem_pages); ! free += atomic_read(&page_cache_size); free += nr_free_pages(); free += nr_swap_pages; --- 72,77 ---- return 1; ! /* The page cache contains buffer pages these days.. */ ! free = atomic_read(&page_cache_size); free += nr_free_pages(); free += nr_swap_pages; *************** *** 81,85 **** * and in the swapper space. At the same time, this compensates * for the swap-space over-allocation (ie "nr_swap_pages" being ! * too small. */ free += swapper_space.nrpages; --- 81,85 ---- * and in the swapper space. At the same time, this compensates * for the swap-space over-allocation (ie "nr_swap_pages" being ! * too small. */ free += swapper_space.nrpages; Index: oom_kill.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/oom_kill.c,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -r1.4 -r1.5 *** oom_kill.c 2001/09/29 20:30:14 1.4 --- oom_kill.c 2001/12/12 20:45:46 1.5 *************** *** 21,28 **** #include <linux/swapctl.h> #include <linux/timex.h> - #ifdef CONFIG_COMP_CACHE #include <linux/comp_cache.h> #endif /* #define DEBUG */ --- 21,28 ---- #include <linux/swapctl.h> #include <linux/timex.h> #ifdef CONFIG_COMP_CACHE #include <linux/comp_cache.h> #endif + /* #define DEBUG */ *************** *** 154,158 **** */ p->counter = 5 * HZ; ! p->flags |= PF_MEMALLOC; /* This process has hardware access, be more careful. */ --- 154,158 ---- */ p->counter = 5 * HZ; ! p->flags |= PF_MEMALLOC | PF_MEMDIE; /* This process has hardware access, be more careful. */ *************** *** 172,176 **** * don't have to be perfect here, we just have to be good. */ ! void oom_kill(void) { struct task_struct *p = select_bad_process(), *q; --- 172,176 ---- * don't have to be perfect here, we just have to be good. */ ! static void oom_kill(void) { struct task_struct *p = select_bad_process(), *q; *************** *** 195,197 **** --- 195,248 ---- schedule(); return; + } + + /** + * out_of_memory - is the system out of memory? + */ + void out_of_memory(void) + { + static unsigned long first, last, count; + unsigned long now, since; + + /* + * Enough swap space left? Not OOM. + */ + if (nr_swap_pages > 0) + return; + + now = jiffies; + since = now - last; + last = now; + + /* + * If it's been a long time since last failure, + * we're not oom. + */ + last = now; + if (since > 5*HZ) + goto reset; + + /* + * If we haven't tried for at least one second, + * we're not really oom. + */ + since = now - first; + if (since < HZ) + return; + + /* + * If we have gotten only a few failures, + * we're not really oom. + */ + if (++count < 10) + return; + + /* + * Ok, really out of memory. Kill something. + */ + oom_kill(); + + reset: + first = now; + count = 0; } Index: page_alloc.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/page_alloc.c,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -r1.11 -r1.12 *** page_alloc.c 2001/10/01 22:43:59 1.11 --- page_alloc.c 2001/12/12 20:45:46 1.12 *************** *** 28,32 **** static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; ! static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 32, 128, 128, }; static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, }; static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, }; --- 28,32 ---- static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; ! static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, }; static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, }; static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, }; *************** *** 85,94 **** if (PageLocked(page)) BUG(); ! if (PageDecrAfter(page)) BUG(); if (PageActive(page)) BUG(); - if (PageInactive(page)) - BUG(); #ifdef CONFIG_COMP_SWAP page->flags &= ~(1<<PG_comp_swap_cache); --- 85,92 ---- if (PageLocked(page)) BUG(); ! if (PageLRU(page)) BUG(); if (PageActive(page)) BUG(); #ifdef CONFIG_COMP_SWAP page->flags &= ~(1<<PG_comp_swap_cache); *************** *** 147,158 **** local_freelist: ! /* ! * This is a little subtle: if the allocation order ! * wanted is major than zero we'd better take all the pages ! * local since we must deal with fragmentation too and we ! * can't rely on the nr_local_pages information. ! */ ! if (current->nr_local_pages && !current->allocation_order) goto back_local_freelist; list_add(&page->list, ¤t->local_pages); --- 145,152 ---- local_freelist: ! if (current->nr_local_pages) goto back_local_freelist; + if (in_interrupt()) + goto back_local_freelist; list_add(&page->list, ¤t->local_pages); *************** *** 216,221 **** set_page_count(page, 1); if (BAD_RANGE(zone,page)) BUG(); ! DEBUG_LRU_PAGE(page); return page; } --- 210,218 ---- set_page_count(page, 1); if (BAD_RANGE(zone,page)) + BUG(); + if (PageLRU(page)) BUG(); ! if (PageActive(page)) ! BUG(); return page; } *************** *** 282,291 **** if (PageLocked(page)) BUG(); ! if (PageDecrAfter(page)) BUG(); if (PageActive(page)) BUG(); - if (PageInactive(page)) - BUG(); if (PageDirty(page)) BUG(); --- 279,286 ---- if (PageLocked(page)) BUG(); ! if (PageLRU(page)) BUG(); if (PageActive(page)) BUG(); if (PageDirty(page)) BUG(); *************** *** 312,321 **** } - static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order) - { - long free = zone->free_pages - (1UL << order); - return free >= 0 ? free : 0; - } - /* * This is the 'heart' of the zoned buddy allocator: --- 307,310 ---- *************** *** 323,326 **** --- 312,316 ---- struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist) { + unsigned long min; zone_t **zone, * classzone; struct page * page; *************** *** 329,332 **** --- 319,323 ---- zone = zonelist->zones; classzone = *zone; + min = 1UL << order; for (;;) { zone_t *z = *(zone++); *************** *** 334,338 **** break; ! if (zone_free_pages(z, order) > z->pages_low) { page = rmqueue(z, order); if (page) --- 325,330 ---- break; ! min += z->pages_low; ! if (z->free_pages > min) { page = rmqueue(z, order); if (page) *************** *** 347,360 **** zone = zonelist->zones; for (;;) { ! unsigned long min; zone_t *z = *(zone++); if (!z) break; ! min = z->pages_min; if (!(gfp_mask & __GFP_WAIT)) ! min >>= 2; ! if (zone_free_pages(z, order) > min) { page = rmqueue(z, order); if (page) --- 339,354 ---- zone = zonelist->zones; + min = 1UL << order; for (;;) { ! unsigned long local_min; zone_t *z = *(zone++); if (!z) break; ! local_min = z->pages_min; if (!(gfp_mask & __GFP_WAIT)) ! local_min >>= 2; ! min += local_min; ! if (z->free_pages > min) { page = rmqueue(z, order); if (page) *************** *** 365,369 **** /* here we're in the low on memory slow path */ ! if (current->flags & PF_MEMALLOC) { zone = zonelist->zones; for (;;) { --- 359,364 ---- /* here we're in the low on memory slow path */ ! rebalance: ! if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) { zone = zonelist->zones; for (;;) { *************** *** 378,383 **** return NULL; } - rebalance: page = balance_classzone(classzone, gfp_mask, order, &freed); if (page) --- 373,381 ---- return NULL; } + + /* Atomic allocations - we can't balance anything */ + if (!(gfp_mask & __GFP_WAIT)) + return NULL; page = balance_classzone(classzone, gfp_mask, order, &freed); if (page) *************** *** 385,422 **** zone = zonelist->zones; ! if (likely(freed)) { ! for (;;) { ! zone_t *z = *(zone++); ! if (!z) ! break; ! ! if (zone_free_pages(z, order) > z->pages_min) { ! page = rmqueue(z, order); ! if (page) ! return page; ! } ! } ! goto rebalance; ! } else { ! /* ! * Check that no other task is been killed meanwhile, ! * in such a case we can succeed the allocation. ! */ ! for (;;) { ! zone_t *z = *(zone++); ! if (!z) ! break; ! if (zone_free_pages(z, order) > z->pages_high) { ! page = rmqueue(z, order); ! if (page) ! return page; ! } } } ! printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i) from %p\n", ! order, gfp_mask, !!(current->flags & PF_MEMALLOC), __builtin_return_address(0)); ! return NULL; } --- 383,409 ---- zone = zonelist->zones; ! min = 1UL << order; ! for (;;) { ! zone_t *z = *(zone++); ! if (!z) ! break; ! min += z->pages_min; ! if (z->free_pages > min) { ! page = rmqueue(z, order); ! if (page) ! return page; } } ! /* Don't let big-order allocations loop */ ! if (order > 3) ! return NULL; ! ! /* Yield for kswapd, and try again */ ! current->policy |= SCHED_YIELD; ! __set_current_state(TASK_RUNNING); ! schedule(); ! goto rebalance; } *************** *** 447,450 **** --- 434,446 ---- } + void page_cache_release(struct page *page) + { + if (!PageReserved(page) && put_page_testzero(page)) { + if (PageLRU(page)) + lru_cache_del(page); + __free_pages_ok(page, 0); + } + } + void __free_pages(struct page *page, unsigned int order) { *************** *** 484,501 **** pg_data_t *pgdat = pgdat_list; unsigned int sum = 0; - zonelist_t *zonelist; - zone_t **zonep, *zone; do { ! zonelist = pgdat->node_zonelists + __GFP_HIGHMEM; ! zonep = zonelist->zones; ! ! for (zone = *zonep++; zone; zone = *zonep++) ! sum += zone->free_pages; pgdat = pgdat->node_next; } while (pgdat); ! return sum + nr_active_pages + nr_inactive_pages; } --- 480,500 ---- pg_data_t *pgdat = pgdat_list; unsigned int sum = 0; do { ! zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); ! zone_t **zonep = zonelist->zones; ! zone_t *zone; ! ! for (zone = *zonep++; zone; zone = *zonep++) { ! unsigned long size = zone->size; ! unsigned long high = zone->pages_high; ! if (size > high) ! sum += size - high; ! } pgdat = pgdat->node_next; } while (pgdat); ! return sum; } *************** *** 514,517 **** --- 513,518 ---- #endif + #define K(x) ((x) << (PAGE_SHIFT-10)) + /* * Show free area list (used inside shift_scroll-lock stuff) *************** *** 523,530 **** unsigned int order; unsigned type; printk("Free pages: %6dkB (%6dkB HighMem)\n", ! nr_free_pages() << (PAGE_SHIFT-10), ! nr_free_highpages() << (PAGE_SHIFT-10)); printk("( Active: %d, inactive: %d, free: %d )\n", --- 524,547 ---- unsigned int order; unsigned type; + pg_data_t *tmpdat = pgdat; printk("Free pages: %6dkB (%6dkB HighMem)\n", ! K(nr_free_pages()), ! K(nr_free_highpages())); ! ! while (tmpdat) { ! zone_t *zone; ! for (zone = tmpdat->node_zones; ! zone < tmpdat->node_zones + MAX_NR_ZONES; zone++) ! printk("Zone:%s freepages:%6lukB min:%6lukB low:%6lukB " ! "high:%6lukB\n", ! zone->name, ! K(zone->free_pages), ! K(zone->pages_min), ! K(zone->pages_low), ! K(zone->pages_high)); ! ! tmpdat = tmpdat->node_next; ! } printk("( Active: %d, inactive: %d, free: %d )\n", *************** *** 552,561 **** } total += nr * (1 << order); ! printk("%lu*%lukB ", nr, ! (PAGE_SIZE>>10) << order); } spin_unlock_irqrestore(&zone->lock, flags); } ! printk("= %lukB)\n", total * (PAGE_SIZE>>10)); } --- 569,577 ---- } total += nr * (1 << order); ! printk("%lu*%lukB ", nr, K(1UL) << order); } spin_unlock_irqrestore(&zone->lock, flags); } ! printk("= %lukB)\n", K(total)); } Index: shmem.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/shmem.c,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -r1.10 -r1.11 *** shmem.c 2001/09/29 20:30:14 1.10 --- shmem.c 2001/12/12 20:45:46 1.11 *************** *** 27,42 **** #include <linux/string.h> #include <linux/locks.h> ! #include <asm/smplock.h> #include <asm/uaccess.h> - #ifdef CONFIG_COMP_CACHE #include <linux/comp_cache.h> #endif /* This magic number is used in glibc for posix shared memory */ [...1438 lines suppressed...] - spin_unlock (&shmem_ilock); - } - - /* * shmem_file_setup - get an unlinked file living in shmem fs --- 1448,1451 ---- *************** *** 1338,1342 **** return ERR_PTR(-EINVAL); ! if (!vm_enough_memory((size) >> PAGE_SHIFT)) return ERR_PTR(-ENOMEM); --- 1467,1471 ---- return ERR_PTR(-EINVAL); ! if (!vm_enough_memory((size) >> PAGE_CACHE_SHIFT)) return ERR_PTR(-ENOMEM); Index: swap_state.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/swap_state.c,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -r1.15 -r1.16 *** swap_state.c 2001/09/29 20:30:14 1.15 --- swap_state.c 2001/12/12 20:45:46 1.16 *************** *** 17,21 **** #include <asm/pgtable.h> - #ifdef CONFIG_COMP_CACHE #include <linux/comp_cache.h> --- 17,20 ---- *************** *** 29,34 **** static int swap_writepage(struct page *page) { ! if (exclusive_swap_page(page)) { ! delete_from_swap_cache(page); UnlockPage(page); return 0; --- 28,32 ---- static int swap_writepage(struct page *page) { ! if (remove_exclusive_swap_page(page)) { UnlockPage(page); return 0; *************** *** 56,92 **** #ifdef SWAP_CACHE_INFO ! unsigned long swap_cache_add_total; ! unsigned long swap_cache_del_total; ! unsigned long swap_cache_find_total; ! unsigned long swap_cache_find_success; void show_swap_cache_info(void) { ! printk("Swap cache: add %ld, delete %ld, find %ld/%ld\n", ! swap_cache_add_total, ! swap_cache_del_total, ! swap_cache_find_success, swap_cache_find_total); } #endif ! void add_to_swap_cache(struct page *page, swp_entry_t entry) { - unsigned long flags; - - #ifdef SWAP_CACHE_INFO - swap_cache_add_total++; - #endif - if (!PageLocked(page)) - BUG(); if (page->mapping) BUG(); ! /* clear PG_dirty so a subsequent set_page_dirty takes effect */ ! flags = page->flags & ~(1 << PG_error | 1 << PG_dirty | 1 << PG_arch_1 | 1 << PG_referenced); ! page->flags = flags | (1 << PG_uptodate); #ifdef CONFIG_COMP_CACHE add_swap_cache_page_vswap(page, entry); #endif ! add_to_page_cache_locked(page, &swapper_space, entry.val); } --- 54,103 ---- #ifdef SWAP_CACHE_INFO ! #define INC_CACHE_INFO(x) (swap_cache_info.x++) ! ! static struct { ! unsigned long add_total; ! unsigned long del_total; ! unsigned long find_success; ! unsigned long find_total; ! unsigned long noent_race; ! unsigned long exist_race; ! } swap_cache_info; void show_swap_cache_info(void) { ! printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", ! swap_cache_info.add_total, swap_cache_info.del_total, ! swap_cache_info.find_success, swap_cache_info.find_total, ! swap_cache_info.noent_race, swap_cache_info.exist_race); } + #else + #define INC_CACHE_INFO(x) do { } while (0) #endif ! int add_to_swap_cache(struct page *page, swp_entry_t entry) { if (page->mapping) BUG(); ! if (!swap_duplicate(entry)) { ! INC_CACHE_INFO(noent_race); ! return -ENOENT; ! } ! if (add_to_page_cache_unique(page, &swapper_space, entry.val, ! page_hash(&swapper_space, entry.val)) != 0) { ! swap_free(entry); ! INC_CACHE_INFO(exist_race); ! return -EEXIST; ! } #ifdef CONFIG_COMP_CACHE add_swap_cache_page_vswap(page, entry); #endif ! if (!PageLocked(page)) ! BUG(); ! if (!PageSwapCache(page)) ! BUG(); ! INC_CACHE_INFO(add_total); ! return 0; } *************** *** 97,108 **** void __delete_from_swap_cache(struct page *page) { - #ifdef SWAP_CACHE_INFO - swap_cache_del_total++; - #endif if (!PageLocked(page)) BUG(); if (!PageSwapCache(page)) BUG(); - ClearPageDirty(page); #ifdef CONFIG_COMP_CACHE --- 108,115 ---- *************** *** 110,113 **** --- 117,121 ---- #endif __remove_inode_page(page); + INC_CACHE_INFO(del_total); } *************** *** 125,130 **** BUG(); ! if (block_flushpage(page, 0)) ! lru_cache_del(page); entry.val = page->index; --- 133,137 ---- BUG(); ! block_flushpage(page, 0); entry.val = page->index; *************** *** 154,159 **** */ if (PageSwapCache(page) && !TryLockPage(page)) { ! if (exclusive_swap_page(page)) ! delete_from_swap_cache(page); UnlockPage(page); } --- 161,165 ---- */ if (PageSwapCache(page) && !TryLockPage(page)) { ! remove_exclusive_swap_page(page); UnlockPage(page); } *************** *** 171,177 **** struct page *found; - #ifdef SWAP_CACHE_INFO - swap_cache_find_total++; - #endif found = find_get_page(&swapper_space, entry.val); /* --- 177,180 ---- *************** *** 181,188 **** * that, but no need to change: we _have_ got the right page. */ ! #ifdef SWAP_CACHE_INFO if (found) ! swap_cache_find_success++; ! #endif return found; } --- 184,190 ---- * that, but no need to change: we _have_ got the right page. */ ! INC_CACHE_INFO(find_total); if (found) ! INC_CACHE_INFO(find_success); return found; } *************** *** 196,266 **** struct page * read_swap_cache_async(swp_entry_t entry) { ! struct page *found_page, *new_page; ! struct page **hash; ! ! /* ! * Look for the page in the swap cache. Since we normally call ! * this only after lookup_swap_cache() failed, re-calling that ! * would confuse the statistics: use __find_get_page() directly. ! */ ! hash = page_hash(&swapper_space, entry.val); ! #ifdef CONFIG_COMP_CACHE ! found_page = lookup_comp_cache(entry); ! #else ! found_page = __find_get_page(&swapper_space, entry.val, hash); ! #endif ! ! if (found_page) ! goto out; ! ! new_page = alloc_page(GFP_HIGHUSER); ! if (!new_page) ! goto out; /* Out of memory */ ! if (TryLockPage(new_page)) ! BUG(); ! /* ! * Check the swap cache again, in case we stalled above. ! * swap_list_lock is guarding against races between this check ! * and where the new page is added to the swap cache below. ! * It is also guarding against race where try_to_swap_out ! * allocates entry with get_swap_page then adds to cache. ! */ #ifdef CONFIG_COMP_CACHE ! found_page = lookup_comp_cache(entry); #else ! swap_list_lock(); ! found_page = __find_get_page(&swapper_space, entry.val, hash); #endif ! ! if (found_page) ! goto out_free_page; ! ! /* ! * Make sure the swap entry is still in use. It could have gone ! * since caller dropped page_table_lock, while allocating page above, ! * or while allocating page in prior call via swapin_readahead. ! */ ! if (!swap_duplicate(entry)) /* Account for the swap cache */ ! goto out_free_page; ! /* ! * Add it to the swap cache and read its contents. ! */ ! add_to_swap_cache(new_page, entry); ! swap_list_unlock(); ! ! rw_swap_page(READ, new_page); #ifdef CONFIG_COMP_SWAP ! if (swap_compressed(entry)) ! PageSetCompSwapCache(new_page); ! #endif ! return new_page; ! out_free_page: ! swap_list_unlock(); ! UnlockPage(new_page); ! page_cache_release(new_page); ! out: return found_page; } --- 198,253 ---- struct page * read_swap_cache_async(swp_entry_t entry) { ! struct page *found_page, *new_page = NULL; ! int err; ! do { ! /* ! * First check the swap cache. Since this is normally ! * called after lookup_swap_cache() failed, re-calling ! * that would confuse statistics: use find_get_page() ! * directly. ! */ #ifdef CONFIG_COMP_CACHE ! found_page = lookup_comp_cache(entry); #else ! found_page = find_get_page(&swapper_space, entry.val); #endif ! if (found_page) ! break; ! /* ! * Get a new page to read into from swap. ! */ ! if (!new_page) { ! new_page = alloc_page(GFP_HIGHUSER); ! if (!new_page) ! break; /* Out of memory */ ! } ! ! /* ! * Associate the page with swap entry in the swap cache. ! * May fail (-ENOENT) if swap entry has been freed since ! * our caller observed it. May fail (-EEXIST) if there ! * is already a page associated with this entry in the ! * swap cache: added by a racing read_swap_cache_async, ! * or by try_to_swap_out (or shmem_writepage) re-using ! * the just freed swap entry for an existing page. ! */ ! err = add_to_swap_cache(new_page, entry); ! if (!err) { ! /* ! * Initiate read into locked page and return. ! */ ! rw_swap_page(READ, new_page); #ifdef CONFIG_COMP_SWAP ! if (swap_compressed(entry)) ! PageSetCompSwapCache(new_page); ! #endif ! return new_page; ! } ! } while (err != -ENOENT); ! if (new_page) ! page_cache_release(new_page); return found_page; } Index: swapfile.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/swapfile.c,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -r1.18 -r1.19 *** swapfile.c 2001/09/29 20:30:14 1.18 --- swapfile.c 2001/12/12 20:45:46 1.19 *************** *** 18,26 **** #include <asm/pgtable.h> - #ifdef CONFIG_COMP_CACHE - #include <linux/comp_cache.h> - #endif --- 18,23 ---- *************** *** 48,52 **** unsigned long offset, type; ! if (virtual_swap_address(entry)) BUG(); --- 45,49 ---- unsigned long offset, type; ! if (vswap_address(entry)) BUG(); *************** *** 63,67 **** goto bad_offset; if (!p->swap_map[offset]) ! goto bad_unused; if (!compressed) { --- 60,64 ---- goto bad_offset; if (!p->swap_map[offset]) ! goto bad_unused; if (!compressed) { *************** *** 94,107 **** return; ! bad_entry: printk("Null entry in swap_compressed\n"); goto out; ! bad_file: printk("Bad swap file entry (scse) %08lx\n", entry.val); goto out; ! bad_offset: printk("Bad swap offset entry %08lx\n", entry.val); goto out; ! bad_unused: printk("Unused swap offset entry in swap_compressed %08lx\n", entry.val); goto out; --- 91,104 ---- return; ! bad_entry: printk("Null entry in swap_compressed\n"); goto out; ! bad_file: printk("Bad swap file entry (scse) %08lx\n", entry.val); goto out; ! bad_offset: printk("Bad swap offset entry %08lx\n", entry.val); goto out; ! bad_unused: printk("Unused swap offset entry in swap_compressed %08lx\n", entry.val); goto out; *************** *** 115,119 **** int retval = -1; ! if (virtual_swap_address(entry)) BUG(); --- 112,116 ---- int retval = -1; ! if (vswap_address(entry)) BUG(); *************** *** 143,162 **** if (retval == -1) BUG(); ! out: return retval; ! ! bad_entry: printk("Null entry in swap_compressed\n"); goto out; ! bad_file: printk("Bad swap file entry (swap_algorithm) %08lx\n", entry.val); goto out; ! bad_offset: printk("Bad swap offset entry %08lx\n", entry.val); goto out; ! bad_unused: printk("Unused swap offset entry in swap_compressed %08lx\n", entry.val); goto out; ! bad_compressed: printk("Swap offset entry not compressed %08lx\n", entry.val); goto out; --- 140,159 ---- if (retval == -1) BUG(); ! out: return retval; ! ! bad_entry: printk("Null entry in swap_compressed\n"); goto out; ! bad_file: printk("Bad swap file entry (swap_algorithm) %08lx\n", entry.val); goto out; ! bad_offset: printk("Bad swap offset entry %08lx\n", entry.val); goto out; ! bad_unused: printk("Unused swap offset entry in swap_compressed %08lx\n", entry.val); goto out; ! bad_compressed: printk("Swap offset entry not compressed %08lx\n", entry.val); goto out; *************** *** 170,174 **** int retval = 0; ! if (virtual_swap_address(entry)) BUG(); --- 167,171 ---- int retval = 0; ! if (vswap_address(entry)) BUG(); *************** *** 186,206 **** goto bad_unused; retval = EntryCompressed(p, offset); ! out: return retval; ! bad_entry: printk("Null entry in swap_compressed\n"); goto out; ! bad_file: printk("Bad swap file entry (swap_compressed) %08lx\n", entry.val); goto out; ! bad_offset: printk("Bad swap offset entry %08lx\n", entry.val); goto out; ! bad_unused: printk("Unused swap offset entry in swap_compressed %08lx\n", entry.val); goto out; } - #endif --- 183,202 ---- goto bad_unused; retval = EntryCompressed(p, offset); ! out: return retval; ! bad_entry: printk("Null entry in swap_compressed\n"); goto out; ! bad_file: printk("Bad swap file entry (swap_compressed) %08lx\n", entry.val); goto out; ! bad_offset: printk("Bad swap offset entry %08lx\n", entry.val); goto out; ! bad_unused: printk("Unused swap offset entry in swap_compressed %08lx\n", entry.val); goto out; } #endif *************** *** 258,263 **** si->highest_bit = 0; } ! /* Initial count 1 for user reference + 1 for swap cache */ ! si->swap_map[offset] = 2; nr_swap_pages--; si->cluster_next = offset+1; --- 254,258 ---- si->highest_bit = 0; } ! si->swap_map[offset] = 1; nr_swap_pages--; si->cluster_next = offset+1; *************** *** 269,277 **** } - /* - * Callers of get_swap_page must hold swap_list_lock across the call, - * and across the following add_to_swap_cache, to guard against races - * with read_swap_cache_async. - */ swp_entry_t get_swap_page(void) { --- 264,267 ---- *************** *** 282,285 **** --- 272,276 ---- entry.val = 0; /* Out of memory */ + swap_list_lock(); type = swap_list.next; if (type < 0) *************** *** 317,328 **** } out: return entry; } ! /* ! * Caller has made sure that the swapdevice corresponding to entry ! * is still around or has not been recycled. ! */ ! void swap_free(swp_entry_t entry) { struct swap_info_struct * p; --- 308,316 ---- } out: + swap_list_unlock(); return entry; } ! static struct swap_info_struct * swap_info_get(swp_entry_t entry) { struct swap_info_struct * p; *************** *** 331,342 **** if (!entry.val) goto out; - type = SWP_TYPE(entry); #ifdef CONFIG_COMP_CACHE ! /* virtual swap address */ ! if (type == COMP_CACHE_SWP_TYPE) { ! comp_cache_swp_free(entry); ! return; ! } #endif if (type >= nr_swapfiles) --- 319,326 ---- if (!entry.val) goto out; type = SWP_TYPE(entry); #ifdef CONFIG_COMP_CACHE ! if (vswap_address(entry)) ! return &swap_info[type]; #endif if (type >= nr_swapfiles) *************** *** 354,368 **** swap_list.next = type; swap_device_lock(p); ! if (p->swap_map[offset] < SWAP_MAP_MAX) { #ifdef CONFIG_COMP_CACHE ! if (!--(p->swap_map[offset])) { #ifdef CONFIG_COMP_SWAP p->swap_comp[offset] = 0; #endif /* let's keep the swap_map[offset] used for * the case the comp_cache_release() calls * swap_dup() */ p->swap_map[offset]++; ! if (!comp_cache_release(entry)) { if (offset < p->lowest_bit) p->lowest_bit = offset; --- 338,391 ---- swap_list.next = type; swap_device_lock(p); ! return p; ! ! bad_free: ! printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); ! goto out; ! bad_offset: ! printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); ! goto out; ! bad_device: ! printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); ! goto out; ! bad_nofile: ! printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); ! out: ! return NULL; ! } ! ! static void swap_info_put(struct swap_info_struct * p) ! { #ifdef CONFIG_COMP_CACHE ! if (vswap_info_struct(p)) ! return; ! #endif ! swap_device_unlock(p); ! swap_list_unlock(); ! } ! ! static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) ! { ! int count; ! ! #ifdef CONFIG_COMP_CACHE ! if (vswap_info_struct(p)) ! return comp_cache_swp_free(SWP_ENTRY(COMP_CACHE_SWP_TYPE, offset)); ! #endif ! count = p->swap_map[offset]; ! ! if (count < SWAP_MAP_MAX) { ! count--; ! p->swap_map[offset] = count; ! if (!count) { #ifdef CONFIG_COMP_SWAP p->swap_comp[offset] = 0; #endif + #ifdef CONFIG_COMP_CACHE /* let's keep the swap_map[offset] used for * the case the comp_cache_release() calls * swap_dup() */ p->swap_map[offset]++; ! if (!comp_cache_release(SWP_ENTRY(p - swap_info, offset))) { if (offset < p->lowest_bit) p->lowest_bit = offset; *************** *** 373,377 **** p->swap_map[offset]--; #else - if (!--(p->swap_map[offset])) { if (offset < p->lowest_bit) p->lowest_bit = offset; --- 396,399 ---- *************** *** 382,405 **** } } ! swap_device_unlock(p); ! swap_list_unlock(); ! out: ! return; ! bad_nofile: ! printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); ! goto out; ! bad_device: ! printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); ! goto out; ! bad_offset: ! printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); ! goto out; ! bad_free: ! printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); ! goto out; } /* * The swap entry has been read in advance, and we return 1 to indicate * that the page has been used or is no longer needed. --- 404,566 ---- } } ! return count; ! } ! /* ! * Caller has made sure that the swapdevice corresponding to entry ! * is still around or has not been recycled. ! */ ! void swap_free(swp_entry_t entry) ! { ! struct swap_info_struct * p; ! ! p = swap_info_get(entry); ! if (p) { ! swap_entry_free(p, SWP_OFFSET(entry)); ! swap_info_put(p); ! } ! } ! ! /* ! * Check if we're the only user of a swap page, ! * when the page is locked. ! */ ! static int exclusive_swap_page(struct page *page) ! { ! int retval = 0; ! struct swap_info_struct * p; ! swp_entry_t entry; ! ! entry.val = page->index; ! p = swap_info_get(entry); ! if (p) { ! /* Is the only swap cache user the cache itself? */ ! #ifdef CONFIG_COMP_CACHE ! if ((vswap_address(entry) && comp_cache_swp_count(entry) == 1) || ! (!vswap_address(entry) && p->swap_map[SWP_OFFSET(entry)] == 1)) { ! #else ! if (p->swap_map[SWP_OFFSET(entry)] == 1) { ! #endif ! /* Recheck the page count with the pagecache lock held.. */ ! spin_lock(&pagecache_lock); ! if (page_count(page) - !!page->buffers == 2) ! retval = 1; ! spin_unlock(&pagecache_lock); ! } ! swap_info_put(p); ! } ! return retval; } /* + * We can use this swap cache entry directly + * if there are no other references to it. + * + * Here "exclusive_swap_page()" does the real + * work, but we opportunistically check whether + * we need to get all the locks first.. + */ + int can_share_swap_page(struct page *page) + { + int retval = 0; + + if (!PageLocked(page)) + BUG(); + switch (page_count(page)) { + case 3: + if (!page->buffers) + break; + /* Fallthrough */ + case 2: + if (!PageSwapCache(page)) + break; + retval = exclusive_swap_page(page); + break; + case 1: + if (PageReserved(page)) + break; + retval = 1; + } + return retval; + } + + /* + * Work out if there are any other processes sharing this + * swap cache page. Free it if you can. Return success. + */ + int remove_exclusive_swap_page(struct page *page) + { + int retval; + struct swap_info_struct * p; + swp_entry_t entry; + + if (!PageLocked(page)) + BUG(); + if (!PageSwapCache(page)) + return 0; + if (page_count(page) - !!page->buffers != 2) /* 2: us + cache */ + return 0; + + entry.val = page->index; + p = swap_info_get(entry); + if (!p) + return 0; + + /* Is the only swap cache user the cache itself? */ + retval = 0; + #ifdef CONFIG_COMP_CACHE + if ((vswap_address(entry) && comp_cache_swp_count(entry) == 1) || + (!vswap_address(entry) && p->swap_map[SWP_OFFSET(entry)] == 1)) { + #else + if (p->swap_map[SWP_OFFSET(entry)] == 1) { + #endif + /* Recheck the page count with the pagecache lock held.. */ + spin_lock(&pagecache_lock); + if (page_count(page) - !!page->buffers == 2) { + __delete_from_swap_cache(page); + SetPageDirty(page); + retval = 1; + } + spin_unlock(&pagecache_lock); + } + swap_info_put(p); + + if (retval) { + block_flushpage(page, 0); + swap_free(entry); + page_cache_release(page); + } + + return retval; + } + + /* + * Free the swap entry like above, but also try to + * free the page cache entry if it is the last user. + */ + void free_swap_and_cache(swp_entry_t entry) + { + struct swap_info_struct * p; + struct page *page = NULL; + + p = swap_info_get(entry); + if (p) { + if (swap_entry_free(p, SWP_OFFSET(entry)) == 1) + page = find_trylock_page(&swapper_space, entry.val); + swap_info_put(p); + } + if (page) { + page_cache_get(page); + /* Only cache user (+us), or swap space full? Free it! */ + if (page_count(page) == 2 || vm_swap_full()) { + delete_from_swap_cache(page); + SetPageDirty(page); + } + UnlockPage(page); + page_cache_release(page); + } + } + + /* * The swap entry has been read in advance, and we return 1 to indicate * that the page has been used or is no longer needed. *************** *** 410,414 **** * what to do if a write is requested later. */ ! /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page* page) --- 571,575 ---- * what to do if a write is requested later. */ ! /* mmlist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page* page) *************** *** 422,431 **** * address (entry), remap the pte to the page that has been * just swapped in */ ! if (virtual_swap_address(pte_to_swp_entry(pte))) { unsigned long offset = SWP_OFFSET(pte_to_swp_entry(pte)); ! if (real_swap_address(offset) != entry.val) return; ! /* free the virtual swap entry */ swap_free(pte_to_swp_entry(pte)); --- 583,592 ---- * address (entry), remap the pte to the page that has been * just swapped in */ ! if (vswap_address(pte_to_swp_entry(pte))) { unsigned long offset = SWP_OFFSET(pte_to_swp_entry(pte)); ! if (real_swap_address(offset) != entry.val) return; ! /* free the virtual swap entry */ swap_free(pte_to_swp_entry(pte)); *************** *** 447,451 **** } ! /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long size, unsigned long offset, --- 608,612 ---- } ! /* mmlist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long size, unsigned long offset, *************** *** 475,479 **** } ! /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long size, --- 636,640 ---- } ! /* mmlist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long size, *************** *** 506,510 **** } ! /* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */ static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, swp_entry_t entry, struct page* page) --- 667,671 ---- } ! /* mmlist_lock and vma->vm_mm->page_table_lock are held */ static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, swp_entry_t entry, struct page* page) *************** *** 646,653 **** /* * Don't hold on to start_mm if it looks like exiting. - * Can mmput ever block? if so, then we cannot risk - * it between deleting the page from the swap cache, - * and completing the search through mms (and cannot - * use it to avoid the long hold on mmlist_lock there). */ if (atomic_read(&start_mm->mm_users) == 1) { --- 807,810 ---- *************** *** 658,698 **** /* ! * Wait for and lock page. Remove it from swap cache ! * so try_to_swap_out won't bump swap count. Mark dirty ! * so try_to_swap_out will preserve it without us having ! * to mark any present ptes as dirty: so we can skip ! * searching processes once swap count has all gone. */ lock_page(page); - #ifdef CONFIG_COMP_SWAP - if (PageCompSwapCache(page)) { - struct page *tmp_page = alloc_page(GFP_ATOMIC); - swp_entry_t real_entry; - - real_entry.val = page->index; - - if (!tmp_page) - panic("do_swap_page(): couldn't alloc temp page\n"); - - if (TryLockPage(tmp_page)) - BUG(); - - /* copy - page -> tmp_page */ - copy_page(page_address(tmp_page), page_address(page)); - - /* decompress - tmp_page -> page */ - decompress(swap_algorithm(real_entry), page_address(tmp_page), page_address(page)); - - UnlockPage(tmp_page); - page_cache_release(tmp_page); - - PageClearCompSwapCache(page); - } - #endif - if (PageSwapCache(page)) - delete_from_swap_cache(page); - SetPageDirty(page); - UnlockPage(page); - flush_page_to_ram(page); /* --- 815,827 ---- /* ! * Wait for and lock page. When do_swap_page races with ! * try_to_unuse, do_swap_page can handle the fault much ! * faster than try_to_unuse can locate the entry. This ! * apparently redundant "wait_on_page" lets try_to_unuse ! * defer to do_swap_page in such a case - in some tests, ! * do_swap_page and try_to_unuse repeatedly compete. */ + wait_on_page(page); lock_page(page); /* *************** *** 702,706 **** */ swcount = *swap_map; ! if (swcount) { if (start_mm == &init_mm) shmem_unuse(entry, page); --- 831,836 ---- */ swcount = *swap_map; ! if (swcount > 1) { ! flush_page_to_ram(page); if (start_mm == &init_mm) shmem_unuse(entry, page); *************** *** 708,712 **** unuse_process(start_mm, entry, page); } ! if (*swap_map) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; --- 838,842 ---- unuse_process(start_mm, entry, page); } ! if (*swap_map > 1) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; *************** *** 715,719 **** spin_lock(&mmlist_lock); ! while (*swap_map && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); swcount = *swap_map; --- 845,850 ---- spin_lock(&mmlist_lock); ! while (*swap_map > 1 && ! (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); swcount = *swap_map; *************** *** 733,737 **** start_mm = new_start_mm; } - page_cache_release(page); /* --- 864,867 ---- *************** *** 752,756 **** swap_device_lock(si); nr_swap_pages++; ! *swap_map = 0; swap_device_unlock(si); swap_list_unlock(); --- 882,886 ---- swap_device_lock(si); nr_swap_pages++; ! *swap_map = 1; swap_device_unlock(si); swap_list_unlock(); *************** *** 759,762 **** --- 889,950 ---- /* + * If a reference remains (rare), we would like to leave + * the page in the swap cache; but try_to_swap_out could + * then re-duplicate the entry once we drop page lock, + * so we might loop indefinitely; also, that page could + * not be swapped out to other storage meanwhile. So: + * delete from cache even if there's another reference, + * after ensuring that the data has been saved to disk - + * since if the reference remains (rarer), it will be + * read from disk into another page. Splitting into two + * pages would be incorrect if swap supported "shared + * private" pages, but they are handled by tmpfs files. + * Note shmem_unuse already deleted its from swap cache. + */ + swcount = *swap_map; + if ((swcount > 0) != PageSwapCache(page)) + BUG(); + if ((swcount > 1) && PageDirty(page)) { + rw_swap_page(WRITE, page); + lock_page(page); + } + #ifdef CONFIG_COMP_SWAP + if (PageCompSwapCache(page)) { + struct page *tmp_page = alloc_page(GFP_ATOMIC); + swp_entry_t real_entry; + + real_entry.val = page->index; + + if (!tmp_page) + panic("do_swap_page(): couldn't alloc temp page\n"); + + if (TryLockPage(tmp_page)) + BUG(); + + /* copy - page -> tmp_page */ + copy_page(page_address(tmp_page), page_address(page)); + + /* decompress - tmp_page -> page */ + decompress(swap_algorithm(real_entry), page_address(tmp_page), page_address(page)); + + UnlockPage(tmp_page); + page_cache_release(tmp_page); + + PageClearCompSwapCache(page); + } + #endif + if (PageSwapCache(page)) + delete_from_swap_cache(page); + + /* + * So we could skip searching mms once swap count went + * to 1, we did not mark any present ptes as dirty: must + * mark page dirty so try_to_swap_out will preserve it. + */ + SetPageDirty(page); + UnlockPage(page); + page_cache_release(page); + + /* * Make sure that we aren't completely killing * interactive performance. Interruptible check on *************** *** 765,772 **** if (current->need_resched) schedule(); - else { - unlock_kernel(); - lock_kernel(); - } } --- 953,956 ---- *************** *** 824,828 **** --- 1008,1014 ---- p->flags = SWP_USED; swap_list_unlock(); + unlock_kernel(); err = try_to_unuse(type); + lock_kernel(); if (err) { /* re-insert swap space back into swap_list */ *************** *** 856,859 **** --- 1042,1048 ---- swap_map = p->swap_map; p->swap_map = NULL; + #ifdef CONFIG_COMP_SWAP + p->swap_comp = NULL; + #endif p->flags = 0; swap_device_unlock(p); *************** *** 962,968 **** p->swap_device = 0; p->swap_map = NULL; - #ifdef CONFIG_COMP_SWAP - p->swap_comp = NULL; - #endif p->lowest_bit = 0; p->highest_bit = 0; --- 1151,1154 ---- *************** *** 1065,1072 **** goto bad_swap; } - #ifdef CONFIG_COMP_SWAP p->swap_comp = vmalloc(maxpages * sizeof(short)); - if (!p->swap_comp) { vfree(p->swap_map); --- 1251,1256 ---- *************** *** 1076,1080 **** memset(p->swap_comp, 0, maxpages * sizeof(short)); #endif - for (i = 1 ; i < maxpages ; i++) { if (test_bit(i,(char *) swap_header)) --- 1260,1263 ---- *************** *** 1111,1115 **** goto bad_swap; } - #ifdef CONFIG_COMP_SWAP if (!(p->swap_comp = vmalloc (maxpages * sizeof(short)))) { --- 1294,1297 ---- *************** *** 1120,1123 **** --- 1302,1306 ---- memset(p->swap_comp, 0, maxpages * sizeof(short)); #endif + error = 0; memset(p->swap_map, 0, maxpages * sizeof(short)); *************** *** 1240,1245 **** #ifdef CONFIG_COMP_CACHE ! /* virtual swap address */ ! if (virtual_swap_address(entry)) { comp_cache_swp_duplicate(entry); return 1; --- 1423,1427 ---- #ifdef CONFIG_COMP_CACHE ! if (vswap_address(entry)) { comp_cache_swp_duplicate(entry); return 1; *************** *** 1287,1298 **** if (!entry.val) goto bad_entry; - type = SWP_TYPE(entry); #ifdef CONFIG_COMP_CACHE ! /* virtual swap address */ ! if (type == COMP_CACHE_SWP_TYPE) { retval = comp_cache_swp_count(entry); goto out; } ! #endif if (type >= nr_swapfiles) goto bad_file; --- 1469,1479 ---- if (!entry.val) goto bad_entry; #ifdef CONFIG_COMP_CACHE ! if (vswap_address(entry)) { retval = comp_cache_swp_count(entry); goto out; } ! #endif ! type = SWP_TYPE(entry); if (type >= nr_swapfiles) goto bad_file; Index: vmscan.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/vmscan.c,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -r1.19 -r1.20 *** vmscan.c 2001/09/29 20:30:14 1.19 --- vmscan.c 2001/12/12 20:45:46 1.20 *************** *** 8,12 **** * Removed kswapd_ctl limits, and swap out as many pages as needed * to bring the system back to f... [truncated message content] |