[lc-checkins] CVS: linux/mm Makefile,1.5,1.6 filemap.c,1.42,1.43 memory.c,1.36,1.37 mmap.c,1.8,1.9 o
Status: Beta
Brought to you by:
nitin_sf
Update of /cvsroot/linuxcompressed/linux/mm In directory sc8-pr-cvs1:/tmp/cvs-serv25395/mm Modified Files: Makefile filemap.c memory.c mmap.c oom_kill.c page_alloc.c page_io.c shmem.c swap_state.c swapfile.c vmscan.c Log Message: o Port code to 2.4.20 Bug fix (?) o Changes checks in vswap.c to avoid oopses. It will BUG() instead. Some of the checks were done after the value had been accessed. Note o Virtual swap addresses are temporarily disabled, due to debugging sessions related to the use of swap files instead of swap partitions. Index: Makefile =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/Makefile,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -r1.5 -r1.6 *** Makefile 12 Dec 2001 20:45:46 -0000 1.5 --- Makefile 19 May 2003 01:38:47 -0000 1.6 *************** *** 10,14 **** O_TARGET := mm.o ! export-objs := shmem.o filemap.o obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ --- 10,14 ---- O_TARGET := mm.o ! export-objs := shmem.o filemap.o memory.o page_alloc.o obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ Index: filemap.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/filemap.c,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -r1.42 -r1.43 *** filemap.c 29 Nov 2002 21:23:02 -0000 1.42 --- filemap.c 19 May 2003 01:38:47 -0000 1.43 *************** *** 24,28 **** #include <linux/mm.h> #include <linux/iobuf.h> - #include <linux/compiler.h> #include <linux/comp_cache.h> --- 24,27 ---- *************** *** 55,59 **** ! spinlock_t pagecache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock --- 54,58 ---- ! spinlock_cacheline_t pagecache_lock_cacheline = {SPIN_LOCK_UNLOCKED}; /* * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock *************** *** 65,69 **** * pagecache_lock */ ! spinlock_t pagemap_lru_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; #define CLUSTER_PAGES (1 << page_cluster) --- 64,68 ---- * pagecache_lock */ ! spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED}; #define CLUSTER_PAGES (1 << page_cluster) *************** *** 122,126 **** void __remove_inode_page(struct page *page) { ! if (PageDirty(page)) BUG(); remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); --- 121,126 ---- void __remove_inode_page(struct page *page) { ! if (PageDirty(page) && !PageSwapCache(page)) ! BUG(); remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); *************** *** 156,164 **** if (mapping) { spin_lock(&pagecache_lock); ! list_del(&page->list); ! list_add(&page->list, &mapping->dirty_pages); spin_unlock(&pagecache_lock); ! if (mapping->host) mark_inode_dirty_pages(mapping->host); #ifdef CONFIG_COMP_CACHE --- 156,167 ---- if (mapping) { spin_lock(&pagecache_lock); ! mapping = page->mapping; ! if (mapping) { /* may have been truncated */ ! list_del(&page->list); ! list_add(&page->list, &mapping->dirty_pages); ! } spin_unlock(&pagecache_lock); ! if (mapping && mapping->host) mark_inode_dirty_pages(mapping->host); #ifdef CONFIG_COMP_CACHE *************** *** 582,586 **** while (!list_empty(&mapping->dirty_pages)) { ! struct page *page = list_entry(mapping->dirty_pages.next, struct page, list); list_del(&page->list); --- 585,589 ---- while (!list_empty(&mapping->dirty_pages)) { ! struct page *page = list_entry(mapping->dirty_pages.prev, struct page, list); list_del(&page->list); *************** *** 816,819 **** --- 819,882 ---- } + /* + * Knuth recommends primes in approximately golden ratio to the maximum + * integer representable by a machine word for multiplicative hashing. + * Chuck Lever verified the effectiveness of this technique: + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * + * These primes are chosen to be bit-sparse, that is operations on + * them can use shifts and additions instead of multiplications for + * machines where multiplications are slow. + */ + #if BITS_PER_LONG == 32 + /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ + #define GOLDEN_RATIO_PRIME 0x9e370001UL + #elif BITS_PER_LONG == 64 + /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ + #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL + #else + #error Define GOLDEN_RATIO_PRIME for your wordsize. + #endif + + /* + * In order to wait for pages to become available there must be + * waitqueues associated with pages. By using a hash table of + * waitqueues where the bucket discipline is to maintain all + * waiters on the same queue and wake all when any of the pages + * become available, and for the woken contexts to check to be + * sure the appropriate page became available, this saves space + * at a cost of "thundering herd" phenomena during rare hash + * collisions. + */ + static inline wait_queue_head_t *page_waitqueue(struct page *page) + { + const zone_t *zone = page_zone(page); + wait_queue_head_t *wait = zone->wait_table; + unsigned long hash = (unsigned long)page; + + #if BITS_PER_LONG == 64 + /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ + unsigned long n = hash; + n <<= 18; + hash -= n; + n <<= 33; + hash -= n; + n <<= 3; + hash += n; + n <<= 3; + hash -= n; + n <<= 4; + hash += n; + n <<= 2; + hash += n; + #else + /* On some cpus multiply is faster, on others gcc will do shifts */ + hash *= GOLDEN_RATIO_PRIME; + #endif + hash >>= zone->wait_table_shift; + + return &wait[hash]; + } + /* * Wait for a page to get unlocked. *************** *** 822,832 **** * ie with increased "page->count" so that the page won't * go away during the wait.. */ void ___wait_on_page(struct page *page) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); ! add_wait_queue(&page->wait, &wait); do { set_task_state(tsk, TASK_UNINTERRUPTIBLE); --- 885,911 ---- * ie with increased "page->count" so that the page won't * go away during the wait.. + * + * The waiting strategy is to get on a waitqueue determined + * by hashing. Waiters will then collide, and the newly woken + * task must then determine whether it was woken for the page + * it really wanted, and go back to sleep on the waitqueue if + * that wasn't it. With the waitqueue semantics, it never leaves + * the waitqueue unless it calls, so the loop moves forward one + * iteration every time there is + * (1) a collision + * and + * (2) one of the colliding pages is woken + * + * This is the thundering herd problem, but it is expected to + * be very rare due to the few pages that are actually being + * waited on at any given time and the quality of the hash function. */ void ___wait_on_page(struct page *page) { + wait_queue_head_t *waitqueue = page_waitqueue(page); struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); ! add_wait_queue(waitqueue, &wait); do { set_task_state(tsk, TASK_UNINTERRUPTIBLE); *************** *** 836,852 **** schedule(); } while (PageLocked(page)); ! tsk->state = TASK_RUNNING; ! remove_wait_queue(&page->wait, &wait); } void unlock_page(struct page *page) { ! clear_bit(PG_launder, &(page)->flags); smp_mb__before_clear_bit(); if (!test_and_clear_bit(PG_locked, &(page)->flags)) BUG(); smp_mb__after_clear_bit(); ! if (waitqueue_active(&(page)->wait)) ! wake_up(&(page)->wait); } --- 915,946 ---- schedule(); } while (PageLocked(page)); ! __set_task_state(tsk, TASK_RUNNING); ! remove_wait_queue(waitqueue, &wait); } + /* + * unlock_page() is the other half of the story just above + * __wait_on_page(). Here a couple of quick checks are done + * and a couple of flags are set on the page, and then all + * of the waiters for all of the pages in the appropriate + * wait queue are woken. + */ void unlock_page(struct page *page) { ! wait_queue_head_t *waitqueue = page_waitqueue(page); ! ClearPageLaunder(page); smp_mb__before_clear_bit(); if (!test_and_clear_bit(PG_locked, &(page)->flags)) BUG(); smp_mb__after_clear_bit(); ! ! /* ! * Although the default semantics of wake_up() are ! * to wake all, here the specific function is used ! * to make it even more explicit that a number of ! * pages are being waited on here. ! */ ! if (waitqueue_active(waitqueue)) ! wake_up_all(waitqueue); } *************** *** 857,864 **** static void __lock_page(struct page *page) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); ! add_wait_queue_exclusive(&page->wait, &wait); for (;;) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); --- 951,959 ---- static void __lock_page(struct page *page) { + wait_queue_head_t *waitqueue = page_waitqueue(page); struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); ! add_wait_queue_exclusive(waitqueue, &wait); for (;;) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); *************** *** 870,877 **** break; } ! tsk->state = TASK_RUNNING; ! remove_wait_queue(&page->wait, &wait); } - /* --- 965,971 ---- break; } ! __set_task_state(tsk, TASK_RUNNING); ! remove_wait_queue(waitqueue, &wait); } /* *************** *** 1091,1103 **** /* - * Returns locked page at given index in given cache, creating it if needed. - */ - struct page *grab_cache_page(struct address_space *mapping, unsigned long index) - { - return find_or_create_page(mapping, index, mapping->gfp_mask); - } - - - /* * Same as grab_cache_page, but do not wait if the page is unavailable. * This is intended for speculative data generators, where the data can --- 1185,1188 ---- *************** *** 1381,1388 **** * Mark a page as having seen activity. * ! * If it was already so marked, move it ! * to the active queue and drop the referenced ! * bit. Otherwise, just mark it for future ! * action.. */ void mark_page_accessed(struct page *page) --- 1466,1471 ---- * Mark a page as having seen activity. * ! * If it was already so marked, move it to the active queue and drop ! * the referenced bit. Otherwise, just mark it for future action.. */ void mark_page_accessed(struct page *page) *************** *** 1391,1399 **** activate_page(page); ClearPageReferenced(page); ! return; ! } ! ! /* Mark the page referenced, AFTER checking for previous usage.. */ ! SetPageReferenced(page); } --- 1474,1479 ---- activate_page(page); ClearPageReferenced(page); ! } else ! SetPageReferenced(page); } *************** *** 1634,1637 **** --- 1714,1718 ---- struct address_space * mapping = filp->f_dentry->d_inode->i_mapping; struct inode * inode = mapping->host; + loff_t size = inode->i_size; new_iobuf = 0; *************** *** 1659,1662 **** --- 1740,1746 ---- goto out_free; + if ((rw == READ) && (offset + count > size)) + count = size - offset; + /* * Flush to disk exclusively the _data_, metadata must remain *************** *** 1689,1692 **** --- 1773,1777 ---- count -= retval; buf += retval; + /* warning: weird semantics here, we're reporting a read behind the end of the file */ progress += retval; } *************** *** 1778,1783 **** size = inode->i_size; if (pos < size) { - if (pos + count > size) - count = size - pos; retval = generic_file_direct_IO(READ, filp, buf, count, pos); if (retval > 0) --- 1863,1866 ---- *************** *** 2307,2310 **** --- 2390,2396 ---- struct file * file = vma->vm_file; + if ( (flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED) ) + return -EBUSY; + if (file && (vma->vm_flags & VM_SHARED)) { ret = filemap_sync(vma, start, end-start, flags); *************** *** 2348,2351 **** --- 2434,2440 ---- if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) goto out; + if ((flags & MS_ASYNC) && (flags & MS_SYNC)) + goto out; + error = 0; if (end == start) *************** *** 2353,2357 **** /* * If the interval [start,end) covers some unmapped address ranges, ! * just ignore them, but return -EFAULT at the end. */ vma = find_vma(current->mm, start); --- 2442,2446 ---- /* * If the interval [start,end) covers some unmapped address ranges, ! * just ignore them, but return -ENOMEM at the end. */ vma = find_vma(current->mm, start); *************** *** 2359,2368 **** for (;;) { /* Still start < end. */ ! error = -EFAULT; if (!vma) goto out; /* Here start < vma->vm_end. */ if (start < vma->vm_start) { ! unmapped_error = -EFAULT; start = vma->vm_start; } --- 2448,2457 ---- for (;;) { /* Still start < end. */ ! error = -ENOMEM; if (!vma) goto out; /* Here start < vma->vm_end. */ if (start < vma->vm_start) { ! unmapped_error = -ENOMEM; start = vma->vm_start; } *************** *** 2512,2516 **** /* This caps the number of vma's this process can own */ ! if (vma->vm_mm->map_count > MAX_MAP_COUNT) return -ENOMEM; --- 2601,2605 ---- /* This caps the number of vma's this process can own */ ! if (vma->vm_mm->map_count > max_map_count) return -ENOMEM; *************** *** 3077,3081 **** err = -EFBIG; ! if (limit != RLIM_INFINITY) { if (pos >= limit) { send_sig(SIGXFSZ, current, 0); --- 3166,3170 ---- err = -EFBIG; ! if (!S_ISBLK(inode->i_mode) && limit != RLIM_INFINITY) { if (pos >= limit) { send_sig(SIGXFSZ, current, 0); Index: memory.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/memory.c,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -r1.36 -r1.37 *** memory.c 10 Sep 2002 16:43:12 -0000 1.36 --- memory.c 19 May 2003 01:38:48 -0000 1.37 *************** *** 45,48 **** --- 45,49 ---- #include <linux/highmem.h> #include <linux/pagemap.h> + #include <linux/module.h> #include <linux/comp_cache.h> *************** *** 53,56 **** --- 54,58 ---- unsigned long max_mapnr; unsigned long num_physpages; + unsigned long num_mappedpages; void * high_memory; struct page *highmem_start_page; *************** *** 529,532 **** --- 531,536 ---- } + EXPORT_SYMBOL(get_user_pages); + /* * Force in an entire range of pages from the current process's user VA, *************** *** 587,590 **** --- 591,596 ---- * size of the kiobuf, so we have to stop marking pages dirty once the * requested byte count has been reached. + * + * Must be called from process context - set_page_dirty() takes VFS locks. */ *************** *** 604,608 **** if (!PageReserved(page)) ! SetPageDirty(page); remaining -= (PAGE_SIZE - offset); --- 610,614 ---- if (!PageReserved(page)) ! set_page_dirty(page); remaining -= (PAGE_SIZE - offset); *************** *** 1500,1502 **** --- 1506,1529 ---- len, write, 0, NULL, NULL); return ret == len ? 0 : -1; + } + + struct page * vmalloc_to_page(void * vmalloc_addr) + { + unsigned long addr = (unsigned long) vmalloc_addr; + struct page *page = NULL; + pmd_t *pmd; + pte_t *pte; + pgd_t *pgd; + + pgd = pgd_offset_k(addr); + if (!pgd_none(*pgd)) { + pmd = pmd_offset(pgd, addr); + if (!pmd_none(*pmd)) { + pte = pte_offset(pmd, addr); + if (pte_present(*pte)) { + page = pte_page(*pte); + } + } + } + return page; } Index: mmap.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/mmap.c,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -r1.8 -r1.9 *** mmap.c 28 Apr 2002 20:51:34 -0000 1.8 --- mmap.c 19 May 2003 01:38:48 -0000 1.9 *************** *** 47,50 **** --- 47,51 ---- int sysctl_overcommit_memory; + int max_map_count = DEFAULT_MAX_MAP_COUNT; /* Check that a process has enough memory to allocate a *************** *** 420,424 **** /* Too many mappings? */ ! if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; --- 421,425 ---- /* Too many mappings? */ ! if (mm->map_count > max_map_count) return -ENOMEM; *************** *** 485,489 **** /* Clear old maps */ - error = -ENOMEM; munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); --- 486,489 ---- *************** *** 555,559 **** * f_op->mmap method. -DaveM */ ! addr = vma->vm_start; vma_link(mm, vma, prev, rb_link, rb_parent); --- 555,582 ---- * f_op->mmap method. -DaveM */ ! if (addr != vma->vm_start) { ! /* ! * It is a bit too late to pretend changing the virtual ! * area of the mapping, we just corrupted userspace ! * in the do_munmap, so FIXME (not in 2.4 to avoid breaking ! * the driver API). ! */ ! struct vm_area_struct * stale_vma; ! /* Since addr changed, we rely on the mmap op to prevent ! * collisions with existing vmas and just use find_vma_prepare ! * to update the tree pointers. ! */ ! addr = vma->vm_start; ! stale_vma = find_vma_prepare(mm, addr, &prev, ! &rb_link, &rb_parent); ! /* ! * Make sure the lowlevel driver did its job right. ! */ ! if (unlikely(stale_vma && stale_vma->vm_start < vma->vm_end)) { ! printk(KERN_ERR "buggy mmap operation: [<%p>]\n", ! file ? file->f_op->mmap : NULL); ! BUG(); ! } ! } vma_link(mm, vma, prev, rb_link, rb_parent); *************** *** 926,930 **** /* If we'll make "hole", check the vm areas limit */ if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len) ! && mm->map_count >= MAX_MAP_COUNT) return -ENOMEM; --- 949,953 ---- /* If we'll make "hole", check the vm areas limit */ if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len) ! && mm->map_count >= max_map_count) return -ENOMEM; *************** *** 1047,1051 **** return -ENOMEM; ! if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; --- 1070,1074 ---- return -ENOMEM; ! if (mm->map_count > max_map_count) return -ENOMEM; *************** *** 1053,1060 **** return -ENOMEM; ! flags = calc_vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, ! MAP_FIXED|MAP_PRIVATE) | mm->def_flags; ! ! flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; /* Can we just expand an old anonymous mapping? */ --- 1076,1080 ---- return -ENOMEM; ! flags = VM_DATA_DEFAULT_FLAGS | mm->def_flags; /* Can we just expand an old anonymous mapping? */ *************** *** 1140,1144 **** mpnt = next; } - flush_tlb_mm(mm); /* This is just debugging */ --- 1160,1163 ---- *************** *** 1147,1150 **** --- 1166,1171 ---- clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); + + flush_tlb_mm(mm); } Index: oom_kill.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/oom_kill.c,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -r1.8 -r1.9 *** oom_kill.c 14 Jan 2002 12:05:08 -0000 1.8 --- oom_kill.c 19 May 2003 01:38:48 -0000 1.9 *************** *** 112,117 **** /* * Simple selection loop. We chose the process with the highest ! * number of 'points'. We need the locks to make sure that the ! * list of task structs doesn't change while we look the other way. * * (not docbooked, we don't want this one cluttering up the manual) --- 112,116 ---- /* * Simple selection loop. We chose the process with the highest ! * number of 'points'. We expect the caller will lock the tasklist. * * (not docbooked, we don't want this one cluttering up the manual) *************** *** 123,127 **** struct task_struct *chosen = NULL; - read_lock(&tasklist_lock); for_each_task(p) { if (p->pid) { --- 122,125 ---- *************** *** 133,137 **** } } - read_unlock(&tasklist_lock); return chosen; } --- 131,134 ---- *************** *** 172,176 **** static void oom_kill(void) { ! struct task_struct *p = select_bad_process(), *q; /* Found nothing?!?! Either we hang forever, or we panic. */ --- 169,176 ---- static void oom_kill(void) { ! struct task_struct *p, *q; ! ! read_lock(&tasklist_lock); ! p = select_bad_process(); /* Found nothing?!?! Either we hang forever, or we panic. */ *************** *** 179,185 **** /* kill all processes that share the ->mm (i.e. all threads) */ - read_lock(&tasklist_lock); for_each_task(q) { ! if(q->mm == p->mm) oom_kill_task(q); } read_unlock(&tasklist_lock); --- 179,185 ---- /* kill all processes that share the ->mm (i.e. all threads) */ for_each_task(q) { ! if (q->mm == p->mm) ! oom_kill_task(q); } read_unlock(&tasklist_lock); *************** *** 190,195 **** * for more memory. */ ! current->policy |= SCHED_YIELD; ! schedule(); return; } --- 190,194 ---- * for more memory. */ ! yield(); return; } *************** *** 200,204 **** void out_of_memory(void) { ! static unsigned long first, last, count; unsigned long now, since; --- 199,203 ---- void out_of_memory(void) { ! static unsigned long first, last, count, lastkill; unsigned long now, since; *************** *** 243,248 **** --- 242,257 ---- /* + * If we just killed a process, wait a while + * to give that task a chance to exit. This + * avoids killing multiple processes needlessly. + */ + since = now - lastkill; + if (since < HZ*5) + return; + + /* * Ok, really out of memory. Kill something. */ + lastkill = now; oom_kill(); Index: page_alloc.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/page_alloc.c,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -r1.26 -r1.27 *** page_alloc.c 29 Nov 2002 21:23:02 -0000 1.26 --- page_alloc.c 19 May 2003 01:38:48 -0000 1.27 *************** *** 2,5 **** --- 2,8 ---- * linux/mm/page_alloc.c * + * Manages the free list, the system allocates free pages here. + * Note that kmalloc() lives in slab.c + * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie *************** *** 18,22 **** #include <linux/bootmem.h> #include <linux/slab.h> ! #include <linux/compiler.h> #include <linux/comp_cache.h> --- 21,25 ---- #include <linux/bootmem.h> #include <linux/slab.h> ! #include <linux/module.h> #include <linux/comp_cache.h> *************** *** 24,31 **** int nr_active_pages; int nr_inactive_pages; ! struct list_head inactive_list; ! struct list_head active_list; pg_data_t *pgdat_list; static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; #ifdef CONFIG_COMP_CACHE --- 27,43 ---- int nr_active_pages; int nr_inactive_pages; ! LIST_HEAD(inactive_list); ! LIST_HEAD(active_list); pg_data_t *pgdat_list; + /* + * + * The zone_table array is used to look up the address of the + * struct zone corresponding to a given zone number (ZONE_DMA, + * ZONE_NORMAL, or ZONE_HIGHMEM). + */ + zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES]; + EXPORT_SYMBOL(zone_table); + static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; #ifdef CONFIG_COMP_CACHE *************** *** 40,71 **** /* - * Free_page() adds the page to the free lists. This is optimized for - * fast normal cases (no error jumps taken normally). - * - * The way to optimize jumps for gcc-2.2.2 is to: - * - select the "normal" case and put it inside the if () { XXX } - * - no else-statements if you can avoid them - * - * With the above two rules, you get a straight-line execution path - * for the normal case, giving better asm-code. - */ - - #define memlist_init(x) INIT_LIST_HEAD(x) - #define memlist_add_head list_add - #define memlist_add_tail list_add_tail - #define memlist_del list_del - #define memlist_entry list_entry - #define memlist_next(x) ((x)->next) - #define memlist_prev(x) ((x)->prev) - - /* * Temporary debugging check. */ ! #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size)) /* ! * Buddy system. Hairy. You really aren't expected to understand this * ! * Hint: -mask = 1+~mask */ --- 52,87 ---- /* * Temporary debugging check. */ ! #define BAD_RANGE(zone, page) \ ! ( \ ! (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \ ! || (((page) - mem_map) < (zone)->zone_start_mapnr) \ ! || ((zone) != page_zone(page)) \ ! ) /* ! * Freeing function for a buddy system allocator. ! * Contrary to prior comments, this is *NOT* hairy, and there ! * is no reason for anyone not to understand it. * ! * The concept of a buddy system is to maintain direct-mapped tables ! * (containing bit values) for memory blocks of various "orders". ! * The bottom level table contains the map for the smallest allocatable ! * units of memory (here, pages), and each level above it describes ! * pairs of units from the levels below, hence, "buddies". ! * At a high level, all that happens here is marking the table entry ! * at the bottom level available, and propagating the changes upward ! * as necessary, plus some accounting needed to play nicely with other ! * parts of the VM system. ! * At each level, we keep one bit for each pair of blocks, which ! * is set to 1 iff only one of the pair is allocated. So when we ! * are allocating or freeing one, we can derive the state of the ! * other. That is, if we allocate a small block, and both were ! * free, the remainder of the region must be split into blocks. ! * If a block is freed, and its buddy is also free, then this ! * triggers coalescing into a block of larger size. ! * ! * -- wli */ *************** *** 78,86 **** zone_t *zone; ! /* Yes, think what happens when other parts of the kernel take * a reference to a page in order to pin it for io. -ben */ ! if (PageLRU(page)) lru_cache_del(page); if (page->buffers) --- 94,106 ---- zone_t *zone; ! /* ! * Yes, think what happens when other parts of the kernel take * a reference to a page in order to pin it for io. -ben */ ! if (PageLRU(page)) { ! if (unlikely(in_interrupt())) ! BUG(); lru_cache_del(page); + } if (page->buffers) *************** *** 90,99 **** if (!VALID_PAGE(page)) BUG(); - if (PageSwapCache(page)) - BUG(); if (PageLocked(page)) BUG(); - if (PageLRU(page)) - BUG(); if (PageActive(page)) BUG(); --- 110,115 ---- *************** *** 104,108 **** back_local_freelist: ! zone = page->zone; mask = (~0UL) << order; --- 120,124 ---- back_local_freelist: ! zone = page_zone(page); mask = (~0UL) << order; *************** *** 131,134 **** --- 147,152 ---- /* * Move the buddy up one level. + * This code is taking advantage of the identity: + * -mask = 1+~mask */ buddy1 = base + (page_idx ^ -mask); *************** *** 139,143 **** BUG(); ! memlist_del(&buddy1->list); mask <<= 1; area++; --- 157,161 ---- BUG(); ! list_del(&buddy1->list); mask <<= 1; area++; *************** *** 145,149 **** page_idx &= mask; } ! memlist_add_head(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); --- 163,167 ---- page_idx &= mask; } ! list_add(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); *************** *** 175,179 **** high--; size >>= 1; ! memlist_add_head(&(page)->list, &(area)->free_list); MARK_USED(index, high, area); index += size; --- 193,197 ---- high--; size >>= 1; ! list_add(&(page)->list, &(area)->free_list); MARK_USED(index, high, area); index += size; *************** *** 197,209 **** do { head = &area->free_list; ! curr = memlist_next(head); if (curr != head) { unsigned int index; ! page = memlist_entry(curr, struct page, list); if (BAD_RANGE(zone,page)) BUG(); ! memlist_del(curr); index = page - zone->zone_mem_map; if (curr_order != MAX_ORDER-1) --- 215,227 ---- do { head = &area->free_list; ! curr = head->next; if (curr != head) { unsigned int index; ! page = list_entry(curr, struct page, list); if (BAD_RANGE(zone,page)) BUG(); ! list_del(curr); index = page - zone->zone_mem_map; if (curr_order != MAX_ORDER-1) *************** *** 253,257 **** current->flags |= PF_MEMALLOC | PF_FREE_PAGES; ! __freed = try_to_free_pages(classzone, gfp_mask, order); current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES); --- 271,275 ---- current->flags |= PF_MEMALLOC | PF_FREE_PAGES; ! __freed = try_to_free_pages_zone(classzone, gfp_mask); current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES); *************** *** 269,273 **** do { tmp = list_entry(entry, struct page, list); ! if (tmp->index == order && memclass(tmp->zone, classzone)) { list_del(entry); current->nr_local_pages--; --- 287,291 ---- do { tmp = list_entry(entry, struct page, list); ! if (tmp->index == order && memclass(page_zone(tmp), classzone)) { list_del(entry); current->nr_local_pages--; *************** *** 281,286 **** if (!VALID_PAGE(page)) BUG(); - if (PageSwapCache(page)) - BUG(); if (PageLocked(page)) BUG(); --- 299,302 ---- *************** *** 325,328 **** --- 341,346 ---- zone = zonelist->zones; classzone = *zone; + if (classzone == NULL) + return NULL; min = 1UL << order; for (;;) { *************** *** 408,414 **** /* Yield for kswapd, and try again */ ! current->policy |= SCHED_YIELD; ! __set_current_state(TASK_RUNNING); ! schedule(); goto rebalance; } --- 426,430 ---- /* Yield for kswapd, and try again */ ! yield(); goto rebalance; } *************** *** 457,470 **** unsigned int nr_free_pages (void) { ! unsigned int sum; zone_t *zone; - pg_data_t *pgdat = pgdat_list; ! sum = 0; ! while (pgdat) { ! for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++) ! sum += zone->free_pages; ! pgdat = pgdat->node_next; ! } return sum; } --- 473,482 ---- unsigned int nr_free_pages (void) { ! unsigned int sum = 0; zone_t *zone; ! for_each_zone(zone) ! sum += zone->free_pages; ! return sum; } *************** *** 475,482 **** unsigned int nr_free_buffer_pages (void) { ! pg_data_t *pgdat = pgdat_list; unsigned int sum = 0; ! do { zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); zone_t **zonep = zonelist->zones; --- 487,494 ---- unsigned int nr_free_buffer_pages (void) { ! pg_data_t *pgdat; unsigned int sum = 0; ! for_each_pgdat(pgdat) { zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); zone_t **zonep = zonelist->zones; *************** *** 489,495 **** sum += size - high; } ! ! pgdat = pgdat->node_next; ! } while (pgdat); return sum; --- 501,505 ---- sum += size - high; } ! } return sum; *************** *** 499,509 **** unsigned int nr_free_highpages (void) { ! pg_data_t *pgdat = pgdat_list; unsigned int pages = 0; ! while (pgdat) { pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; ! pgdat = pgdat->node_next; ! } return pages; } --- 509,518 ---- unsigned int nr_free_highpages (void) { ! pg_data_t *pgdat; unsigned int pages = 0; ! for_each_pgdat(pgdat) pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; ! return pages; } *************** *** 560,565 **** nr = 0; for (;;) { ! curr = memlist_next(curr); ! if (curr == head) break; nr++; --- 569,573 ---- nr = 0; for (;;) { ! if ((curr = curr->next) == head) break; nr++; *************** *** 631,634 **** --- 639,684 ---- } + /* + * Helper functions to size the waitqueue hash table. + * Essentially these want to choose hash table sizes sufficiently + * large so that collisions trying to wait on pages are rare. + * But in fact, the number of active page waitqueues on typical + * systems is ridiculously low, less than 200. So this is even + * conservative, even though it seems large. + * + * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to + * waitqueues, i.e. the size of the waitq table given the number of pages. + */ + #define PAGES_PER_WAITQUEUE 256 + + static inline unsigned long wait_table_size(unsigned long pages) + { + unsigned long size = 1; + + pages /= PAGES_PER_WAITQUEUE; + + while (size < pages) + size <<= 1; + + /* + * Once we have dozens or even hundreds of threads sleeping + * on IO we've got bigger problems than wait queue collision. + * Limit the size of the wait table to a reasonable size. + */ + size = min(size, 4096UL); + + return size; + } + + /* + * This is an integer logarithm so that shifts can be used later + * to extract the more random high bits from the multiplicative + * hash function before the remainder is taken. + */ + static inline unsigned long wait_table_bits(unsigned long size) + { + return ffz(~size); + } + #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) *************** *** 682,686 **** unsigned long *zholes_size, struct page *lmem_map) { - struct page *p; unsigned long i, j; unsigned long map_size; --- 732,735 ---- *************** *** 703,709 **** printk("On node %d totalpages: %lu\n", nid, realtotalpages); - INIT_LIST_HEAD(&active_list); - INIT_LIST_HEAD(&inactive_list); - /* * Some architectures (with lots of mem and discontinous memory --- 752,755 ---- *************** *** 725,740 **** pgdat->nr_zones = 0; - /* - * Initially all pages are reserved - free ones are freed - * up by free_all_bootmem() once the early boot process is - * done. - */ - for (p = lmem_map; p < lmem_map + totalpages; p++) { - set_page_count(p, 0); - SetPageReserved(p); - init_waitqueue_head(&p->wait); - memlist_init(&p->list); - } - offset = lmem_map - mem_map; for (j = 0; j < MAX_NR_ZONES; j++) { --- 771,774 ---- *************** *** 743,746 **** --- 777,781 ---- unsigned long size, realsize; + zone_table[nid * MAX_NR_ZONES + j] = zone; realsize = size = zones_size[j]; if (zholes_size) *************** *** 757,760 **** --- 792,809 ---- continue; + /* + * The per-page waitqueue mechanism uses hashed waitqueues + * per zone. + */ + zone->wait_table_size = wait_table_size(size); + zone->wait_table_shift = + BITS_PER_LONG - wait_table_bits(zone->wait_table_size); + zone->wait_table = (wait_queue_head_t *) + alloc_bootmem_node(pgdat, zone->wait_table_size + * sizeof(wait_queue_head_t)); + + for(i = 0; i < zone->wait_table_size; ++i) + init_waitqueue_head(zone->wait_table + i); + pgdat->nr_zones = j+1; *************** *** 775,783 **** printk("BUG: wrong zone alignment, it will crash\n"); for (i = 0; i < size; i++) { struct page *page = mem_map + offset + i; ! page->zone = zone; if (j != ZONE_HIGHMEM) ! page->virtual = __va(zone_start_paddr); zone_start_paddr += PAGE_SIZE; } --- 824,840 ---- printk("BUG: wrong zone alignment, it will crash\n"); + /* + * Initially all pages are reserved - free ones are freed + * up by free_all_bootmem() once the early boot process is + * done. Non-atomic initialization, single-pass. + */ for (i = 0; i < size; i++) { struct page *page = mem_map + offset + i; ! set_page_zone(page, nid * MAX_NR_ZONES + j); ! set_page_count(page, 0); ! SetPageReserved(page); ! INIT_LIST_HEAD(&page->list); if (j != ZONE_HIGHMEM) ! set_page_address(page, __va(zone_start_paddr)); zone_start_paddr += PAGE_SIZE; } *************** *** 787,791 **** unsigned long bitmap_size; ! memlist_init(&zone->free_area[i].free_list); if (i == MAX_ORDER-1) { zone->free_area[i].map = NULL; --- 844,848 ---- unsigned long bitmap_size; ! INIT_LIST_HEAD(&zone->free_area[i].free_list); if (i == MAX_ORDER-1) { zone->free_area[i].map = NULL; Index: page_io.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/page_io.c,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -r1.6 -r1.7 *** page_io.c 10 Sep 2002 16:43:15 -0000 1.6 --- page_io.c 19 May 2003 01:38:49 -0000 1.7 *************** *** 73,81 **** /* block_size == PAGE_SIZE/zones_used */ brw_page(rw, page, dev, zones, block_size); - - /* Note! For consistency we do all of the logic, - * decrementing the page count, and unlocking the page in the - * swap lock map - in the IO completion handler. - */ return 1; } --- 73,76 ---- *************** *** 100,105 **** if (!PageSwapCache(page)) PAGE_BUG(page); - if (page->mapping != &swapper_space) - PAGE_BUG(page); if (!rw_swap_page_base(rw, entry, page)) UnlockPage(page); --- 95,98 ---- *************** *** 117,129 **** if (!PageLocked(page)) PAGE_BUG(page); - if (PageSwapCache(page)) - PAGE_BUG(page); if (page->mapping) PAGE_BUG(page); /* needs sync_page to wait I/O completation */ page->mapping = &swapper_space; ! if (!rw_swap_page_base(rw, entry, page)) ! UnlockPage(page); ! wait_on_page(page); page->mapping = NULL; } --- 110,122 ---- if (!PageLocked(page)) PAGE_BUG(page); if (page->mapping) PAGE_BUG(page); /* needs sync_page to wait I/O completation */ page->mapping = &swapper_space; ! if (rw_swap_page_base(rw, entry, page)) ! lock_page(page); ! if (!block_flushpage(page, 0)) ! PAGE_BUG(page); page->mapping = NULL; + UnlockPage(page); } Index: shmem.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/shmem.c,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -r1.22 -r1.23 *** shmem.c 10 Sep 2002 16:43:16 -0000 1.22 --- shmem.c 19 May 2003 01:38:49 -0000 1.23 *************** *** 36,39 **** --- 36,47 ---- #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) + #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) + + #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * (ENTRIES_PER_PAGE/2) * (ENTRIES_PER_PAGE+1)) + #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) + #define VM_ACCT(size) (((size) + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT) + + /* Pretend that each entry is of this size in directory's i_size */ + #define BOGO_DIRENT_SIZE 20 #define SHMEM_SB(sb) (&sb->u.shmem_sb) *************** *** 43,47 **** static struct file_operations shmem_file_operations; static struct inode_operations shmem_inode_operations; - static struct file_operations shmem_dir_operations; static struct inode_operations shmem_dir_inode_operations; static struct vm_operations_struct shmem_vm_ops; --- 51,54 ---- *************** *** 51,55 **** atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */ ! #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) /* --- 58,62 ---- atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */ ! static struct page *shmem_getpage_locked(struct shmem_inode_info *, struct inode *, unsigned long); /* *************** *** 128,134 **** * +-> 52-55 */ - - #define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * ENTRIES_PER_PAGE/2*(ENTRIES_PER_PAGE+1)) - static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index, unsigned long page) { --- 135,138 ---- *************** *** 183,187 **** swp_entry_t * res; ! if (index >= SHMEM_MAX_BLOCKS) return ERR_PTR(-EFBIG); --- 187,191 ---- swp_entry_t * res; ! if (index >= SHMEM_MAX_INDEX) return ERR_PTR(-EFBIG); *************** *** 315,318 **** --- 319,323 ---- { unsigned long index; + unsigned long partial; unsigned long freed = 0; struct shmem_inode_info * info = SHMEM_I(inode); *************** *** 322,325 **** --- 327,352 ---- spin_lock (&info->lock); index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + partial = inode->i_size & ~PAGE_CACHE_MASK; + + if (partial) { + swp_entry_t *entry = shmem_swp_entry(info, index-1, 0); + struct page *page; + /* + * This check is racy: it's faintly possible that page + * was assigned to swap during truncate_inode_pages, + * and now assigned to file; but better than nothing. + */ + if (!IS_ERR(entry) && entry->val) { + spin_unlock(&info->lock); + page = shmem_getpage_locked(info, inode, index-1); + if (!IS_ERR(page)) { + memclear_highpage_flush(page, partial, + PAGE_CACHE_SIZE - partial); + UnlockPage(page); + page_cache_release(page); + } + spin_lock(&info->lock); + } + } while (index < info->next_index) *************** *** 336,344 **** struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); ! inode->i_size = 0; ! if (inode->i_op->truncate == shmem_truncate){ spin_lock (&shmem_ilock); list_del (&SHMEM_I(inode)->list); spin_unlock (&shmem_ilock); shmem_truncate (inode); } --- 363,371 ---- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); ! if (inode->i_op->truncate == shmem_truncate) { spin_lock (&shmem_ilock); list_del (&SHMEM_I(inode)->list); spin_unlock (&shmem_ilock); + inode->i_size = 0; shmem_truncate (inode); } *************** *** 349,374 **** } ! static int shmem_clear_swp (swp_entry_t entry, swp_entry_t *ptr, int size) { swp_entry_t *test; ! for (test = ptr; test < ptr + size; test++) { ! if (test->val == entry.val) { ! swap_free (entry); ! *test = (swp_entry_t) {0}; return test - ptr; - } } return -1; } ! static int shmem_unuse_inode (struct shmem_inode_info *info, swp_entry_t entry, struct page *page) { swp_entry_t *ptr; unsigned long idx; int offset; ! idx = 0; spin_lock (&info->lock); ! offset = shmem_clear_swp (entry, info->i_direct, SHMEM_NR_DIRECT); if (offset >= 0) goto found; --- 376,403 ---- } ! static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *ptr, swp_entry_t *eptr) ! { swp_entry_t *test; ! for (test = ptr; test < eptr; test++) { ! if (test->val == entry.val) return test - ptr; } return -1; } ! static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) { swp_entry_t *ptr; unsigned long idx; int offset; ! idx = 0; + ptr = info->i_direct; spin_lock (&info->lock); ! offset = info->next_index; ! if (offset > SHMEM_NR_DIRECT) ! offset = SHMEM_NR_DIRECT; ! offset = shmem_find_swp(entry, ptr, ptr + offset); if (offset >= 0) goto found; *************** *** 379,383 **** if (IS_ERR(ptr)) continue; ! offset = shmem_clear_swp (entry, ptr, ENTRIES_PER_PAGE); if (offset >= 0) goto found; --- 408,415 ---- if (IS_ERR(ptr)) continue; ! offset = info->next_index - idx; ! if (offset > ENTRIES_PER_PAGE) ! offset = ENTRIES_PER_PAGE; ! offset = shmem_find_swp(entry, ptr, ptr + offset); if (offset >= 0) goto found; *************** *** 387,391 **** found: if (PageCompressed(page)) ! decompress_swap_cache_page(page); delete_from_swap_cache(page); add_to_page_cache(page, info->inode->i_mapping, offset + idx); --- 419,425 ---- found: if (PageCompressed(page)) ! decompress_swap_cache_page(page); ! swap_free(entry); ! ptr[offset] = (swp_entry_t) {0}; delete_from_swap_cache(page); add_to_page_cache(page, info->inode->i_mapping, offset + idx); *************** *** 398,402 **** /* ! * unuse_shmem() search for an eventually swapped out shmem page. */ void shmem_unuse(swp_entry_t entry, struct page *page) --- 432,436 ---- /* ! * shmem_unuse() search for an eventually swapped out shmem page. */ void shmem_unuse(swp_entry_t entry, struct page *page) *************** *** 409,414 **** info = list_entry(p, struct shmem_inode_info, list); ! if (shmem_unuse_inode(info, entry, page)) break; } spin_unlock (&shmem_ilock); --- 443,452 ---- info = list_entry(p, struct shmem_inode_info, list); ! if (info->swapped && shmem_unuse_inode(info, entry, page)) { ! /* move head to start search for next from here */ ! list_del(&shmem_inodes); ! list_add_tail(&shmem_inodes, p); break; + } } spin_unlock (&shmem_ilock); *************** *** 531,535 **** /* Look it up and read it in.. */ ! page = find_get_page(&swapper_space, entry->val); if (!page) { swp_entry_t swap = *entry; --- 569,573 ---- /* Look it up and read it in.. */ ! page = lookup_swap_cache(*entry); if (!page) { swp_entry_t swap = *entry; *************** *** 588,591 **** --- 626,630 ---- return ERR_PTR(-ENOMEM); clear_highpage(page); + flush_dcache_page(page); inode->i_blocks += BLOCKS_PER_PAGE; add_to_page_cache (page, mapping, idx); *************** *** 707,717 **** inode->i_fop = &shmem_file_operations; spin_lock (&shmem_ilock); ! list_add (&SHMEM_I(inode)->list, &shmem_inodes); spin_unlock (&shmem_ilock); break; case S_IFDIR: inode->i_nlink++; inode->i_op = &shmem_dir_inode_operations; ! inode->i_fop = &shmem_dir_operations; break; case S_IFLNK: --- 746,758 ---- inode->i_fop = &shmem_file_operations; spin_lock (&shmem_ilock); ! list_add_tail(&info->list, &shmem_inodes); spin_unlock (&shmem_ilock); break; case S_IFDIR: inode->i_nlink++; + /* Some things misbehave if size == 0 on a directory */ + inode->i_size = 2 * BOGO_DIRENT_SIZE; inode->i_op = &shmem_dir_inode_operations; ! inode->i_fop = &dcache_dir_ops; break; case S_IFLNK: *************** *** 884,888 **** status = -EFAULT; ClearPageUptodate(page); - kunmap(page); goto unlock; } --- 925,928 ---- *************** *** 979,983 **** buf->f_ffree = sbinfo->free_inodes; spin_unlock (&sbinfo->stat_lock); ! buf->f_namelen = 255; return 0; } --- 1019,1023 ---- buf->f_ffree = sbinfo->free_inodes; spin_unlock (&sbinfo->stat_lock); ! buf->f_namelen = NAME_MAX; return 0; } *************** *** 1001,1006 **** int error = -ENOSPC; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; if (inode) { d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ --- 1041,1047 ---- int error = -ENOSPC; if (inode) { + dir->i_size += BOGO_DIRENT_SIZE; + dir->i_ctime = dir->i_mtime = CURRENT_TIME; d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ *************** *** 1035,1038 **** --- 1076,1080 ---- return -EPERM; + dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inode->i_nlink++; *************** *** 1079,1082 **** --- 1121,1126 ---- { struct inode *inode = dentry->d_inode; + + dir->i_size -= BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inode->i_nlink--; *************** *** 1102,1123 **** static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry) { ! int error = -ENOTEMPTY; ! if (shmem_empty(new_dentry)) { ! struct inode *inode = new_dentry->d_inode; ! if (inode) { ! inode->i_ctime = CURRENT_TIME; ! inode->i_nlink--; ! dput(new_dentry); ! } ! error = 0; ! old_dentry->d_inode->i_ctime = old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; } ! return error; } static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname) { - int error; int len; struct inode *inode; --- 1146,1174 ---- static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry) { ! struct inode *inode = old_dentry->d_inode; ! int they_are_dirs = S_ISDIR(inode->i_mode); ! if (!shmem_empty(new_dentry)) ! return -ENOTEMPTY; ! ! if (new_dentry->d_inode) { ! (void) shmem_unlink(new_dir, new_dentry); ! if (they_are_dirs) ! old_dir->i_nlink--; ! } else if (they_are_dirs) { ! old_dir->i_nlink--; ! new_dir->i_nlink++; } ! ! old_dir->i_size -= BOGO_DIRENT_SIZE; ! new_dir->i_size += BOGO_DIRENT_SIZE; ! old_dir->i_ctime = old_dir->i_mtime = ! new_dir->i_ctime = new_dir->i_mtime = ! inode->i_ctime = CURRENT_TIME; ! return 0; } static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname) { int len; struct inode *inode; *************** *** 1126,1138 **** struct shmem_inode_info * info; - error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0); - if (error) - return error; - len = strlen(symname) + 1; if (len > PAGE_CACHE_SIZE) return -ENAMETOOLONG; ! ! inode = dentry->d_inode; info = SHMEM_I(inode); inode->i_size = len-1; --- 1177,1188 ---- struct shmem_inode_info * info; len = strlen(symname) + 1; if (len > PAGE_CACHE_SIZE) return -ENAMETOOLONG; ! ! inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); ! if (!inode) ! return -ENOSPC; ! info = SHMEM_I(inode); inode->i_size = len-1; *************** *** 1142,1154 **** inode->i_op = &shmem_symlink_inline_operations; } else { - spin_lock (&shmem_ilock); - list_add (&info->list, &shmem_inodes); - spin_unlock (&shmem_ilock); down(&info->sem); page = shmem_getpage_locked(info, inode, 0); if (IS_ERR(page)) { up(&info->sem); return PTR_ERR(page); } kaddr = kmap(page); memcpy(kaddr, symname, len); --- 1192,1206 ---- inode->i_op = &shmem_symlink_inline_operations; } else { down(&info->sem); page = shmem_getpage_locked(info, inode, 0); if (IS_ERR(page)) { up(&info->sem); + iput(inode); return PTR_ERR(page); } + inode->i_op = &shmem_symlink_inode_operations; + spin_lock (&shmem_ilock); + list_add_tail(&info->list, &shmem_inodes); + spin_unlock (&shmem_ilock); kaddr = kmap(page); memcpy(kaddr, symname, len); *************** *** 1158,1164 **** page_cache_release(page); up(&info->sem); - inode->i_op = &shmem_symlink_inode_operations; } dir->i_ctime = dir->i_mtime = CURRENT_TIME; return 0; } --- 1210,1218 ---- page_cache_release(page); up(&info->sem); } + dir->i_size += BOGO_DIRENT_SIZE; dir->i_ctime = dir->i_mtime = CURRENT_TIME; + d_instantiate(dentry, inode); + dget(dentry); return 0; } *************** *** 1321,1325 **** sbinfo->max_inodes = inodes; sbinfo->free_inodes = inodes; ! sb->s_maxbytes = (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; --- 1375,1379 ---- sbinfo->max_inodes = inodes; sbinfo->free_inodes = inodes; ! sb->s_maxbytes = SHMEM_MAX_BYTES; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; *************** *** 1360,1371 **** }; - static struct file_operations shmem_dir_operations = { - read: generic_read_dir, - readdir: dcache_readdir, - #ifdef CONFIG_TMPFS - fsync: shmem_sync_file, - #endif - }; - static struct inode_operations shmem_dir_inode_operations = { #ifdef CONFIG_TMPFS --- 1414,1417 ---- *************** *** 1463,1470 **** int vm_enough_memory(long pages); ! if (size > (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT) return ERR_PTR(-EINVAL); ! if (!vm_enough_memory((size) >> PAGE_CACHE_SHIFT)) return ERR_PTR(-ENOMEM); --- 1509,1516 ---- int vm_enough_memory(long pages); ! if (size > SHMEM_MAX_BYTES) return ERR_PTR(-EINVAL); ! if (!vm_enough_memory(VM_ACCT(size))) return ERR_PTR(-ENOMEM); *************** *** 1488,1498 **** d_instantiate(dentry, inode); ! dentry->d_inode->i_size = size; ! shmem_truncate(inode); file->f_vfsmnt = mntget(shm_mnt); file->f_dentry = dentry; file->f_op = &shmem_file_operations; file->f_mode = FMODE_WRITE | FMODE_READ; - inode->i_nlink = 0; /* It is unlinked */ return(file); --- 1534,1543 ---- d_instantiate(dentry, inode); ! inode->i_size = size; ! inode->i_nlink = 0; /* It is unlinked */ file->f_vfsmnt = mntget(shm_mnt); file->f_dentry = dentry; file->f_op = &shmem_file_operations; file->f_mode = FMODE_WRITE | FMODE_READ; return(file); *************** *** 1503,1506 **** --- 1548,1552 ---- return ERR_PTR(error); } + /* * shmem_zero_setup - setup a shared anonymous mapping Index: swap_state.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/swap_state.c,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -r1.42 -r1.43 *** swap_state.c 6 Dec 2002 19:29:21 -0000 1.42 --- swap_state.c 19 May 2003 01:38:49 -0000 1.43 *************** *** 127,131 **** BUG(); ! block_flushpage(page, 0); entry.val = page->index; --- 127,132 ---- BUG(); ! if (unlikely(!block_flushpage(page, 0))) ! BUG(); /* an anonymous page cannot have page->buffers set */ entry.val = page->index; Index: swapfile.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/swapfile.c,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -r1.38 -r1.39 *** swapfile.c 6 Dec 2002 19:29:21 -0000 1.38 --- swapfile.c 19 May 2003 01:38:49 -0000 1.39 *************** *** 15,19 **** #include <linux/pagemap.h> #include <linux/shm.h> - #include <linux/compiler.h> #include <linux/comp_cache.h> --- 15,18 ---- *************** *** 944,956 **** * Note shmem_unuse already deleted its from swap cache. */ ! swcount = swap_map_count(*swap_map); ! if ((swcount > 0) != PageSwapCache(page)) ! BUG(); ! if ((swcount > 1) && PageDirty(page)) { rw_swap_page(WRITE, page); lock_page(page); } ! if (PageCompressed(page)) ! decompress_swap_cache_page(page); if (PageSwapCache(page)) delete_from_swap_cache(page); --- 943,952 ---- * Note shmem_unuse already deleted its from swap cache. */ ! if ((swap_map_count(*swap_map) > 1) && PageDirty(page) && PageSwapCache(page)) { rw_swap_page(WRITE, page); lock_page(page); } ! if (PageCompressed(page)) ! decompress_swap_cache_page(page); if (PageSwapCache(page)) delete_from_swap_cache(page); Index: vmscan.c =================================================================== RCS file: /cvsroot/linuxcompressed/linux/mm/vmscan.c,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -r1.44 -r1.45 *** vmscan.c 22 Nov 2002 16:01:36 -0000 1.44 --- vmscan.c 19 May 2003 01:38:50 -0000 1.45 *************** *** 2,5 **** --- 2,8 ---- * linux/mm/vmscan.c * + * The pageout daemon, decides which pages to evict (swap out) and + * does the actual work of freeing them. + * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * *************** *** 21,25 **** #include <linux/highmem.h> #include <linux/file.h> - #include <linux/compiler.h> #include <linux/comp_cache.h> --- 24,27 ---- *************** *** 60,64 **** /* Don't bother replenishing zones not under pressure.. */ ! if (!memclass(page->zone, classzone)) return 0; --- 62,66 ---- /* Don't bother replenishing zones not under pressure.. */ ! if (!memclass(page_zone(page), classzone)) return 0; *************** *** 241,246 **** end = vma->vm_end; ! if (address >= end) ! BUG(); do { count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone); --- 243,247 ---- end = vma->vm_end; ! BUG_ON(address >= end); do { count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone); *************** *** 361,368 **** page = list_entry(entry, struct page, lru); ! if (unlikely(!PageLRU(page))) ! BUG(); ! if (unlikely(PageActive(page))) ! BUG(); list_del(entry); --- 362,367 ---- page = list_entry(entry, struct page, lru); ! BUG_ON(!PageLRU(page)); ! BUG_ON(PageActive(page)); list_del(entry); *************** *** 376,380 **** continue; ! if (!memclass(page->zone, classzone)) continue; --- 375,379 ---- continue; ! if (!memclass(page_zone(page), classzone)) continue; *************** *** 643,647 **** } ! int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order) { int priority = DEF_PRIORITY; --- 642,646 ---- } ! int try_to_free_pages_zone(zone_t *classzone, unsigned int gfp_mask) { int priority = DEF_PRIORITY; *************** *** 663,666 **** --- 662,684 ---- } + int try_to_free_pages(unsigned int gfp_mask) + { + pg_data_t *pgdat; + zonelist_t *zonelist; + unsigned long pf_free_pages; + int error = 0; + + pf_free_pages = current->flags & PF_FREE_PAGES; + current->flags &= ~PF_FREE_PAGES; + + for_each_pgdat(pgdat) { + zonelist = pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK); + error |= try_to_free_pages_zone(zonelist->zones[0], gfp_mask); + } + + current->flags |= pf_free_pages; + return error; + } + DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); *************** *** 689,693 **** if (!zone->need_balance) continue; ! if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) { zone->need_balance = 0; __set_current_state(TASK_INTERRUPTIBLE); --- 707,711 ---- if (!zone->need_balance) continue; ! if (!try_to_free_pages_zone(zone, GFP_KSWAPD)) { zone->need_balance = 0; __set_current_state(TASK_INTERRUPTIBLE); *************** *** 711,718 **** do { need_more_balance = 0; ! pgdat = pgdat_list; ! do need_more_balance |= kswapd_balance_pgdat(pgdat); - while ((pgdat = pgdat->node_next)); } while (need_more_balance); } --- 729,735 ---- do { need_more_balance = 0; ! ! for_each_pgdat(pgdat) need_more_balance |= kswapd_balance_pgdat(pgdat); } while (need_more_balance); } *************** *** 737,746 **** pg_data_t * pgdat; ! pgdat = pgdat_list; ! do { ! if (kswapd_can_sleep_pgdat(pgdat)) ! continue; ! return 0; ! } while ((pgdat = pgdat->node_next)); return 1; --- 754,761 ---- pg_data_t * pgdat; ! for_each_pgdat(pgdat) { ! if (!kswapd_can_sleep_pgdat(pgdat)) ! return 0; ! } return 1; |