[lc-checkins] CVS: linux/mm Makefile,1.5,1.6 filemap.c,1.42,1.43 memory.c,1.36,1.37 mmap.c,1.8,1.9 o
Status: Beta
Brought to you by:
nitin_sf
Update of /cvsroot/linuxcompressed/linux/mm
In directory sc8-pr-cvs1:/tmp/cvs-serv25395/mm
Modified Files:
Makefile filemap.c memory.c mmap.c oom_kill.c page_alloc.c
page_io.c shmem.c swap_state.c swapfile.c vmscan.c
Log Message:
o Port code to 2.4.20
Bug fix (?)
o Changes checks in vswap.c to avoid oopses. It will BUG()
instead. Some of the checks were done after the value had been
accessed.
Note
o Virtual swap addresses are temporarily disabled, due to debugging
sessions related to the use of swap files instead of swap partitions.
Index: Makefile
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/Makefile,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** Makefile 12 Dec 2001 20:45:46 -0000 1.5
--- Makefile 19 May 2003 01:38:47 -0000 1.6
***************
*** 10,14 ****
O_TARGET := mm.o
! export-objs := shmem.o filemap.o
obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
--- 10,14 ----
O_TARGET := mm.o
! export-objs := shmem.o filemap.o memory.o page_alloc.o
obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
Index: filemap.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/filemap.c,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -r1.42 -r1.43
*** filemap.c 29 Nov 2002 21:23:02 -0000 1.42
--- filemap.c 19 May 2003 01:38:47 -0000 1.43
***************
*** 24,28 ****
#include <linux/mm.h>
#include <linux/iobuf.h>
- #include <linux/compiler.h>
#include <linux/comp_cache.h>
--- 24,27 ----
***************
*** 55,59 ****
! spinlock_t pagecache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
/*
* NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock
--- 54,58 ----
! spinlock_cacheline_t pagecache_lock_cacheline = {SPIN_LOCK_UNLOCKED};
/*
* NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock
***************
*** 65,69 ****
* pagecache_lock
*/
! spinlock_t pagemap_lru_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
#define CLUSTER_PAGES (1 << page_cluster)
--- 64,68 ----
* pagecache_lock
*/
! spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED};
#define CLUSTER_PAGES (1 << page_cluster)
***************
*** 122,126 ****
void __remove_inode_page(struct page *page)
{
! if (PageDirty(page)) BUG();
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
--- 121,126 ----
void __remove_inode_page(struct page *page)
{
! if (PageDirty(page) && !PageSwapCache(page))
! BUG();
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
***************
*** 156,164 ****
if (mapping) {
spin_lock(&pagecache_lock);
! list_del(&page->list);
! list_add(&page->list, &mapping->dirty_pages);
spin_unlock(&pagecache_lock);
! if (mapping->host)
mark_inode_dirty_pages(mapping->host);
#ifdef CONFIG_COMP_CACHE
--- 156,167 ----
if (mapping) {
spin_lock(&pagecache_lock);
! mapping = page->mapping;
! if (mapping) { /* may have been truncated */
! list_del(&page->list);
! list_add(&page->list, &mapping->dirty_pages);
! }
spin_unlock(&pagecache_lock);
! if (mapping && mapping->host)
mark_inode_dirty_pages(mapping->host);
#ifdef CONFIG_COMP_CACHE
***************
*** 582,586 ****
while (!list_empty(&mapping->dirty_pages)) {
! struct page *page = list_entry(mapping->dirty_pages.next, struct page, list);
list_del(&page->list);
--- 585,589 ----
while (!list_empty(&mapping->dirty_pages)) {
! struct page *page = list_entry(mapping->dirty_pages.prev, struct page, list);
list_del(&page->list);
***************
*** 816,819 ****
--- 819,882 ----
}
+ /*
+ * Knuth recommends primes in approximately golden ratio to the maximum
+ * integer representable by a machine word for multiplicative hashing.
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
+ *
+ * These primes are chosen to be bit-sparse, that is operations on
+ * them can use shifts and additions instead of multiplications for
+ * machines where multiplications are slow.
+ */
+ #if BITS_PER_LONG == 32
+ /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
+ #define GOLDEN_RATIO_PRIME 0x9e370001UL
+ #elif BITS_PER_LONG == 64
+ /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
+ #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
+ #else
+ #error Define GOLDEN_RATIO_PRIME for your wordsize.
+ #endif
+
+ /*
+ * In order to wait for pages to become available there must be
+ * waitqueues associated with pages. By using a hash table of
+ * waitqueues where the bucket discipline is to maintain all
+ * waiters on the same queue and wake all when any of the pages
+ * become available, and for the woken contexts to check to be
+ * sure the appropriate page became available, this saves space
+ * at a cost of "thundering herd" phenomena during rare hash
+ * collisions.
+ */
+ static inline wait_queue_head_t *page_waitqueue(struct page *page)
+ {
+ const zone_t *zone = page_zone(page);
+ wait_queue_head_t *wait = zone->wait_table;
+ unsigned long hash = (unsigned long)page;
+
+ #if BITS_PER_LONG == 64
+ /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
+ unsigned long n = hash;
+ n <<= 18;
+ hash -= n;
+ n <<= 33;
+ hash -= n;
+ n <<= 3;
+ hash += n;
+ n <<= 3;
+ hash -= n;
+ n <<= 4;
+ hash += n;
+ n <<= 2;
+ hash += n;
+ #else
+ /* On some cpus multiply is faster, on others gcc will do shifts */
+ hash *= GOLDEN_RATIO_PRIME;
+ #endif
+ hash >>= zone->wait_table_shift;
+
+ return &wait[hash];
+ }
+
/*
* Wait for a page to get unlocked.
***************
*** 822,832 ****
* ie with increased "page->count" so that the page won't
* go away during the wait..
*/
void ___wait_on_page(struct page *page)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
! add_wait_queue(&page->wait, &wait);
do {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
--- 885,911 ----
* ie with increased "page->count" so that the page won't
* go away during the wait..
+ *
+ * The waiting strategy is to get on a waitqueue determined
+ * by hashing. Waiters will then collide, and the newly woken
+ * task must then determine whether it was woken for the page
+ * it really wanted, and go back to sleep on the waitqueue if
+ * that wasn't it. With the waitqueue semantics, it never leaves
+ * the waitqueue unless it calls, so the loop moves forward one
+ * iteration every time there is
+ * (1) a collision
+ * and
+ * (2) one of the colliding pages is woken
+ *
+ * This is the thundering herd problem, but it is expected to
+ * be very rare due to the few pages that are actually being
+ * waited on at any given time and the quality of the hash function.
*/
void ___wait_on_page(struct page *page)
{
+ wait_queue_head_t *waitqueue = page_waitqueue(page);
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
! add_wait_queue(waitqueue, &wait);
do {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
***************
*** 836,852 ****
schedule();
} while (PageLocked(page));
! tsk->state = TASK_RUNNING;
! remove_wait_queue(&page->wait, &wait);
}
void unlock_page(struct page *page)
{
! clear_bit(PG_launder, &(page)->flags);
smp_mb__before_clear_bit();
if (!test_and_clear_bit(PG_locked, &(page)->flags))
BUG();
smp_mb__after_clear_bit();
! if (waitqueue_active(&(page)->wait))
! wake_up(&(page)->wait);
}
--- 915,946 ----
schedule();
} while (PageLocked(page));
! __set_task_state(tsk, TASK_RUNNING);
! remove_wait_queue(waitqueue, &wait);
}
+ /*
+ * unlock_page() is the other half of the story just above
+ * __wait_on_page(). Here a couple of quick checks are done
+ * and a couple of flags are set on the page, and then all
+ * of the waiters for all of the pages in the appropriate
+ * wait queue are woken.
+ */
void unlock_page(struct page *page)
{
! wait_queue_head_t *waitqueue = page_waitqueue(page);
! ClearPageLaunder(page);
smp_mb__before_clear_bit();
if (!test_and_clear_bit(PG_locked, &(page)->flags))
BUG();
smp_mb__after_clear_bit();
!
! /*
! * Although the default semantics of wake_up() are
! * to wake all, here the specific function is used
! * to make it even more explicit that a number of
! * pages are being waited on here.
! */
! if (waitqueue_active(waitqueue))
! wake_up_all(waitqueue);
}
***************
*** 857,864 ****
static void __lock_page(struct page *page)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
! add_wait_queue_exclusive(&page->wait, &wait);
for (;;) {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
--- 951,959 ----
static void __lock_page(struct page *page)
{
+ wait_queue_head_t *waitqueue = page_waitqueue(page);
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
! add_wait_queue_exclusive(waitqueue, &wait);
for (;;) {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
***************
*** 870,877 ****
break;
}
! tsk->state = TASK_RUNNING;
! remove_wait_queue(&page->wait, &wait);
}
-
/*
--- 965,971 ----
break;
}
! __set_task_state(tsk, TASK_RUNNING);
! remove_wait_queue(waitqueue, &wait);
}
/*
***************
*** 1091,1103 ****
/*
- * Returns locked page at given index in given cache, creating it if needed.
- */
- struct page *grab_cache_page(struct address_space *mapping, unsigned long index)
- {
- return find_or_create_page(mapping, index, mapping->gfp_mask);
- }
-
-
- /*
* Same as grab_cache_page, but do not wait if the page is unavailable.
* This is intended for speculative data generators, where the data can
--- 1185,1188 ----
***************
*** 1381,1388 ****
* Mark a page as having seen activity.
*
! * If it was already so marked, move it
! * to the active queue and drop the referenced
! * bit. Otherwise, just mark it for future
! * action..
*/
void mark_page_accessed(struct page *page)
--- 1466,1471 ----
* Mark a page as having seen activity.
*
! * If it was already so marked, move it to the active queue and drop
! * the referenced bit. Otherwise, just mark it for future action..
*/
void mark_page_accessed(struct page *page)
***************
*** 1391,1399 ****
activate_page(page);
ClearPageReferenced(page);
! return;
! }
!
! /* Mark the page referenced, AFTER checking for previous usage.. */
! SetPageReferenced(page);
}
--- 1474,1479 ----
activate_page(page);
ClearPageReferenced(page);
! } else
! SetPageReferenced(page);
}
***************
*** 1634,1637 ****
--- 1714,1718 ----
struct address_space * mapping = filp->f_dentry->d_inode->i_mapping;
struct inode * inode = mapping->host;
+ loff_t size = inode->i_size;
new_iobuf = 0;
***************
*** 1659,1662 ****
--- 1740,1746 ----
goto out_free;
+ if ((rw == READ) && (offset + count > size))
+ count = size - offset;
+
/*
* Flush to disk exclusively the _data_, metadata must remain
***************
*** 1689,1692 ****
--- 1773,1777 ----
count -= retval;
buf += retval;
+ /* warning: weird semantics here, we're reporting a read behind the end of the file */
progress += retval;
}
***************
*** 1778,1783 ****
size = inode->i_size;
if (pos < size) {
- if (pos + count > size)
- count = size - pos;
retval = generic_file_direct_IO(READ, filp, buf, count, pos);
if (retval > 0)
--- 1863,1866 ----
***************
*** 2307,2310 ****
--- 2390,2396 ----
struct file * file = vma->vm_file;
+ if ( (flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED) )
+ return -EBUSY;
+
if (file && (vma->vm_flags & VM_SHARED)) {
ret = filemap_sync(vma, start, end-start, flags);
***************
*** 2348,2351 ****
--- 2434,2440 ----
if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
goto out;
+ if ((flags & MS_ASYNC) && (flags & MS_SYNC))
+ goto out;
+
error = 0;
if (end == start)
***************
*** 2353,2357 ****
/*
* If the interval [start,end) covers some unmapped address ranges,
! * just ignore them, but return -EFAULT at the end.
*/
vma = find_vma(current->mm, start);
--- 2442,2446 ----
/*
* If the interval [start,end) covers some unmapped address ranges,
! * just ignore them, but return -ENOMEM at the end.
*/
vma = find_vma(current->mm, start);
***************
*** 2359,2368 ****
for (;;) {
/* Still start < end. */
! error = -EFAULT;
if (!vma)
goto out;
/* Here start < vma->vm_end. */
if (start < vma->vm_start) {
! unmapped_error = -EFAULT;
start = vma->vm_start;
}
--- 2448,2457 ----
for (;;) {
/* Still start < end. */
! error = -ENOMEM;
if (!vma)
goto out;
/* Here start < vma->vm_end. */
if (start < vma->vm_start) {
! unmapped_error = -ENOMEM;
start = vma->vm_start;
}
***************
*** 2512,2516 ****
/* This caps the number of vma's this process can own */
! if (vma->vm_mm->map_count > MAX_MAP_COUNT)
return -ENOMEM;
--- 2601,2605 ----
/* This caps the number of vma's this process can own */
! if (vma->vm_mm->map_count > max_map_count)
return -ENOMEM;
***************
*** 3077,3081 ****
err = -EFBIG;
! if (limit != RLIM_INFINITY) {
if (pos >= limit) {
send_sig(SIGXFSZ, current, 0);
--- 3166,3170 ----
err = -EFBIG;
! if (!S_ISBLK(inode->i_mode) && limit != RLIM_INFINITY) {
if (pos >= limit) {
send_sig(SIGXFSZ, current, 0);
Index: memory.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/memory.c,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -r1.36 -r1.37
*** memory.c 10 Sep 2002 16:43:12 -0000 1.36
--- memory.c 19 May 2003 01:38:48 -0000 1.37
***************
*** 45,48 ****
--- 45,49 ----
#include <linux/highmem.h>
#include <linux/pagemap.h>
+ #include <linux/module.h>
#include <linux/comp_cache.h>
***************
*** 53,56 ****
--- 54,58 ----
unsigned long max_mapnr;
unsigned long num_physpages;
+ unsigned long num_mappedpages;
void * high_memory;
struct page *highmem_start_page;
***************
*** 529,532 ****
--- 531,536 ----
}
+ EXPORT_SYMBOL(get_user_pages);
+
/*
* Force in an entire range of pages from the current process's user VA,
***************
*** 587,590 ****
--- 591,596 ----
* size of the kiobuf, so we have to stop marking pages dirty once the
* requested byte count has been reached.
+ *
+ * Must be called from process context - set_page_dirty() takes VFS locks.
*/
***************
*** 604,608 ****
if (!PageReserved(page))
! SetPageDirty(page);
remaining -= (PAGE_SIZE - offset);
--- 610,614 ----
if (!PageReserved(page))
! set_page_dirty(page);
remaining -= (PAGE_SIZE - offset);
***************
*** 1500,1502 ****
--- 1506,1529 ----
len, write, 0, NULL, NULL);
return ret == len ? 0 : -1;
+ }
+
+ struct page * vmalloc_to_page(void * vmalloc_addr)
+ {
+ unsigned long addr = (unsigned long) vmalloc_addr;
+ struct page *page = NULL;
+ pmd_t *pmd;
+ pte_t *pte;
+ pgd_t *pgd;
+
+ pgd = pgd_offset_k(addr);
+ if (!pgd_none(*pgd)) {
+ pmd = pmd_offset(pgd, addr);
+ if (!pmd_none(*pmd)) {
+ pte = pte_offset(pmd, addr);
+ if (pte_present(*pte)) {
+ page = pte_page(*pte);
+ }
+ }
+ }
+ return page;
}
Index: mmap.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/mmap.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -r1.8 -r1.9
*** mmap.c 28 Apr 2002 20:51:34 -0000 1.8
--- mmap.c 19 May 2003 01:38:48 -0000 1.9
***************
*** 47,50 ****
--- 47,51 ----
int sysctl_overcommit_memory;
+ int max_map_count = DEFAULT_MAX_MAP_COUNT;
/* Check that a process has enough memory to allocate a
***************
*** 420,424 ****
/* Too many mappings? */
! if (mm->map_count > MAX_MAP_COUNT)
return -ENOMEM;
--- 421,425 ----
/* Too many mappings? */
! if (mm->map_count > max_map_count)
return -ENOMEM;
***************
*** 485,489 ****
/* Clear old maps */
- error = -ENOMEM;
munmap_back:
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
--- 486,489 ----
***************
*** 555,559 ****
* f_op->mmap method. -DaveM
*/
! addr = vma->vm_start;
vma_link(mm, vma, prev, rb_link, rb_parent);
--- 555,582 ----
* f_op->mmap method. -DaveM
*/
! if (addr != vma->vm_start) {
! /*
! * It is a bit too late to pretend changing the virtual
! * area of the mapping, we just corrupted userspace
! * in the do_munmap, so FIXME (not in 2.4 to avoid breaking
! * the driver API).
! */
! struct vm_area_struct * stale_vma;
! /* Since addr changed, we rely on the mmap op to prevent
! * collisions with existing vmas and just use find_vma_prepare
! * to update the tree pointers.
! */
! addr = vma->vm_start;
! stale_vma = find_vma_prepare(mm, addr, &prev,
! &rb_link, &rb_parent);
! /*
! * Make sure the lowlevel driver did its job right.
! */
! if (unlikely(stale_vma && stale_vma->vm_start < vma->vm_end)) {
! printk(KERN_ERR "buggy mmap operation: [<%p>]\n",
! file ? file->f_op->mmap : NULL);
! BUG();
! }
! }
vma_link(mm, vma, prev, rb_link, rb_parent);
***************
*** 926,930 ****
/* If we'll make "hole", check the vm areas limit */
if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
! && mm->map_count >= MAX_MAP_COUNT)
return -ENOMEM;
--- 949,953 ----
/* If we'll make "hole", check the vm areas limit */
if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
! && mm->map_count >= max_map_count)
return -ENOMEM;
***************
*** 1047,1051 ****
return -ENOMEM;
! if (mm->map_count > MAX_MAP_COUNT)
return -ENOMEM;
--- 1070,1074 ----
return -ENOMEM;
! if (mm->map_count > max_map_count)
return -ENOMEM;
***************
*** 1053,1060 ****
return -ENOMEM;
! flags = calc_vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC,
! MAP_FIXED|MAP_PRIVATE) | mm->def_flags;
!
! flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
/* Can we just expand an old anonymous mapping? */
--- 1076,1080 ----
return -ENOMEM;
! flags = VM_DATA_DEFAULT_FLAGS | mm->def_flags;
/* Can we just expand an old anonymous mapping? */
***************
*** 1140,1144 ****
mpnt = next;
}
- flush_tlb_mm(mm);
/* This is just debugging */
--- 1160,1163 ----
***************
*** 1147,1150 ****
--- 1166,1171 ----
clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
+
+ flush_tlb_mm(mm);
}
Index: oom_kill.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/oom_kill.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -r1.8 -r1.9
*** oom_kill.c 14 Jan 2002 12:05:08 -0000 1.8
--- oom_kill.c 19 May 2003 01:38:48 -0000 1.9
***************
*** 112,117 ****
/*
* Simple selection loop. We chose the process with the highest
! * number of 'points'. We need the locks to make sure that the
! * list of task structs doesn't change while we look the other way.
*
* (not docbooked, we don't want this one cluttering up the manual)
--- 112,116 ----
/*
* Simple selection loop. We chose the process with the highest
! * number of 'points'. We expect the caller will lock the tasklist.
*
* (not docbooked, we don't want this one cluttering up the manual)
***************
*** 123,127 ****
struct task_struct *chosen = NULL;
- read_lock(&tasklist_lock);
for_each_task(p) {
if (p->pid) {
--- 122,125 ----
***************
*** 133,137 ****
}
}
- read_unlock(&tasklist_lock);
return chosen;
}
--- 131,134 ----
***************
*** 172,176 ****
static void oom_kill(void)
{
! struct task_struct *p = select_bad_process(), *q;
/* Found nothing?!?! Either we hang forever, or we panic. */
--- 169,176 ----
static void oom_kill(void)
{
! struct task_struct *p, *q;
!
! read_lock(&tasklist_lock);
! p = select_bad_process();
/* Found nothing?!?! Either we hang forever, or we panic. */
***************
*** 179,185 ****
/* kill all processes that share the ->mm (i.e. all threads) */
- read_lock(&tasklist_lock);
for_each_task(q) {
! if(q->mm == p->mm) oom_kill_task(q);
}
read_unlock(&tasklist_lock);
--- 179,185 ----
/* kill all processes that share the ->mm (i.e. all threads) */
for_each_task(q) {
! if (q->mm == p->mm)
! oom_kill_task(q);
}
read_unlock(&tasklist_lock);
***************
*** 190,195 ****
* for more memory.
*/
! current->policy |= SCHED_YIELD;
! schedule();
return;
}
--- 190,194 ----
* for more memory.
*/
! yield();
return;
}
***************
*** 200,204 ****
void out_of_memory(void)
{
! static unsigned long first, last, count;
unsigned long now, since;
--- 199,203 ----
void out_of_memory(void)
{
! static unsigned long first, last, count, lastkill;
unsigned long now, since;
***************
*** 243,248 ****
--- 242,257 ----
/*
+ * If we just killed a process, wait a while
+ * to give that task a chance to exit. This
+ * avoids killing multiple processes needlessly.
+ */
+ since = now - lastkill;
+ if (since < HZ*5)
+ return;
+
+ /*
* Ok, really out of memory. Kill something.
*/
+ lastkill = now;
oom_kill();
Index: page_alloc.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/page_alloc.c,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -r1.26 -r1.27
*** page_alloc.c 29 Nov 2002 21:23:02 -0000 1.26
--- page_alloc.c 19 May 2003 01:38:48 -0000 1.27
***************
*** 2,5 ****
--- 2,8 ----
* linux/mm/page_alloc.c
*
+ * Manages the free list, the system allocates free pages here.
+ * Note that kmalloc() lives in slab.c
+ *
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
* Swap reorganised 29.12.95, Stephen Tweedie
***************
*** 18,22 ****
#include <linux/bootmem.h>
#include <linux/slab.h>
! #include <linux/compiler.h>
#include <linux/comp_cache.h>
--- 21,25 ----
#include <linux/bootmem.h>
#include <linux/slab.h>
! #include <linux/module.h>
#include <linux/comp_cache.h>
***************
*** 24,31 ****
int nr_active_pages;
int nr_inactive_pages;
! struct list_head inactive_list;
! struct list_head active_list;
pg_data_t *pgdat_list;
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
#ifdef CONFIG_COMP_CACHE
--- 27,43 ----
int nr_active_pages;
int nr_inactive_pages;
! LIST_HEAD(inactive_list);
! LIST_HEAD(active_list);
pg_data_t *pgdat_list;
+ /*
+ *
+ * The zone_table array is used to look up the address of the
+ * struct zone corresponding to a given zone number (ZONE_DMA,
+ * ZONE_NORMAL, or ZONE_HIGHMEM).
+ */
+ zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
+ EXPORT_SYMBOL(zone_table);
+
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
#ifdef CONFIG_COMP_CACHE
***************
*** 40,71 ****
/*
- * Free_page() adds the page to the free lists. This is optimized for
- * fast normal cases (no error jumps taken normally).
- *
- * The way to optimize jumps for gcc-2.2.2 is to:
- * - select the "normal" case and put it inside the if () { XXX }
- * - no else-statements if you can avoid them
- *
- * With the above two rules, you get a straight-line execution path
- * for the normal case, giving better asm-code.
- */
-
- #define memlist_init(x) INIT_LIST_HEAD(x)
- #define memlist_add_head list_add
- #define memlist_add_tail list_add_tail
- #define memlist_del list_del
- #define memlist_entry list_entry
- #define memlist_next(x) ((x)->next)
- #define memlist_prev(x) ((x)->prev)
-
- /*
* Temporary debugging check.
*/
! #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size))
/*
! * Buddy system. Hairy. You really aren't expected to understand this
*
! * Hint: -mask = 1+~mask
*/
--- 52,87 ----
/*
* Temporary debugging check.
*/
! #define BAD_RANGE(zone, page) \
! ( \
! (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \
! || (((page) - mem_map) < (zone)->zone_start_mapnr) \
! || ((zone) != page_zone(page)) \
! )
/*
! * Freeing function for a buddy system allocator.
! * Contrary to prior comments, this is *NOT* hairy, and there
! * is no reason for anyone not to understand it.
*
! * The concept of a buddy system is to maintain direct-mapped tables
! * (containing bit values) for memory blocks of various "orders".
! * The bottom level table contains the map for the smallest allocatable
! * units of memory (here, pages), and each level above it describes
! * pairs of units from the levels below, hence, "buddies".
! * At a high level, all that happens here is marking the table entry
! * at the bottom level available, and propagating the changes upward
! * as necessary, plus some accounting needed to play nicely with other
! * parts of the VM system.
! * At each level, we keep one bit for each pair of blocks, which
! * is set to 1 iff only one of the pair is allocated. So when we
! * are allocating or freeing one, we can derive the state of the
! * other. That is, if we allocate a small block, and both were
! * free, the remainder of the region must be split into blocks.
! * If a block is freed, and its buddy is also free, then this
! * triggers coalescing into a block of larger size.
! *
! * -- wli
*/
***************
*** 78,86 ****
zone_t *zone;
! /* Yes, think what happens when other parts of the kernel take
* a reference to a page in order to pin it for io. -ben
*/
! if (PageLRU(page))
lru_cache_del(page);
if (page->buffers)
--- 94,106 ----
zone_t *zone;
! /*
! * Yes, think what happens when other parts of the kernel take
* a reference to a page in order to pin it for io. -ben
*/
! if (PageLRU(page)) {
! if (unlikely(in_interrupt()))
! BUG();
lru_cache_del(page);
+ }
if (page->buffers)
***************
*** 90,99 ****
if (!VALID_PAGE(page))
BUG();
- if (PageSwapCache(page))
- BUG();
if (PageLocked(page))
BUG();
- if (PageLRU(page))
- BUG();
if (PageActive(page))
BUG();
--- 110,115 ----
***************
*** 104,108 ****
back_local_freelist:
! zone = page->zone;
mask = (~0UL) << order;
--- 120,124 ----
back_local_freelist:
! zone = page_zone(page);
mask = (~0UL) << order;
***************
*** 131,134 ****
--- 147,152 ----
/*
* Move the buddy up one level.
+ * This code is taking advantage of the identity:
+ * -mask = 1+~mask
*/
buddy1 = base + (page_idx ^ -mask);
***************
*** 139,143 ****
BUG();
! memlist_del(&buddy1->list);
mask <<= 1;
area++;
--- 157,161 ----
BUG();
! list_del(&buddy1->list);
mask <<= 1;
area++;
***************
*** 145,149 ****
page_idx &= mask;
}
! memlist_add_head(&(base + page_idx)->list, &area->free_list);
spin_unlock_irqrestore(&zone->lock, flags);
--- 163,167 ----
page_idx &= mask;
}
! list_add(&(base + page_idx)->list, &area->free_list);
spin_unlock_irqrestore(&zone->lock, flags);
***************
*** 175,179 ****
high--;
size >>= 1;
! memlist_add_head(&(page)->list, &(area)->free_list);
MARK_USED(index, high, area);
index += size;
--- 193,197 ----
high--;
size >>= 1;
! list_add(&(page)->list, &(area)->free_list);
MARK_USED(index, high, area);
index += size;
***************
*** 197,209 ****
do {
head = &area->free_list;
! curr = memlist_next(head);
if (curr != head) {
unsigned int index;
! page = memlist_entry(curr, struct page, list);
if (BAD_RANGE(zone,page))
BUG();
! memlist_del(curr);
index = page - zone->zone_mem_map;
if (curr_order != MAX_ORDER-1)
--- 215,227 ----
do {
head = &area->free_list;
! curr = head->next;
if (curr != head) {
unsigned int index;
! page = list_entry(curr, struct page, list);
if (BAD_RANGE(zone,page))
BUG();
! list_del(curr);
index = page - zone->zone_mem_map;
if (curr_order != MAX_ORDER-1)
***************
*** 253,257 ****
current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
! __freed = try_to_free_pages(classzone, gfp_mask, order);
current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
--- 271,275 ----
current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
! __freed = try_to_free_pages_zone(classzone, gfp_mask);
current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
***************
*** 269,273 ****
do {
tmp = list_entry(entry, struct page, list);
! if (tmp->index == order && memclass(tmp->zone, classzone)) {
list_del(entry);
current->nr_local_pages--;
--- 287,291 ----
do {
tmp = list_entry(entry, struct page, list);
! if (tmp->index == order && memclass(page_zone(tmp), classzone)) {
list_del(entry);
current->nr_local_pages--;
***************
*** 281,286 ****
if (!VALID_PAGE(page))
BUG();
- if (PageSwapCache(page))
- BUG();
if (PageLocked(page))
BUG();
--- 299,302 ----
***************
*** 325,328 ****
--- 341,346 ----
zone = zonelist->zones;
classzone = *zone;
+ if (classzone == NULL)
+ return NULL;
min = 1UL << order;
for (;;) {
***************
*** 408,414 ****
/* Yield for kswapd, and try again */
! current->policy |= SCHED_YIELD;
! __set_current_state(TASK_RUNNING);
! schedule();
goto rebalance;
}
--- 426,430 ----
/* Yield for kswapd, and try again */
! yield();
goto rebalance;
}
***************
*** 457,470 ****
unsigned int nr_free_pages (void)
{
! unsigned int sum;
zone_t *zone;
- pg_data_t *pgdat = pgdat_list;
! sum = 0;
! while (pgdat) {
! for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
! sum += zone->free_pages;
! pgdat = pgdat->node_next;
! }
return sum;
}
--- 473,482 ----
unsigned int nr_free_pages (void)
{
! unsigned int sum = 0;
zone_t *zone;
! for_each_zone(zone)
! sum += zone->free_pages;
!
return sum;
}
***************
*** 475,482 ****
unsigned int nr_free_buffer_pages (void)
{
! pg_data_t *pgdat = pgdat_list;
unsigned int sum = 0;
! do {
zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
zone_t **zonep = zonelist->zones;
--- 487,494 ----
unsigned int nr_free_buffer_pages (void)
{
! pg_data_t *pgdat;
unsigned int sum = 0;
! for_each_pgdat(pgdat) {
zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
zone_t **zonep = zonelist->zones;
***************
*** 489,495 ****
sum += size - high;
}
!
! pgdat = pgdat->node_next;
! } while (pgdat);
return sum;
--- 501,505 ----
sum += size - high;
}
! }
return sum;
***************
*** 499,509 ****
unsigned int nr_free_highpages (void)
{
! pg_data_t *pgdat = pgdat_list;
unsigned int pages = 0;
! while (pgdat) {
pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
! pgdat = pgdat->node_next;
! }
return pages;
}
--- 509,518 ----
unsigned int nr_free_highpages (void)
{
! pg_data_t *pgdat;
unsigned int pages = 0;
! for_each_pgdat(pgdat)
pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
!
return pages;
}
***************
*** 560,565 ****
nr = 0;
for (;;) {
! curr = memlist_next(curr);
! if (curr == head)
break;
nr++;
--- 569,573 ----
nr = 0;
for (;;) {
! if ((curr = curr->next) == head)
break;
nr++;
***************
*** 631,634 ****
--- 639,684 ----
}
+ /*
+ * Helper functions to size the waitqueue hash table.
+ * Essentially these want to choose hash table sizes sufficiently
+ * large so that collisions trying to wait on pages are rare.
+ * But in fact, the number of active page waitqueues on typical
+ * systems is ridiculously low, less than 200. So this is even
+ * conservative, even though it seems large.
+ *
+ * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
+ * waitqueues, i.e. the size of the waitq table given the number of pages.
+ */
+ #define PAGES_PER_WAITQUEUE 256
+
+ static inline unsigned long wait_table_size(unsigned long pages)
+ {
+ unsigned long size = 1;
+
+ pages /= PAGES_PER_WAITQUEUE;
+
+ while (size < pages)
+ size <<= 1;
+
+ /*
+ * Once we have dozens or even hundreds of threads sleeping
+ * on IO we've got bigger problems than wait queue collision.
+ * Limit the size of the wait table to a reasonable size.
+ */
+ size = min(size, 4096UL);
+
+ return size;
+ }
+
+ /*
+ * This is an integer logarithm so that shifts can be used later
+ * to extract the more random high bits from the multiplicative
+ * hash function before the remainder is taken.
+ */
+ static inline unsigned long wait_table_bits(unsigned long size)
+ {
+ return ffz(~size);
+ }
+
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
***************
*** 682,686 ****
unsigned long *zholes_size, struct page *lmem_map)
{
- struct page *p;
unsigned long i, j;
unsigned long map_size;
--- 732,735 ----
***************
*** 703,709 ****
printk("On node %d totalpages: %lu\n", nid, realtotalpages);
- INIT_LIST_HEAD(&active_list);
- INIT_LIST_HEAD(&inactive_list);
-
/*
* Some architectures (with lots of mem and discontinous memory
--- 752,755 ----
***************
*** 725,740 ****
pgdat->nr_zones = 0;
- /*
- * Initially all pages are reserved - free ones are freed
- * up by free_all_bootmem() once the early boot process is
- * done.
- */
- for (p = lmem_map; p < lmem_map + totalpages; p++) {
- set_page_count(p, 0);
- SetPageReserved(p);
- init_waitqueue_head(&p->wait);
- memlist_init(&p->list);
- }
-
offset = lmem_map - mem_map;
for (j = 0; j < MAX_NR_ZONES; j++) {
--- 771,774 ----
***************
*** 743,746 ****
--- 777,781 ----
unsigned long size, realsize;
+ zone_table[nid * MAX_NR_ZONES + j] = zone;
realsize = size = zones_size[j];
if (zholes_size)
***************
*** 757,760 ****
--- 792,809 ----
continue;
+ /*
+ * The per-page waitqueue mechanism uses hashed waitqueues
+ * per zone.
+ */
+ zone->wait_table_size = wait_table_size(size);
+ zone->wait_table_shift =
+ BITS_PER_LONG - wait_table_bits(zone->wait_table_size);
+ zone->wait_table = (wait_queue_head_t *)
+ alloc_bootmem_node(pgdat, zone->wait_table_size
+ * sizeof(wait_queue_head_t));
+
+ for(i = 0; i < zone->wait_table_size; ++i)
+ init_waitqueue_head(zone->wait_table + i);
+
pgdat->nr_zones = j+1;
***************
*** 775,783 ****
printk("BUG: wrong zone alignment, it will crash\n");
for (i = 0; i < size; i++) {
struct page *page = mem_map + offset + i;
! page->zone = zone;
if (j != ZONE_HIGHMEM)
! page->virtual = __va(zone_start_paddr);
zone_start_paddr += PAGE_SIZE;
}
--- 824,840 ----
printk("BUG: wrong zone alignment, it will crash\n");
+ /*
+ * Initially all pages are reserved - free ones are freed
+ * up by free_all_bootmem() once the early boot process is
+ * done. Non-atomic initialization, single-pass.
+ */
for (i = 0; i < size; i++) {
struct page *page = mem_map + offset + i;
! set_page_zone(page, nid * MAX_NR_ZONES + j);
! set_page_count(page, 0);
! SetPageReserved(page);
! INIT_LIST_HEAD(&page->list);
if (j != ZONE_HIGHMEM)
! set_page_address(page, __va(zone_start_paddr));
zone_start_paddr += PAGE_SIZE;
}
***************
*** 787,791 ****
unsigned long bitmap_size;
! memlist_init(&zone->free_area[i].free_list);
if (i == MAX_ORDER-1) {
zone->free_area[i].map = NULL;
--- 844,848 ----
unsigned long bitmap_size;
! INIT_LIST_HEAD(&zone->free_area[i].free_list);
if (i == MAX_ORDER-1) {
zone->free_area[i].map = NULL;
Index: page_io.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/page_io.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -r1.6 -r1.7
*** page_io.c 10 Sep 2002 16:43:15 -0000 1.6
--- page_io.c 19 May 2003 01:38:49 -0000 1.7
***************
*** 73,81 ****
/* block_size == PAGE_SIZE/zones_used */
brw_page(rw, page, dev, zones, block_size);
-
- /* Note! For consistency we do all of the logic,
- * decrementing the page count, and unlocking the page in the
- * swap lock map - in the IO completion handler.
- */
return 1;
}
--- 73,76 ----
***************
*** 100,105 ****
if (!PageSwapCache(page))
PAGE_BUG(page);
- if (page->mapping != &swapper_space)
- PAGE_BUG(page);
if (!rw_swap_page_base(rw, entry, page))
UnlockPage(page);
--- 95,98 ----
***************
*** 117,129 ****
if (!PageLocked(page))
PAGE_BUG(page);
- if (PageSwapCache(page))
- PAGE_BUG(page);
if (page->mapping)
PAGE_BUG(page);
/* needs sync_page to wait I/O completation */
page->mapping = &swapper_space;
! if (!rw_swap_page_base(rw, entry, page))
! UnlockPage(page);
! wait_on_page(page);
page->mapping = NULL;
}
--- 110,122 ----
if (!PageLocked(page))
PAGE_BUG(page);
if (page->mapping)
PAGE_BUG(page);
/* needs sync_page to wait I/O completation */
page->mapping = &swapper_space;
! if (rw_swap_page_base(rw, entry, page))
! lock_page(page);
! if (!block_flushpage(page, 0))
! PAGE_BUG(page);
page->mapping = NULL;
+ UnlockPage(page);
}
Index: shmem.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/shmem.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -C2 -r1.22 -r1.23
*** shmem.c 10 Sep 2002 16:43:16 -0000 1.22
--- shmem.c 19 May 2003 01:38:49 -0000 1.23
***************
*** 36,39 ****
--- 36,47 ----
#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
+ #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
+
+ #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * (ENTRIES_PER_PAGE/2) * (ENTRIES_PER_PAGE+1))
+ #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
+ #define VM_ACCT(size) (((size) + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT)
+
+ /* Pretend that each entry is of this size in directory's i_size */
+ #define BOGO_DIRENT_SIZE 20
#define SHMEM_SB(sb) (&sb->u.shmem_sb)
***************
*** 43,47 ****
static struct file_operations shmem_file_operations;
static struct inode_operations shmem_inode_operations;
- static struct file_operations shmem_dir_operations;
static struct inode_operations shmem_dir_inode_operations;
static struct vm_operations_struct shmem_vm_ops;
--- 51,54 ----
***************
*** 51,55 ****
atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */
! #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
/*
--- 58,62 ----
atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */
! static struct page *shmem_getpage_locked(struct shmem_inode_info *, struct inode *, unsigned long);
/*
***************
*** 128,134 ****
* +-> 52-55
*/
-
- #define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * ENTRIES_PER_PAGE/2*(ENTRIES_PER_PAGE+1))
-
static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index, unsigned long page)
{
--- 135,138 ----
***************
*** 183,187 ****
swp_entry_t * res;
! if (index >= SHMEM_MAX_BLOCKS)
return ERR_PTR(-EFBIG);
--- 187,191 ----
swp_entry_t * res;
! if (index >= SHMEM_MAX_INDEX)
return ERR_PTR(-EFBIG);
***************
*** 315,318 ****
--- 319,323 ----
{
unsigned long index;
+ unsigned long partial;
unsigned long freed = 0;
struct shmem_inode_info * info = SHMEM_I(inode);
***************
*** 322,325 ****
--- 327,352 ----
spin_lock (&info->lock);
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ partial = inode->i_size & ~PAGE_CACHE_MASK;
+
+ if (partial) {
+ swp_entry_t *entry = shmem_swp_entry(info, index-1, 0);
+ struct page *page;
+ /*
+ * This check is racy: it's faintly possible that page
+ * was assigned to swap during truncate_inode_pages,
+ * and now assigned to file; but better than nothing.
+ */
+ if (!IS_ERR(entry) && entry->val) {
+ spin_unlock(&info->lock);
+ page = shmem_getpage_locked(info, inode, index-1);
+ if (!IS_ERR(page)) {
+ memclear_highpage_flush(page, partial,
+ PAGE_CACHE_SIZE - partial);
+ UnlockPage(page);
+ page_cache_release(page);
+ }
+ spin_lock(&info->lock);
+ }
+ }
while (index < info->next_index)
***************
*** 336,344 ****
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
! inode->i_size = 0;
! if (inode->i_op->truncate == shmem_truncate){
spin_lock (&shmem_ilock);
list_del (&SHMEM_I(inode)->list);
spin_unlock (&shmem_ilock);
shmem_truncate (inode);
}
--- 363,371 ----
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
! if (inode->i_op->truncate == shmem_truncate) {
spin_lock (&shmem_ilock);
list_del (&SHMEM_I(inode)->list);
spin_unlock (&shmem_ilock);
+ inode->i_size = 0;
shmem_truncate (inode);
}
***************
*** 349,374 ****
}
! static int shmem_clear_swp (swp_entry_t entry, swp_entry_t *ptr, int size) {
swp_entry_t *test;
! for (test = ptr; test < ptr + size; test++) {
! if (test->val == entry.val) {
! swap_free (entry);
! *test = (swp_entry_t) {0};
return test - ptr;
- }
}
return -1;
}
! static int shmem_unuse_inode (struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
{
swp_entry_t *ptr;
unsigned long idx;
int offset;
!
idx = 0;
spin_lock (&info->lock);
! offset = shmem_clear_swp (entry, info->i_direct, SHMEM_NR_DIRECT);
if (offset >= 0)
goto found;
--- 376,403 ----
}
! static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *ptr, swp_entry_t *eptr)
! {
swp_entry_t *test;
! for (test = ptr; test < eptr; test++) {
! if (test->val == entry.val)
return test - ptr;
}
return -1;
}
! static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
{
swp_entry_t *ptr;
unsigned long idx;
int offset;
!
idx = 0;
+ ptr = info->i_direct;
spin_lock (&info->lock);
! offset = info->next_index;
! if (offset > SHMEM_NR_DIRECT)
! offset = SHMEM_NR_DIRECT;
! offset = shmem_find_swp(entry, ptr, ptr + offset);
if (offset >= 0)
goto found;
***************
*** 379,383 ****
if (IS_ERR(ptr))
continue;
! offset = shmem_clear_swp (entry, ptr, ENTRIES_PER_PAGE);
if (offset >= 0)
goto found;
--- 408,415 ----
if (IS_ERR(ptr))
continue;
! offset = info->next_index - idx;
! if (offset > ENTRIES_PER_PAGE)
! offset = ENTRIES_PER_PAGE;
! offset = shmem_find_swp(entry, ptr, ptr + offset);
if (offset >= 0)
goto found;
***************
*** 387,391 ****
found:
if (PageCompressed(page))
! decompress_swap_cache_page(page);
delete_from_swap_cache(page);
add_to_page_cache(page, info->inode->i_mapping, offset + idx);
--- 419,425 ----
found:
if (PageCompressed(page))
! decompress_swap_cache_page(page);
! swap_free(entry);
! ptr[offset] = (swp_entry_t) {0};
delete_from_swap_cache(page);
add_to_page_cache(page, info->inode->i_mapping, offset + idx);
***************
*** 398,402 ****
/*
! * unuse_shmem() search for an eventually swapped out shmem page.
*/
void shmem_unuse(swp_entry_t entry, struct page *page)
--- 432,436 ----
/*
! * shmem_unuse() search for an eventually swapped out shmem page.
*/
void shmem_unuse(swp_entry_t entry, struct page *page)
***************
*** 409,414 ****
info = list_entry(p, struct shmem_inode_info, list);
! if (shmem_unuse_inode(info, entry, page))
break;
}
spin_unlock (&shmem_ilock);
--- 443,452 ----
info = list_entry(p, struct shmem_inode_info, list);
! if (info->swapped && shmem_unuse_inode(info, entry, page)) {
! /* move head to start search for next from here */
! list_del(&shmem_inodes);
! list_add_tail(&shmem_inodes, p);
break;
+ }
}
spin_unlock (&shmem_ilock);
***************
*** 531,535 ****
/* Look it up and read it in.. */
! page = find_get_page(&swapper_space, entry->val);
if (!page) {
swp_entry_t swap = *entry;
--- 569,573 ----
/* Look it up and read it in.. */
! page = lookup_swap_cache(*entry);
if (!page) {
swp_entry_t swap = *entry;
***************
*** 588,591 ****
--- 626,630 ----
return ERR_PTR(-ENOMEM);
clear_highpage(page);
+ flush_dcache_page(page);
inode->i_blocks += BLOCKS_PER_PAGE;
add_to_page_cache (page, mapping, idx);
***************
*** 707,717 ****
inode->i_fop = &shmem_file_operations;
spin_lock (&shmem_ilock);
! list_add (&SHMEM_I(inode)->list, &shmem_inodes);
spin_unlock (&shmem_ilock);
break;
case S_IFDIR:
inode->i_nlink++;
inode->i_op = &shmem_dir_inode_operations;
! inode->i_fop = &shmem_dir_operations;
break;
case S_IFLNK:
--- 746,758 ----
inode->i_fop = &shmem_file_operations;
spin_lock (&shmem_ilock);
! list_add_tail(&info->list, &shmem_inodes);
spin_unlock (&shmem_ilock);
break;
case S_IFDIR:
inode->i_nlink++;
+ /* Some things misbehave if size == 0 on a directory */
+ inode->i_size = 2 * BOGO_DIRENT_SIZE;
inode->i_op = &shmem_dir_inode_operations;
! inode->i_fop = &dcache_dir_ops;
break;
case S_IFLNK:
***************
*** 884,888 ****
status = -EFAULT;
ClearPageUptodate(page);
- kunmap(page);
goto unlock;
}
--- 925,928 ----
***************
*** 979,983 ****
buf->f_ffree = sbinfo->free_inodes;
spin_unlock (&sbinfo->stat_lock);
! buf->f_namelen = 255;
return 0;
}
--- 1019,1023 ----
buf->f_ffree = sbinfo->free_inodes;
spin_unlock (&sbinfo->stat_lock);
! buf->f_namelen = NAME_MAX;
return 0;
}
***************
*** 1001,1006 ****
int error = -ENOSPC;
- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode) {
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
--- 1041,1047 ----
int error = -ENOSPC;
if (inode) {
+ dir->i_size += BOGO_DIRENT_SIZE;
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
***************
*** 1035,1038 ****
--- 1076,1080 ----
return -EPERM;
+ dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inode->i_nlink++;
***************
*** 1079,1082 ****
--- 1121,1126 ----
{
struct inode *inode = dentry->d_inode;
+
+ dir->i_size -= BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inode->i_nlink--;
***************
*** 1102,1123 ****
static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
{
! int error = -ENOTEMPTY;
! if (shmem_empty(new_dentry)) {
! struct inode *inode = new_dentry->d_inode;
! if (inode) {
! inode->i_ctime = CURRENT_TIME;
! inode->i_nlink--;
! dput(new_dentry);
! }
! error = 0;
! old_dentry->d_inode->i_ctime = old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
}
! return error;
}
static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
{
- int error;
int len;
struct inode *inode;
--- 1146,1174 ----
static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
{
! struct inode *inode = old_dentry->d_inode;
! int they_are_dirs = S_ISDIR(inode->i_mode);
! if (!shmem_empty(new_dentry))
! return -ENOTEMPTY;
!
! if (new_dentry->d_inode) {
! (void) shmem_unlink(new_dir, new_dentry);
! if (they_are_dirs)
! old_dir->i_nlink--;
! } else if (they_are_dirs) {
! old_dir->i_nlink--;
! new_dir->i_nlink++;
}
!
! old_dir->i_size -= BOGO_DIRENT_SIZE;
! new_dir->i_size += BOGO_DIRENT_SIZE;
! old_dir->i_ctime = old_dir->i_mtime =
! new_dir->i_ctime = new_dir->i_mtime =
! inode->i_ctime = CURRENT_TIME;
! return 0;
}
static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
{
int len;
struct inode *inode;
***************
*** 1126,1138 ****
struct shmem_inode_info * info;
- error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
- if (error)
- return error;
-
len = strlen(symname) + 1;
if (len > PAGE_CACHE_SIZE)
return -ENAMETOOLONG;
!
! inode = dentry->d_inode;
info = SHMEM_I(inode);
inode->i_size = len-1;
--- 1177,1188 ----
struct shmem_inode_info * info;
len = strlen(symname) + 1;
if (len > PAGE_CACHE_SIZE)
return -ENAMETOOLONG;
!
! inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
! if (!inode)
! return -ENOSPC;
!
info = SHMEM_I(inode);
inode->i_size = len-1;
***************
*** 1142,1154 ****
inode->i_op = &shmem_symlink_inline_operations;
} else {
- spin_lock (&shmem_ilock);
- list_add (&info->list, &shmem_inodes);
- spin_unlock (&shmem_ilock);
down(&info->sem);
page = shmem_getpage_locked(info, inode, 0);
if (IS_ERR(page)) {
up(&info->sem);
return PTR_ERR(page);
}
kaddr = kmap(page);
memcpy(kaddr, symname, len);
--- 1192,1206 ----
inode->i_op = &shmem_symlink_inline_operations;
} else {
down(&info->sem);
page = shmem_getpage_locked(info, inode, 0);
if (IS_ERR(page)) {
up(&info->sem);
+ iput(inode);
return PTR_ERR(page);
}
+ inode->i_op = &shmem_symlink_inode_operations;
+ spin_lock (&shmem_ilock);
+ list_add_tail(&info->list, &shmem_inodes);
+ spin_unlock (&shmem_ilock);
kaddr = kmap(page);
memcpy(kaddr, symname, len);
***************
*** 1158,1164 ****
page_cache_release(page);
up(&info->sem);
- inode->i_op = &shmem_symlink_inode_operations;
}
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
return 0;
}
--- 1210,1218 ----
page_cache_release(page);
up(&info->sem);
}
+ dir->i_size += BOGO_DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ d_instantiate(dentry, inode);
+ dget(dentry);
return 0;
}
***************
*** 1321,1325 ****
sbinfo->max_inodes = inodes;
sbinfo->free_inodes = inodes;
! sb->s_maxbytes = (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
--- 1375,1379 ----
sbinfo->max_inodes = inodes;
sbinfo->free_inodes = inodes;
! sb->s_maxbytes = SHMEM_MAX_BYTES;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
***************
*** 1360,1371 ****
};
- static struct file_operations shmem_dir_operations = {
- read: generic_read_dir,
- readdir: dcache_readdir,
- #ifdef CONFIG_TMPFS
- fsync: shmem_sync_file,
- #endif
- };
-
static struct inode_operations shmem_dir_inode_operations = {
#ifdef CONFIG_TMPFS
--- 1414,1417 ----
***************
*** 1463,1470 ****
int vm_enough_memory(long pages);
! if (size > (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT)
return ERR_PTR(-EINVAL);
! if (!vm_enough_memory((size) >> PAGE_CACHE_SHIFT))
return ERR_PTR(-ENOMEM);
--- 1509,1516 ----
int vm_enough_memory(long pages);
! if (size > SHMEM_MAX_BYTES)
return ERR_PTR(-EINVAL);
! if (!vm_enough_memory(VM_ACCT(size)))
return ERR_PTR(-ENOMEM);
***************
*** 1488,1498 ****
d_instantiate(dentry, inode);
! dentry->d_inode->i_size = size;
! shmem_truncate(inode);
file->f_vfsmnt = mntget(shm_mnt);
file->f_dentry = dentry;
file->f_op = &shmem_file_operations;
file->f_mode = FMODE_WRITE | FMODE_READ;
- inode->i_nlink = 0; /* It is unlinked */
return(file);
--- 1534,1543 ----
d_instantiate(dentry, inode);
! inode->i_size = size;
! inode->i_nlink = 0; /* It is unlinked */
file->f_vfsmnt = mntget(shm_mnt);
file->f_dentry = dentry;
file->f_op = &shmem_file_operations;
file->f_mode = FMODE_WRITE | FMODE_READ;
return(file);
***************
*** 1503,1506 ****
--- 1548,1552 ----
return ERR_PTR(error);
}
+
/*
* shmem_zero_setup - setup a shared anonymous mapping
Index: swap_state.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/swap_state.c,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -r1.42 -r1.43
*** swap_state.c 6 Dec 2002 19:29:21 -0000 1.42
--- swap_state.c 19 May 2003 01:38:49 -0000 1.43
***************
*** 127,131 ****
BUG();
! block_flushpage(page, 0);
entry.val = page->index;
--- 127,132 ----
BUG();
! if (unlikely(!block_flushpage(page, 0)))
! BUG(); /* an anonymous page cannot have page->buffers set */
entry.val = page->index;
Index: swapfile.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/swapfile.c,v
retrieving revision 1.38
retrieving revision 1.39
diff -C2 -r1.38 -r1.39
*** swapfile.c 6 Dec 2002 19:29:21 -0000 1.38
--- swapfile.c 19 May 2003 01:38:49 -0000 1.39
***************
*** 15,19 ****
#include <linux/pagemap.h>
#include <linux/shm.h>
- #include <linux/compiler.h>
#include <linux/comp_cache.h>
--- 15,18 ----
***************
*** 944,956 ****
* Note shmem_unuse already deleted its from swap cache.
*/
! swcount = swap_map_count(*swap_map);
! if ((swcount > 0) != PageSwapCache(page))
! BUG();
! if ((swcount > 1) && PageDirty(page)) {
rw_swap_page(WRITE, page);
lock_page(page);
}
! if (PageCompressed(page))
! decompress_swap_cache_page(page);
if (PageSwapCache(page))
delete_from_swap_cache(page);
--- 943,952 ----
* Note shmem_unuse already deleted its from swap cache.
*/
! if ((swap_map_count(*swap_map) > 1) && PageDirty(page) && PageSwapCache(page)) {
rw_swap_page(WRITE, page);
lock_page(page);
}
! if (PageCompressed(page))
! decompress_swap_cache_page(page);
if (PageSwapCache(page))
delete_from_swap_cache(page);
Index: vmscan.c
===================================================================
RCS file: /cvsroot/linuxcompressed/linux/mm/vmscan.c,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -r1.44 -r1.45
*** vmscan.c 22 Nov 2002 16:01:36 -0000 1.44
--- vmscan.c 19 May 2003 01:38:50 -0000 1.45
***************
*** 2,5 ****
--- 2,8 ----
* linux/mm/vmscan.c
*
+ * The pageout daemon, decides which pages to evict (swap out) and
+ * does the actual work of freeing them.
+ *
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
***************
*** 21,25 ****
#include <linux/highmem.h>
#include <linux/file.h>
- #include <linux/compiler.h>
#include <linux/comp_cache.h>
--- 24,27 ----
***************
*** 60,64 ****
/* Don't bother replenishing zones not under pressure.. */
! if (!memclass(page->zone, classzone))
return 0;
--- 62,66 ----
/* Don't bother replenishing zones not under pressure.. */
! if (!memclass(page_zone(page), classzone))
return 0;
***************
*** 241,246 ****
end = vma->vm_end;
! if (address >= end)
! BUG();
do {
count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
--- 243,247 ----
end = vma->vm_end;
! BUG_ON(address >= end);
do {
count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
***************
*** 361,368 ****
page = list_entry(entry, struct page, lru);
! if (unlikely(!PageLRU(page)))
! BUG();
! if (unlikely(PageActive(page)))
! BUG();
list_del(entry);
--- 362,367 ----
page = list_entry(entry, struct page, lru);
! BUG_ON(!PageLRU(page));
! BUG_ON(PageActive(page));
list_del(entry);
***************
*** 376,380 ****
continue;
! if (!memclass(page->zone, classzone))
continue;
--- 375,379 ----
continue;
! if (!memclass(page_zone(page), classzone))
continue;
***************
*** 643,647 ****
}
! int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order)
{
int priority = DEF_PRIORITY;
--- 642,646 ----
}
! int try_to_free_pages_zone(zone_t *classzone, unsigned int gfp_mask)
{
int priority = DEF_PRIORITY;
***************
*** 663,666 ****
--- 662,684 ----
}
+ int try_to_free_pages(unsigned int gfp_mask)
+ {
+ pg_data_t *pgdat;
+ zonelist_t *zonelist;
+ unsigned long pf_free_pages;
+ int error = 0;
+
+ pf_free_pages = current->flags & PF_FREE_PAGES;
+ current->flags &= ~PF_FREE_PAGES;
+
+ for_each_pgdat(pgdat) {
+ zonelist = pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK);
+ error |= try_to_free_pages_zone(zonelist->zones[0], gfp_mask);
+ }
+
+ current->flags |= pf_free_pages;
+ return error;
+ }
+
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
***************
*** 689,693 ****
if (!zone->need_balance)
continue;
! if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
zone->need_balance = 0;
__set_current_state(TASK_INTERRUPTIBLE);
--- 707,711 ----
if (!zone->need_balance)
continue;
! if (!try_to_free_pages_zone(zone, GFP_KSWAPD)) {
zone->need_balance = 0;
__set_current_state(TASK_INTERRUPTIBLE);
***************
*** 711,718 ****
do {
need_more_balance = 0;
! pgdat = pgdat_list;
! do
need_more_balance |= kswapd_balance_pgdat(pgdat);
- while ((pgdat = pgdat->node_next));
} while (need_more_balance);
}
--- 729,735 ----
do {
need_more_balance = 0;
!
! for_each_pgdat(pgdat)
need_more_balance |= kswapd_balance_pgdat(pgdat);
} while (need_more_balance);
}
***************
*** 737,746 ****
pg_data_t * pgdat;
! pgdat = pgdat_list;
! do {
! if (kswapd_can_sleep_pgdat(pgdat))
! continue;
! return 0;
! } while ((pgdat = pgdat->node_next));
return 1;
--- 754,761 ----
pg_data_t * pgdat;
! for_each_pgdat(pgdat) {
! if (!kswapd_can_sleep_pgdat(pgdat))
! return 0;
! }
return 1;
|