Update of /cvsroot/linux-vax/kernel-2.4/fs
In directory usw-pr-cvs1:/tmp/cvs-serv27691
Modified Files:
dcache.c devices.c dquot.c exec.c fcntl.c file_table.c
filesystems.c inode.c iobuf.c
Log Message:
sync 2.4.15 commit 11
Index: dcache.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/dcache.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -u -r1.1.1.2 -r1.2
--- dcache.c 25 Feb 2001 23:14:46 -0000 1.1.1.2
+++ dcache.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/smp_lock.h>
#include <linux/cache.h>
+#include <linux/module.h>
#include <asm/uaccess.h>
@@ -138,10 +139,6 @@
goto kill_it;
list_add(&dentry->d_lru, &dentry_unused);
dentry_stat.nr_unused++;
- /*
- * Update the timestamp
- */
- dentry->d_reftime = jiffies;
spin_unlock(&dcache_lock);
return;
@@ -223,8 +220,7 @@
atomic_inc(&dentry->d_count);
if (atomic_read(&dentry->d_count) == 1) {
dentry_stat.nr_unused--;
- list_del(&dentry->d_lru);
- INIT_LIST_HEAD(&dentry->d_lru); /* make "list_empty()" work */
+ list_del_init(&dentry->d_lru);
}
return dentry;
}
@@ -337,10 +333,10 @@
dentry = list_entry(tmp, struct dentry, d_lru);
/* If the dentry was recently referenced, don't free it. */
- if (dentry->d_flags & DCACHE_REFERENCED) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
+ if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
+ dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
list_add(&dentry->d_lru, &dentry_unused);
- goto next;
+ continue;
}
dentry_stat.nr_unused--;
@@ -349,7 +345,6 @@
BUG();
prune_one_dentry(dentry);
- next:
if (!--count)
break;
}
@@ -413,8 +408,7 @@
if (atomic_read(&dentry->d_count))
continue;
dentry_stat.nr_unused--;
- list_del(tmp);
- INIT_LIST_HEAD(tmp);
+ list_del_init(tmp);
prune_one_dentry(dentry);
goto repeat;
}
@@ -553,7 +547,7 @@
* ...
* 6 - base-level: try to shrink a bit.
*/
-void shrink_dcache_memory(int priority, unsigned int gfp_mask)
+int shrink_dcache_memory(int priority, unsigned int gfp_mask)
{
int count = 0;
@@ -568,14 +562,14 @@
* We should make sure we don't hold the superblock lock over
* block allocations, but for now:
*/
- if (!(gfp_mask & __GFP_IO))
- return;
+ if (!(gfp_mask & __GFP_FS))
+ return 0;
- if (priority)
- count = dentry_stat.nr_unused / priority;
+ count = dentry_stat.nr_unused / priority;
prune_dcache(count);
kmem_cache_shrink(dentry_cache);
+ return 0;
}
#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
@@ -612,6 +606,7 @@
str[name->len] = 0;
atomic_set(&dentry->d_count, 1);
+ dentry->d_vfs_flags = 0;
dentry->d_flags = 0;
dentry->d_inode = NULL;
dentry->d_parent = NULL;
@@ -621,7 +616,7 @@
dentry->d_name.hash = name->hash;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
- INIT_LIST_HEAD(&dentry->d_vfsmnt);
+ dentry->d_mounted = 0;
INIT_LIST_HEAD(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
@@ -656,6 +651,7 @@
void d_instantiate(struct dentry *entry, struct inode * inode)
{
+ if (!list_empty(&entry->d_alias)) BUG();
spin_lock(&dcache_lock);
if (inode)
list_add(&entry->d_alias, &inode->i_dentry);
@@ -734,7 +730,7 @@
continue;
}
__dget_locked(dentry);
- dentry->d_flags |= DCACHE_REFERENCED;
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
spin_unlock(&dcache_lock);
return dentry;
}
@@ -744,58 +740,48 @@
/**
* d_validate - verify dentry provided from insecure source
- * @dentry: The dentry alleged to be valid
- * @dparent: The parent dentry
+ * @dentry: The dentry alleged to be valid child of @dparent
+ * @dparent: The parent dentry (known to be valid)
* @hash: Hash of the dentry
* @len: Length of the name
*
* An insecure source has sent us a dentry, here we verify it and dget() it.
* This is used by ncpfs in its readdir implementation.
* Zero is returned in the dentry is invalid.
- *
- * NOTE: This function does _not_ dereference the pointers before we have
- * validated them. We can test the pointer values, but we
- * must not actually use them until we have found a valid
- * copy of the pointer in kernel space..
*/
-int d_validate(struct dentry *dentry, struct dentry *dparent,
- unsigned int hash, unsigned int len)
+int d_validate(struct dentry *dentry, struct dentry *dparent)
{
+ unsigned long dent_addr = (unsigned long) dentry;
+ unsigned long min_addr = PAGE_OFFSET;
+ unsigned long align_mask = 0x0F;
struct list_head *base, *lhp;
- int valid = 1;
- spin_lock(&dcache_lock);
- if (dentry != dparent) {
- base = d_hash(dparent, hash);
- lhp = base;
- while ((lhp = lhp->next) != base) {
- if (dentry == list_entry(lhp, struct dentry, d_hash)) {
- __dget_locked(dentry);
- goto out;
- }
- }
- } else {
- /*
- * Special case: local mount points don't live in
- * the hashes, so we search the super blocks.
- */
- struct super_block *sb = sb_entry(super_blocks.next);
+ if (dent_addr < min_addr)
+ goto out;
+ if (dent_addr > (unsigned long)high_memory - sizeof(struct dentry))
+ goto out;
+ if (dent_addr & align_mask)
+ goto out;
+ if ((!kern_addr_valid(dent_addr)) || (!kern_addr_valid(dent_addr -1 +
+ sizeof(struct dentry))))
+ goto out;
- for (; sb != sb_entry(&super_blocks);
- sb = sb_entry(sb->s_list.next)) {
- if (!sb->s_dev)
- continue;
- if (sb->s_root == dentry) {
- __dget_locked(dentry);
- goto out;
- }
+ if (dentry->d_parent != dparent)
+ goto out;
+
+ spin_lock(&dcache_lock);
+ lhp = base = d_hash(dparent, dentry->d_name.hash);
+ while ((lhp = lhp->next) != base) {
+ if (dentry == list_entry(lhp, struct dentry, d_hash)) {
+ __dget_locked(dentry);
+ spin_unlock(&dcache_lock);
+ return 1;
}
}
- valid = 0;
-out:
spin_unlock(&dcache_lock);
- return valid;
+out:
+ return 0;
}
/*
@@ -848,6 +834,7 @@
void d_rehash(struct dentry * entry)
{
struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+ if (!list_empty(&entry->d_hash)) BUG();
spin_lock(&dcache_lock);
list_add(&entry->d_hash, list);
spin_unlock(&dcache_lock);
@@ -922,8 +909,7 @@
list_add(&dentry->d_hash, &target->d_hash);
/* Unhash the target: dput() will then get rid of it */
- list_del(&target->d_hash);
- INIT_LIST_HEAD(&target->d_hash);
+ list_del_init(&target->d_hash);
list_del(&dentry->d_child);
list_del(&target->d_child);
@@ -1239,6 +1225,18 @@
} while (i);
}
+static void init_buffer_head(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ {
+ struct buffer_head * bh = (struct buffer_head *) foo;
+
+ memset(bh, 0, sizeof(*bh));
+ init_waitqueue_head(&bh->b_wait);
+ }
+}
+
/* SLAB cache for __getname() consumers */
kmem_cache_t *names_cachep;
@@ -1250,12 +1248,16 @@
/* SLAB cache for buffer_head structures */
kmem_cache_t *bh_cachep;
+EXPORT_SYMBOL(bh_cachep);
+
+extern void bdev_cache_init(void);
+extern void cdev_cache_init(void);
void __init vfs_caches_init(unsigned long mempages)
{
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
if(!bh_cachep)
panic("Cannot create buffer head SLAB cache");
@@ -1280,4 +1282,8 @@
#endif
dcache_init(mempages);
+ inode_init(mempages);
+ mnt_init(mempages);
+ bdev_cache_init();
+ cdev_cache_init();
}
Index: devices.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/devices.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- devices.c 14 Jan 2001 16:25:21 -0000 1.1.1.1
+++ devices.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -203,10 +203,10 @@
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = to_kdev_t(rdev);
+ inode->i_cdev = cdget(rdev);
} else if (S_ISBLK(mode)) {
inode->i_fop = &def_blk_fops;
inode->i_rdev = to_kdev_t(rdev);
- inode->i_bdev = bdget(rdev);
} else if (S_ISFIFO(mode))
inode->i_fop = &def_fifo_fops;
else if (S_ISSOCK(mode))
Index: dquot.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/dquot.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -u -r1.1.1.2 -r1.2
--- dquot.c 25 Feb 2001 23:14:46 -0000 1.1.1.2
+++ dquot.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -26,7 +26,7 @@
* dquot_incr_...() to calling functions.
* invalidate_dquots() now writes modified dquots.
* Serialized quota_off() and quota_on() for mount point.
- * Fixed a few bugs in grow_dquots.
+ * Fixed a few bugs in grow_dquots().
* Fixed deadlock in write_dquot() - we no longer account quotas on
* quota files
* remove_dquot_ref() moved to inode.c - it now traverses through inodes
@@ -34,13 +34,24 @@
* Added check for bogus uid and fixed check for group in quotactl.
* Jan Kara, <ja...@su...>, sponsored by SuSE CR, 10-11/99
[...1541 lines suppressed...]
+ ret = quota_on(sb, type, (char *) addr);
goto out;
case Q_QUOTAOFF:
ret = quota_off(sb, type);
@@ -1597,12 +1468,12 @@
goto out;
}
- flags |= QUOTA_SYSCALL;
-
- ret = -ESRCH;
+ ret = -NODEV;
if (sb && sb_has_quota_enabled(sb, type))
ret = set_dqblk(sb, id, type, flags, (struct dqblk *) addr);
out:
+ if (sb)
+ drop_super(sb);
unlock_kernel();
return ret;
}
Index: exec.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/exec.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -u -r1.1.1.2 -r1.2
--- exec.c 25 Feb 2001 23:14:45 -0000 1.1.1.2
+++ exec.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -34,6 +34,7 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
+#include <linux/personality.h>
#define __NO_VERSION__
#include <linux/module.h>
@@ -45,6 +46,8 @@
#include <linux/kmod.h>
#endif
+int core_uses_pid;
+
static struct linux_binfmt *formats;
static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
@@ -159,11 +162,9 @@
if (argv != NULL) {
for (;;) {
char * p;
- int error;
- error = get_user(p,argv);
- if (error)
- return error;
+ if (get_user(p, argv))
+ return -EFAULT;
if (!p)
break;
argv++;
@@ -186,7 +187,7 @@
int len;
unsigned long pos;
- if (get_user(str, argv+argc) || !str || !(len = strnlen_user(str, bprm->p)))
+ if (get_user(str, argv+argc) || !(len = strnlen_user(str, bprm->p)))
return -EFAULT;
if (bprm->p < len)
return -E2BIG;
@@ -252,6 +253,8 @@
/*
* This routine is used to map in a page into an address space: needed by
* execve() for the initial stack and environment pages.
+ *
+ * tsk->mmap_sem is held for writing.
*/
void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
{
@@ -260,29 +263,32 @@
pte_t * pte;
if (page_count(page) != 1)
- printk("mem_map disagrees with %p at %08lx\n", page, address);
+ printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
pgd = pgd_offset(tsk->mm, address);
- pmd = pmd_alloc(pgd, address);
- if (!pmd) {
- __free_page(page);
- force_sig(SIGKILL, tsk);
- return;
- }
- pte = pte_alloc(pmd, address);
- if (!pte) {
- __free_page(page);
- force_sig(SIGKILL, tsk);
- return;
- }
- if (!pte_none(*pte)) {
- pte_ERROR(*pte);
- __free_page(page);
- return;
- }
+
+ spin_lock(&tsk->mm->page_table_lock);
+ pmd = pmd_alloc(tsk->mm, pgd, address);
+ if (!pmd)
+ goto out;
+ pte = pte_alloc(tsk->mm, pmd, address);
+ if (!pte)
+ goto out;
+ if (!pte_none(*pte))
+ goto out;
+ lru_cache_add(page);
flush_dcache_page(page);
flush_page_to_ram(page);
set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
-/* no need for flush_tlb */
+ tsk->mm->rss++;
+ spin_unlock(&tsk->mm->page_table_lock);
+
+ /* no need for flush_tlb */
+ return;
+out:
+ spin_unlock(&tsk->mm->page_table_lock);
+ __free_page(page);
+ force_sig(SIGKILL, tsk);
+ return;
}
int setup_arg_pages(struct linux_binprm *bprm)
@@ -302,7 +308,7 @@
if (!mpnt)
return -ENOMEM;
- down(¤t->mm->mmap_sem);
+ down_write(¤t->mm->mmap_sem);
{
mpnt->vm_mm = current->mm;
mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
@@ -321,12 +327,11 @@
struct page *page = bprm->page[i];
if (page) {
bprm->page[i] = NULL;
- current->mm->rss++;
put_dirty_page(current,page,stack_base);
}
stack_base += PAGE_SIZE;
}
- up(¤t->mm->mmap_sem);
+ up_write(¤t->mm->mmap_sem);
return 0;
}
@@ -344,8 +349,11 @@
if (!err) {
inode = nd.dentry->d_inode;
file = ERR_PTR(-EACCES);
- if (!IS_NOEXEC(inode) && S_ISREG(inode->i_mode)) {
+ if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
+ S_ISREG(inode->i_mode)) {
int err = permission(inode, MAY_EXEC);
+ if (!err && !(inode->i_mode & 0111))
+ err = -EACCES;
file = ERR_PTR(err);
if (!err) {
file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
@@ -545,7 +553,7 @@
current->sas_ss_sp = current->sas_ss_size = 0;
if (current->euid == current->uid && current->egid == current->gid)
- current->dumpable = 1;
+ current->mm->dumpable = 1;
name = bprm->filename;
for (i=0; (ch = *(name++)) != '\0';) {
if (ch == '/')
@@ -562,7 +570,7 @@
if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
permission(bprm->file->f_dentry->d_inode,MAY_READ))
- current->dumpable = 0;
+ current->mm->dumpable = 0;
/* An exec changes our domain. We are no longer part of the thread
group */
@@ -577,9 +585,10 @@
mmap_failed:
flush_failed:
spin_lock_irq(¤t->sigmask_lock);
- if (current->sig != oldsig)
+ if (current->sig != oldsig) {
kfree(current->sig);
- current->sig = oldsig;
+ current->sig = oldsig;
+ }
spin_unlock_irq(¤t->sigmask_lock);
return retval;
}
@@ -590,7 +599,7 @@
*/
static inline int must_not_trace_exec(struct task_struct * p)
{
- return (p->ptrace & PT_PTRACED) && !cap_raised(p->p_pptr->cap_effective, CAP_SYS_PTRACE);
+ return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
}
/*
@@ -603,7 +612,10 @@
struct inode * inode = bprm->file->f_dentry->d_inode;
mode = inode->i_mode;
- /* Huh? We had already checked for MAY_EXEC, WTF do we check this? */
+ /*
+ * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
+ * vfs_permission lets a non-executable through
+ */
if (!(mode & 0111)) /* with at least _one_ execute bit set */
return -EACCES;
if (bprm->file->f_op == NULL)
@@ -612,7 +624,7 @@
bprm->e_uid = current->euid;
bprm->e_gid = current->egid;
- if(!IS_NOSUID(inode)) {
+ if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
/* Set-uid? */
if (mode & S_ISUID)
bprm->e_uid = inode->i_uid;
@@ -680,7 +692,7 @@
if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
!cap_issubset(new_permitted, current->cap_permitted)) {
- current->dumpable = 0;
+ current->mm->dumpable = 0;
lock_kernel();
if (must_not_trace_exec(current)
@@ -759,7 +771,6 @@
if (!bprm->loader && eh->fh.f_magic == 0x183 &&
(eh->fh.f_flags & 0x3000) == 0x3000)
{
- char * dynloader[] = { "/sbin/loader" };
struct file * file;
unsigned long loader;
@@ -769,10 +780,14 @@
loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
- file = open_exec(dynloader[0]);
+ file = open_exec("/sbin/loader");
retval = PTR_ERR(file);
if (IS_ERR(file))
return retval;
+
+ /* Remember if the application is TASO. */
+ bprm->sh_bang = eh->ah.entry < 0x100000000;
+
bprm->file = file;
bprm->loader = loader;
retval = prepare_binprm(bprm);
@@ -783,6 +798,9 @@
}
}
#endif
+ /* kernel module loader fixup */
+ /* so we don't try to load run modprobe in kernel space. */
+ set_fs(USER_DS);
for (try=0; try<2; try++) {
read_lock(&binfmt_lock);
for (fmt = formats ; fmt ; fmt = fmt->next) {
@@ -918,26 +936,25 @@
int do_coredump(long signr, struct pt_regs * regs)
{
struct linux_binfmt * binfmt;
- char corename[6+sizeof(current->comm)];
+ char corename[6+sizeof(current->comm)+10];
struct file * file;
struct inode * inode;
+ int retval = 0;
lock_kernel();
binfmt = current->binfmt;
if (!binfmt || !binfmt->core_dump)
goto fail;
- if (!current->dumpable || atomic_read(¤t->mm->mm_users) != 1)
+ if (!current->mm->dumpable)
goto fail;
- current->dumpable = 0;
+ current->mm->dumpable = 0;
if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
goto fail;
memcpy(corename,"core.", 5);
-#if 0
- memcpy(corename+5,current->comm,sizeof(current->comm));
-#else
corename[4] = '\0';
-#endif
+ if (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)
+ sprintf(&corename[4], ".%d", current->pid);
file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
if (IS_ERR(file))
goto fail;
@@ -955,15 +972,14 @@
goto close_fail;
if (do_truncate(file->f_dentry, 0) != 0)
goto close_fail;
- if (!binfmt->core_dump(signr, regs, file))
- goto close_fail;
- unlock_kernel();
- filp_close(file, NULL);
- return 1;
+
+ down_read(¤t->mm->mmap_sem);
+ retval = binfmt->core_dump(signr, regs, file);
+ up_read(¤t->mm->mmap_sem);
close_fail:
filp_close(file, NULL);
fail:
unlock_kernel();
- return 0;
+ return retval;
}
Index: fcntl.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/fcntl.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- fcntl.c 14 Jan 2001 16:25:03 -0000 1.1.1.1
+++ fcntl.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -10,6 +10,7 @@
#include <linux/dnotify.h>
#include <linux/smp_lock.h>
#include <linux/slab.h>
+#include <linux/iobuf.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
@@ -194,7 +195,7 @@
return ret;
}
-#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC)
+#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT)
static int setfl(int fd, struct file * filp, unsigned long arg)
{
@@ -217,6 +218,25 @@
}
}
+ if (arg & O_DIRECT) {
+ /*
+ * alloc_kiovec() can sleep and we are only serialized by
+ * the big kernel lock here, so abuse the i_sem to serialize
+ * this case too. We of course wouldn't need to go deep down
+ * to the inode layer, we could stay at the file layer, but
+ * we don't want to pay for the memory of a semaphore in each
+ * file structure too and we use the inode semaphore that we just
+ * pay for anyways.
+ */
+ error = 0;
+ down(&inode->i_sem);
+ if (!filp->f_iobuf)
+ error = alloc_kiovec(1, &filp->f_iobuf);
+ up(&inode->i_sem);
+ if (error < 0)
+ return error;
+ }
+
/* required for strict SunOS emulation */
if (O_NONBLOCK != O_NDELAY)
if (arg & O_NDELAY)
@@ -338,7 +358,6 @@
if (!filp)
goto out;
- lock_kernel();
switch (cmd) {
case F_GETLK64:
err = fcntl_getlk64(fd, (struct flock64 *) arg);
@@ -353,7 +372,6 @@
err = do_fcntl(fd, cmd, arg, filp);
break;
}
- unlock_kernel();
fput(filp);
out:
return err;
Index: file_table.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/file_table.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- file_table.c 14 Jan 2001 16:24:51 -0000 1.1.1.1
+++ file_table.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
+#include <linux/iobuf.h>
/* sysctl tunables... */
struct files_stat_struct files_stat = {0, 0, NR_FILE};
@@ -66,10 +67,10 @@
goto new_one;
}
/* Big problems... */
- printk("VFS: filp allocation failed\n");
+ printk(KERN_WARNING "VFS: filp allocation failed\n");
} else if (files_stat.max_files > old_max) {
- printk("VFS: file-max limit %d reached\n", files_stat.max_files);
+ printk(KERN_INFO "VFS: file-max limit %d reached\n", files_stat.max_files);
old_max = files_stat.max_files;
}
file_list_unlock();
@@ -104,21 +105,24 @@
if (atomic_dec_and_test(&file->f_count)) {
locks_remove_flock(file);
+
+ if (file->f_iobuf)
+ free_kiovec(1, &file->f_iobuf);
+
if (file->f_op && file->f_op->release)
file->f_op->release(inode, file);
fops_put(file->f_op);
- file->f_dentry = NULL;
- file->f_vfsmnt = NULL;
if (file->f_mode & FMODE_WRITE)
put_write_access(inode);
- dput(dentry);
- if (mnt)
- mntput(mnt);
file_list_lock();
+ file->f_dentry = NULL;
+ file->f_vfsmnt = NULL;
list_del(&file->f_list);
list_add(&file->f_list, &free_list);
files_stat.nr_free_files++;
file_list_unlock();
+ dput(dentry);
+ mntput(mnt);
}
}
@@ -158,14 +162,6 @@
file_list_unlock();
}
-void file_moveto(struct file *new, struct file *old)
-{
- file_list_lock();
- list_del(&new->f_list);
- list_add(&new->f_list, &old->f_list);
- file_list_unlock();
-}
-
int fs_may_remount_ro(struct super_block *sb)
{
struct list_head *p;
@@ -174,12 +170,7 @@
file_list_lock();
for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
struct file *file = list_entry(p, struct file, f_list);
- struct inode *inode;
-
- if (!file->f_dentry)
- continue;
-
- inode = file->f_dentry->d_inode;
+ struct inode *inode = file->f_dentry->d_inode;
/* File with pending delete? */
if (inode->i_nlink == 0)
Index: filesystems.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/filesystems.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- filesystems.c 14 Jan 2001 16:25:21 -0000 1.1.1.1
+++ filesystems.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -7,36 +7,11 @@
*/
#include <linux/config.h>
-#include <linux/fs.h>
-
-#include <linux/devfs_fs_kernel.h>
-#include <linux/nfs_fs.h>
-#include <linux/auto_fs.h>
-#include <linux/devpts_fs.h>
-#include <linux/major.h>
-#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/kmod.h>
-#include <linux/init.h>
-#include <linux/module.h>
#include <linux/nfsd/interface.h>
-
-#ifdef CONFIG_DEVPTS_FS
-extern int init_devpts_fs(void);
-#endif
-
-void __init filesystem_setup(void)
-{
- init_devfs_fs(); /* Header file may make this empty */
-
-#ifdef CONFIG_NFS_FS
- init_nfs_fs();
-#endif
-
-#ifdef CONFIG_DEVPTS_FS
- init_devpts_fs();
-#endif
-}
#if defined(CONFIG_NFSD_MODULE)
struct nfsd_linkage *nfsd_linkage = NULL;
Index: inode.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/inode.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -u -r1.1.1.2 -r1.2
--- inode.c 25 Feb 2001 23:14:45 -0000 1.1.1.2
+++ inode.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -13,6 +13,10 @@
#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/cache.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
+#include <linux/prefetch.h>
+#include <linux/locks.h>
/*
* New inode.c implementation.
@@ -62,7 +66,7 @@
* NOTE! You also have to own the lock if you change
* the i_state of an inode while it is in use..
*/
-spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
/*
* Statistics gathering..
@@ -75,7 +79,7 @@
((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
static void destroy_inode(struct inode *inode)
{
- if (!list_empty(&inode->i_dirty_buffers))
+ if (inode_has_buffers(inode))
BUG();
kmem_cache_free(inode_cachep, (inode));
}
@@ -101,6 +105,8 @@
INIT_LIST_HEAD(&inode->i_data.locked_pages);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_dirty_buffers);
+ INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
+ INIT_LIST_HEAD(&inode->i_devices);
sema_init(&inode->i_sem, 1);
sema_init(&inode->i_zombie, 1);
spin_lock_init(&inode->i_data.i_shared_lock);
@@ -123,36 +129,38 @@
/**
* __mark_inode_dirty - internal function
* @inode: inode to mark
- *
- * Mark an inode as dirty. Callers should use mark_inode_dirty.
+ * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
+ * Mark an inode as dirty. Callers should use mark_inode_dirty or
+ * mark_inode_dirty_sync.
*/
void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block * sb = inode->i_sb;
- if (sb) {
- /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
- if (flags & (I_DIRTY | I_DIRTY_SYNC)) {
- if (sb->s_op && sb->s_op->dirty_inode)
- sb->s_op->dirty_inode(inode);
- }
+ if (!sb)
+ return;
- /* avoid the locking if we can */
- if ((inode->i_state & flags) == flags)
- return;
+ /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
+ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+ if (sb->s_op && sb->s_op->dirty_inode)
+ sb->s_op->dirty_inode(inode);
+ }
- spin_lock(&inode_lock);
- if ((inode->i_state & flags) != flags) {
- inode->i_state |= flags;
- /* Only add valid (ie hashed) inodes to the dirty list */
- if (!list_empty(&inode->i_hash)) {
- list_del(&inode->i_list);
- list_add(&inode->i_list, &sb->s_dirty);
- }
+ /* avoid the locking if we can */
+ if ((inode->i_state & flags) == flags)
+ return;
+
+ spin_lock(&inode_lock);
+ if ((inode->i_state & flags) != flags) {
+ inode->i_state |= flags;
+ /* Only add valid (ie hashed) inodes to the dirty list */
+ if (!(inode->i_state & I_LOCK) && !list_empty(&inode->i_hash)) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &sb->s_dirty);
}
- spin_unlock(&inode_lock);
}
+ spin_unlock(&inode_lock);
}
static void __wait_on_inode(struct inode * inode)
@@ -179,7 +187,7 @@
static inline void write_inode(struct inode *inode, int sync)
{
- if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
+ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
inode->i_sb->s_op->write_inode(inode, sync);
}
@@ -190,13 +198,53 @@
return;
}
atomic_inc(&inode->i_count);
- if (!(inode->i_state & I_DIRTY)) {
+ if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
list_del(&inode->i_list);
list_add(&inode->i_list, &inode_in_use);
}
inodes_stat.nr_unused--;
}
+static inline void __sync_one(struct inode *inode, int sync)
+{
+ unsigned dirty;
+
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &inode->i_sb->s_locked_inodes);
+
+ if (inode->i_state & I_LOCK)
+ BUG();
+
+ /* Set I_LOCK, reset I_DIRTY */
+ dirty = inode->i_state & I_DIRTY;
+ inode->i_state |= I_LOCK;
+ inode->i_state &= ~I_DIRTY;
+ spin_unlock(&inode_lock);
+
+ filemap_fdatasync(inode->i_mapping);
+
+ /* Don't write the inode if only I_DIRTY_PAGES was set */
+ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
+ write_inode(inode, sync);
+
+ filemap_fdatawait(inode->i_mapping);
+
+ spin_lock(&inode_lock);
+ inode->i_state &= ~I_LOCK;
+ if (!(inode->i_state & I_FREEING)) {
+ struct list_head *to;
+ if (inode->i_state & I_DIRTY)
+ to = &inode->i_sb->s_dirty;
+ else if (atomic_read(&inode->i_count))
+ to = &inode_in_use;
+ else
+ to = &inode_unused;
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, to);
+ }
+ wake_up(&inode->i_wait);
+}
+
static inline void sync_one(struct inode *inode, int sync)
{
if (inode->i_state & I_LOCK) {
@@ -206,38 +254,117 @@
iput(inode);
spin_lock(&inode_lock);
} else {
- unsigned dirty;
+ __sync_one(inode, sync);
+ }
+}
- list_del(&inode->i_list);
- list_add(&inode->i_list, atomic_read(&inode->i_count)
- ? &inode_in_use
- : &inode_unused);
- /* Set I_LOCK, reset I_DIRTY */
- dirty = inode->i_state & I_DIRTY;
- inode->i_state |= I_LOCK;
- inode->i_state &= ~I_DIRTY;
+static inline void sync_list(struct list_head *head)
+{
+ struct list_head * tmp;
+
+ while ((tmp = head->prev) != head)
+ __sync_one(list_entry(tmp, struct inode, i_list), 0);
+}
+
+static inline void wait_on_locked(struct list_head *head)
+{
+ struct list_head * tmp;
+ while ((tmp = head->prev) != head) {
+ struct inode *inode = list_entry(tmp, struct inode, i_list);
+ __iget(inode);
spin_unlock(&inode_lock);
+ __wait_on_inode(inode);
+ iput(inode);
+ spin_lock(&inode_lock);
+ }
+}
- filemap_fdatasync(inode->i_mapping);
+static inline int try_to_sync_unused_list(struct list_head *head, int nr_inodes)
+{
+ struct list_head *tmp = head;
+ struct inode *inode;
- /* Don't write the inode if only I_DIRTY_PAGES was set */
- if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
- write_inode(inode, sync);
+ while (nr_inodes && (tmp = tmp->prev) != head) {
+ inode = list_entry(tmp, struct inode, i_list);
- filemap_fdatawait(inode->i_mapping);
+ if (!atomic_read(&inode->i_count)) {
+ __sync_one(inode, 0);
+ nr_inodes--;
- spin_lock(&inode_lock);
- inode->i_state &= ~I_LOCK;
- wake_up(&inode->i_wait);
+ /*
+ * __sync_one moved the inode to another list,
+ * so we have to start looking from the list head.
+ */
+ tmp = head;
+ }
+ }
+
+ return nr_inodes;
+}
+
+void sync_inodes_sb(struct super_block *sb)
+{
+ spin_lock(&inode_lock);
+ while (!list_empty(&sb->s_dirty)||!list_empty(&sb->s_locked_inodes)) {
+ sync_list(&sb->s_dirty);
+ wait_on_locked(&sb->s_locked_inodes);
}
+ spin_unlock(&inode_lock);
}
-static inline void sync_list(struct list_head *head)
+/*
+ * Note:
+ * We don't need to grab a reference to superblock here. If it has non-empty
+ * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
+ * past sync_inodes_sb() until both ->s_dirty and ->s_locked_inodes are
+ * empty. Since __sync_one() regains inode_lock before it finally moves
+ * inode from superblock lists we are OK.
+ */
+
+void sync_unlocked_inodes(void)
{
- struct list_head * tmp;
+ struct super_block * sb;
+ spin_lock(&inode_lock);
+ spin_lock(&sb_lock);
+ sb = sb_entry(super_blocks.next);
+ for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
+ if (!list_empty(&sb->s_dirty)) {
+ spin_unlock(&sb_lock);
+ sync_list(&sb->s_dirty);
+ spin_lock(&sb_lock);
+ }
+ }
+ spin_unlock(&sb_lock);
+ spin_unlock(&inode_lock);
+}
- while ((tmp = head->prev) != head)
- sync_one(list_entry(tmp, struct inode, i_list), 0);
+/*
+ * Find a superblock with inodes that need to be synced
+ */
+
+static struct super_block *get_super_to_sync(void)
+{
+ struct list_head *p;
+restart:
+ spin_lock(&inode_lock);
+ spin_lock(&sb_lock);
+ list_for_each(p, &super_blocks) {
+ struct super_block *s = list_entry(p,struct super_block,s_list);
+ if (list_empty(&s->s_dirty) && list_empty(&s->s_locked_inodes))
+ continue;
+ s->s_count++;
+ spin_unlock(&sb_lock);
+ spin_unlock(&inode_lock);
+ down_read(&s->s_umount);
+ if (!s->s_root) {
+ drop_super(s);
+ goto restart;
+ }
+ return s;
+ }
+ spin_unlock(&sb_lock);
+ spin_unlock(&inode_lock);
+ return NULL;
}
/**
@@ -247,42 +374,48 @@
* sync_inodes goes through the super block's dirty list,
* writes them out, and puts them back on the normal list.
*/
-
+
void sync_inodes(kdev_t dev)
{
- struct super_block * sb = sb_entry(super_blocks.next);
+ struct super_block * s;
/*
* Search the super_blocks array for the device(s) to sync.
*/
- spin_lock(&inode_lock);
- for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
- if (!sb->s_dev)
- continue;
- if (dev && sb->s_dev != dev)
- continue;
-
- sync_list(&sb->s_dirty);
-
- if (dev)
- break;
+ if (dev) {
+ if ((s = get_super(dev)) != NULL) {
+ sync_inodes_sb(s);
+ drop_super(s);
+ }
+ } else {
+ while ((s = get_super_to_sync()) != NULL) {
+ sync_inodes_sb(s);
+ drop_super(s);
+ }
}
- spin_unlock(&inode_lock);
}
-/*
- * Called with the spinlock already held..
- */
-static void sync_all_inodes(void)
+static void try_to_sync_unused_inodes(void * arg)
{
- struct super_block * sb = sb_entry(super_blocks.next);
- for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
- if (!sb->s_dev)
+ struct super_block * sb;
+ int nr_inodes = inodes_stat.nr_unused;
+
+ spin_lock(&inode_lock);
+ spin_lock(&sb_lock);
+ sb = sb_entry(super_blocks.next);
+ for (; nr_inodes && sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
+ if (list_empty(&sb->s_dirty))
continue;
- sync_list(&sb->s_dirty);
+ spin_unlock(&sb_lock);
+ nr_inodes = try_to_sync_unused_list(&sb->s_dirty, nr_inodes);
+ spin_lock(&sb_lock);
}
+ spin_unlock(&sb_lock);
+ spin_unlock(&inode_lock);
}
+static struct tq_struct unused_inodes_flush_task;
+
/**
* write_inode_now - write an inode to disk
* @inode: inode to write to disk
@@ -301,9 +434,11 @@
while (inode->i_state & I_DIRTY)
sync_one(inode, sync);
spin_unlock(&inode_lock);
+ if (sync)
+ wait_on_inode(inode);
}
else
- printk("write_inode_now: no super block\n");
+ printk(KERN_ERR "write_inode_now: no super block\n");
}
/**
@@ -315,9 +450,9 @@
* O_SYNC flag set, to flush dirty writes to disk.
*/
-int generic_osync_inode(struct inode *inode, int datasync)
+int generic_osync_inode(struct inode *inode, int what)
{
- int err;
+ int err = 0, err2 = 0, need_write_inode_now = 0;
/*
* WARNING
@@ -340,23 +475,24 @@
* every O_SYNC write, not just the synchronous I/Os. --sct
*/
-#ifdef WRITERS_QUEUE_IO
- err = osync_inode_buffers(inode);
-#else
- err = fsync_inode_buffers(inode);
-#endif
+ if (what & OSYNC_METADATA)
+ err = fsync_inode_buffers(inode);
+ if (what & OSYNC_DATA)
+ err2 = fsync_inode_data_buffers(inode);
+ if (!err)
+ err = err2;
spin_lock(&inode_lock);
- if (!(inode->i_state & I_DIRTY))
- goto out;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- goto out;
+ if ((inode->i_state & I_DIRTY) &&
+ ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
+ need_write_inode_now = 1;
spin_unlock(&inode_lock);
- write_inode_now(inode, 1);
- return err;
- out:
- spin_unlock(&inode_lock);
+ if (need_write_inode_now)
+ write_inode_now(inode, 1);
+ else
+ wait_on_inode(inode);
+
return err;
}
@@ -371,8 +507,7 @@
void clear_inode(struct inode *inode)
{
- if (!list_empty(&inode->i_dirty_buffers))
- invalidate_inode_buffers(inode);
+ invalidate_inode_buffers(inode);
if (inode->i_data.nrpages)
BUG();
@@ -381,13 +516,14 @@
if (inode->i_state & I_CLEAR)
BUG();
wait_on_inode(inode);
- if (IS_QUOTAINIT(inode))
- DQUOT_DROP(inode);
+ DQUOT_DROP(inode);
if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
inode->i_sb->s_op->clear_inode(inode);
- if (inode->i_bdev) {
- bdput(inode->i_bdev);
- inode->i_bdev = NULL;
+ if (inode->i_bdev)
+ bd_forget(inode);
+ else if (inode->i_cdev) {
+ cdput(inode->i_cdev);
+ inode->i_cdev = NULL;
}
inode->i_state = I_CLEAR;
}
@@ -435,8 +571,7 @@
continue;
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
- list_del(&inode->i_hash);
- INIT_LIST_HEAD(&inode->i_hash);
+ list_del_init(&inode->i_hash);
list_del(&inode->i_list);
list_add(&inode->i_list, dispose);
inode->i_state |= I_FREEING;
@@ -476,12 +611,39 @@
busy = invalidate_list(&inode_in_use, sb, &throw_away);
busy |= invalidate_list(&inode_unused, sb, &throw_away);
busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
spin_unlock(&inode_lock);
dispose_list(&throw_away);
return busy;
}
+
+int invalidate_device(kdev_t dev, int do_sync)
+{
+ struct super_block *sb;
+ int res;
+
+ if (do_sync)
+ fsync_dev(dev);
+
+ res = 0;
+ sb = get_super(dev);
+ if (sb) {
+ /*
+ * no need to lock the super, get_super holds the
+ * read semaphore so the filesystem cannot go away
+ * under us (->put_super runs with the write lock
+ * hold).
+ */
+ shrink_dcache_sb(sb);
+ res = invalidate_inodes(sb);
+ drop_super(sb);
+ }
+ invalidate_buffers(dev);
+ return res;
+}
+
/*
* This is called with the inode lock held. It searches
@@ -503,13 +665,12 @@
{
LIST_HEAD(list);
struct list_head *entry, *freeable = &list;
- int count = 0;
+ int count;
struct inode * inode;
spin_lock(&inode_lock);
- /* go simple and safe syncing everything before starting */
- sync_all_inodes();
+ count = 0;
entry = inode_unused.prev;
while (entry != &inode_unused)
{
@@ -517,12 +678,12 @@
entry = entry->prev;
inode = INODE(tmp);
- if (inode->i_state & (I_FREEING|I_CLEAR))
- BUG();
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
+ continue;
if (!CAN_UNUSE(inode))
continue;
if (atomic_read(&inode->i_count))
- BUG();
+ continue;
list_del(tmp);
list_del(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_hash);
@@ -536,9 +697,18 @@
spin_unlock(&inode_lock);
dispose_list(freeable);
+
+ /*
+ * If we didn't freed enough clean inodes schedule
+ * a sync of the dirty inodes, we cannot do it
+ * from here or we're either synchronously dogslow
+ * or we deadlock with oom.
+ */
+ if (goal)
+ schedule_task(&unused_inodes_flush_task);
}
-void shrink_icache_memory(int priority, int gfp_mask)
+int shrink_icache_memory(int priority, int gfp_mask)
{
int count = 0;
@@ -549,14 +719,14 @@
* want to recurse into the FS that called us
* in clear_inode() and friends..
*/
- if (!(gfp_mask & __GFP_IO))
- return;
+ if (!(gfp_mask & __GFP_FS))
+ return 0;
- if (priority)
- count = inodes_stat.nr_unused / priority;
+ count = inodes_stat.nr_unused / priority;
prune_icache(count);
kmem_cache_shrink(inode_cachep);
+ return 0;
}
/*
@@ -607,12 +777,15 @@
inode->i_nlink = 1;
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
+ inode->i_blocks = 0;
inode->i_generation = 0;
memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
inode->i_pipe = NULL;
inode->i_bdev = NULL;
+ inode->i_cdev = NULL;
inode->i_data.a_ops = &empty_aops;
inode->i_data.host = inode;
+ inode->i_data.gfp_mask = GFP_HIGHUSER;
inode->i_mapping = &inode->i_data;
}
@@ -634,6 +807,8 @@
static unsigned long last_ino;
struct inode * inode;
+ spin_lock_prefetch(&inode_lock);
+
inode = alloc_inode();
if (inode)
{
@@ -642,6 +817,7 @@
list_add(&inode->i_list, &inode_in_use);
inode->i_sb = NULL;
inode->i_dev = 0;
+ inode->i_blkbits = 0;
inode->i_ino = ++last_ino;
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
@@ -675,6 +851,7 @@
list_add(&inode->i_hash, head);
inode->i_sb = sb;
inode->i_dev = sb->s_dev;
+ inode->i_blkbits = sb->s_blocksize_bits;
inode->i_ino = ino;
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
@@ -781,8 +958,6 @@
*/
inode = NULL;
spin_unlock(&inode_lock);
- if (inode)
- wait_on_inode(inode);
return inode;
}
@@ -853,10 +1028,14 @@
void iput(struct inode *inode)
{
if (inode) {
+ struct super_block *sb = inode->i_sb;
struct super_operations *op = NULL;
- if (inode->i_sb && inode->i_sb->s_op)
- op = inode->i_sb->s_op;
+ if (inode->i_state == I_CLEAR)
+ BUG();
+
+ if (sb && sb->s_op)
+ op = sb->s_op;
if (op && op->put_inode)
op->put_inode(inode);
@@ -877,6 +1056,8 @@
if (op && op->delete_inode) {
void (*delete)(struct inode *) = op->delete_inode;
+ if (!is_bad_inode(inode))
+ DQUOT_INIT(inode);
/* s_op->delete_inode internally recalls clear_inode() */
delete(inode);
} else
@@ -884,22 +1065,22 @@
if (inode->i_state != I_CLEAR)
BUG();
} else {
- if (!list_empty(&inode->i_hash)) {
- if (!(inode->i_state & I_DIRTY)) {
+ if (!list_empty(&inode->i_hash) && sb && sb->s_root) {
+ if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
list_del(&inode->i_list);
- list_add(&inode->i_list,
- &inode_unused);
+ list_add(&inode->i_list, &inode_unused);
}
inodes_stat.nr_unused++;
spin_unlock(&inode_lock);
return;
} else {
- /* magic nfs path */
- list_del(&inode->i_list);
- INIT_LIST_HEAD(&inode->i_list);
+ list_del_init(&inode->i_list);
+ list_del_init(&inode->i_hash);
inode->i_state|=I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
+ if (inode->i_data.nrpages)
+ truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
}
}
@@ -988,6 +1169,8 @@
NULL);
if (!inode_cachep)
panic("cannot create inode slab cache");
+
+ unused_inodes_flush_task.routine = try_to_sync_unused_inodes;
}
/**
@@ -1018,38 +1201,40 @@
void put_dquot_list(struct list_head *);
int remove_inode_dquot_ref(struct inode *, short, struct list_head *);
-void remove_dquot_ref(kdev_t dev, short type)
+void remove_dquot_ref(struct super_block *sb, short type)
{
- struct super_block *sb = get_super(dev);
struct inode *inode;
struct list_head *act_head;
LIST_HEAD(tofree_head);
- if (!sb || !sb->dq_op)
+ if (!sb->dq_op)
return; /* nothing to do */
-
/* We have to be protected against other CPUs */
- spin_lock(&inode_lock);
+ lock_kernel(); /* This lock is for quota code */
+ spin_lock(&inode_lock); /* This lock is for inodes code */
- for (act_head = inode_in_use.next; act_head != &inode_in_use; act_head = act_head->next) {
+ list_for_each(act_head, &inode_in_use) {
inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
- continue;
- remove_inode_dquot_ref(inode, type, &tofree_head);
+ if (inode->i_sb == sb && IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
}
- for (act_head = inode_unused.next; act_head != &inode_unused; act_head = act_head->next) {
+ list_for_each(act_head, &inode_unused) {
inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
- continue;
- remove_inode_dquot_ref(inode, type, &tofree_head);
+ if (inode->i_sb == sb && IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
}
- for (act_head = sb->s_dirty.next; act_head != &sb->s_dirty; act_head = act_head->next) {
+ list_for_each(act_head, &sb->s_dirty) {
inode = list_entry(act_head, struct inode, i_list);
- if (!IS_QUOTAINIT(inode))
- continue;
- remove_inode_dquot_ref(inode, type, &tofree_head);
+ if (IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
+ }
+ list_for_each(act_head, &sb->s_locked_inodes) {
+ inode = list_entry(act_head, struct inode, i_list);
+ if (IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
}
spin_unlock(&inode_lock);
+ unlock_kernel();
put_dquot_list(&tofree_head);
}
Index: iobuf.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/fs/iobuf.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -u -r1.1.1.2 -r1.2
--- iobuf.c 25 Feb 2001 23:14:46 -0000 1.1.1.2
+++ iobuf.c 9 Apr 2002 13:11:16 -0000 1.2
@@ -8,9 +8,7 @@
#include <linux/iobuf.h>
#include <linux/slab.h>
-
-static kmem_cache_t *kiobuf_cachep;
-
+#include <linux/vmalloc.h>
void end_kio_request(struct kiobuf *kiobuf, int uptodate)
{
@@ -24,18 +22,7 @@
}
}
-
-void __init kiobuf_setup(void)
-{
- kiobuf_cachep = kmem_cache_create("kiobuf",
- sizeof(struct kiobuf),
- 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
- if(!kiobuf_cachep)
- panic("Cannot create kernel iobuf cache\n");
-}
-
-void kiobuf_init(struct kiobuf *iobuf)
+static void kiobuf_init(struct kiobuf *iobuf)
{
memset(iobuf, 0, sizeof(*iobuf));
init_waitqueue_head(&iobuf->wait_queue);
@@ -43,19 +30,49 @@
iobuf->maplist = iobuf->map_array;
}
+int alloc_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+ int i;
+
+ for (i = 0; i < KIO_MAX_SECTORS; i++)
+ if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) {
+ while (i--) {
+ kmem_cache_free(bh_cachep, kiobuf->bh[i]);
+ kiobuf->bh[i] = NULL;
+ }
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void free_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+ int i;
+
+ for (i = 0; i < KIO_MAX_SECTORS; i++) {
+ kmem_cache_free(bh_cachep, kiobuf->bh[i]);
+ kiobuf->bh[i] = NULL;
+ }
+}
+
int alloc_kiovec(int nr, struct kiobuf **bufp)
{
int i;
struct kiobuf *iobuf;
for (i = 0; i < nr; i++) {
- iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
+ iobuf = vmalloc(sizeof(struct kiobuf));
if (!iobuf) {
free_kiovec(i, bufp);
return -ENOMEM;
}
kiobuf_init(iobuf);
- *bufp++ = iobuf;
+ if (alloc_kiobuf_bhs(iobuf)) {
+ vfree(iobuf);
+ free_kiovec(i, bufp);
+ return -ENOMEM;
+ }
+ bufp[i] = iobuf;
}
return 0;
@@ -72,7 +89,8 @@
unlock_kiovec(1, &iobuf);
if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
- kmem_cache_free(kiobuf_cachep, bufp[i]);
+ free_kiobuf_bhs(iobuf);
+ vfree(bufp[i]);
}
}
@@ -115,11 +133,12 @@
add_wait_queue(&kiobuf->wait_queue, &wait);
repeat:
- run_task_queue(&tq_disk);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (atomic_read(&kiobuf->io_count) != 0) {
+ run_task_queue(&tq_disk);
schedule();
- goto repeat;
+ if (atomic_read(&kiobuf->io_count) != 0)
+ goto repeat;
}
tsk->state = TASK_RUNNING;
remove_wait_queue(&kiobuf->wait_queue, &wait);
|