From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:31
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Even here, we reuse values from one asm statement to the next without telling this to GCC - so fix this. While at it, a bit of improvements to the generated asm code, with better use of constraints. Still TODO: convert all this to the syscall_stub macros we already have. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/sys-i386/stub_segv.c | 10 ++++++---- arch/um/sys-x86_64/stub_segv.c | 18 ++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c --- a/arch/um/sys-i386/stub_segv.c +++ b/arch/um/sys-i386/stub_segv.c @@ -13,17 +13,19 @@ void __attribute__ ((__section__ (".__sy stub_segv_handler(int sig) { struct sigcontext *sc = (struct sigcontext *) (&sig + 1); + long pid; GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), sc); - __asm__("movl %0, %%eax ; int $0x80": : "g" (__NR_getpid)); - __asm__("movl %%eax, %%ebx ; movl %0, %%eax ; movl %1, %%ecx ;" - "int $0x80": : "g" (__NR_kill), "g" (SIGUSR1)); + __asm__("movl %1, %%eax ; int $0x80": "=&a" (pid): "i" (__NR_getpid)); + __asm__("movl %0, %%eax ; movl %1, %%ecx ;" + "int $0x80": : "i" (__NR_kill), "i" (SIGUSR1), "b" (pid) + : "eax", "ecx"); /* Load pointer to sigcontext into esp, since we need to leave * the stack in its original form when we do the sigreturn here, by * hand. */ __asm__("mov %0,%%esp ; movl %1, %%eax ; " - "int $0x80" : : "a" (sc), "g" (__NR_sigreturn)); + "int $0x80" : : "r" (sc), "i" (__NR_sigreturn)); } diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c --- a/arch/um/sys-x86_64/stub_segv.c +++ b/arch/um/sys-x86_64/stub_segv.c @@ -31,21 +31,23 @@ void __attribute__ ((__section__ (".__sy stub_segv_handler(int sig) { struct ucontext *uc; + long pid; - __asm__("movq %%rdx, %0" : "=g" (uc) :); + __asm__("movq %%rdx, %0" : "=g" (uc) : ); GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), &uc->uc_mcontext); - __asm__("movq %0, %%rax ; syscall": : "g" (__NR_getpid)); - __asm__("movq %%rax, %%rdi ; movq %0, %%rax ; movq %1, %%rsi ;" - "syscall": : "g" (__NR_kill), "g" (SIGUSR1) : - "%rdi", "%rax", "%rsi"); + __asm__("movq %0, %%rax ; syscall": "=&a" (pid) : "g" (__NR_getpid) + : "rax", __syscall_clobber); + __asm__("movq %0, %%rax ; movq %1, %%rsi ;" + "syscall": : "i" (__NR_kill), "i" (SIGUSR1), "D" (pid) : + "rdi", "rax", "rsi", __syscall_clobber); /* sys_sigreturn expects that the stack pointer will be 8 bytes into * the signal frame. So, we use the ucontext pointer, which we know * already, to get the signal frame pointer, and add 8 to that. */ - __asm__("movq %0, %%rsp": : - "g" ((unsigned long) container_of(uc, struct rt_sigframe, + __asm__("movq %0, %%rsp": : + "g" ((unsigned long) container_of(uc, struct rt_sigframe, uc) + 8)); - __asm__("movq %0, %%rax ; syscall" : : "g" (__NR_rt_sigreturn)); + __asm__("movq %0, %%rax ; syscall" : : "g" (__NR_rt_sigreturn) : "rax"); } |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:31
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> In a previous patch I shifted an allocation to being atomic. In this patch, a better but more intrusive solution is implemented, i.e. hold the lock only when really needing it, especially not over pipe operations, nor over the culprit allocation. Additionally, while at it, add a missing kfree in the failure path, and make sure that if we fail in forking, write_sigio_pid is -1 and not, say, -ENOMEM. And fix whitespace, at least for things I was touching anyway. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/kernel/sigio_user.c | 84 ++++++++++++++++++++++++++++++------------- 1 files changed, 59 insertions(+), 25 deletions(-) diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c --- a/arch/um/kernel/sigio_user.c +++ b/arch/um/kernel/sigio_user.c @@ -336,70 +336,104 @@ int ignore_sigio_fd(int fd) return(err); } -static int setup_initial_poll(int fd) +static struct pollfd* setup_initial_poll(int fd) { struct pollfd *p; - p = um_kmalloc_atomic(sizeof(struct pollfd)); - if(p == NULL){ + p = um_kmalloc(sizeof(struct pollfd)); + if (p == NULL) { printk("setup_initial_poll : failed to allocate poll\n"); - return(-1); + return NULL; } *p = ((struct pollfd) { .fd = fd, .events = POLLIN, .revents = 0 }); - current_poll = ((struct pollfds) { .poll = p, - .used = 1, - .size = 1 }); - return(0); + return p; } void write_sigio_workaround(void) { unsigned long stack; + struct pollfd *p; int err; + int l_write_sigio_fds[2]; + int l_sigio_private[2]; + int l_write_sigio_pid; + /* We call this *tons* of times - and most ones we must just fail. */ sigio_lock(); - if(write_sigio_pid != -1) - goto out; + l_write_sigio_pid = write_sigio_pid; + sigio_unlock(); + + if (l_write_sigio_pid != -1) + return; - err = os_pipe(write_sigio_fds, 1, 1); + err = os_pipe(l_write_sigio_fds, 1, 1); if(err < 0){ printk("write_sigio_workaround - os_pipe 1 failed, " "err = %d\n", -err); - goto out; + return; } - err = os_pipe(sigio_private, 1, 1); + err = os_pipe(l_sigio_private, 1, 1); if(err < 0){ - printk("write_sigio_workaround - os_pipe 2 failed, " + printk("write_sigio_workaround - os_pipe 1 failed, " "err = %d\n", -err); goto out_close1; } - if(setup_initial_poll(sigio_private[1])) + + p = setup_initial_poll(l_sigio_private[1]); + if(!p) goto out_close2; - write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + sigio_lock(); + + /* Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. */ + if(write_sigio_pid != -1) + goto out_unlock; + + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, CLONE_FILES | CLONE_VM, &stack, 0); - if(write_sigio_pid < 0) goto out_close2; + if (write_sigio_pid < 0) + goto out_clear; - if(write_sigio_irq(write_sigio_fds[0])) + if (write_sigio_irq(l_write_sigio_fds[0])) goto out_kill; - out: + /* Success, finally. */ + memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); + memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); + + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); + sigio_unlock(); return; out_kill: - os_kill_process(write_sigio_pid, 1); + l_write_sigio_pid = write_sigio_pid; + write_sigio_pid = -1; + sigio_unlock(); + /* Going to call waitpid, avoid holding the lock. */ + os_kill_process(l_write_sigio_pid, 1); + goto out_free; + + out_clear: write_sigio_pid = -1; + out_unlock: + sigio_unlock(); + out_free: + kfree(p); out_close2: - os_close_file(sigio_private[0]); - os_close_file(sigio_private[1]); + os_close_file(l_sigio_private[0]); + os_close_file(l_sigio_private[1]); out_close1: - os_close_file(write_sigio_fds[0]); - os_close_file(write_sigio_fds[1]); - sigio_unlock(); + os_close_file(l_write_sigio_fds[0]); + os_close_file(l_write_sigio_fds[1]); + return; + } int read_sigio_fd(int fd) |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:31
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Since the 4th param is unused, remove it altogether. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/chan_kern.c | 5 ++--- arch/um/drivers/chan_user.c | 2 +- arch/um/include/chan_user.h | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -89,8 +89,7 @@ static int not_configged_write(int fd, c return(-EIO); } -static int not_configged_console_write(int fd, const char *buf, int len, - void *data) +static int not_configged_console_write(int fd, const char *buf, int len) { my_puts("Using a channel type which is configured out of " "UML\n"); @@ -299,7 +298,7 @@ int console_write_chan(struct list_head chan = list_entry(ele, struct chan, list); if(!chan->output || (chan->ops->console_write == NULL)) continue; - n = chan->ops->console_write(chan->fd, buf, len, chan->data); + n = chan->ops->console_write(chan->fd, buf, len); if(chan->primary) ret = n; } return(ret); diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -21,7 +21,7 @@ #include "choose-mode.h" #include "mode.h" -int generic_console_write(int fd, const char *buf, int n, void *unused) +int generic_console_write(int fd, const char *buf, int n) { struct termios save, new; int err; diff --git a/arch/um/include/chan_user.h b/arch/um/include/chan_user.h --- a/arch/um/include/chan_user.h +++ b/arch/um/include/chan_user.h @@ -25,7 +25,7 @@ struct chan_ops { void (*close)(int, void *); int (*read)(int, char *, void *); int (*write)(int, const char *, int, void *); - int (*console_write)(int, const char *, int, void *); + int (*console_write)(int, const char *, int); int (*window_size)(int, void *, unsigned short *, unsigned short *); void (*free)(void *); int winch; @@ -37,7 +37,7 @@ extern struct chan_ops fd_ops, null_ops, extern void generic_close(int fd, void *unused); extern int generic_read(int fd, char *c_out, void *unused); extern int generic_write(int fd, const char *buf, int n, void *unused); -extern int generic_console_write(int fd, const char *buf, int n, void *state); +extern int generic_console_write(int fd, const char *buf, int n); extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, unsigned short *cols_out); extern void generic_free(void *data); |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Ugly trick to help make malloc not sleeping - we can't do anything else. But this is not yet optimal, since spinlock don't trigger in_atomic() when preemption is disabled. Also, even if ugly, this was already used in one place, and was even more bogus. Fix it. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/include/kern_util.h | 4 +++- arch/um/include/user.h | 1 + arch/um/kernel/helper.c | 4 ++-- arch/um/kernel/main.c | 11 +++++++---- arch/um/kernel/process_kern.c | 21 +++++++++++++-------- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -109,9 +109,11 @@ extern void machine_halt(void); extern int is_syscall(unsigned long addr); extern void arch_switch(void); extern void free_irq(unsigned int, void *); -extern int um_in_interrupt(void); extern int cpu(void); +/* Are we disallowed to sleep? Used to choose between GFP_KERNEL and GFP_ATOMIC. */ +extern int __cant_sleep(void); + #endif /* diff --git a/arch/um/include/user.h b/arch/um/include/user.h --- a/arch/um/include/user.h +++ b/arch/um/include/user.h @@ -18,6 +18,7 @@ extern int open_gdb_chan(void); extern unsigned long strlcpy(char *, const char *, unsigned long); extern unsigned long strlcat(char *, const char *, unsigned long); extern void *um_vmalloc(int size); +extern void *um_vmalloc_atomic(int size); extern void vfree(void *ptr); #endif diff --git a/arch/um/kernel/helper.c b/arch/um/kernel/helper.c --- a/arch/um/kernel/helper.c +++ b/arch/um/kernel/helper.c @@ -61,7 +61,7 @@ int run_helper(void (*pre_exec)(void *), if((stack_out != NULL) && (*stack_out != 0)) stack = *stack_out; - else stack = alloc_stack(0, um_in_interrupt()); + else stack = alloc_stack(0, __cant_sleep()); if(stack == 0) return(-ENOMEM); @@ -124,7 +124,7 @@ int run_helper_thread(int (*proc)(void * unsigned long stack, sp; int pid, status, err; - stack = alloc_stack(stack_order, um_in_interrupt()); + stack = alloc_stack(stack_order, __cant_sleep()); if(stack == 0) return(-ENOMEM); sp = stack + (page_size() << stack_order) - sizeof(void *); diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -197,11 +197,14 @@ void *__wrap_malloc(int size) { void *ret; - if(!CAN_KMALLOC()) + if (!CAN_KMALLOC()) return(__real_malloc(size)); - else if(size <= PAGE_SIZE) /* finding contiguos pages can be hard*/ - ret = um_kmalloc(size); - else ret = um_vmalloc(size); + else if (size <= PAGE_SIZE) /* finding contiguos pages can be hard*/ + ret = __cant_sleep() ? um_kmalloc_atomic(size) : + um_kmalloc(size); + else + ret = __cant_sleep() ? um_vmalloc_atomic(size) : + um_vmalloc(size); /* glibc people insist that if malloc fails, errno should be * set by malloc as well. So we do. diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -287,17 +287,27 @@ EXPORT_SYMBOL(disable_hlt); void *um_kmalloc(int size) { - return(kmalloc(size, GFP_KERNEL)); + return kmalloc(size, GFP_KERNEL); } void *um_kmalloc_atomic(int size) { - return(kmalloc(size, GFP_ATOMIC)); + return kmalloc(size, GFP_ATOMIC); } void *um_vmalloc(int size) { - return(vmalloc(size)); + return vmalloc(size); +} + +void *um_vmalloc_atomic(int size) +{ + return __vmalloc(size, GFP_ATOMIC | __GFP_HIGHMEM, PAGE_KERNEL); +} + +int __cant_sleep(void) { + return in_atomic() || irqs_disabled() || in_interrupt(); + /* Is in_interrupt() really needed? */ } unsigned long get_fault_addr(void) @@ -373,11 +383,6 @@ int smp_sigio_handler(void) return(0); } -int um_in_interrupt(void) -{ - return(in_interrupt()); -} - int cpu(void) { return(current_thread->cpu); |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Jeff Dike noted that the assembly code for syscall stubs is misassembled with GCC 3.2.3: the values copied in registers weren't preserved between one asm() and the following one. So I fixed the thing by rewriting the __asm__ constraints more like unistd.h ones. Note: in syscall6 case I had to add one more instruction (i.e. moving arg6 in eax and shuffling things around) - it's needed for the function to be valid in general (we can't load the value from the stack, relative to ebp, because we change it), but could be avoided since we actually use a constant as param 6. The only fix would be to turn stub_syscall6 to a macro and use a "i" constraint for arg6 (i.e., specify it's a constant value). Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/include/sysdep-i386/stub.h | 64 +++++++++++++++++++++++----------- arch/um/include/sysdep-x86_64/stub.h | 61 ++++++++++++++++++++++++++------ 2 files changed, 92 insertions(+), 33 deletions(-) diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h --- a/arch/um/include/sysdep-i386/stub.h +++ b/arch/um/include/sysdep-i386/stub.h @@ -16,45 +16,69 @@ extern void stub_clone_handler(void); #define STUB_MMAP_NR __NR_mmap2 #define MMAP_OFFSET(o) ((o) >> PAGE_SHIFT) +static inline long stub_syscall1(long syscall, long arg1) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1)); + + return ret; +} + static inline long stub_syscall2(long syscall, long arg1, long arg2) { long ret; - __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx"); - __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx"); - __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax"); - __asm__("int $0x80;" : : : "%eax"); - __asm__ __volatile__("movl %%eax, %0; " : "=g" (ret) :); - return(ret); + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2)); + + return ret; } static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) { - __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx"); - return(stub_syscall2(syscall, arg1, arg2)); + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3)); + + return ret; } static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, long arg4) { - __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi"); - return(stub_syscall3(syscall, arg1, arg2, arg3)); + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3), "S" (arg4)); + + return ret; +} + +static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)); + + return ret; } static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6) { long ret; - __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax"); - __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx"); - __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx"); - __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx"); - __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi"); - __asm__("movl %0, %%edi; " : : "g" (arg5) : "%edi"); - __asm__ __volatile__("pushl %%ebp ; movl %1, %%ebp; " - "int $0x80; popl %%ebp ; " - "movl %%eax, %0; " : "=g" (ret) : "g" (arg6) : "%eax"); - return(ret); + + __asm__ volatile ("push %%ebp ; movl %%eax,%%ebp ; movl %1,%%eax ; " + "int $0x80 ; pop %%ebp" + : "=a" (ret) + : "g" (syscall), "b" (arg1), "c" (arg2), "d" (arg3), + "S" (arg4), "D" (arg5), "0" (arg6)); + + return ret; } static inline void trap_myself(void) diff --git a/arch/um/include/sysdep-x86_64/stub.h b/arch/um/include/sysdep-x86_64/stub.h --- a/arch/um/include/sysdep-x86_64/stub.h +++ b/arch/um/include/sysdep-x86_64/stub.h @@ -17,37 +17,72 @@ extern void stub_clone_handler(void); #define STUB_MMAP_NR __NR_mmap #define MMAP_OFFSET(o) (o) +#define __syscall_clobber "r11","rcx","memory" +#define __syscall "syscall" + static inline long stub_syscall2(long syscall, long arg1, long arg2) { long ret; - __asm__("movq %0, %%rsi; " : : "g" (arg2) : "%rsi"); - __asm__("movq %0, %%rdi; " : : "g" (arg1) : "%rdi"); - __asm__("movq %0, %%rax; " : : "g" (syscall) : "%rax"); - __asm__("syscall;" : : : "%rax", "%r11", "%rcx"); - __asm__ __volatile__("movq %%rax, %0; " : "=g" (ret) :); - return(ret); + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2) : __syscall_clobber ); + + return ret; } static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) { - __asm__("movq %0, %%rdx; " : : "g" (arg3) : "%rdx"); - return(stub_syscall2(syscall, arg1, arg2)); + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3) + : __syscall_clobber ); + + return ret; } static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, long arg4) { - __asm__("movq %0, %%r10; " : : "g" (arg4) : "%r10"); - return(stub_syscall3(syscall, arg1, arg2, arg3)); + long ret; + + __asm__ volatile ("movq %5,%%r10 ; " __syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4) + : __syscall_clobber, "r10" ); + + return ret; +} + +static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long ret; + + __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4), "g" (arg5) + : __syscall_clobber, "r10", "r8" ); + + return ret; } static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6) { - __asm__("movq %0, %%r9; " : : "g" (arg6) : "%r9"); - __asm__("movq %0, %%r8; " : : "g" (arg5) : "%r8"); - return(stub_syscall4(syscall, arg1, arg2, arg3, arg4)); + long ret; + + __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " + "movq %7, %%r9; " __syscall : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4), "g" (arg5), "g" (arg6) + : __syscall_clobber, "r10", "r8", "r9" ); + + return ret; } static inline void trap_myself(void) |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> The access_ok_tt() macro is bogus, in that a read access is unconditionally considered valid. I couldn't find in SCM logs the introduction of this check, but I went back to 2.4.20-1um and the definition was the same. Possibly this was done to avoid problems with missing set_fs() calls, but there can't be any I think because they would fail with SKAS mode. TT-specific code is still to check. Also, this patch joins common code together, and makes the "address range wrapping" check happen for all cases, rather than for only some. This may, possibly, be reoptimized at some time, but the current code doesn't seem clever, just confused. * Important: I've also had to change references to access_ok_{tt,skas} back to access_ok - the kernel wasn't that happy otherwise. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/include/um_uaccess.h | 19 ++++++++++++++++++- arch/um/kernel/skas/include/uaccess-skas.h | 10 ++-------- arch/um/kernel/skas/uaccess.c | 8 ++++---- arch/um/kernel/tt/include/uaccess-tt.h | 8 +------- arch/um/kernel/tt/uaccess.c | 8 ++++---- 5 files changed, 29 insertions(+), 24 deletions(-) diff --git a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h --- a/arch/um/include/um_uaccess.h +++ b/arch/um/include/um_uaccess.h @@ -17,8 +17,25 @@ #include "uaccess-skas.h" #endif +#define __under_task_size(addr, size) \ + (((unsigned long) (addr) < TASK_SIZE) && \ + (((unsigned long) (addr) + (size)) < TASK_SIZE)) + +#define __access_ok_vsyscall(type, addr, size) \ + ((type == VERIFY_READ) && \ + ((unsigned long) (addr) >= FIXADDR_USER_START) && \ + ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ + ((unsigned long) (addr) + (size) >= (unsigned long)(addr))) + +#define __addr_range_nowrap(addr, size) \ + ((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) + #define access_ok(type, addr, size) \ - CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size) + (__addr_range_nowrap(addr, size) && \ + (__under_task_size(addr, size) || \ + __access_ok_vsyscall(type, addr, size) || \ + segment_eq(get_fs(), KERNEL_DS) || \ + CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size))) static inline int copy_from_user(void *to, const void __user *from, int n) { diff --git a/arch/um/kernel/skas/include/uaccess-skas.h b/arch/um/kernel/skas/include/uaccess-skas.h --- a/arch/um/kernel/skas/include/uaccess-skas.h +++ b/arch/um/kernel/skas/include/uaccess-skas.h @@ -9,14 +9,8 @@ #include "asm/errno.h" #include "asm/fixmap.h" -#define access_ok_skas(type, addr, size) \ - ((segment_eq(get_fs(), KERNEL_DS)) || \ - (((unsigned long) (addr) < TASK_SIZE) && \ - ((unsigned long) (addr) + (size) <= TASK_SIZE)) || \ - ((type == VERIFY_READ ) && \ - ((unsigned long) (addr) >= FIXADDR_USER_START) && \ - ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ - ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))) +/* No SKAS-specific checking. */ +#define access_ok_skas(type, addr, size) 0 extern int copy_from_user_skas(void *to, const void __user *from, int n); extern int copy_to_user_skas(void __user *to, const void *from, int n); diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -143,7 +143,7 @@ int copy_from_user_skas(void *to, const return(0); } - return(access_ok_skas(VERIFY_READ, from, n) ? + return(access_ok(VERIFY_READ, from, n) ? buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): n); } @@ -164,7 +164,7 @@ int copy_to_user_skas(void __user *to, c return(0); } - return(access_ok_skas(VERIFY_WRITE, to, n) ? + return(access_ok(VERIFY_WRITE, to, n) ? buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : n); } @@ -193,7 +193,7 @@ int strncpy_from_user_skas(char *dst, co return(strnlen(dst, count)); } - if(!access_ok_skas(VERIFY_READ, src, 1)) + if(!access_ok(VERIFY_READ, src, 1)) return(-EFAULT); n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, @@ -221,7 +221,7 @@ int clear_user_skas(void __user *mem, in return(0); } - return(access_ok_skas(VERIFY_WRITE, mem, len) ? + return(access_ok(VERIFY_WRITE, mem, len) ? buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); } diff --git a/arch/um/kernel/tt/include/uaccess-tt.h b/arch/um/kernel/tt/include/uaccess-tt.h --- a/arch/um/kernel/tt/include/uaccess-tt.h +++ b/arch/um/kernel/tt/include/uaccess-tt.h @@ -19,19 +19,13 @@ extern unsigned long end_vm; extern unsigned long uml_physmem; -#define under_task_size(addr, size) \ - (((unsigned long) (addr) < TASK_SIZE) && \ - (((unsigned long) (addr) + (size)) < TASK_SIZE)) - #define is_stack(addr, size) \ (((unsigned long) (addr) < STACK_TOP) && \ ((unsigned long) (addr) >= STACK_TOP - ABOVE_KMEM) && \ (((unsigned long) (addr) + (size)) <= STACK_TOP)) #define access_ok_tt(type, addr, size) \ - ((type == VERIFY_READ) || (segment_eq(get_fs(), KERNEL_DS)) || \ - (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ - (under_task_size(addr, size) || is_stack(addr, size)))) + (is_stack(addr, size)) extern unsigned long get_fault_addr(void); diff --git a/arch/um/kernel/tt/uaccess.c b/arch/um/kernel/tt/uaccess.c --- a/arch/um/kernel/tt/uaccess.c +++ b/arch/um/kernel/tt/uaccess.c @@ -8,7 +8,7 @@ int copy_from_user_tt(void *to, const void __user *from, int n) { - if(!access_ok_tt(VERIFY_READ, from, n)) + if(!access_ok(VERIFY_READ, from, n)) return(n); return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, @@ -17,7 +17,7 @@ int copy_from_user_tt(void *to, const vo int copy_to_user_tt(void __user *to, const void *from, int n) { - if(!access_ok_tt(VERIFY_WRITE, to, n)) + if(!access_ok(VERIFY_WRITE, to, n)) return(n); return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, @@ -28,7 +28,7 @@ int strncpy_from_user_tt(char *dst, cons { int n; - if(!access_ok_tt(VERIFY_READ, src, 1)) + if(!access_ok(VERIFY_READ, src, 1)) return(-EFAULT); n = __do_strncpy_from_user(dst, src, count, @@ -47,7 +47,7 @@ int __clear_user_tt(void __user *mem, in int clear_user_tt(void __user *mem, int len) { - if(!access_ok_tt(VERIFY_WRITE, mem, len)) + if(!access_ok(VERIFY_WRITE, mem, len)) return(len); return(__do_clear_user(mem, len, ¤t->thread.fault_addr, |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> The problems in this area came to light while fixing a compile failure with GCC 4, in commit bcb01b8a67476e6f748086e626df8424cc27036d. I went comparing this code with x86_64 frame construction (which we should ABI compatible with) and resync'ed the code a bit. It isn't yet perfect, because we don't yet save floating point context. But that will come later. Please give a critical eye, even because things currently have no reported misbehaviour, and this code is complex enough. CC: Andi Kleen <ak...@su...> Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/sys-x86_64/signal.c | 19 ++++++++++++++++--- 1 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c --- a/arch/um/sys-x86_64/signal.c +++ b/arch/um/sys-x86_64/signal.c @@ -164,6 +164,7 @@ struct rt_sigframe #define round_down(m, n) (((m) / (n)) * (n)) +/* Taken from arch/x86_64/kernel/signal.c:setup_rt_frame(). */ int setup_signal_stack_si(unsigned long stack_top, int sig, struct k_sigaction *ka, struct pt_regs * regs, siginfo_t *info, sigset_t *set) @@ -173,9 +174,21 @@ int setup_signal_stack_si(unsigned long int err = 0; struct task_struct *me = current; - frame = (struct rt_sigframe __user *) - round_down(stack_top - sizeof(struct rt_sigframe), 16) - 8; - frame = (struct rt_sigframe *) ((unsigned long) frame - 128); + /* Leave space on the stack for the Red Zone, and for saving FP + * registers, even if this doesn't happen. We don't have a way to test + * used_math(), so we do that inconditionally. + * + * XXX: RED-PEN: currently, we're using a Red Zone also for any + * alternate stack set up by sigaltstack(), which x86-64 doesn't do + * (because there shouldn't be any code executing there). This could + * cause failures if user setup a too little alternate stack.*/ + + fp = (struct rt_sigframe *) round_down(stack_top - 128 - + sizeof(struct _fpstate), 16); + + /* Now leave the space for the rest of signal frame. */ + frame = (void __user *) round_down((unsigned long) fp - + sizeof(struct rt_sigframe), 16) - 8; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto out; |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Remove a stone-age comment (UM *does* have a MMU, i.e. the host), and fix a dependency (introduced in commit 02edeb586ae4cdd17778923674700edb732a4741) to do what was intended. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/Kconfig | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -7,7 +7,6 @@ config UML bool default y -# XXX: does UM have a mmu/swap? config MMU bool default y @@ -196,7 +195,8 @@ config HOST_2G_2G config SMP bool "Symmetric multi-processing support (EXPERIMENTAL)" default n - depends on (MODE_TT && EXPERIMENTAL && !SMP_BROKEN) || (BROKEN && SMP_BROKEN) + #SMP_BROKEN is for x86_64. + depends on MODE_TT && EXPERIMENTAL && (!SMP_BROKEN || (BROKEN && SMP_BROKEN)) help This option enables UML SMP support. It is NOT related to having a real SMP box. Not directly, at least. |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> The below warning was added in place of pte_mkyoung(); if (is_write) pte_mkdirty(); In fact, if the PTE is not marked young/dirty, our dirty/accessed bit emulation would cause the TLB permission not to be changed, and so we'd loop, and given we don't support preemption yet, we'd busy-hang here. However, I've seen this warning trigger without crashes during a loop of concurrent kernel builds, at random times (i.e. like a race condition), and I realized that two concurrent faults on the same page, one on read and one on write, can trigger it. The read fault gets serviced and the PTE gets marked writable but clean (it's possible on a shared-writable mapping), while the generic code sees the PTE was already installed and returns without action. In this case, we'll see another fault and service it normally. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/kernel/trap_kern.c | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -95,7 +95,16 @@ survive: pte = pte_offset_kernel(pmd, address); } while(!pte_present(*pte)); err = 0; + /* The below warning was added in place of + * pte_mkyoung(); if (is_write) pte_mkdirty(); + * If it's triggered, we'd see normally a hang here (a clean pte is + * marked read-only to emulate the dirty bit). + * However, the generic code can mark a PTE writable but clean on a + * concurrent read fault, triggering this harmlessly. So comment it out. + */ +#if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); +#endif flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> printk clears the host errno (I verified this in debugging and it's reasonable enough, given that it ends via a write call on some fd, especially since printk() goes on /dev/tty0 which is often the host stdout). So save errno earlier. There's no reason to change the printk calls to use -err rather than errno - the assignment can't clear errno. And in the first failure path, we used to return 0 too (and this time more clearly), which is totally wrong. 0 is a success fd, which is then registered and gives a "registering fd twice" warning. Finally, fix up some whitespace. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/mcast_user.c | 20 ++++++++++---------- 1 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c --- a/arch/um/drivers/mcast_user.c +++ b/arch/um/drivers/mcast_user.c @@ -54,7 +54,7 @@ static int mcast_open(void *data) struct mcast_data *pri = data; struct sockaddr_in *sin = pri->mcast_addr; struct ip_mreq mreq; - int fd, yes = 1, err = 0; + int fd, yes = 1, err = -EINVAL; if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) @@ -63,40 +63,40 @@ static int mcast_open(void *data) fd = socket(AF_INET, SOCK_DGRAM, 0); if (fd < 0){ + err = -errno; printk("mcast_open : data socket failed, errno = %d\n", errno); - err = -errno; goto out; } if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { + err = -errno; printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", errno); - err = -errno; goto out_close; } /* set ttl according to config */ if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, sizeof(pri->ttl)) < 0) { + err = -errno; printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", errno); - err = -errno; goto out_close; } /* set LOOP, so data does get fed back to local sockets */ if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { + err = -errno; printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", errno); - err = -errno; goto out_close; } /* bind socket to mcast address */ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { - printk("mcast_open : data bind failed, errno = %d\n", errno); err = -errno; + printk("mcast_open : data bind failed, errno = %d\n", errno); goto out_close; } @@ -105,22 +105,22 @@ static int mcast_open(void *data) mreq.imr_interface.s_addr = 0; if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0) { + err = -errno; printk("mcast_open: IP_ADD_MEMBERSHIP failed, error = %d\n", errno); printk("There appears not to be a multicast-capable network " "interface on the host.\n"); printk("eth0 should be configured in order to use the " "multicast transport.\n"); - err = -errno; - goto out_close; + goto out_close; } return fd; out_close: - os_close_file(fd); + os_close_file(fd); out: - return err; + return err; } static void mcast_close(int fd, void *data) |
From: Paolo 'B. G. <bla...@ya...> - 2005-10-25 22:05:32
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> We were using a long series of (stupid) wrappers which all call generic_console_write(). Since the wrappers only change the 4th param, which is unused by the called proc, remove them and call generic_console_write() directly. If needed at any time in the future to reintroduce this stuff, the member could be moved to a generic struct, to avoid this duplicated handling. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/fd.c | 9 +-------- arch/um/drivers/port_user.c | 9 +-------- arch/um/drivers/pty.c | 11 ++--------- arch/um/drivers/tty.c | 9 +-------- arch/um/drivers/xterm.c | 9 +-------- 5 files changed, 6 insertions(+), 41 deletions(-) diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c --- a/arch/um/drivers/fd.c +++ b/arch/um/drivers/fd.c @@ -76,13 +76,6 @@ static void fd_close(int fd, void *d) } } -static int fd_console_write(int fd, const char *buf, int n, void *d) -{ - struct fd_chan *data = d; - - return(generic_console_write(fd, buf, n, &data->tt)); -} - struct chan_ops fd_ops = { .type = "fd", .init = fd_init, @@ -90,7 +83,7 @@ struct chan_ops fd_ops = { .close = fd_close, .read = generic_read, .write = generic_write, - .console_write = fd_console_write, + .console_write = generic_console_write, .window_size = generic_window_size, .free = generic_free, .winch = 1, diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c --- a/arch/um/drivers/port_user.c +++ b/arch/um/drivers/port_user.c @@ -101,13 +101,6 @@ static void port_close(int fd, void *d) os_close_file(fd); } -static int port_console_write(int fd, const char *buf, int n, void *d) -{ - struct port_chan *data = d; - - return(generic_console_write(fd, buf, n, &data->tt)); -} - struct chan_ops port_ops = { .type = "port", .init = port_init, @@ -115,7 +108,7 @@ struct chan_ops port_ops = { .close = port_close, .read = generic_read, .write = generic_write, - .console_write = port_console_write, + .console_write = generic_console_write, .window_size = generic_window_size, .free = port_free, .winch = 1, diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c --- a/arch/um/drivers/pty.c +++ b/arch/um/drivers/pty.c @@ -118,13 +118,6 @@ static int pty_open(int input, int outpu return(fd); } -static int pty_console_write(int fd, const char *buf, int n, void *d) -{ - struct pty_chan *data = d; - - return(generic_console_write(fd, buf, n, &data->tt)); -} - struct chan_ops pty_ops = { .type = "pty", .init = pty_chan_init, @@ -132,7 +125,7 @@ struct chan_ops pty_ops = { .close = generic_close, .read = generic_read, .write = generic_write, - .console_write = pty_console_write, + .console_write = generic_console_write, .window_size = generic_window_size, .free = generic_free, .winch = 0, @@ -145,7 +138,7 @@ struct chan_ops pts_ops = { .close = generic_close, .read = generic_read, .write = generic_write, - .console_write = pty_console_write, + .console_write = generic_console_write, .window_size = generic_window_size, .free = generic_free, .winch = 0, diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c --- a/arch/um/drivers/tty.c +++ b/arch/um/drivers/tty.c @@ -60,13 +60,6 @@ static int tty_open(int input, int outpu return(fd); } -static int tty_console_write(int fd, const char *buf, int n, void *d) -{ - struct tty_chan *data = d; - - return(generic_console_write(fd, buf, n, &data->tt)); -} - struct chan_ops tty_ops = { .type = "tty", .init = tty_chan_init, @@ -74,7 +67,7 @@ struct chan_ops tty_ops = { .close = generic_close, .read = generic_read, .write = generic_write, - .console_write = tty_console_write, + .console_write = generic_console_write, .window_size = generic_window_size, .free = generic_free, .winch = 0, diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -195,13 +195,6 @@ static void xterm_free(void *d) free(d); } -static int xterm_console_write(int fd, const char *buf, int n, void *d) -{ - struct xterm_chan *data = d; - - return(generic_console_write(fd, buf, n, &data->tt)); -} - struct chan_ops xterm_ops = { .type = "xterm", .init = xterm_init, @@ -209,7 +202,7 @@ struct chan_ops xterm_ops = { .close = xterm_close, .read = generic_read, .write = generic_write, - .console_write = xterm_console_write, + .console_write = generic_console_write, .window_size = generic_window_size, .free = xterm_free, .winch = 1, |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:38
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> In a previous patch I shifted an allocation to being atomic. In this patch, a better but more intrusive solution is implemented, i.e. hold the lock only when really needing it, especially not over pipe operations, nor over the culprit allocation. Additionally, while at it, add a missing kfree in the failure path, and make sure that if we fail in forking, write_sigio_pid is -1 and not, say, -ENOMEM. And fix whitespace, at least for things I was touching anyway. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/kernel/sigio_user.c | 83 ++++++++++++++++++++++++++++++------------- 1 files changed, 58 insertions(+), 25 deletions(-) diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c index 62e5cfd..55ef415 100644 --- a/arch/um/kernel/sigio_user.c +++ b/arch/um/kernel/sigio_user.c @@ -337,70 +337,103 @@ int ignore_sigio_fd(int fd) return(err); } -static int setup_initial_poll(int fd) +static struct pollfd* setup_initial_poll(int fd) { struct pollfd *p; - p = um_kmalloc_atomic(sizeof(struct pollfd)); - if(p == NULL){ + p = um_kmalloc(sizeof(struct pollfd)); + if (p == NULL) { printk("setup_initial_poll : failed to allocate poll\n"); - return(-1); + return NULL; } *p = ((struct pollfd) { .fd = fd, .events = POLLIN, .revents = 0 }); - current_poll = ((struct pollfds) { .poll = p, - .used = 1, - .size = 1 }); - return(0); + return p; } void write_sigio_workaround(void) { unsigned long stack; + struct pollfd *p; int err; + int l_write_sigio_fds[2]; + int l_sigio_private[2]; + int l_write_sigio_pid; + /* We call this *tons* of times - and most ones we must just fail. */ sigio_lock(); - if(write_sigio_pid != -1) - goto out; + l_write_sigio_pid = write_sigio_pid; + sigio_unlock(); - err = os_pipe(write_sigio_fds, 1, 1); + if (l_write_sigio_pid != -1) + return; + + err = os_pipe(l_write_sigio_fds, 1, 1); if(err < 0){ printk("write_sigio_workaround - os_pipe 1 failed, " "err = %d\n", -err); - goto out; + return; } - err = os_pipe(sigio_private, 1, 1); + err = os_pipe(l_sigio_private, 1, 1); if(err < 0){ - printk("write_sigio_workaround - os_pipe 2 failed, " + printk("write_sigio_workaround - os_pipe 1 failed, " "err = %d\n", -err); goto out_close1; } - if(setup_initial_poll(sigio_private[1])) + + p = setup_initial_poll(l_sigio_private[1]); + if(!p) goto out_close2; - write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + sigio_lock(); + + /* Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. */ + if(write_sigio_pid != -1) + goto out_unlock; + + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, CLONE_FILES | CLONE_VM, &stack, 0); - if(write_sigio_pid < 0) goto out_close2; + if (write_sigio_pid < 0) + goto out_clear; - if(write_sigio_irq(write_sigio_fds[0])) + if (write_sigio_irq(l_write_sigio_fds[0])) goto out_kill; - out: + /* Success, finally. */ + memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); + memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); + + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); + sigio_unlock(); return; out_kill: - os_kill_process(write_sigio_pid, 1); + l_write_sigio_pid = write_sigio_pid; write_sigio_pid = -1; + sigio_unlock(); + /* Going to call waitpid, avoid holding the lock. */ + os_kill_process(l_write_sigio_pid, 1); + goto out_free; + + out_clear: + write_sigio_pid = -1; + out_unlock: + sigio_unlock(); + out_free: + kfree(p); out_close2: - os_close_file(sigio_private[0]); - os_close_file(sigio_private[1]); + os_close_file(l_sigio_private[0]); + os_close_file(l_sigio_private[1]); out_close1: - os_close_file(write_sigio_fds[0]); - os_close_file(write_sigio_fds[1]); - sigio_unlock(); + os_close_file(l_write_sigio_fds[0]); + os_close_file(l_write_sigio_fds[1]); + return; } int read_sigio_fd(int fd) |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:41
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> In this error path, when the interface has had a problem, we call dev_close(), which is disallowed for two reasons: *) takes again the UML internal spinlock, inside the ->stop method of this device *) can be called in process context only, while we're in interrupt context. I've also thought that calling dev_close() may be a wrong policy to follow, but it's not up to me to decide that. However, we may end up with multiple dev_close() queued on the same device. But the initial test for (dev->flags & IFF_UP) makes this harmless, though - and dev_close() is supposed to care about races with itself. So there's no harm in delaying the shutdown, IMHO. Something to mark the interface as "going to shutdown" would be appreciated, but dev_deactivate has the same problems as dev_close(), so we can't use it either. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/net_kern.c | 15 +++++++++++++-- 1 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 98350bb..178f68b 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -68,6 +68,11 @@ static int uml_net_rx(struct net_device return pkt_len; } +static void uml_dev_close(void* dev) +{ + dev_close( (struct net_device *) dev); +} + irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) { struct net_device *dev = dev_id; @@ -80,15 +85,21 @@ irqreturn_t uml_net_interrupt(int irq, v spin_lock(&lp->lock); while((err = uml_net_rx(dev)) > 0) ; if(err < 0) { + DECLARE_WORK(close_work, uml_dev_close, dev); printk(KERN_ERR "Device '%s' read returned %d, shutting it down\n", dev->name, err); - dev_close(dev); + /* dev_close can't be called in interrupt context, and takes + * again lp->lock. + * And dev_close() can be safely called multiple times on the + * same device, since it tests for (dev->flags & IFF_UP). So + * there's no harm in delaying the device shutdown. */ + schedule_work(&close_work); goto out; } reactivate_fd(lp->fd, UM_ETH_IRQ); - out: +out: spin_unlock(&lp->lock); return(IRQ_HANDLED); } |
From: Andrew M. <ak...@os...> - 2006-01-18 00:50:25
|
"Paolo 'Blaisorblade' Giarrusso" <bla...@ya...> wrote: > > > From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> > > In this error path, when the interface has had a problem, we call dev_close(), > which is disallowed for two reasons: > > *) takes again the UML internal spinlock, inside the ->stop method of this > device > *) can be called in process context only, while we're in interrupt context. > > I've also thought that calling dev_close() may be a wrong policy to follow, but > it's not up to me to decide that. > > However, we may end up with multiple dev_close() queued on the same device. > But the initial test for (dev->flags & IFF_UP) makes this harmless, though - > and dev_close() is supposed to care about races with itself. So there's no harm > in delaying the shutdown, IMHO. > > Something to mark the interface as "going to shutdown" would be appreciated, but > dev_deactivate has the same problems as dev_close(), so we can't use it either. > > ... > + /* dev_close can't be called in interrupt context, and takes > + * again lp->lock. > + * And dev_close() can be safely called multiple times on the > + * same device, since it tests for (dev->flags & IFF_UP). So > + * there's no harm in delaying the device shutdown. */ > + schedule_work(&close_work); > goto out; > } This callback can be pending for an arbitrary amount of time. I'd have expected to see a flush_sceduled_work() somewhere in the driver to force all such pending work to complete before we destroy things which that callback wil be using. |
From: Blaisorblade <bla...@ya...> - 2006-01-18 11:45:09
|
On Wednesday 18 January 2006 01:52, Andrew Morton wrote: > "Paolo 'Blaisorblade' Giarrusso" <bla...@ya...> wrote: > > From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> > > > > In this error path, when the interface has had a problem, we call > > dev_close(), which is disallowed for two reasons: > > *) takes again the UML internal spinlock, inside the ->stop method of > > this device > > *) can be called in process context only, while we're in interrupt > > context. > > I've also thought that calling dev_close() may be a wrong policy to > > follow, but it's not up to me to decide that. > > However, we may end up with multiple dev_close() queued on the same > > device. But the initial test for (dev->flags & IFF_UP) makes this > > harmless, though - and dev_close() is supposed to care about races with > > itself. So there's no harm in delaying the shutdown, IMHO. > > Something to mark the interface as "going to shutdown" would be > > appreciated, but dev_deactivate has the same problems as dev_close(), so > > we can't use it either. > > ... > > + /* dev_close can't be called in interrupt context, and takes > > + * again lp->lock. > > + * And dev_close() can be safely called multiple times on the > > + * same device, since it tests for (dev->flags & IFF_UP). So > > + * there's no harm in delaying the device shutdown. */ > > + schedule_work(&close_work); > > goto out; > > } > This callback can be pending for an arbitrary amount of time. I'd have > expected to see a flush_sceduled_work() somewhere in the driver to force > all such pending work to complete before we destroy things which that > callback wil be using. Thanks for the tip (it's good I got schedule_work right, it wasn't hard but 1st time I use it), however the device is not later freed, it seems, in this context... even because it's an unlikely event (I reproduced it with a damn-bad configuration). Not going to cause seeable leaks anyway, but yes, to correct. However, network devices are created from the host only, so only the host should free them (as done by net_remove on "hot-unplug" request). Maybe that's to fix too, however, but in a separate patch; Jeff, what about this? -- Inform me of my mistakes, so I can keep imitating Homer Simpson's "Doh!". Paolo Giarrusso, aka Blaisorblade (Skype ID "PaoloGiarrusso", ICQ 215621894) http://www.user-mode-linux.org/~blaisorblade ___________________________________ Yahoo! Messenger with Voice: chiama da PC a telefono a tariffe esclusive http://it.messenger.yahoo.com |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:42
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Ugly trick to help make malloc not sleeping - we can't do anything else. But this is not yet optimal, since spinlock don't trigger in_atomic() when preemption is disabled. Also, even if ugly, this was already used in one place, and was even more bogus. Fix it. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/include/kern_util.h | 4 +++- arch/um/include/user.h | 1 + arch/um/kernel/process_kern.c | 21 +++++++++++++-------- arch/um/os-Linux/helper.c | 4 ++-- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 8f4e46d..c649108 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -120,8 +120,10 @@ extern void machine_halt(void); extern int is_syscall(unsigned long addr); extern void arch_switch(void); extern void free_irq(unsigned int, void *); -extern int um_in_interrupt(void); extern int cpu(void); + +/* Are we disallowed to sleep? Used to choose between GFP_KERNEL and GFP_ATOMIC. */ +extern int __cant_sleep(void); extern void segv_handler(int sig, union uml_pt_regs *regs); extern void sigio_handler(int sig, union uml_pt_regs *regs); diff --git a/arch/um/include/user.h b/arch/um/include/user.h index 0f865ef..91b0ac4 100644 --- a/arch/um/include/user.h +++ b/arch/um/include/user.h @@ -18,6 +18,7 @@ extern int open_gdb_chan(void); extern unsigned long strlcpy(char *, const char *, unsigned long); extern unsigned long strlcat(char *, const char *, unsigned long); extern void *um_vmalloc(int size); +extern void *um_vmalloc_atomic(int size); extern void vfree(void *ptr); #endif diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 7f13b85..d852c55 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -288,17 +288,27 @@ EXPORT_SYMBOL(disable_hlt); void *um_kmalloc(int size) { - return(kmalloc(size, GFP_KERNEL)); + return kmalloc(size, GFP_KERNEL); } void *um_kmalloc_atomic(int size) { - return(kmalloc(size, GFP_ATOMIC)); + return kmalloc(size, GFP_ATOMIC); } void *um_vmalloc(int size) { - return(vmalloc(size)); + return vmalloc(size); +} + +void *um_vmalloc_atomic(int size) +{ + return __vmalloc(size, GFP_ATOMIC | __GFP_HIGHMEM, PAGE_KERNEL); +} + +int __cant_sleep(void) { + return in_atomic() || irqs_disabled() || in_interrupt(); + /* Is in_interrupt() really needed? */ } unsigned long get_fault_addr(void) @@ -370,11 +380,6 @@ int smp_sigio_handler(void) return(0); } -int um_in_interrupt(void) -{ - return(in_interrupt()); -} - int cpu(void) { return(current_thread->cpu); diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 36cc847..6490a4f 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -60,7 +60,7 @@ int run_helper(void (*pre_exec)(void *), if((stack_out != NULL) && (*stack_out != 0)) stack = *stack_out; - else stack = alloc_stack(0, um_in_interrupt()); + else stack = alloc_stack(0, __cant_sleep()); if(stack == 0) return(-ENOMEM); @@ -124,7 +124,7 @@ int run_helper_thread(int (*proc)(void * unsigned long stack, sp; int pid, status, err; - stack = alloc_stack(stack_order, um_in_interrupt()); + stack = alloc_stack(stack_order, __cant_sleep()); if(stack == 0) return(-ENOMEM); sp = stack + (page_size() << stack_order) - sizeof(void *); |
From: Andrew M. <ak...@os...> - 2006-01-18 00:54:05
|
"Paolo 'Blaisorblade' Giarrusso" <bla...@ya...> wrote: > > +int __cant_sleep(void) { > + return in_atomic() || irqs_disabled() || in_interrupt(); aww, man, this is awful. Code is supposed to know what context it's running in, not go fishing about in core internals trying to fix up its own confusion. > + /* Is in_interrupt() really needed? */ > } Yes, it is. in_atomic() is a no-op on !PREEMPT and local irq's can be enabled in soft- or hard- interrupt context, so irqs_disabled() will return 0. |
From: Blaisorblade <bla...@ya...> - 2006-01-18 11:47:54
|
On Wednesday 18 January 2006 01:56, Andrew Morton wrote: > "Paolo 'Blaisorblade' Giarrusso" <bla...@ya...> wrote: > > +int __cant_sleep(void) { > > + return in_atomic() || irqs_disabled() || in_interrupt(); > > aww, man, this is awful. Code is supposed to know what context it's > running in, not go fishing about in core internals trying to fix up its own > confusion. Yes, I know. But is libc supposed too? That's our problem. This code is only used for calls to malloc() (not kmalloc()) after the kernel has booted. There are a few calls to malloc in our code, ok, and those should be fixed too. But the main problem are glibc ones (which are called from all kinds of places - we once got a crash because opendir() was calling malloc with big sizes - we solved that by optionally using vmalloc()). -- Inform me of my mistakes, so I can keep imitating Homer Simpson's "Doh!". Paolo Giarrusso, aka Blaisorblade (Skype ID "PaoloGiarrusso", ICQ 215621894) http://www.user-mode-linux.org/~blaisorblade ___________________________________ Yahoo! Messenger with Voice: chiama da PC a telefono a tariffe esclusive http://it.messenger.yahoo.com |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:43
|
Correct a bit of whitespace problems while working here. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/ubd_kern.c | 31 +++++++++++++++++-------------- 1 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index e498f34..f93af66 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1200,15 +1200,16 @@ int open_ubd_file(char *file, struct ope int fd, err, sectorsize, asked_switch, mode = 0644; fd = os_open_file(file, *openflags, mode); - if(fd < 0){ - if((fd == -ENOENT) && (create_cow_out != NULL)) + if (fd < 0) { + if ((fd == -ENOENT) && (create_cow_out != NULL)) *create_cow_out = 1; - if(!openflags->w || - ((fd != -EROFS) && (fd != -EACCES))) return(fd); + if (!openflags->w || + ((fd != -EROFS) && (fd != -EACCES))) + return fd; openflags->w = 0; fd = os_open_file(file, *openflags, mode); - if(fd < 0) - return(fd); + if (fd < 0) + return fd; } err = os_lock_file(fd, openflags->w); @@ -1218,7 +1219,8 @@ int open_ubd_file(char *file, struct ope } /* Succesful return case! */ - if(backing_file_out == NULL) return(fd); + if(backing_file_out == NULL) + return(fd); err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, &size, §orsize, &align, bitmap_offset_out); @@ -1227,7 +1229,8 @@ int open_ubd_file(char *file, struct ope "errno = %d\n", file, -err); goto out_close; } - if(err) return(fd); + if(err) + return(fd); asked_switch = path_requires_switch(*backing_file_out, backing_file, file); @@ -1236,24 +1239,24 @@ int open_ubd_file(char *file, struct ope printk("Switching backing file to '%s'\n", *backing_file_out); err = write_cow_header(file, fd, *backing_file_out, sectorsize, align, &size); - if(err){ + if (err) { printk("Switch failed, errno = %d\n", -err); goto out_close; } - } - else { + } else { *backing_file_out = backing_file; err = backing_file_mismatch(*backing_file_out, size, mtime); - if(err) goto out_close; + if (err) + goto out_close; } cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, bitmap_len_out, data_offset_out); - return(fd); + return fd; out_close: os_close_file(fd); - return(err); + return err; } int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:47
|
From: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Pre-clear transport-specific private structure before passing it down. In fact, I just got a slab corruption and kernel panic on exit because kfree() was called on a pointer which probably was never allocated, BUT hadn't been set to NULL by the driver. As the code is full of such errors, I've decided for now to go the safe way (we're talking about drivers), and to do the simple thing. I'm also starting to fix drivers, and already sent a patch for the daemon transport. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/net_kern.c | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 5b8c64e..98350bb 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -322,6 +322,11 @@ static int eth_configure(int n, void *in return 1; } + lp = dev->priv; + /* This points to the transport private data. It's still clear, but we + * must memset it to 0 *now*. Let's help the drivers. */ + memset(lp, 0, size); + /* sysfs register */ if (!driver_registered) { platform_driver_register(¨_net_driver); @@ -364,7 +369,6 @@ static int eth_configure(int n, void *in free_netdev(dev); return 1; } - lp = dev->priv; /* lp.user is the first four bytes of the transport data, which * has already been initialized. This structure assignment will |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:47
|
From: Jeff Dike <jd...@ad...>, Paolo 'Blaisorblade' Giarrusso <bla...@ya...> Define a release method for the ubd and network driver so that sysfs doesn't complain when one is removed via: host $ uml_mconsole <umid> remove <dev> Done by Jeff around January for ubd only, later lost, then restored in his tree - however I'm merging it now since there's no reason to leave this here. We don't need to do any cleanup in the new added method, because when hot-unplug is done by uml_mconsole we already handle cleanup in mconsole infrastructure, i.e. mc_device->remove (net_remove/ubd_remove), which is also the calling method. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/net_kern.c | 6 ++++++ arch/um/drivers/ubd_kern.c | 6 ++++++ 2 files changed, 12 insertions(+), 0 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index fb1f9fb..5b8c64e 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -276,6 +276,11 @@ static struct platform_driver uml_net_dr }; static int driver_registered; +/* Sysfs requires a ->release when mconsole hot-unplug is done */ +static void dummy_device_release(struct device *dev) +{ +} + static int eth_configure(int n, void *init, char *mac, struct transport *transport) { @@ -324,6 +329,7 @@ static int eth_configure(int n, void *in } device->pdev.id = n; device->pdev.name = DRIVER_NAME; + device->pdev.dev.release = dummy_device_release; platform_device_register(&device->pdev); SET_NETDEV_DEV(dev,&device->pdev.dev); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 7696f8d..49dda56 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -617,6 +617,11 @@ static int ubd_open_dev(struct ubd *dev) return(err); } +/* Sysfs requires a ->release when mconsole hot-unplug is done */ +static void dummy_device_release(struct device *dev) +{ +} + static int ubd_new_disk(int major, u64 size, int unit, struct gendisk **disk_out) @@ -652,6 +657,7 @@ static int ubd_new_disk(int major, u64 s if (major == MAJOR_NR) { ubd_dev[unit].pdev.id = unit; ubd_dev[unit].pdev.name = DRIVER_NAME; + ubd_dev[unit].pdev.dev.release = dummy_device_release; platform_device_register(&ubd_dev[unit].pdev); disk->driverfs_dev = &ubd_dev[unit].pdev.dev; } |
From: Jeff D. <jd...@ad...> - 2006-01-18 02:01:55
|
On Wed, Jan 18, 2006 at 01:19:21AM +0100, Paolo 'Blaisorblade' Giarrusso wrote: > Done by Jeff around January for ubd only, later lost, then restored in his tree > - however I'm merging it now since there's no reason to leave this here. The original was dinged by hch for covering over a problem without really fixing it. We need to think about this one some more. Jeff |
From: Blaisorblade <bla...@ya...> - 2006-01-18 11:12:09
|
On Wednesday 18 January 2006 03:53, Jeff Dike wrote: > On Wed, Jan 18, 2006 at 01:19:21AM +0100, Paolo 'Blaisorblade' Giarrusso wrote: > > Done by Jeff around January for ubd only, later lost, then restored in > > his tree - however I'm merging it now since there's no reason to leave > > this here. > > The original was dinged by hch for covering over a problem without really > fixing it. > > We need to think about this one some more. About this, have you reported upstream the "crash with remove ubd2 and ubd3" thing caused by wrong refcounts? Greg and friends tend to fix various drivers all around, I guess it's the right policy to ask them to fix this too. -- Inform me of my mistakes, so I can keep imitating Homer Simpson's "Doh!". Paolo Giarrusso, aka Blaisorblade (Skype ID "PaoloGiarrusso", ICQ 215621894) http://www.user-mode-linux.org/~blaisorblade ___________________________________ Yahoo! Messenger with Voice: chiama da PC a telefono a tariffe esclusive http://it.messenger.yahoo.com |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:47
|
*) mark as "EXPERIMENTAL" various items that either aren't very stable or that are actively crashing the setup of users which don't really need them (i.e. HIGHMEM and 3-level pagetables on x86 - nobody needs either, everybody reports "I'm using it and getting trouble"). *) move net/Kconfig near to the rest of network configurations, and drivers/block/Kconfig near "Block layer" submenu. *) it's useless and doesn't work well to force NETDEVICES on and to disable the prompt like it's done. Better remove the attempt, and change that to a simple "default y if UML". *) drop the warning about "report problems about HPPFS" - it's redundant anyway, as that's the usual procedure, and HPPFS users are especially technical (i.e. they know reporting bugs is _good_). Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/Kconfig | 27 ++++++++++++++------------- arch/um/Kconfig.i386 | 6 +++++- drivers/net/Kconfig | 1 + 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 8ff3bcb..5982fe2 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -143,6 +143,7 @@ config HOSTFS config HPPFS tristate "HoneyPot ProcFS (EXPERIMENTAL)" + depends on EXPERIMENTAL help hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc entries to be overridden, removed, or fabricated from the host. @@ -155,10 +156,6 @@ config HPPFS You only need this if you are setting up a UML honeypot. Otherwise, it is safe to say 'N' here. - If you are actively using it, please report any problems, since it's - getting fixed. In this moment, it is experimental on 2.6 (it works on - 2.4). - config MCONSOLE bool "Management console" default y @@ -243,8 +240,16 @@ config NEST_LEVEL Only change this if you are running nested UMLs. config HIGHMEM - bool "Highmem support" - depends on !64BIT + bool "Highmem support (EXPERIMENTAL)" + depends on !64BIT && EXPERIMENTAL + default n + help + This was used to allow UML to run with big amounts of memory. + Currently it is unstable, so if unsure say N. + + To use big amounts of memory, it is recommended to disable TT mode (i.e. + CONFIG_MODE_TT) and enable static linking (i.e. CONFIG_STATIC_LINK) - + this should allow the guest to use up to 2.75G of memory. config KERNEL_STACK_ORDER int "Kernel stack size order" @@ -269,17 +274,13 @@ endmenu source "init/Kconfig" -source "net/Kconfig" - -source "drivers/base/Kconfig" +source "drivers/block/Kconfig" source "arch/um/Kconfig.char" -source "drivers/block/Kconfig" +source "drivers/base/Kconfig" -config NETDEVICES - bool - default NET +source "net/Kconfig" source "arch/um/Kconfig.net" diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386 index c71b39a..ef79ed2 100644 --- a/arch/um/Kconfig.i386 +++ b/arch/um/Kconfig.i386 @@ -22,13 +22,17 @@ config TOP_ADDR default 0x80000000 if HOST_2G_2G config 3_LEVEL_PGTABLES - bool "Three-level pagetables" + bool "Three-level pagetables (EXPERIMENTAL)" default n + depends on EXPERIMENTAL help Three-level pagetables will let UML have more than 4G of physical memory. All the memory that can't be mapped directly will be treated as high memory. + However, this it experimental on 32-bit architectures, so if unsure say + N (on x86-64 it's automatically enabled, instead, as it's safe there). + config STUB_CODE hex default 0xbfffe000 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1421941..c0f9351 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -7,6 +7,7 @@ menu "Network device support" config NETDEVICES depends on NET + default y if UML bool "Network device support" ---help--- You can say N here if you don't intend to connect your Linux box to |
From: Paolo 'B. G. <bla...@ya...> - 2006-01-18 00:27:47
|
When the user specifies both a COW file and its backing file, if the previous backing file is not found, currently UML tries again to use it and fails. This can be corrected by changing same_backing_files() return value in that case, so that the caller will try to change the COW file to point to the new location, as already done in other cases. Additionally, given the change in the meaning of the func, change its name, invert its return value, so all values are inverted except when stat(from_cow,&buf2) fails. And add some comments and two minor bugfixes - remove a fd leak (return err rather than goto out) and a repeated check. Tested well. Signed-off-by: Paolo 'Blaisorblade' Giarrusso <bla...@ya...> --- arch/um/drivers/ubd_kern.c | 28 +++++++++++++++------------- 1 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 49dda56..e498f34 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1109,31 +1109,33 @@ static int ubd_ioctl(struct inode * inod return(-EINVAL); } -static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) +static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) { struct uml_stat buf1, buf2; int err; - if(from_cmdline == NULL) return(1); - if(!strcmp(from_cmdline, from_cow)) return(1); + if(from_cmdline == NULL) + return 0; + if(!strcmp(from_cmdline, from_cow)) + return 0; err = os_stat_file(from_cmdline, &buf1); if(err < 0){ printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); - return(1); + return 0; } err = os_stat_file(from_cow, &buf2); if(err < 0){ printk("Couldn't stat '%s', err = %d\n", from_cow, -err); - return(1); + return 1; } if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) - return(1); + return 0; printk("Backing file mismatch - \"%s\" requested,\n" "\"%s\" specified in COW header of \"%s\"\n", from_cmdline, from_cow, cow); - return(0); + return 1; } static int backing_file_mismatch(char *file, __u64 size, time_t mtime) @@ -1195,7 +1197,7 @@ int open_ubd_file(char *file, struct ope unsigned long long size; __u32 version, align; char *backing_file; - int fd, err, sectorsize, same, mode = 0644; + int fd, err, sectorsize, asked_switch, mode = 0644; fd = os_open_file(file, *openflags, mode); if(fd < 0){ @@ -1215,6 +1217,7 @@ int open_ubd_file(char *file, struct ope goto out_close; } + /* Succesful return case! */ if(backing_file_out == NULL) return(fd); err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, @@ -1226,17 +1229,16 @@ int open_ubd_file(char *file, struct ope } if(err) return(fd); - if(backing_file_out == NULL) return(fd); - - same = same_backing_files(*backing_file_out, backing_file, file); + asked_switch = path_requires_switch(*backing_file_out, backing_file, file); - if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ + /* Allow switching only if no mismatch. */ + if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) { printk("Switching backing file to '%s'\n", *backing_file_out); err = write_cow_header(file, fd, *backing_file_out, sectorsize, align, &size); if(err){ printk("Switch failed, errno = %d\n", -err); - return(err); + goto out_close; } } else { |