From: Richard W. <ric...@gm...> - 2015-05-10 12:36:04
|
On Sun, May 10, 2015 at 1:14 AM, Thomas Meyer <th...@m3...> wrote: > Hi, > > Changes: > - also create posix timer in stub_clone_handler() > - incorporated antons remarks Hm, this patch does a *lot* more than the changelog says. > > diff --git a/arch/um/Makefile b/arch/um/Makefile > index 17d4460..a4a434f 100644 > --- a/arch/um/Makefile > +++ b/arch/um/Makefile > @@ -130,7 +130,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT) > # The wrappers will select whether using "malloc" or the kernel allocator. > LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc > > -LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) > +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt > > # Used by link-vmlinux.sh which has special support for um link > export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) > diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h > index 4a2037f..0f2a5b1 100644 > --- a/arch/um/include/asm/irq.h > +++ b/arch/um/include/asm/irq.h > @@ -16,8 +16,9 @@ > #define TELNETD_IRQ 12 > #define XTERM_IRQ 13 > #define RANDOM_IRQ 14 > +#define HRTIMER_IRQ 15 > > -#define LAST_IRQ RANDOM_IRQ > +#define LAST_IRQ HRTIMER_IRQ > #define NR_IRQS (LAST_IRQ + 1) > > #endif > diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h > index ca1843e..798aa6e 100644 > --- a/arch/um/include/shared/as-layout.h > +++ b/arch/um/include/shared/as-layout.h > @@ -17,7 +17,7 @@ > > /* Some constant macros are used in both assembler and > * C code. Therefore we cannot annotate them always with > - * 'UL' and other type specifiers unilaterally. We > + * 'UL' and other type specifiers unilaterally. We > * use the following macros to deal with this. > */ > > @@ -28,6 +28,13 @@ > #define _UML_AC(X, Y) __UML_AC(X, Y) > #endif > > +/** > + * userspace stub address space layout: > + * Below macros define the layout of the stub code and data > + * which are mapped in each userspace process: > + * - one page of code located at 0x100000 followed by > + * - one page of data > + */ > #define STUB_START _UML_AC(, 0x100000) > #define STUB_CODE _UML_AC((unsigned long), STUB_START) > #define STUB_DATA _UML_AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE) > diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h > index 83a91f9..0282b36 100644 > --- a/arch/um/include/shared/kern_util.h > +++ b/arch/um/include/shared/kern_util.h > @@ -37,6 +37,7 @@ extern void initial_thread_cb(void (*proc)(void *), void *arg); > extern int is_syscall(unsigned long addr); > > extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); > +extern void hrtimer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); > > extern int start_uml(void); > extern void paging_init(void); > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h > index d824528..7f7368b 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -217,7 +217,8 @@ extern int set_umid(char *name); > extern char *get_umid(void); > > /* signal.c */ > -extern void timer_init(void); > +extern void uml_timer_set_signal_handler(void); > +extern void uml_hrtimer_set_signal_handler(void); > extern void set_sigstack(void *sig_stack, int size); > extern void remove_sigstack(void); > extern void set_handler(int sig); > @@ -238,12 +239,16 @@ extern void um_early_printk(const char *s, unsigned int n); > extern void os_fix_helper_signals(void); > > /* time.c */ > -extern void idle_sleep(unsigned long long nsecs); > -extern int set_interval(void); > -extern int timer_one_shot(int ticks); > -extern long long disable_timer(void); > +extern void os_idle_sleep(unsigned long long nsecs); > +extern int os_timer_create(void* timer); > +extern int os_timer_set_interval(void* timer, void* its); > +extern int os_timer_one_shot(int ticks); > +extern long long os_timer_disable(void); > +extern long os_timer_remain(void* timer); > extern void uml_idle_timer(void); > +extern long long os_persistent_clock_emulation(void); > extern long long os_nsecs(void); > +extern long long os_vnsecs(void); > > /* skas/mem.c */ > extern long run_syscall_stub(struct mm_id * mm_idp, > diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h > index f6ed92c..f98b9e2 100644 > --- a/arch/um/include/shared/skas/stub-data.h > +++ b/arch/um/include/shared/skas/stub-data.h > @@ -6,12 +6,12 @@ > #ifndef __STUB_DATA_H > #define __STUB_DATA_H > > -#include <sys/time.h> > +#include <time.h> > > struct stub_data { > - long offset; > + unsigned long offset; > int fd; > - struct itimerval timer; > + struct itimerspec timer; > long err; > }; > > diff --git a/arch/um/include/shared/timer-internal.h b/arch/um/include/shared/timer-internal.h > new file mode 100644 > index 0000000..afdc6dc > --- /dev/null > +++ b/arch/um/include/shared/timer-internal.h > @@ -0,0 +1,18 @@ > +/* > + * Copyright (C) 2012 - 2014 Cisco Systems > + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > + * Licensed under the GPL > + */ > + > +#ifndef __TIMER_INTERNAL_H__ > +#define __TIMER_INTERNAL_H__ > + > +#define TIMER_MULTIPLIER 256 > +#define TIMER_MIN_DELTA 500 > + > +extern void timer_lock(void); > +extern void timer_unlock(void); > + > +extern long long hrtimer_disable(void); > + > +#endif > diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c > index 23cb935..4c1966a 100644 > --- a/arch/um/kernel/irq.c > +++ b/arch/um/kernel/irq.c > @@ -338,20 +338,20 @@ static struct irq_chip normal_irq_type = { > .irq_unmask = dummy, > }; > > -static struct irq_chip SIGVTALRM_irq_type = { > - .name = "SIGVTALRM", > - .irq_disable = dummy, > - .irq_enable = dummy, > - .irq_ack = dummy, > - .irq_mask = dummy, > - .irq_unmask = dummy, > +static struct irq_chip SIGUSR2_irq_type = { > + .name = "SIGUSR2", > + .irq_disable = dummy, > + .irq_enable = dummy, > + .irq_ack = dummy, > + .irq_mask = dummy, > + .irq_unmask = dummy, > }; > > void __init init_IRQ(void) > { > int i; > > - irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); > + irq_set_chip_and_handler(HRTIMER_IRQ, &SIGUSR2_irq_type, handle_edge_irq); > > for (i = 1; i < NR_IRQS; i++) > irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); > diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c > index 9034fc8..5f6642d 100644 > --- a/arch/um/kernel/physmem.c > +++ b/arch/um/kernel/physmem.c > @@ -119,14 +119,23 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, > len - bootmap_size - reserve); > } > > +/** > + * phys_mapping() - maps a physical address to an offset address > + * phys: the physical address > + * offset_out: the offset in the memory map area > + * > + * Returns an file descriptor, or -1 when unknown physical address > + */ > int phys_mapping(unsigned long phys, unsigned long long *offset_out) > { > int fd = -1; > > + /* first check normal memory */ > if (phys < physmem_size) { > fd = physmem_fd; > *offset_out = phys; > } > + /* than check io memory */ > else if (phys < __pa(end_iomem)) { > struct iomem_region *region = iomem_regions; > > @@ -140,6 +149,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) > region = region->next; > } > } > + /* last check highmem */ > else if (phys < __pa(end_iomem) + highmem) { > fd = physmem_fd; > *offset_out = phys - iomem_size; > diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c > index 68b9119..b8a8d10 100644 > --- a/arch/um/kernel/process.c > +++ b/arch/um/kernel/process.c > @@ -27,6 +27,7 @@ > #include <kern_util.h> > #include <os.h> > #include <skas.h> > +#include <timer-internal.h> > > /* > * This is a per-cpu array. A processor only modifies its entry and it only > @@ -201,12 +202,8 @@ void initial_thread_cb(void (*proc)(void *), void *arg) > > void arch_cpu_idle(void) > { > - unsigned long long nsecs; > - > cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); > - nsecs = disable_timer(); > - idle_sleep(nsecs); > - local_irq_enable(); > + os_idle_sleep(UM_NSEC_PER_SEC / UM_HZ); > } > > int __cant_sleep(void) { > diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c > index 289771d..5f283b1 100644 > --- a/arch/um/kernel/skas/clone.c > +++ b/arch/um/kernel/skas/clone.c > @@ -20,37 +20,63 @@ > * on some systems. > */ > > +/** > + * stub_clone_handler() - userspace clone handler stub > + * > + * this stub clone hanlder is mmaped(?)/available in all userspace > + * processes. It's used to copy an mm context from an fork syscall in the > + * traced userspace process > + */ > void __attribute__ ((__section__ (".__syscall_stub"))) > stub_clone_handler(void) > { > struct stub_data *data = (struct stub_data *) STUB_DATA; > + struct sigevent sev; > + timer_t timerid; > long err; > > + /* clone "from" process */ > err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, > STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); > - if (err != 0) > + /* Parent: exit here, child, continue */ > + if (err != 0) { > goto out; > + } > > + /* set child to ptrace */ > err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); > if (err) > goto out; > > - err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, > - (long) &data->timer, 0); > + /* create a new posix interval timer */ > + sev.sigev_notify = SIGEV_SIGNAL; > + sev.sigev_signo = SIGUSR2; > + sev.sigev_value.sival_ptr = NULL; > + > + err = stub_syscall3(__NR_timer_create, CLOCK_MONOTONIC, > + (long) &sev, (long) &timerid); > if (err) > goto out; > > + /* set interval to the given value from copy_context_skas0() */ > + err = stub_syscall4(__NR_timer_settime, (long) timerid, 0l, > + (long) &data->timer, 0l); > + if (err) > + goto out; > + > + /* switch to new stack */ > remap_stack(data->fd, data->offset); > goto done; > > out: > /* > - * save current result. > - * Parent: pid; > - * child: retcode of mmap already saved and it jumps around this > - * assignment > + * Save current result. > + * - Parent: pid from clone() call > + * - Child: "retcode of mmap already saved and it jumps around this > + * assignment"??? > */ > data->err = err; > + > done: > trap_myself(); > } > diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c > index 94abdcc..df9c9ab 100644 > --- a/arch/um/kernel/skas/mmu.c > +++ b/arch/um/kernel/skas/mmu.c > @@ -47,6 +47,13 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, > return -ENOMEM; > } > > +/** > + * init_new_context() - creates or copies an mm context > + * @task: the belonging task > + * @mm: the mm struct to be setup/allocated > + * > + * called by mm_init() (kernel/fork.c) > + */ > int init_new_context(struct task_struct *task, struct mm_struct *mm) > { > struct mm_context *from_mm = NULL; > @@ -59,13 +66,15 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) > goto out; > > to_mm->id.stack = stack; > - if (current->mm != NULL && current->mm != &init_mm) > + if (current->mm != NULL && current->mm != &init_mm) { > from_mm = ¤t->mm->context; > + } > > - if (from_mm) > - to_mm->id.u.pid = copy_context_skas0(stack, > - from_mm->id.u.pid); > - else to_mm->id.u.pid = start_userspace(stack); > + if (from_mm) { > + to_mm->id.u.pid = copy_context_skas0(stack, from_mm->id.u.pid); > + } else { > + to_mm->id.u.pid = start_userspace(stack); > + } > > if (to_mm->id.u.pid < 0) { > ret = to_mm->id.u.pid; > diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c > index 527fa58..2b0c35a 100644 > --- a/arch/um/kernel/skas/process.c > +++ b/arch/um/kernel/skas/process.c > @@ -43,6 +43,9 @@ int __init start_uml(void) > &init_task.thread.switch_buf); > } > > +/** > + * current_stub_stack() - returns the address of the current mm stack > + */ > unsigned long current_stub_stack(void) > { > if (current->mm == NULL) > diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c > index 117568d..ed64037 100644 > --- a/arch/um/kernel/time.c > +++ b/arch/um/kernel/time.c > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012-2014 Cisco Systems > * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > */ > @@ -8,32 +9,36 @@ > #include <linux/interrupt.h> > #include <linux/jiffies.h> > #include <linux/threads.h> > +#include <linux/spinlock.h> > #include <asm/irq.h> > #include <asm/param.h> > #include <kern_util.h> > #include <os.h> > +#include <timer-internal.h> > > -void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) > +void hrtimer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) > { > unsigned long flags; > > local_irq_save(flags); > - do_IRQ(TIMER_IRQ, regs); > + do_IRQ(HRTIMER_IRQ, regs); > local_irq_restore(flags); > } > > -static void itimer_set_mode(enum clock_event_mode mode, > +static void timer_set_mode(enum clock_event_mode mode, > struct clock_event_device *evt) > { > switch (mode) { > case CLOCK_EVT_MODE_PERIODIC: > - set_interval(); > + os_timer_set_interval(NULL, NULL); > break; > > + case CLOCK_EVT_MODE_ONESHOT: > + os_timer_one_shot(1); > + > case CLOCK_EVT_MODE_SHUTDOWN: > case CLOCK_EVT_MODE_UNUSED: > - case CLOCK_EVT_MODE_ONESHOT: > - disable_timer(); > + os_timer_disable(); > break; > > case CLOCK_EVT_MODE_RESUME: > @@ -41,68 +46,74 @@ static void itimer_set_mode(enum clock_event_mode mode, > } > } > > -static int itimer_next_event(unsigned long delta, > +static int timer_next_event(unsigned long delta, > struct clock_event_device *evt) > { > - return timer_one_shot(delta + 1); > + return os_timer_one_shot(delta); > } > > -static struct clock_event_device itimer_clockevent = { > - .name = "itimer", > +static struct clock_event_device timer_clockevent = { > + .name = "timer", > .rating = 250, > .cpumask = cpu_all_mask, > .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, > - .set_mode = itimer_set_mode, > - .set_next_event = itimer_next_event, > - .shift = 32, > + .set_mode = timer_set_mode, > + .set_next_event = timer_next_event, > + .shift = 0, > + .max_delta_ns = 0xffffffff, > + .min_delta_ns = TIMER_MIN_DELTA, //microsecond resolution should be enough for anyone, same as 640K RAM > .irq = 0, > + .mult = 1, > }; > > -static irqreturn_t um_timer(int irq, void *dev) > +static irqreturn_t um_timer_irq(int irq, void *dev) > { > - (*itimer_clockevent.event_handler)(&itimer_clockevent); > + (*timer_clockevent.event_handler)(&timer_clockevent); > > return IRQ_HANDLED; > } > > -static cycle_t itimer_read(struct clocksource *cs) > +static cycle_t timer_read(struct clocksource *cs) > { > - return os_nsecs() / 1000; > + return os_nsecs() / TIMER_MULTIPLIER; > } > > -static struct clocksource itimer_clocksource = { > - .name = "itimer", > +static struct clocksource timer_clocksource = { > + .name = "timer", > .rating = 300, > - .read = itimer_read, > + .read = timer_read, > .mask = CLOCKSOURCE_MASK(64), > .flags = CLOCK_SOURCE_IS_CONTINUOUS, > }; > > -static void __init setup_itimer(void) > +static void __init timer_setup(void) > { > int err; > > - err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL); > - if (err != 0) > + err = request_irq(HRTIMER_IRQ, um_timer_irq, IRQF_TIMER, "hr timer", NULL); > + if (err != 0) { > printk(KERN_ERR "register_timer : request_irq failed - " > "errno = %d\n", -err); > + return; > + } > + > + err = os_timer_create(NULL); > + if (err != 0) { > + printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); > + return; > + } > > - itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32); > - itimer_clockevent.max_delta_ns = > - clockevent_delta2ns(60 * HZ, &itimer_clockevent); > - itimer_clockevent.min_delta_ns = > - clockevent_delta2ns(1, &itimer_clockevent); > - err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC); > + err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER); > if (err) { > printk(KERN_ERR "clocksource_register_hz returned %d\n", err); > return; > } > - clockevents_register_device(&itimer_clockevent); > + clockevents_register_device(&timer_clockevent); > } > > void read_persistent_clock(struct timespec *ts) > { > - long long nsecs = os_nsecs(); > + long long nsecs = os_persistent_clock_emulation(); > > set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, > nsecs % NSEC_PER_SEC); > @@ -110,6 +121,6 @@ void read_persistent_clock(struct timespec *ts) > > void __init time_init(void) > { > - timer_init(); > - late_time_init = setup_itimer; > + uml_hrtimer_set_signal_handler(); > + late_time_init = timer_setup; > } > diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h > deleted file mode 100644 > index 0dc2c9f..0000000 > --- a/arch/um/os-Linux/internal.h > +++ /dev/null > @@ -1 +0,0 @@ > -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc); > diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c > index df9191a..bd5907e 100644 > --- a/arch/um/os-Linux/main.c > +++ b/arch/um/os-Linux/main.c > @@ -168,8 +168,8 @@ int __init main(int argc, char **argv, char **envp) > * some time) and cause a segfault. > */ > > - /* stop timers and set SIGVTALRM to be ignored */ > - disable_timer(); > + /* stop timers and set timer signal to be ignored */ > + os_timer_disable(); > > /* disable SIGIO for the fds and set SIGIO to be ignored */ > err = deactivate_all_fds(); > diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c > index 7b605e4..ee6db2e 100644 > --- a/arch/um/os-Linux/signal.c > +++ b/arch/um/os-Linux/signal.c > @@ -13,7 +13,6 @@ > #include <kern_util.h> > #include <os.h> > #include <sysdep/mcontext.h> > -#include "internal.h" > > void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { > [SIGTRAP] = relay_signal, > @@ -23,7 +22,8 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { > [SIGBUS] = bus_handler, > [SIGSEGV] = segv_handler, > [SIGIO] = sigio_handler, > - [SIGVTALRM] = timer_handler }; > + [SIGUSR2] = hrtimer_handler > +}; > > static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) > { > @@ -38,7 +38,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) > } > > /* enable signals if sig isn't IRQ signal */ > - if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) > + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM) && (sig != SIGUSR2)) > unblock_signals(); > > (*sig_info[sig])(sig, si, &r); > @@ -55,8 +55,8 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) > #define SIGIO_BIT 0 > #define SIGIO_MASK (1 << SIGIO_BIT) > > -#define SIGVTALRM_BIT 1 > -#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) > +#define SIGUSR2_BIT 2 > +#define SIGUSR2_MASK (1 << SIGUSR2_BIT) > > static int signals_enabled; > static unsigned int signals_pending; > @@ -78,46 +78,47 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) > set_signals(enabled); > } > > -static void real_alarm_handler(mcontext_t *mc) > +static void real_hralarm_handler(mcontext_t *mc) > { > struct uml_pt_regs regs; > > if (mc != NULL) > get_regs_from_mc(®s, mc); > regs.is_user = 0; > - unblock_signals(); > - timer_handler(SIGVTALRM, NULL, ®s); > + hrtimer_handler(SIGUSR2, NULL, ®s); > } > > -void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) > +void hralarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) > { > int enabled; > > enabled = signals_enabled; > if (!signals_enabled) { > - signals_pending |= SIGVTALRM_MASK; > + signals_pending |= SIGUSR2_MASK; > return; > } > > block_signals(); > - > - real_alarm_handler(mc); > + real_hralarm_handler(mc); > set_signals(enabled); > } > > -void timer_init(void) > +void uml_hrtimer_set_signal_handler(void) > { > - set_handler(SIGVTALRM); > + set_handler(SIGUSR2); > } > > void set_sigstack(void *sig_stack, int size) > { > - stack_t stack = ((stack_t) { .ss_flags = 0, > - .ss_sp = (__ptr_t) sig_stack, > - .ss_size = size - sizeof(void *) }); > + stack_t stack = ((stack_t) { > + .ss_flags = 0, > + .ss_sp = (__ptr_t) sig_stack, > + .ss_size = size - sizeof(void *) > + }); > > - if (sigaltstack(&stack, NULL) != 0) > + if (sigaltstack(&stack, NULL) != 0) { > panic("enabling signal stack failed, errno = %d\n", errno); > + } > } > > static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { > @@ -129,10 +130,9 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { > > [SIGIO] = sig_handler, > [SIGWINCH] = sig_handler, > - [SIGVTALRM] = alarm_handler > + [SIGUSR2] = hralarm_handler > }; > > - > static void hard_handler(int sig, siginfo_t *si, void *p) > { > struct ucontext *uc = p; > @@ -176,6 +176,13 @@ static void hard_handler(int sig, siginfo_t *si, void *p) > } while (pending); > } > > +/** > + * set_handler() - enable signal in process' signal mask > + * @sig: The signal to enable > + * > + * Enable the given signal in the process' signal mask and > + * attach hard_handler() as handler routine > + */ > void set_handler(int sig) > { > struct sigaction action; > @@ -186,9 +193,9 @@ void set_handler(int sig) > > /* block irq ones */ > sigemptyset(&action.sa_mask); > - sigaddset(&action.sa_mask, SIGVTALRM); > sigaddset(&action.sa_mask, SIGIO); > sigaddset(&action.sa_mask, SIGWINCH); > + sigaddset(&action.sa_mask, SIGUSR2); > > if (sig == SIGSEGV) > flags |= SA_NODEFER; > @@ -281,8 +288,8 @@ void unblock_signals(void) > if (save_pending & SIGIO_MASK) > sig_handler_common(SIGIO, NULL, NULL); > > - if (save_pending & SIGVTALRM_MASK) > - real_alarm_handler(NULL); > + if (save_pending & SIGUSR2_MASK) > + real_hralarm_handler(NULL); > } > } > > @@ -298,9 +305,11 @@ int set_signals(int enable) > return enable; > > ret = signals_enabled; > - if (enable) > + if (enable) { > unblock_signals(); > - else block_signals(); > + } else { > + block_signals(); > + } > > return ret; > } > diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c > index 7a97775..30065e1 100644 > --- a/arch/um/os-Linux/skas/process.c > +++ b/arch/um/os-Linux/skas/process.c > @@ -45,7 +45,7 @@ static int ptrace_dump_regs(int pid) > * Signals that are OK to receive in the stub - we'll just continue it. > * SIGWINCH will happen when UML is inside a detached screen. > */ > -#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) > +#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH) | (1 << SIGUSR2)) > > /* Signals that the stub will finish with - anything else is an error */ > #define STUB_DONE_MASK (1 << SIGTRAP) > @@ -176,17 +176,59 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, > > extern int __syscall_stub_start; > > +/** > + * userspace_tramp() - userspace trampoline > + * @stack: The address of the stub stack used for the new process > + * (used for SIGSEGV handling). > + * > + * The trampoline does execute as a new process after clone() > + * For each new userspace process the below code sets up > + * all necessary data: > + * 1.) enable ptrace from parent (the uml kernel) > + * 2.) Setup signal handling. Signals are inherited by the parent, i.e > + * the uml kernel > + * 3.) Create and start an posix (interval) timer for this process. > + * This timer will emulate the kernel timer ticks. > + * The timer signal will be processed by the kernel process in userspace() > + * 4.) Map stub code page in the new process, i.e. the > + * userspace process: > + * The stub codes is used to catch syscalls from the userspace to > + * the kernel. > + * See linker scripts arch/um/kernel/dyn.lds.S (dynamic) resp. > + * arch/um/kernel/uml.lds.S (static) > + * for __syscall_stub_start defintion and > + * arch/um/kernel/skas/clone.c for the stub_handler itself. > + * 5.) Map stub data page in the new process, i.e. the > + * userspace process: > + * Setup an SIGSEGV handler into the new process. > + * Page faults will be catched and signaled to the kernel via this > + * mechanism. > + * See arch/x86/um/stub_segv.c for the handler itself. > + * 6.) Stop the new process and wait for the kernel to SIGCONT it agian > + * when it will get scheduled() > + */ > static int userspace_tramp(void *stack) > { > void *addr; > int err, fd; > unsigned long long offset; > + timer_t timer; > + > + struct stub_data *data = (struct stub_data *) stack; > > ptrace(PTRACE_TRACEME, 0, 0, 0); > > signal(SIGTERM, SIG_DFL); > signal(SIGWINCH, SIG_IGN); > - err = set_interval(); > + > + err = os_timer_create(&timer); > + if (err) { > + printk(UM_KERN_ERR "userspace_tramp - creation of timer failed, " > + "errno = %d\n", err); > + exit(1); > + } > + > + err = os_timer_set_interval(&timer, &data->timer); > if (err) { > printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " > "errno = %d\n", err); > @@ -246,11 +288,18 @@ static int userspace_tramp(void *stack) > #define NR_CPUS 1 > int userspace_pid[NR_CPUS]; > > +/** > + * start_userspace() - start a new userspace process with a new mm context > + * @stub_stack: Address of the new process' stack > + * > + * called by init_new_context() > + */ > int start_userspace(unsigned long stub_stack) > { > void *stack; > unsigned long sp; > int pid, status, n, flags, err; > + struct stub_data *data = (struct stub_data *) stub_stack; > > stack = mmap(NULL, UM_KERN_PAGE_SIZE, > PROT_READ | PROT_WRITE | PROT_EXEC, > @@ -266,6 +315,14 @@ int start_userspace(unsigned long stub_stack) > > flags = CLONE_FILES | SIGCHLD; > > + *data = ((struct stub_data) { > + .timer = ((struct itimerspec) > + { .it_value.tv_sec = 0, > + .it_value.tv_nsec = os_timer_remain(NULL), > + .it_interval.tv_sec = 0, > + .it_interval.tv_nsec = UM_NSEC_PER_SEC / UM_HZ }) > + }); > + > pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); > if (pid < 0) { > err = -errno; > @@ -313,10 +370,15 @@ int start_userspace(unsigned long stub_stack) > return err; > } > > +/** > + * userspace() - user space control loop > + * @regs: the register's save memory > + * > + * The main loop that traces and controls each spwaned userspace > + * process > + */ > void userspace(struct uml_pt_regs *regs) > { > - struct itimerval timer; > - unsigned long long nsecs, now; > int err, status, op, pid = userspace_pid[0]; > /* To prevent races if using_sysemu changes under us.*/ > int local_using_sysemu; > @@ -325,13 +387,8 @@ void userspace(struct uml_pt_regs *regs) > /* Handle any immediate reschedules or signals */ > interrupt_end(); > > - if (getitimer(ITIMER_VIRTUAL, &timer)) > - printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); > - nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + > - timer.it_value.tv_usec * UM_NSEC_PER_USEC; > - nsecs += os_nsecs(); > - > while (1) { > + > /* > * This can legitimately fail if the process loads a > * bogus value into a segment register. It will > @@ -388,32 +445,19 @@ void userspace(struct uml_pt_regs *regs) > switch (sig) { > case SIGSEGV: > if (PTRACE_FULL_FAULTINFO) { > - get_skas_faultinfo(pid, > - ®s->faultinfo); > - (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, > - regs); > + get_skas_faultinfo(pid,®s->faultinfo); > + (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, regs); > + } else { > + handle_segv(pid, regs); > } > - else handle_segv(pid, regs); > break; > case SIGTRAP + 0x80: > - handle_trap(pid, regs, local_using_sysemu); > + handle_trap(pid, regs, local_using_sysemu); > break; > case SIGTRAP: > relay_signal(SIGTRAP, (struct siginfo *)&si, regs); > break; > - case SIGVTALRM: > - now = os_nsecs(); > - if (now < nsecs) > - break; > - block_signals(); > - (*sig_info[sig])(sig, (struct siginfo *)&si, regs); > - unblock_signals(); > - nsecs = timer.it_value.tv_sec * > - UM_NSEC_PER_SEC + > - timer.it_value.tv_usec * > - UM_NSEC_PER_USEC; > - nsecs += os_nsecs(); > - break; > + case SIGUSR2: > case SIGIO: > case SIGILL: > case SIGBUS: > @@ -448,8 +492,7 @@ static int __init init_thread_regs(void) > thread_regs[REGS_IP_INDEX] = STUB_CODE + > (unsigned long) stub_clone_handler - > (unsigned long) &__syscall_stub_start; > - thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - > - sizeof(void *); > + thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - sizeof(void *); > #ifdef __SIGNAL_FRAMESIZE > thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; > #endif > @@ -458,26 +501,51 @@ static int __init init_thread_regs(void) > > __initcall(init_thread_regs); > > +/** > + * copy_context_skas0() - copy an mm context > + * new_stack: void pointer of new stack, a zeroed page > + * pid: the pid of the mm parent, this proces is cloned > + * into a new one > + * > + * Copy an mm context from an existing task > + * 1.) get file descriptor and offset of the mmaped new_stack > + * 2.) set current stub stack's data: file descriptor, offset and timer data > + * 3.) Restore parents registers to init_thread_regs() > + * 4.) Continue parent (==from_mm) in stub_clone_handler(), see also > + * init_thread_regs(). This will clone a new process with same > + * mm. > + * 5.) > + * > + * Returns the PID of the new process > + */ > int copy_context_skas0(unsigned long new_stack, int pid) > { > - struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; > int err; > unsigned long current_stack = current_stub_stack(); > struct stub_data *data = (struct stub_data *) current_stack; > struct stub_data *child_data = (struct stub_data *) new_stack; > unsigned long long new_offset; > + > int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); > > /* > * prepare offset and fd of child's stack as argument for parent's > * and child's mmap2 calls > */ > - *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), > - .fd = new_fd, > - .timer = ((struct itimerval) > - { .it_value = tv, > - .it_interval = tv }) }); > - > + *data = ((struct stub_data) { > + .offset = MMAP_OFFSET(new_offset), > + .fd = new_fd, > + .timer = ((struct itimerspec) > + { .it_value.tv_sec = 0, > + .it_value.tv_nsec = os_timer_remain(NULL), > + .it_interval.tv_sec = 0, > + .it_interval.tv_nsec = UM_NSEC_PER_SEC / UM_HZ }) > + }); > + > + /* set parents regs > + * this set the registers to the saved registers done in the initcall > + * init_thread_regs() > + */ > err = ptrace_setregs(pid, thread_regs); > if (err < 0) { > err = -errno; > @@ -486,6 +554,7 @@ int copy_context_skas0(unsigned long new_stack, int pid) > return err; > } > > + /* set parents fp registers */ > err = put_fp_registers(pid, thread_fp_regs); > if (err < 0) { > printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers " > @@ -493,7 +562,9 @@ int copy_context_skas0(unsigned long new_stack, int pid) > return err; > } > > - /* set a well known return code for detection of child write failure */ > + /* set a well known return code for detection of child write failure, > + * i.e. on the new stack > + */ > child_data->err = 12345678; > > /* > @@ -508,8 +579,10 @@ int copy_context_skas0(unsigned long new_stack, int pid) > return err; > } > > + /* wait for parents stub_clone_handler() to finish */ > wait_stub_done(pid); > > + /* get childs pid, the pid of the cloned parent process */ > pid = data->err; > if (pid < 0) { > printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports " > diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c > index e9824d5..5a7f49c 100644 > --- a/arch/um/os-Linux/time.c > +++ b/arch/um/os-Linux/time.c > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012-2014 Cisco Systems > * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) > * Licensed under the GPL > */ > @@ -10,177 +11,177 @@ > #include <sys/time.h> > #include <kern_util.h> > #include <os.h> > -#include "internal.h" > +#include <string.h> > +#include <timer-internal.h> > > -int set_interval(void) > -{ > - int usec = UM_USEC_PER_SEC / UM_HZ; > - struct itimerval interval = ((struct itimerval) { { 0, usec }, > - { 0, usec } }); > - > - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) > - return -errno; > +static timer_t event_high_res_timer = 0; > > - return 0; > +static inline long long timeval_to_ns(const struct timeval *tv) > +{ > + return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + > + tv->tv_usec * UM_NSEC_PER_USEC; > } > > -int timer_one_shot(int ticks) > +static inline long long timespec_to_ns(const struct timespec *ts) > { > - unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; > - unsigned long sec = usec / UM_USEC_PER_SEC; > - struct itimerval interval; > - > - usec %= UM_USEC_PER_SEC; > - interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); > + return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + > + ts->tv_nsec; > +} > > - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) > - return -errno; > +long long os_persistent_clock_emulation (void) { > + struct timespec realtime_tp; > > - return 0; > + clock_gettime(CLOCK_REALTIME, &realtime_tp); > + return timespec_to_ns(&realtime_tp); > } > > /** > - * timeval_to_ns - Convert timeval to nanoseconds > - * @ts: pointer to the timeval variable to be converted > - * > - * Returns the scalar nanosecond representation of the timeval > - * parameter. > - * > - * Ripped from linux/time.h because it's a kernel header, and thus > - * unusable from here. > + * os_timer_create() - create an new posix (interval) timer > */ > -static inline long long timeval_to_ns(const struct timeval *tv) > -{ > - return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + > - tv->tv_usec * UM_NSEC_PER_USEC; > -} > +int os_timer_create(void* timer) { > > -long long disable_timer(void) > -{ > - struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); > - long long remain, max = UM_NSEC_PER_SEC / UM_HZ; > + struct sigevent sev; > + timer_t* t = timer; > > - if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) > - printk(UM_KERN_ERR "disable_timer - setitimer failed, " > - "errno = %d\n", errno); > + if(t == NULL) { > + t = &event_high_res_timer; > + } > > - remain = timeval_to_ns(&time.it_value); > - if (remain > max) > - remain = max; > + sev.sigev_notify = SIGEV_SIGNAL; > + sev.sigev_signo = SIGUSR2; /* note - hrtimer now has its own signal */ > + sev.sigev_value.sival_ptr = &event_high_res_timer; > > - return remain; > + if (timer_create( > + CLOCK_MONOTONIC, > + &sev, > + t) == -1) { > + return -1; > + } > + return 0; > } > > -long long os_nsecs(void) > +int os_timer_set_interval(void* timer, void* i) > { > - struct timeval tv; > + struct itimerspec its; > + unsigned long long nsec; > + timer_t* t = timer; > + struct itimerspec* its_in = i; > > - gettimeofday(&tv, NULL); > - return timeval_to_ns(&tv); > -} > + if(t == NULL) { > + t = &event_high_res_timer; > + } > + > + nsec = UM_NSEC_PER_SEC / UM_HZ; > + > + if(its_in != NULL) { > + its.it_value.tv_sec = its_in->it_value.tv_sec; > + its.it_value.tv_nsec = its_in->it_value.tv_nsec; > + } else { > + its.it_value.tv_sec = 0; > + its.it_value.tv_nsec = nsec; > + } > + > + its.it_interval.tv_sec = 0; > + its.it_interval.tv_nsec = nsec; > + > + if(timer_settime(*t, 0, &its, NULL) == -1) { > + return -errno; > + } > > -#ifdef UML_CONFIG_NO_HZ_COMMON > -static int after_sleep_interval(struct timespec *ts) > -{ > return 0; > } > > -static void deliver_alarm(void) > +/** > + * os_timer_remain() - returns the remaining nano seconds of the given interval > + * timer > + * Because this is the remaining time of an interval timer, which correspondends > + * to HZ, this value can never be bigger than one second. Just > + * the nanosecond part of the timer is returned. > + * The returned time is relative to the start time of the interval timer. > + * Return an negative value in an error case. > + */ > +long os_timer_remain(void* timer) > { > - alarm_handler(SIGVTALRM, NULL, NULL); > -} > + struct itimerspec its; > + timer_t* t = timer; > > -static unsigned long long sleep_time(unsigned long long nsecs) > -{ > - return nsecs; > -} > + if(t == NULL) { > + t = &event_high_res_timer; > + } > > -#else > -unsigned long long last_tick; > -unsigned long long skew; > + if(timer_gettime(t, &its) == -1) { > + return -errno; > + } > > -static void deliver_alarm(void) > -{ > - unsigned long long this_tick = os_nsecs(); > - int one_tick = UM_NSEC_PER_SEC / UM_HZ; > + return its.it_value.tv_nsec; > +} > > - /* Protection against the host's time going backwards */ > - if ((last_tick != 0) && (this_tick < last_tick)) > - this_tick = last_tick; > +int os_timer_one_shot(int ticks) > +{ > + struct itimerspec its; > + unsigned long long nsec; > + unsigned long sec; > > - if (last_tick == 0) > - last_tick = this_tick - one_tick; > + nsec = (ticks + 1); > + sec = nsec / UM_NSEC_PER_SEC; > + nsec = nsec % UM_NSEC_PER_SEC; > > - skew += this_tick - last_tick; > + its.it_value.tv_sec = nsec / UM_NSEC_PER_SEC; > + its.it_value.tv_nsec = nsec; > > - while (skew >= one_tick) { > - alarm_handler(SIGVTALRM, NULL, NULL); > - skew -= one_tick; > - } > + its.it_interval.tv_sec = 0; > + its.it_interval.tv_nsec = 0; // we cheat here > > - last_tick = this_tick; > + timer_settime(event_high_res_timer, 0, &its, NULL); > + return 0; > } > > -static unsigned long long sleep_time(unsigned long long nsecs) > +/** > + * os_timer_disable() - disable the posix (interval) timer > + * Returns the remaining interval timer time in nanoseconds > + */ > +long long os_timer_disable(void) > { > - return nsecs > skew ? nsecs - skew : 0; > + struct itimerspec its; > + > + memset(&its, 0, sizeof(struct itimerspec)); > + timer_settime(event_high_res_timer, 0, &its, &its); > + > + return its.it_value.tv_sec * UM_NSEC_PER_SEC + its.it_value.tv_nsec; > } > > -static inline long long timespec_to_us(const struct timespec *ts) > +long long os_vnsecs(void) > { > - return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + > - ts->tv_nsec / UM_NSEC_PER_USEC; > + struct timespec ts; > + > + clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&ts); > + return timespec_to_ns(&ts); > } > > -static int after_sleep_interval(struct timespec *ts) > +long long os_nsecs(void) > { > - int usec = UM_USEC_PER_SEC / UM_HZ; > - long long start_usecs = timespec_to_us(ts); > - struct timeval tv; > - struct itimerval interval; > - > - /* > - * It seems that rounding can increase the value returned from > - * setitimer to larger than the one passed in. Over time, > - * this will cause the remaining time to be greater than the > - * tick interval. If this happens, then just reduce the first > - * tick to the interval value. > - */ > - if (start_usecs > usec) > - start_usecs = usec; > - > - start_usecs -= skew / UM_NSEC_PER_USEC; > - if (start_usecs < 0) > - start_usecs = 0; > - > - tv = ((struct timeval) { .tv_sec = start_usecs / UM_USEC_PER_SEC, > - .tv_usec = start_usecs % UM_USEC_PER_SEC }); > - interval = ((struct itimerval) { { 0, usec }, tv }); > - > - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) > - return -errno; > + struct timespec ts; > > - return 0; > + clock_gettime(CLOCK_MONOTONIC,&ts); > + return timespec_to_ns(&ts); > } > -#endif > > -void idle_sleep(unsigned long long nsecs) > +/** > + * os_idle_sleep() - sleep for a given time of nsecs > + * @nsecs: nanoseconds to sleep > + */ > +void os_idle_sleep(unsigned long long nsecs) > { > struct timespec ts; > > - /* > - * nsecs can come in as zero, in which case, this starts a > - * busy loop. To prevent this, reset nsecs to the tick > - * interval if it is zero. > - */ > - if (nsecs == 0) > - nsecs = UM_NSEC_PER_SEC / UM_HZ; > - > - nsecs = sleep_time(nsecs); > - ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, > - .tv_nsec = nsecs % UM_NSEC_PER_SEC }); > - > - if (nanosleep(&ts, &ts) == 0) > - deliver_alarm(); > - after_sleep_interval(&ts); > + if (nsecs <= 0) { > + return; > + } > + > + ts = ((struct timespec) { > + .tv_sec = nsecs / UM_NSEC_PER_SEC, > + .tv_nsec = nsecs % UM_NSEC_PER_SEC > + }); > + > + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); > } > diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c > index faee55e..10ecc06 100644 > --- a/arch/um/os-Linux/util.c > +++ b/arch/um/os-Linux/util.c > @@ -102,6 +102,7 @@ void os_fix_helper_signals(void) > signal(SIGWINCH, SIG_IGN); > signal(SIGINT, SIG_DFL); > signal(SIGTERM, SIG_DFL); > + signal(SIGUSR2, SIG_IGN); > } > > void os_dump_core(void) > > > > ------------------------------------------------------------------------------ > One dashboard for servers and applications across Physical-Virtual-Cloud > Widest out-of-the-box monitoring support with 50+ applications > Performance metrics, stats and reports that give you Actionable Insights > Deep dive visibility with transaction tracing using APM Insight. > http://ad.doubleclick.net/ddm/clk/290420510;117567292;y > _______________________________________________ > User-mode-linux-devel mailing list > Use...@li... > https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel -- Thanks, //richard |