From: Eli C. <eli...@gm...> - 2016-03-19 16:59:01
|
This series first fixes a bug that results in corrupted FPU state after invoking signal handlers. It also adds support for the extended processor state (XSTATE) for x86_64 UML, especially the YMM registers used by AVX(2) instructions. Tested with a minimal multi-threaded FPU-intensive test program (see below). This series supersedes the previous sigreturn fix as that one is incorrect when the process is multi-threaded. Changes since v2: - Add an improved sigreturn fix to this series - Merge the ptrace changes into the last commit - Make the selftest program multi-threaded Changes since v1: - Refactor functions with oversized stack frame - Add a tiny selftest program to the cover letter Eli Cooper (3): um: fix FPU state preservation around signal handlers um: extend fpstate to _xstate to support YMM registers um: add extended processor state save/restore support arch/um/include/shared/registers.h | 2 ++ arch/um/kernel/process.c | 2 +- arch/um/os-Linux/signal.c | 28 ++++++++++++++------ arch/x86/um/os-Linux/registers.c | 49 +++++++++++++++++++++++++++++++++-- arch/x86/um/ptrace_32.c | 5 ++-- arch/x86/um/ptrace_64.c | 16 ++++++------ arch/x86/um/shared/sysdep/ptrace_64.h | 4 +-- arch/x86/um/signal.c | 37 +++++++++----------------- arch/x86/um/user-offsets.c | 2 +- 9 files changed, 95 insertions(+), 50 deletions(-) -- /* Test if context switches preserve YMM registers, multi-threaded version * The main function, threads and the signal handler all have their unique * ymm0 value, and constantly detect if someone steps on their toes. * Should loop forever. */ #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <signal.h> #include <unistd.h> #include <pthread.h> #include <sys/time.h> #include <immintrin.h> #define N 10 void sighandler(int signum) { int n = 0xffff; register uint32_t eax asm("eax"); __m256i m0 = _mm256_set_epi64x(0x1234L << 32, 0, 0, 0); while (n--) { asm("vextracti128 $1,%ymm0,%xmm1"); asm("vpextrd $3,%xmm1,%eax"); if (eax != 0x1234) exit(3); } } void thread(void *arg) { long n = (long)arg; register uint32_t eax asm("eax"); __m256i m0 = _mm256_set_epi64x(n << 32, 0, 0, 0); do { asm("vextracti128 $1,%ymm0,%xmm1"); asm("vpextrd $3,%xmm1,%eax"); } while (eax == n); exit(2); } int main() { register uint32_t eax asm("eax"); pthread_t threads[N]; struct itimerval itv; struct timeval tv; struct sigaction act; tv.tv_sec = 0; tv.tv_usec = 100000; itv.it_interval = tv; itv.it_value = tv; act.sa_handler = sighandler; act.sa_flags = 0; sigemptyset(&act.sa_mask); sigaction(SIGALRM, &act, NULL); setitimer(ITIMER_REAL, &itv, NULL); for (long i = 0; i < N; i++) pthread_create(threads + i, NULL, (void *)thread, (void *)i); __m256i m0 = _mm256_set_epi64x(0xabcdL << 32, 0, 0, 0); do { asm("vextracti128 $1,%ymm0,%xmm1"); asm("vpextrd $3,%xmm1,%eax"); } while (eax == 0xabcd); printf("%lx\n", eax); return 1; } |
From: Eli C. <eli...@gm...> - 2016-03-19 16:59:01
|
This patch makes UML saves/restores FPU state from/to the fpstate in pt_regs when setting up or returning from a signal stack, rather than calling ptrace directly. This ensures that FPU state is correctly preserved around signal handlers in a multi-threaded scenario. Signed-off-by: Eli Cooper <eli...@gm...> --- arch/x86/um/signal.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 14fcd01..dac23ee 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -225,20 +225,10 @@ static int copy_sc_from_user(struct pt_regs *regs, } else #endif { - struct user_i387_struct fp; - - err = copy_from_user(&fp, (void *)sc.fpstate, + err = copy_from_user(regs->regs.fp, (void *)sc.fpstate, sizeof(struct user_i387_struct)); if (err) return 1; - - err = restore_fp_registers(pid, (unsigned long *) &fp); - if (err < 0) { - printk(KERN_ERR "copy_sc_from_user - " - "restore_fp_registers failed, errno = %d\n", - -err); - return 1; - } } return 0; } @@ -325,10 +315,8 @@ static int copy_sc_to_user(struct sigcontext __user *to, } else #endif { - struct user_i387_struct fp; - - err = save_fp_registers(pid, (unsigned long *) &fp); - if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) + if (copy_to_user(to_fp, regs->regs.fp, + sizeof(struct user_i387_struct))) return 1; } -- 2.7.2 |
From: Eli C. <eli...@gm...> - 2016-03-19 16:59:03
|
Extends fpstate to _xstate, in order to hold AVX/YMM registers. To avoid oversized stack frame, the following functions have been refactored by using malloc. - sig_handler_common - timer_real_alarm_handler Signed-off-by: Eli Cooper <eli...@gm...> --- arch/um/os-Linux/signal.c | 28 ++++++++++++++++++++-------- arch/x86/um/signal.c | 23 +++++++++++------------ arch/x86/um/user-offsets.c | 2 +- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 7801666..8acaf4e 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -29,23 +29,29 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { - struct uml_pt_regs r; + struct uml_pt_regs *r; int save_errno = errno; - r.is_user = 0; + r = malloc(sizeof(struct uml_pt_regs)); + if (!r) + panic("out of memory"); + + r->is_user = 0; if (sig == SIGSEGV) { /* For segfaults, we want the data from the sigcontext. */ - get_regs_from_mc(&r, mc); - GET_FAULTINFO_FROM_MC(r.faultinfo, mc); + get_regs_from_mc(r, mc); + GET_FAULTINFO_FROM_MC(r->faultinfo, mc); } /* enable signals if sig isn't IRQ signal */ if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) unblock_signals(); - (*sig_info[sig])(sig, si, &r); + (*sig_info[sig])(sig, si, r); errno = save_errno; + + free(r); } /* @@ -83,11 +89,17 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) static void timer_real_alarm_handler(mcontext_t *mc) { - struct uml_pt_regs regs; + struct uml_pt_regs *regs; + + regs = malloc(sizeof(struct uml_pt_regs)); + if (!regs) + panic("out of memory"); if (mc != NULL) - get_regs_from_mc(®s, mc); - timer_handler(SIGALRM, NULL, ®s); + get_regs_from_mc(regs, mc); + timer_handler(SIGALRM, NULL, regs); + + free(regs); } void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index dac23ee..49e5036 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -226,7 +226,7 @@ static int copy_sc_from_user(struct pt_regs *regs, #endif { err = copy_from_user(regs->regs.fp, (void *)sc.fpstate, - sizeof(struct user_i387_struct)); + sizeof(struct _xstate)); if (err) return 1; } @@ -234,7 +234,7 @@ static int copy_sc_from_user(struct pt_regs *regs, } static int copy_sc_to_user(struct sigcontext __user *to, - struct _fpstate __user *to_fp, struct pt_regs *regs, + struct _xstate __user *to_fp, struct pt_regs *regs, unsigned long mask) { struct sigcontext sc; @@ -300,23 +300,22 @@ static int copy_sc_to_user(struct sigcontext __user *to, return 1; } - err = convert_fxsr_to_user(to_fp, &fpx); + err = convert_fxsr_to_user(&to_fp->fpstate, &fpx); if (err) return 1; - err |= __put_user(fpx.swd, &to_fp->status); - err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic); + err |= __put_user(fpx.swd, &to_fp->fpstate.status); + err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); if (err) return 1; - if (copy_to_user(&to_fp->_fxsr_env[0], &fpx, + if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx, sizeof(struct user_fxsr_struct))) return 1; } else #endif { - if (copy_to_user(to_fp, regs->regs.fp, - sizeof(struct user_i387_struct))) + if (copy_to_user(to_fp, regs->regs.fp, sizeof(struct _xstate))) return 1; } @@ -325,7 +324,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, #ifdef CONFIG_X86_32 static int copy_ucontext_to_user(struct ucontext __user *uc, - struct _fpstate __user *fp, sigset_t *set, + struct _xstate __user *fp, sigset_t *set, unsigned long sp) { int err = 0; @@ -341,7 +340,7 @@ struct sigframe char __user *pretcode; int sig; struct sigcontext sc; - struct _fpstate fpstate; + struct _xstate fpstate; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; }; @@ -354,7 +353,7 @@ struct rt_sigframe void __user *puc; struct siginfo info; struct ucontext uc; - struct _fpstate fpstate; + struct _xstate fpstate; char retcode[8]; }; @@ -483,7 +482,7 @@ struct rt_sigframe char __user *pretcode; struct ucontext uc; struct siginfo info; - struct _fpstate fpstate; + struct _xstate fpstate; }; int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index 470564b..cb3c223 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -50,7 +50,7 @@ void foo(void) DEFINE(HOST_GS, GS); DEFINE(HOST_ORIG_AX, ORIG_EAX); #else - DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); + DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long)); DEFINE_LONGS(HOST_BX, RBX); DEFINE_LONGS(HOST_CX, RCX); DEFINE_LONGS(HOST_DI, RDI); -- 2.7.2 |
From: Eli C. <eli...@gm...> - 2016-03-19 16:59:05
|
This patch extends save_fp_registers() and restore_fp_registers() to use PTRACE_GETREGSET and PTRACE_SETREGSET with the XSTATE note type, adding support for new processor state extensions between context switches. When the new ptrace requests are unavailable, it falls back to the old PTRACE_GETFPREGS and PTRACE_SETFPREGS methods, which have been renamed to save_i387_registers() and restore_i387_registers(). Now these functions expect *fp_regs to have the space of an _xstate struct. Thus, this also makes ptrace in UML responde to PTRACE_GETFPREGS/_SETFPREG requests with a user_i387_struct (thus independent from HOST_FP_SIZE), and by calling save_i387_registers() and restore_i387_registers() instead of the extended save_fp_registers() and restore_fp_registers() functions. Signed-off-by: Eli Cooper <eli...@gm...> --- arch/um/include/shared/registers.h | 2 ++ arch/um/kernel/process.c | 2 +- arch/x86/um/os-Linux/registers.c | 49 +++++++++++++++++++++++++++++++++-- arch/x86/um/ptrace_32.c | 5 ++-- arch/x86/um/ptrace_64.c | 16 ++++++------ arch/x86/um/shared/sysdep/ptrace_64.h | 4 +-- 6 files changed, 62 insertions(+), 16 deletions(-) diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index f5b7635..a74449b 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -9,6 +9,8 @@ #include <sysdep/ptrace.h> #include <sysdep/archsetjmp.h> +extern int save_i387_registers(int pid, unsigned long *fp_regs); +extern int restore_i387_registers(int pid, unsigned long *fp_regs); extern int save_fp_registers(int pid, unsigned long *fp_regs); extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern int save_fpx_registers(int pid, unsigned long *fp_regs); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 48af59a..d55a473 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -402,6 +402,6 @@ int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu) { int cpu = current_thread_info()->cpu; - return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu); + return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu); } diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index 41bfe84..00f54a9 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -11,21 +11,56 @@ #endif #include <longjmp.h> #include <sysdep/ptrace_user.h> +#include <sys/uio.h> +#include <asm/sigcontext.h> +#include <linux/elf.h> -int save_fp_registers(int pid, unsigned long *fp_regs) +int have_xstate_support; + +int save_i387_registers(int pid, unsigned long *fp_regs) { if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) return -errno; return 0; } -int restore_fp_registers(int pid, unsigned long *fp_regs) +int save_fp_registers(int pid, unsigned long *fp_regs) +{ + struct iovec iov; + + if (have_xstate_support) { + iov.iov_base = fp_regs; + iov.iov_len = sizeof(struct _xstate); + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + return -errno; + return 0; + } else { + return save_i387_registers(pid, fp_regs); + } +} + +int restore_i387_registers(int pid, unsigned long *fp_regs) { if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) return -errno; return 0; } +int restore_fp_registers(int pid, unsigned long *fp_regs) +{ + struct iovec iov; + + if (have_xstate_support) { + iov.iov_base = fp_regs; + iov.iov_len = sizeof(struct _xstate); + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + return -errno; + return 0; + } else { + return restore_i387_registers(pid, fp_regs); + } +} + #ifdef __i386__ int have_fpx_regs = 1; int save_fpx_registers(int pid, unsigned long *fp_regs) @@ -85,6 +120,16 @@ int put_fp_registers(int pid, unsigned long *regs) return restore_fp_registers(pid, regs); } +void arch_init_registers(int pid) +{ + struct _xstate fp_regs; + struct iovec iov; + + iov.iov_base = &fp_regs; + iov.iov_len = sizeof(struct _xstate); + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0) + have_xstate_support = 1; +} #endif unsigned long get_thread_reg(int reg, jmp_buf *buf) diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 47c78d5..ebd4dd6 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -194,7 +194,8 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c int err, n, cpu = ((struct thread_info *) child->stack)->cpu; struct user_i387_struct fpregs; - err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs); + err = save_i387_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); if (err) return err; @@ -214,7 +215,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c if (n > 0) return -EFAULT; - return restore_fp_registers(userspace_pid[cpu], + return restore_i387_registers(userspace_pid[cpu], (unsigned long *) &fpregs); } diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index a629694..faab418 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -222,14 +222,14 @@ int is_syscall(unsigned long addr) static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int err, n, cpu = ((struct thread_info *) child->stack)->cpu; - long fpregs[HOST_FP_SIZE]; + struct user_i387_struct fpregs; - BUG_ON(sizeof(*buf) != sizeof(fpregs)); - err = save_fp_registers(userspace_pid[cpu], fpregs); + err = save_i387_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); if (err) return err; - n = copy_to_user(buf, fpregs, sizeof(fpregs)); + n = copy_to_user(buf, &fpregs, sizeof(fpregs)); if (n > 0) return -EFAULT; @@ -239,14 +239,14 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int n, cpu = ((struct thread_info *) child->stack)->cpu; - long fpregs[HOST_FP_SIZE]; + struct user_i387_struct fpregs; - BUG_ON(sizeof(*buf) != sizeof(fpregs)); - n = copy_from_user(fpregs, buf, sizeof(fpregs)); + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); if (n > 0) return -EFAULT; - return restore_fp_registers(userspace_pid[cpu], fpregs); + return restore_i387_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); } long subarch_ptrace(struct task_struct *child, long request, diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h index 919789f..0dc223a 100644 --- a/arch/x86/um/shared/sysdep/ptrace_64.h +++ b/arch/x86/um/shared/sysdep/ptrace_64.h @@ -57,8 +57,6 @@ #define UPT_SYSCALL_ARG5(r) UPT_R8(r) #define UPT_SYSCALL_ARG6(r) UPT_R9(r) -static inline void arch_init_registers(int pid) -{ -} +extern void arch_init_registers(int pid); #endif -- 2.7.2 |
From: Richard W. <ri...@no...> - 2016-04-04 21:42:34
|
Am 19.03.2016 um 17:58 schrieb Eli Cooper: > This series first fixes a bug that results in corrupted FPU state after > invoking signal handlers. It also adds support for the extended processor > state (XSTATE) for x86_64 UML, especially the YMM registers used by AVX(2) > instructions. > > Tested with a minimal multi-threaded FPU-intensive test program (see below). > This series supersedes the previous sigreturn fix as that one is incorrect > when the process is multi-threaded. > > Changes since v2: > - Add an improved sigreturn fix to this series > - Merge the ptrace changes into the last commit > - Make the selftest program multi-threaded > > Changes since v1: > - Refactor functions with oversized stack frame > - Add a tiny selftest program to the cover letter > > Eli Cooper (3): > um: fix FPU state preservation around signal handlers > um: extend fpstate to _xstate to support YMM registers > um: add extended processor state save/restore support Sorry for my late response. I'll put this now into -next and give it some testing. Thanks a lot for fixing! //richard |
From: Eli C. <eli...@gm...> - 2016-05-20 15:29:53
|
On 2016/4/5 5:42, Richard Weinberger wrote: > Sorry for my late response. > I'll put this now into -next and give it some testing. Ping? It has been some time but I don't see this in -next yet. |