From: Eli C. <eli...@gm...> - 2016-03-16 08:48:47
|
This series adds support for the extended processor state (XSTATE) for x86_64 UML, especially the YMM registers used by AVX/AVX2 instructions. Modern userspace programs built with AVX can now run inside x86_64 UML without YMM registers getting corrupted. Tested with a tiny selftest program below, which of course requires the following patch I sent earlier today to run correctly. um: fix FPU register double-restore after sigreturn Also tested with a latest Arch Linux image on a CPU with AVX2 support. Without YMM support in UML, a lot of programs crash unexpectedly, or result in bad results (e.g., calculating SHA256 in OpenSSL). Changes since v1: - Refactor functions with oversized stack frame - Add a tiny selftest program to the cover letter Eli Cooper (3): um: extend _fpstate to _xstate um: add extended processor state save/restore support um: fix ptrace PTRACE_GETFPREGS and PTRACE_SETFPREG support arch/um/include/shared/registers.h | 2 ++ arch/um/kernel/process.c | 2 +- arch/um/os-Linux/signal.c | 28 ++++++++++++----- arch/x86/um/os-Linux/registers.c | 49 ++++++++++++++++++++++++++++-- arch/x86/um/ptrace_32.c | 5 +-- arch/x86/um/ptrace_64.c | 16 +++++----- arch/x86/um/shared/sysdep/ptrace_64.h | 4 +-- arch/x86/um/signal.c | 57 +++++++++++++++++++++++------------ arch/x86/um/user-offsets.c | 2 +- 9 files changed, 121 insertions(+), 44 deletions(-) --- /* test if context switches preserve YMM registers, should exit with code 0 */ #include <stdint.h> #include <unistd.h> #include <signal.h> #include <immintrin.h> int flag; void sighandler(int signum) { if (flag) exit(0); alarm(1); flag = 1; __m256i m0 = _mm256_set_epi64x(0, 0, 0, 0); } int main() { register uint32_t a asm("eax") = 0; signal(SIGALRM, sighandler); alarm(1); __m256i m0 = _mm256_set_epi64x(1L << 32, 0, 0, 0); do { asm("vextracti128 $1,%ymm0,%xmm1"); asm("vpextrd $3,%xmm1,%eax"); } while (a == 1); return 1; } |
From: Eli C. <eli...@gm...> - 2016-03-16 08:48:52
|
This patch makes ptrace in UML responde to PTRACE_GETFPREGS/_SETFPREG requests with a user_i387_struct (thus independent from HOST_FP_SIZE), and by calling save_i387_registers() and restore_i387_registers() instead of the extended save_fp_registers() and restore_fp_registers() functions. Signed-off-by: Eli Cooper <eli...@gm...> --- arch/x86/um/ptrace_32.c | 5 +++-- arch/x86/um/ptrace_64.c | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 47c78d5..ebd4dd6 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -194,7 +194,8 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c int err, n, cpu = ((struct thread_info *) child->stack)->cpu; struct user_i387_struct fpregs; - err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs); + err = save_i387_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); if (err) return err; @@ -214,7 +215,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c if (n > 0) return -EFAULT; - return restore_fp_registers(userspace_pid[cpu], + return restore_i387_registers(userspace_pid[cpu], (unsigned long *) &fpregs); } diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index a629694..faab418 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -222,14 +222,14 @@ int is_syscall(unsigned long addr) static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int err, n, cpu = ((struct thread_info *) child->stack)->cpu; - long fpregs[HOST_FP_SIZE]; + struct user_i387_struct fpregs; - BUG_ON(sizeof(*buf) != sizeof(fpregs)); - err = save_fp_registers(userspace_pid[cpu], fpregs); + err = save_i387_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); if (err) return err; - n = copy_to_user(buf, fpregs, sizeof(fpregs)); + n = copy_to_user(buf, &fpregs, sizeof(fpregs)); if (n > 0) return -EFAULT; @@ -239,14 +239,14 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { int n, cpu = ((struct thread_info *) child->stack)->cpu; - long fpregs[HOST_FP_SIZE]; + struct user_i387_struct fpregs; - BUG_ON(sizeof(*buf) != sizeof(fpregs)); - n = copy_from_user(fpregs, buf, sizeof(fpregs)); + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); if (n > 0) return -EFAULT; - return restore_fp_registers(userspace_pid[cpu], fpregs); + return restore_i387_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); } long subarch_ptrace(struct task_struct *child, long request, -- 2.7.2 |
From: Eli C. <eli...@gm...> - 2016-03-16 08:48:54
|
Extends _fpstate to _xstate, in order to hold AVX/YMM registers. To avoid oversized stack frame, the following functions has been refactored by using (k)malloc. - copy_sc_to_user - copy_sc_from_user - sig_handler_common - timer_real_alarm_handler Signed-off-by: Eli Cooper <eli...@gm...> --- arch/um/os-Linux/signal.c | 28 ++++++++++++++++------- arch/x86/um/signal.c | 57 ++++++++++++++++++++++++++++++---------------- arch/x86/um/user-offsets.c | 2 +- 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 7801666..8acaf4e 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -29,23 +29,29 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { - struct uml_pt_regs r; + struct uml_pt_regs *r; int save_errno = errno; - r.is_user = 0; + r = malloc(sizeof(struct uml_pt_regs)); + if (!r) + panic("out of memory"); + + r->is_user = 0; if (sig == SIGSEGV) { /* For segfaults, we want the data from the sigcontext. */ - get_regs_from_mc(&r, mc); - GET_FAULTINFO_FROM_MC(r.faultinfo, mc); + get_regs_from_mc(r, mc); + GET_FAULTINFO_FROM_MC(r->faultinfo, mc); } /* enable signals if sig isn't IRQ signal */ if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) unblock_signals(); - (*sig_info[sig])(sig, si, &r); + (*sig_info[sig])(sig, si, r); errno = save_errno; + + free(r); } /* @@ -83,11 +89,17 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) static void timer_real_alarm_handler(mcontext_t *mc) { - struct uml_pt_regs regs; + struct uml_pt_regs *regs; + + regs = malloc(sizeof(struct uml_pt_regs)); + if (!regs) + panic("out of memory"); if (mc != NULL) - get_regs_from_mc(®s, mc); - timer_handler(SIGALRM, NULL, ®s); + get_regs_from_mc(regs, mc); + timer_handler(SIGALRM, NULL, regs); + + free(regs); } void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 14fcd01..a350535 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -8,6 +8,7 @@ #include <linux/personality.h> #include <linux/ptrace.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <asm/unistd.h> #include <asm/uaccess.h> #include <asm/ucontext.h> @@ -155,6 +156,7 @@ static int copy_sc_from_user(struct pt_regs *regs, { struct sigcontext sc; int err, pid; + struct _xstate *fp; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -225,30 +227,39 @@ static int copy_sc_from_user(struct pt_regs *regs, } else #endif { - struct user_i387_struct fp; + fp = kmalloc(sizeof(struct _xstate), GFP_ATOMIC); + if (!fp) + return 1; - err = copy_from_user(&fp, (void *)sc.fpstate, - sizeof(struct user_i387_struct)); + err = copy_from_user(fp, (void *)sc.fpstate, + sizeof(struct _xstate)); if (err) - return 1; + goto err; - err = restore_fp_registers(pid, (unsigned long *) &fp); + err = restore_fp_registers(pid, (unsigned long *) fp); if (err < 0) { printk(KERN_ERR "copy_sc_from_user - " "restore_fp_registers failed, errno = %d\n", -err); - return 1; + goto err; } + + kfree(fp); } return 0; + +err: + kfree(fp); + return 1; } static int copy_sc_to_user(struct sigcontext __user *to, - struct _fpstate __user *to_fp, struct pt_regs *regs, + struct _xstate __user *to_fp, struct pt_regs *regs, unsigned long mask) { struct sigcontext sc; struct faultinfo * fi = ¤t->thread.arch.faultinfo; + struct _xstate *fp; int err, pid; memset(&sc, 0, sizeof(struct sigcontext)); @@ -310,34 +321,42 @@ static int copy_sc_to_user(struct sigcontext __user *to, return 1; } - err = convert_fxsr_to_user(to_fp, &fpx); + err = convert_fxsr_to_user(&to_fp->fpstate, &fpx); if (err) return 1; - err |= __put_user(fpx.swd, &to_fp->status); - err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic); + err |= __put_user(fpx.swd, &to_fp->fpstate.status); + err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); if (err) return 1; - if (copy_to_user(&to_fp->_fxsr_env[0], &fpx, + if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx, sizeof(struct user_fxsr_struct))) return 1; } else #endif { - struct user_i387_struct fp; - - err = save_fp_registers(pid, (unsigned long *) &fp); - if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) + fp = kmalloc(sizeof(struct _xstate), GFP_ATOMIC); + if (!fp) return 1; + + err = save_fp_registers(pid, (unsigned long *) fp); + if (copy_to_user(to_fp, fp, sizeof(struct _xstate))) + goto err; + + kfree(fp); } return 0; + +err: + kfree(fp); + return 1; } #ifdef CONFIG_X86_32 static int copy_ucontext_to_user(struct ucontext __user *uc, - struct _fpstate __user *fp, sigset_t *set, + struct _xstate __user *fp, sigset_t *set, unsigned long sp) { int err = 0; @@ -353,7 +372,7 @@ struct sigframe char __user *pretcode; int sig; struct sigcontext sc; - struct _fpstate fpstate; + struct _xstate fpstate; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; }; @@ -366,7 +385,7 @@ struct rt_sigframe void __user *puc; struct siginfo info; struct ucontext uc; - struct _fpstate fpstate; + struct _xstate fpstate; char retcode[8]; }; @@ -495,7 +514,7 @@ struct rt_sigframe char __user *pretcode; struct ucontext uc; struct siginfo info; - struct _fpstate fpstate; + struct _xstate fpstate; }; int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index 470564b..cb3c223 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -50,7 +50,7 @@ void foo(void) DEFINE(HOST_GS, GS); DEFINE(HOST_ORIG_AX, ORIG_EAX); #else - DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); + DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long)); DEFINE_LONGS(HOST_BX, RBX); DEFINE_LONGS(HOST_CX, RCX); DEFINE_LONGS(HOST_DI, RDI); -- 2.7.2 |
From: Eli C. <eli...@gm...> - 2016-03-16 08:48:55
|
This patch extends save_fp_registers() and restore_fp_registers() to use PTRACE_GETREGSET and PTRACE_SETREGSET with the XSTATE note type, adding support for new processor state extensions. Now these functions expect *fp_regs to have the space of an _xstate struct. When the new ptrace requests are unavailable, it falls back to the old PTRACE_GETFPREGS and PTRACE_SETFPREGS methods, which have been renamed to save_i387_registers() and restore_i387_registers(). Signed-off-by: Eli Cooper <eli...@gm...> --- arch/um/include/shared/registers.h | 2 ++ arch/um/kernel/process.c | 2 +- arch/x86/um/os-Linux/registers.c | 49 +++++++++++++++++++++++++++++++++-- arch/x86/um/shared/sysdep/ptrace_64.h | 4 +-- 4 files changed, 51 insertions(+), 6 deletions(-) diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index f5b7635..a74449b 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -9,6 +9,8 @@ #include <sysdep/ptrace.h> #include <sysdep/archsetjmp.h> +extern int save_i387_registers(int pid, unsigned long *fp_regs); +extern int restore_i387_registers(int pid, unsigned long *fp_regs); extern int save_fp_registers(int pid, unsigned long *fp_regs); extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern int save_fpx_registers(int pid, unsigned long *fp_regs); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 48af59a..d55a473 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -402,6 +402,6 @@ int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu) { int cpu = current_thread_info()->cpu; - return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu); + return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu); } diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index 41bfe84..00f54a9 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -11,21 +11,56 @@ #endif #include <longjmp.h> #include <sysdep/ptrace_user.h> +#include <sys/uio.h> +#include <asm/sigcontext.h> +#include <linux/elf.h> -int save_fp_registers(int pid, unsigned long *fp_regs) +int have_xstate_support; + +int save_i387_registers(int pid, unsigned long *fp_regs) { if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) return -errno; return 0; } -int restore_fp_registers(int pid, unsigned long *fp_regs) +int save_fp_registers(int pid, unsigned long *fp_regs) +{ + struct iovec iov; + + if (have_xstate_support) { + iov.iov_base = fp_regs; + iov.iov_len = sizeof(struct _xstate); + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + return -errno; + return 0; + } else { + return save_i387_registers(pid, fp_regs); + } +} + +int restore_i387_registers(int pid, unsigned long *fp_regs) { if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) return -errno; return 0; } +int restore_fp_registers(int pid, unsigned long *fp_regs) +{ + struct iovec iov; + + if (have_xstate_support) { + iov.iov_base = fp_regs; + iov.iov_len = sizeof(struct _xstate); + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + return -errno; + return 0; + } else { + return restore_i387_registers(pid, fp_regs); + } +} + #ifdef __i386__ int have_fpx_regs = 1; int save_fpx_registers(int pid, unsigned long *fp_regs) @@ -85,6 +120,16 @@ int put_fp_registers(int pid, unsigned long *regs) return restore_fp_registers(pid, regs); } +void arch_init_registers(int pid) +{ + struct _xstate fp_regs; + struct iovec iov; + + iov.iov_base = &fp_regs; + iov.iov_len = sizeof(struct _xstate); + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0) + have_xstate_support = 1; +} #endif unsigned long get_thread_reg(int reg, jmp_buf *buf) diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h index 919789f..0dc223a 100644 --- a/arch/x86/um/shared/sysdep/ptrace_64.h +++ b/arch/x86/um/shared/sysdep/ptrace_64.h @@ -57,8 +57,6 @@ #define UPT_SYSCALL_ARG5(r) UPT_R8(r) #define UPT_SYSCALL_ARG6(r) UPT_R9(r) -static inline void arch_init_registers(int pid) -{ -} +extern void arch_init_registers(int pid); #endif -- 2.7.2 |