|
From: Eli C. <eli...@gm...> - 2016-03-19 16:59:01
|
This series first fixes a bug that results in corrupted FPU state after
invoking signal handlers. It also adds support for the extended processor
state (XSTATE) for x86_64 UML, especially the YMM registers used by AVX(2)
instructions.
Tested with a minimal multi-threaded FPU-intensive test program (see below).
This series supersedes the previous sigreturn fix as that one is incorrect
when the process is multi-threaded.
Changes since v2:
- Add an improved sigreturn fix to this series
- Merge the ptrace changes into the last commit
- Make the selftest program multi-threaded
Changes since v1:
- Refactor functions with oversized stack frame
- Add a tiny selftest program to the cover letter
Eli Cooper (3):
um: fix FPU state preservation around signal handlers
um: extend fpstate to _xstate to support YMM registers
um: add extended processor state save/restore support
arch/um/include/shared/registers.h | 2 ++
arch/um/kernel/process.c | 2 +-
arch/um/os-Linux/signal.c | 28 ++++++++++++++------
arch/x86/um/os-Linux/registers.c | 49 +++++++++++++++++++++++++++++++++--
arch/x86/um/ptrace_32.c | 5 ++--
arch/x86/um/ptrace_64.c | 16 ++++++------
arch/x86/um/shared/sysdep/ptrace_64.h | 4 +--
arch/x86/um/signal.c | 37 +++++++++-----------------
arch/x86/um/user-offsets.c | 2 +-
9 files changed, 95 insertions(+), 50 deletions(-)
--
/* Test if context switches preserve YMM registers, multi-threaded version
* The main function, threads and the signal handler all have their unique
* ymm0 value, and constantly detect if someone steps on their toes.
* Should loop forever.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <signal.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/time.h>
#include <immintrin.h>
#define N 10
void sighandler(int signum)
{
int n = 0xffff;
register uint32_t eax asm("eax");
__m256i m0 = _mm256_set_epi64x(0x1234L << 32, 0, 0, 0);
while (n--) {
asm("vextracti128 $1,%ymm0,%xmm1");
asm("vpextrd $3,%xmm1,%eax");
if (eax != 0x1234)
exit(3);
}
}
void thread(void *arg)
{
long n = (long)arg;
register uint32_t eax asm("eax");
__m256i m0 = _mm256_set_epi64x(n << 32, 0, 0, 0);
do {
asm("vextracti128 $1,%ymm0,%xmm1");
asm("vpextrd $3,%xmm1,%eax");
} while (eax == n);
exit(2);
}
int main()
{
register uint32_t eax asm("eax");
pthread_t threads[N];
struct itimerval itv;
struct timeval tv;
struct sigaction act;
tv.tv_sec = 0;
tv.tv_usec = 100000;
itv.it_interval = tv;
itv.it_value = tv;
act.sa_handler = sighandler;
act.sa_flags = 0;
sigemptyset(&act.sa_mask);
sigaction(SIGALRM, &act, NULL);
setitimer(ITIMER_REAL, &itv, NULL);
for (long i = 0; i < N; i++)
pthread_create(threads + i, NULL, (void *)thread, (void *)i);
__m256i m0 = _mm256_set_epi64x(0xabcdL << 32, 0, 0, 0);
do {
asm("vextracti128 $1,%ymm0,%xmm1");
asm("vpextrd $3,%xmm1,%eax");
} while (eax == 0xabcd);
printf("%lx\n", eax);
return 1;
}
|
|
From: Eli C. <eli...@gm...> - 2016-03-19 16:59:01
|
This patch makes UML saves/restores FPU state from/to the fpstate in
pt_regs when setting up or returning from a signal stack, rather than
calling ptrace directly. This ensures that FPU state is correctly
preserved around signal handlers in a multi-threaded scenario.
Signed-off-by: Eli Cooper <eli...@gm...>
---
arch/x86/um/signal.c | 18 +++---------------
1 file changed, 3 insertions(+), 15 deletions(-)
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 14fcd01..dac23ee 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -225,20 +225,10 @@ static int copy_sc_from_user(struct pt_regs *regs,
} else
#endif
{
- struct user_i387_struct fp;
-
- err = copy_from_user(&fp, (void *)sc.fpstate,
+ err = copy_from_user(regs->regs.fp, (void *)sc.fpstate,
sizeof(struct user_i387_struct));
if (err)
return 1;
-
- err = restore_fp_registers(pid, (unsigned long *) &fp);
- if (err < 0) {
- printk(KERN_ERR "copy_sc_from_user - "
- "restore_fp_registers failed, errno = %d\n",
- -err);
- return 1;
- }
}
return 0;
}
@@ -325,10 +315,8 @@ static int copy_sc_to_user(struct sigcontext __user *to,
} else
#endif
{
- struct user_i387_struct fp;
-
- err = save_fp_registers(pid, (unsigned long *) &fp);
- if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
+ if (copy_to_user(to_fp, regs->regs.fp,
+ sizeof(struct user_i387_struct)))
return 1;
}
--
2.7.2
|
|
From: Eli C. <eli...@gm...> - 2016-03-19 16:59:03
|
Extends fpstate to _xstate, in order to hold AVX/YMM registers.
To avoid oversized stack frame, the following functions have been
refactored by using malloc.
- sig_handler_common
- timer_real_alarm_handler
Signed-off-by: Eli Cooper <eli...@gm...>
---
arch/um/os-Linux/signal.c | 28 ++++++++++++++++++++--------
arch/x86/um/signal.c | 23 +++++++++++------------
arch/x86/um/user-offsets.c | 2 +-
3 files changed, 32 insertions(+), 21 deletions(-)
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 7801666..8acaf4e 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -29,23 +29,29 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
{
- struct uml_pt_regs r;
+ struct uml_pt_regs *r;
int save_errno = errno;
- r.is_user = 0;
+ r = malloc(sizeof(struct uml_pt_regs));
+ if (!r)
+ panic("out of memory");
+
+ r->is_user = 0;
if (sig == SIGSEGV) {
/* For segfaults, we want the data from the sigcontext. */
- get_regs_from_mc(&r, mc);
- GET_FAULTINFO_FROM_MC(r.faultinfo, mc);
+ get_regs_from_mc(r, mc);
+ GET_FAULTINFO_FROM_MC(r->faultinfo, mc);
}
/* enable signals if sig isn't IRQ signal */
if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM))
unblock_signals();
- (*sig_info[sig])(sig, si, &r);
+ (*sig_info[sig])(sig, si, r);
errno = save_errno;
+
+ free(r);
}
/*
@@ -83,11 +89,17 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
static void timer_real_alarm_handler(mcontext_t *mc)
{
- struct uml_pt_regs regs;
+ struct uml_pt_regs *regs;
+
+ regs = malloc(sizeof(struct uml_pt_regs));
+ if (!regs)
+ panic("out of memory");
if (mc != NULL)
- get_regs_from_mc(®s, mc);
- timer_handler(SIGALRM, NULL, ®s);
+ get_regs_from_mc(regs, mc);
+ timer_handler(SIGALRM, NULL, regs);
+
+ free(regs);
}
void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index dac23ee..49e5036 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -226,7 +226,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
#endif
{
err = copy_from_user(regs->regs.fp, (void *)sc.fpstate,
- sizeof(struct user_i387_struct));
+ sizeof(struct _xstate));
if (err)
return 1;
}
@@ -234,7 +234,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
}
static int copy_sc_to_user(struct sigcontext __user *to,
- struct _fpstate __user *to_fp, struct pt_regs *regs,
+ struct _xstate __user *to_fp, struct pt_regs *regs,
unsigned long mask)
{
struct sigcontext sc;
@@ -300,23 +300,22 @@ static int copy_sc_to_user(struct sigcontext __user *to,
return 1;
}
- err = convert_fxsr_to_user(to_fp, &fpx);
+ err = convert_fxsr_to_user(&to_fp->fpstate, &fpx);
if (err)
return 1;
- err |= __put_user(fpx.swd, &to_fp->status);
- err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic);
+ err |= __put_user(fpx.swd, &to_fp->fpstate.status);
+ err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic);
if (err)
return 1;
- if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
+ if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx,
sizeof(struct user_fxsr_struct)))
return 1;
} else
#endif
{
- if (copy_to_user(to_fp, regs->regs.fp,
- sizeof(struct user_i387_struct)))
+ if (copy_to_user(to_fp, regs->regs.fp, sizeof(struct _xstate)))
return 1;
}
@@ -325,7 +324,7 @@ static int copy_sc_to_user(struct sigcontext __user *to,
#ifdef CONFIG_X86_32
static int copy_ucontext_to_user(struct ucontext __user *uc,
- struct _fpstate __user *fp, sigset_t *set,
+ struct _xstate __user *fp, sigset_t *set,
unsigned long sp)
{
int err = 0;
@@ -341,7 +340,7 @@ struct sigframe
char __user *pretcode;
int sig;
struct sigcontext sc;
- struct _fpstate fpstate;
+ struct _xstate fpstate;
unsigned long extramask[_NSIG_WORDS-1];
char retcode[8];
};
@@ -354,7 +353,7 @@ struct rt_sigframe
void __user *puc;
struct siginfo info;
struct ucontext uc;
- struct _fpstate fpstate;
+ struct _xstate fpstate;
char retcode[8];
};
@@ -483,7 +482,7 @@ struct rt_sigframe
char __user *pretcode;
struct ucontext uc;
struct siginfo info;
- struct _fpstate fpstate;
+ struct _xstate fpstate;
};
int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index 470564b..cb3c223 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -50,7 +50,7 @@ void foo(void)
DEFINE(HOST_GS, GS);
DEFINE(HOST_ORIG_AX, ORIG_EAX);
#else
- DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
+ DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long));
DEFINE_LONGS(HOST_BX, RBX);
DEFINE_LONGS(HOST_CX, RCX);
DEFINE_LONGS(HOST_DI, RDI);
--
2.7.2
|
|
From: Eli C. <eli...@gm...> - 2016-03-19 16:59:05
|
This patch extends save_fp_registers() and restore_fp_registers() to use
PTRACE_GETREGSET and PTRACE_SETREGSET with the XSTATE note type, adding
support for new processor state extensions between context switches.
When the new ptrace requests are unavailable, it falls back to the old
PTRACE_GETFPREGS and PTRACE_SETFPREGS methods, which have been renamed to
save_i387_registers() and restore_i387_registers().
Now these functions expect *fp_regs to have the space of an _xstate struct.
Thus, this also makes ptrace in UML responde to PTRACE_GETFPREGS/_SETFPREG
requests with a user_i387_struct (thus independent from HOST_FP_SIZE), and
by calling save_i387_registers() and restore_i387_registers() instead of
the extended save_fp_registers() and restore_fp_registers() functions.
Signed-off-by: Eli Cooper <eli...@gm...>
---
arch/um/include/shared/registers.h | 2 ++
arch/um/kernel/process.c | 2 +-
arch/x86/um/os-Linux/registers.c | 49 +++++++++++++++++++++++++++++++++--
arch/x86/um/ptrace_32.c | 5 ++--
arch/x86/um/ptrace_64.c | 16 ++++++------
arch/x86/um/shared/sysdep/ptrace_64.h | 4 +--
6 files changed, 62 insertions(+), 16 deletions(-)
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index f5b7635..a74449b 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -9,6 +9,8 @@
#include <sysdep/ptrace.h>
#include <sysdep/archsetjmp.h>
+extern int save_i387_registers(int pid, unsigned long *fp_regs);
+extern int restore_i387_registers(int pid, unsigned long *fp_regs);
extern int save_fp_registers(int pid, unsigned long *fp_regs);
extern int restore_fp_registers(int pid, unsigned long *fp_regs);
extern int save_fpx_registers(int pid, unsigned long *fp_regs);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 48af59a..d55a473 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -402,6 +402,6 @@ int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
{
int cpu = current_thread_info()->cpu;
- return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu);
+ return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu);
}
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 41bfe84..00f54a9 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -11,21 +11,56 @@
#endif
#include <longjmp.h>
#include <sysdep/ptrace_user.h>
+#include <sys/uio.h>
+#include <asm/sigcontext.h>
+#include <linux/elf.h>
-int save_fp_registers(int pid, unsigned long *fp_regs)
+int have_xstate_support;
+
+int save_i387_registers(int pid, unsigned long *fp_regs)
{
if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
return -errno;
return 0;
}
-int restore_fp_registers(int pid, unsigned long *fp_regs)
+int save_fp_registers(int pid, unsigned long *fp_regs)
+{
+ struct iovec iov;
+
+ if (have_xstate_support) {
+ iov.iov_base = fp_regs;
+ iov.iov_len = sizeof(struct _xstate);
+ if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
+ return -errno;
+ return 0;
+ } else {
+ return save_i387_registers(pid, fp_regs);
+ }
+}
+
+int restore_i387_registers(int pid, unsigned long *fp_regs)
{
if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
return -errno;
return 0;
}
+int restore_fp_registers(int pid, unsigned long *fp_regs)
+{
+ struct iovec iov;
+
+ if (have_xstate_support) {
+ iov.iov_base = fp_regs;
+ iov.iov_len = sizeof(struct _xstate);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
+ return -errno;
+ return 0;
+ } else {
+ return restore_i387_registers(pid, fp_regs);
+ }
+}
+
#ifdef __i386__
int have_fpx_regs = 1;
int save_fpx_registers(int pid, unsigned long *fp_regs)
@@ -85,6 +120,16 @@ int put_fp_registers(int pid, unsigned long *regs)
return restore_fp_registers(pid, regs);
}
+void arch_init_registers(int pid)
+{
+ struct _xstate fp_regs;
+ struct iovec iov;
+
+ iov.iov_base = &fp_regs;
+ iov.iov_len = sizeof(struct _xstate);
+ if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0)
+ have_xstate_support = 1;
+}
#endif
unsigned long get_thread_reg(int reg, jmp_buf *buf)
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 47c78d5..ebd4dd6 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -194,7 +194,8 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
struct user_i387_struct fpregs;
- err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
+ err = save_i387_registers(userspace_pid[cpu],
+ (unsigned long *) &fpregs);
if (err)
return err;
@@ -214,7 +215,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
if (n > 0)
return -EFAULT;
- return restore_fp_registers(userspace_pid[cpu],
+ return restore_i387_registers(userspace_pid[cpu],
(unsigned long *) &fpregs);
}
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index a629694..faab418 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -222,14 +222,14 @@ int is_syscall(unsigned long addr)
static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
- long fpregs[HOST_FP_SIZE];
+ struct user_i387_struct fpregs;
- BUG_ON(sizeof(*buf) != sizeof(fpregs));
- err = save_fp_registers(userspace_pid[cpu], fpregs);
+ err = save_i387_registers(userspace_pid[cpu],
+ (unsigned long *) &fpregs);
if (err)
return err;
- n = copy_to_user(buf, fpregs, sizeof(fpregs));
+ n = copy_to_user(buf, &fpregs, sizeof(fpregs));
if (n > 0)
return -EFAULT;
@@ -239,14 +239,14 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
int n, cpu = ((struct thread_info *) child->stack)->cpu;
- long fpregs[HOST_FP_SIZE];
+ struct user_i387_struct fpregs;
- BUG_ON(sizeof(*buf) != sizeof(fpregs));
- n = copy_from_user(fpregs, buf, sizeof(fpregs));
+ n = copy_from_user(&fpregs, buf, sizeof(fpregs));
if (n > 0)
return -EFAULT;
- return restore_fp_registers(userspace_pid[cpu], fpregs);
+ return restore_i387_registers(userspace_pid[cpu],
+ (unsigned long *) &fpregs);
}
long subarch_ptrace(struct task_struct *child, long request,
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
index 919789f..0dc223a 100644
--- a/arch/x86/um/shared/sysdep/ptrace_64.h
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -57,8 +57,6 @@
#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
-static inline void arch_init_registers(int pid)
-{
-}
+extern void arch_init_registers(int pid);
#endif
--
2.7.2
|
|
From: Richard W. <ri...@no...> - 2016-04-04 21:42:34
|
Am 19.03.2016 um 17:58 schrieb Eli Cooper: > This series first fixes a bug that results in corrupted FPU state after > invoking signal handlers. It also adds support for the extended processor > state (XSTATE) for x86_64 UML, especially the YMM registers used by AVX(2) > instructions. > > Tested with a minimal multi-threaded FPU-intensive test program (see below). > This series supersedes the previous sigreturn fix as that one is incorrect > when the process is multi-threaded. > > Changes since v2: > - Add an improved sigreturn fix to this series > - Merge the ptrace changes into the last commit > - Make the selftest program multi-threaded > > Changes since v1: > - Refactor functions with oversized stack frame > - Add a tiny selftest program to the cover letter > > Eli Cooper (3): > um: fix FPU state preservation around signal handlers > um: extend fpstate to _xstate to support YMM registers > um: add extended processor state save/restore support Sorry for my late response. I'll put this now into -next and give it some testing. Thanks a lot for fixing! //richard |
|
From: Eli C. <eli...@gm...> - 2016-05-20 15:29:53
|
On 2016/4/5 5:42, Richard Weinberger wrote: > Sorry for my late response. > I'll put this now into -next and give it some testing. Ping? It has been some time but I don't see this in -next yet. |