You can subscribe to this list here.
1999 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(8) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2000 |
Jan
(19) |
Feb
(11) |
Mar
(56) |
Apr
(31) |
May
(37) |
Jun
(21) |
Jul
(30) |
Aug
(31) |
Sep
(25) |
Oct
(60) |
Nov
(28) |
Dec
(57) |
2001 |
Jan
(47) |
Feb
(119) |
Mar
(279) |
Apr
(198) |
May
(336) |
Jun
(201) |
Jul
(136) |
Aug
(123) |
Sep
(123) |
Oct
(185) |
Nov
(66) |
Dec
(97) |
2002 |
Jan
(318) |
Feb
(101) |
Mar
(167) |
Apr
(233) |
May
(249) |
Jun
(134) |
Jul
(195) |
Aug
(99) |
Sep
(278) |
Oct
(435) |
Nov
(326) |
Dec
(325) |
2003 |
Jan
(214) |
Feb
(309) |
Mar
(142) |
Apr
(141) |
May
(210) |
Jun
(86) |
Jul
(133) |
Aug
(218) |
Sep
(315) |
Oct
(152) |
Nov
(162) |
Dec
(288) |
2004 |
Jan
(277) |
Feb
(267) |
Mar
(182) |
Apr
(168) |
May
(254) |
Jun
(131) |
Jul
(168) |
Aug
(177) |
Sep
(262) |
Oct
(309) |
Nov
(262) |
Dec
(255) |
2005 |
Jan
(258) |
Feb
(169) |
Mar
(282) |
Apr
(208) |
May
(262) |
Jun
(187) |
Jul
(207) |
Aug
(171) |
Sep
(283) |
Oct
(216) |
Nov
(307) |
Dec
(107) |
2006 |
Jan
(207) |
Feb
(82) |
Mar
(192) |
Apr
(165) |
May
(121) |
Jun
(108) |
Jul
(120) |
Aug
(126) |
Sep
(101) |
Oct
(216) |
Nov
(95) |
Dec
(125) |
2007 |
Jan
(176) |
Feb
(117) |
Mar
(240) |
Apr
(120) |
May
(81) |
Jun
(82) |
Jul
(62) |
Aug
(120) |
Sep
(103) |
Oct
(109) |
Nov
(181) |
Dec
(87) |
2008 |
Jan
(145) |
Feb
(69) |
Mar
(31) |
Apr
(98) |
May
(91) |
Jun
(43) |
Jul
(68) |
Aug
(135) |
Sep
(48) |
Oct
(18) |
Nov
(29) |
Dec
(16) |
2009 |
Jan
(26) |
Feb
(15) |
Mar
(83) |
Apr
(39) |
May
(23) |
Jun
(35) |
Jul
(11) |
Aug
(3) |
Sep
(11) |
Oct
(2) |
Nov
(28) |
Dec
(8) |
2010 |
Jan
(4) |
Feb
(40) |
Mar
(4) |
Apr
(46) |
May
(35) |
Jun
(46) |
Jul
(10) |
Aug
(4) |
Sep
(50) |
Oct
(70) |
Nov
(31) |
Dec
(24) |
2011 |
Jan
(17) |
Feb
(8) |
Mar
(35) |
Apr
(50) |
May
(75) |
Jun
(55) |
Jul
(72) |
Aug
(272) |
Sep
(10) |
Oct
(9) |
Nov
(11) |
Dec
(15) |
2012 |
Jan
(36) |
Feb
(49) |
Mar
(54) |
Apr
(47) |
May
(8) |
Jun
(82) |
Jul
(20) |
Aug
(50) |
Sep
(51) |
Oct
(20) |
Nov
(10) |
Dec
(25) |
2013 |
Jan
(34) |
Feb
(4) |
Mar
(24) |
Apr
(40) |
May
(101) |
Jun
(30) |
Jul
(55) |
Aug
(84) |
Sep
(53) |
Oct
(49) |
Nov
(61) |
Dec
(36) |
2014 |
Jan
(26) |
Feb
(22) |
Mar
(30) |
Apr
(4) |
May
(43) |
Jun
(33) |
Jul
(44) |
Aug
(61) |
Sep
(46) |
Oct
(154) |
Nov
(16) |
Dec
(12) |
2015 |
Jan
(18) |
Feb
(2) |
Mar
(122) |
Apr
(23) |
May
(56) |
Jun
(29) |
Jul
(35) |
Aug
(15) |
Sep
|
Oct
(45) |
Nov
(94) |
Dec
(38) |
2016 |
Jan
(50) |
Feb
(39) |
Mar
(39) |
Apr
(1) |
May
(14) |
Jun
(12) |
Jul
(19) |
Aug
(12) |
Sep
(9) |
Oct
(1) |
Nov
(13) |
Dec
(7) |
2017 |
Jan
(6) |
Feb
(1) |
Mar
(16) |
Apr
(5) |
May
(61) |
Jun
(18) |
Jul
(43) |
Aug
(1) |
Sep
(8) |
Oct
(25) |
Nov
(30) |
Dec
(6) |
2018 |
Jan
(5) |
Feb
(2) |
Mar
(25) |
Apr
(15) |
May
(2) |
Jun
(1) |
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
|
Dec
|
2019 |
Jan
|
Feb
(2) |
Mar
|
Apr
(1) |
May
|
Jun
(1) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Vegard N. <veg...@or...> - 2015-12-06 15:34:30
|
Hi, I've been running into some odd crashes when starting my UML instance from Python. This is my script: import subprocess subprocess.check_call(['path/to/vmlinux', 'mem=2048M', 'rootfstype=hostfs', 'rw', 'init=/bin/bash']) This will crash 9 out of 10 times with various strange messages on the console: [ 1.890000] devtmpfs: mounted [ 1.960000] mount (947) used greatest stack depth: 5592 bytes left [ 1.990000] mount (948) used greatest stack depth: 5496 bytes left #�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#�#��+J@��o��#7%����2Z����.�j o ��h� -^(�l0�w8�\@�}P�)o`o-�p)CS�-!���8��,��Ҋ8�>)DV9 � �9�����$��#�#�#�#[ 6.870000] [ 6.870000] Pid: 1, comm: init Not tainted 4.4.0-rc3 [ 6.870000] RIP: 0033:[<00000000018d4848>] [ 6.870000] RSP: 00000000dff3efa8 EFLAGS: 00010216 [ 6.870000] RAX: 0000000000000001 RBX: 00000000600748c0 RCX: 00007fb9d87dacd8 [ 6.870000] RDX: ffffffffffffffff RSI: 0000000000000001 RDI: 00000000dfeba3c8 [ 6.870000] RBP: 00000000dff3efe8 R08: 00000000dff3ee38 R09: 0000000000001382 [ 6.870000] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000dff3e000 [ 6.870000] R13: 0000000060050f90 R14: 0000000000000000 R15: 0000000060050a20 [ 6.870000] Kernel panic - not syncing: Kernel tried to access user memory at addr 0x18d4848, ip 0x18d4848 [ 6.870000] CPU: 0 PID: 1 Comm: init Not tainted 4.4.0-rc3 #1 [ 6.870000] [ 6.870000] Pid: -538516280, comm: Not tainted 4.4.0-rc3 [ 6.870000] RIP: 0033:[<000000006005b54a>] [ 6.870000] RSP: 00000000dff3c060 EFLAGS: 00010206 [ 6.870000] RAX: 0000000002556c00 RBX: 0000000002557610 RCX: 0000000000000001 [ 6.870000] RDX: 0000000000000006 RSI: 0000000000000000 RDI: 000000006005b54a [ 6.870000] RBP: 00000000dff3c120 R08: 00000000dff3e0a0 R09: 0000000000000001 [ 6.870000] R10: 0000000000000008 R11: 0000000000000246 R12: 00000000dff3c190 [ 6.870000] R13: 0000000000000000 R14: 000000006005b54a R15: 00000000dff3c578 [ 6.870000] Kernel panic - not syncing: Kernel tried to access user memory at addr 0x2557610, ip 0x6005b54a I think the biggest clue is perhaps this message that occurs occasionally: [ 1.880000] winch_thread : TIOCSCTTY failed on fd 1 err = 1 I've also seen these traces: [ 1.940000] Kernel panic - not syncing: Kernel mode signal 4 [ 1.940000] CPU: 0 PID: 1 Comm: swapper Not tainted 4.4.0-rc3 #1 [ 1.940000] Stack: [ 1.940000] e032f4e0 601099cb ffffffffff0a0110 6187c3c0 [ 1.940000] 611ad811 601ad6c6 e032f500 60acf171 [ 1.940000] 60acf134 60aeec40 e032f620 601ad207 [ 1.940000] Call Trace: [ 1.940000] [<60076f89>] ? os_is_signal_stack+0x29/0x40 [ 1.940000] [<601ad6c6>] ? printk+0x0/0xfd [ 1.940000] [<6005850e>] show_stack+0x1fe/0x430 [ 1.940000] [<601099cb>] ? dump_stack_print_info+0x17b/0x1f0 [ 1.940000] [<601ad6c6>] ? printk+0x0/0xfd [ 1.940000] [<60acf171>] dump_stack+0x3d/0x4c [ 1.940000] [<60acf134>] ? dump_stack+0x0/0x4c [ 1.940000] [<60aeec40>] ? bust_spinlocks+0x0/0xe0 [ 1.940000] [<601ad207>] panic+0x206/0x5be [ 1.940000] [<601ad001>] ? panic+0x0/0x5be [ 1.940000] [<6005bfde>] relay_signal+0x23e/0x290 [ 1.940000] [<60076c08>] sig_handler_common+0x98/0x1a0 [ 1.940000] [<60076db4>] sig_handler+0xa4/0x160 [ 1.940000] [<600761e1>] hard_handler+0xf1/0x1e0 [ 1.940000] [<601182ab>] ? __call_rcu.isra.1+0x9b/0x140 [ 1.940000] [<60ae48bf>] ? put_dec+0x4f/0x140 [ 1.940000] [<60ae7ce0>] ? number.isra.10+0x720/0x7e0 [ 1.940000] [<601182ab>] ? __call_rcu.isra.1+0x9b/0x140 [ 1.940000] [<600eecec>] ? __enqueue_entity+0xec/0x130 [ 1.940000] [<600f06a0>] ? update_cfs_shares.isra.11+0x0/0x1e0 [ 1.940000] [<600ef3f8>] ? check_preempt_wakeup+0x468/0x6a0 [ 1.940000] [<600748c0>] ? write_sigio_thread+0x0/0x500 [ 1.940000] [<6007494e>] ? write_sigio_thread+0x8e/0x500 [ 1.940000] [<600e625d>] ? schedule_tail+0x2d/0x290 [ 1.940000] [<600748c0>] ? write_sigio_thread+0x0/0x500 and [ 1.900000] Pid: 943, comm: deferwq Not tainted 4.4.0-rc3 [ 1.900000] RIP: 0033:[<0000000000000000>] (On Ctrl-C:) [ 239.840000] Kernel panic - not syncing: Kernel tried to access user memory at addr 0x1844248, ip 0x1844248 [ 239.840000] CPU: 0 PID: 1 Comm: init Not tainted 4.4.0-rc3 #1 [ 239.840000] [ 239.840000] Pid: -538516280, comm: Not tainted 4.4.0-rc3 [ 239.840000] RIP: 0033:[<000000006005b54a>] When I start my instance from the command line (but inside a screen -- the Python script was also started in a screen) there is no problem AFAICT. I am not passing any con*= parameters, so it should be using all the defaults. I can test patches. Thanks, Vegard |
From: Tristan S. <tsc...@go...> - 2015-12-04 17:27:16
|
Acked-by: Tristan Schmelcher <tsc...@go...> |
From: Tristan S. <tsc...@go...> - 2015-12-04 17:13:38
|
File permissions are checked at time of open, so I think this fchmod call has never had any effect. If there is a concern that the mkstemp implementation may be insecure, why not set and restore the umask? |
From: Richard W. <ri...@no...> - 2015-11-29 20:14:18
|
The x86 FPU cleanup changed fpstate to a plain integer. UML on x86 has to deal with that too. Signed-off-by: Richard Weinberger <ri...@no...> diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 06934a8..e5f854c 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -211,7 +211,7 @@ static int copy_sc_from_user(struct pt_regs *regs, if (err) return 1; - err = convert_fxsr_from_user(&fpx, sc.fpstate); + err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate); if (err) return 1; @@ -227,7 +227,7 @@ static int copy_sc_from_user(struct pt_regs *regs, { struct user_i387_struct fp; - err = copy_from_user(&fp, sc.fpstate, + err = copy_from_user(&fp, (void *)sc.fpstate, sizeof(struct user_i387_struct)); if (err) return 1; @@ -291,7 +291,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, #endif #undef PUTREG sc.oldmask = mask; - sc.fpstate = to_fp; + sc.fpstate = (unsigned long)to_fp; err = copy_to_user(to, &sc, sizeof(struct sigcontext)); if (err) @@ -468,12 +468,10 @@ long sys_sigreturn(void) struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); sigset_t set; struct sigcontext __user *sc = &frame->sc; - unsigned long __user *oldmask = &sc->oldmask; - unsigned long __user *extramask = frame->extramask; int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); - if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) || - copy_from_user(&set.sig[1], extramask, sig_size)) + if (copy_from_user(&set.sig[0], (void *)sc->oldmask, sizeof(set.sig[0])) || + copy_from_user(&set.sig[1], frame->extramask, sig_size)) goto segfault; set_current_blocked(&set); @@ -505,6 +503,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, { struct rt_sigframe __user *frame; int err = 0, sig = ksig->sig; + unsigned long fp_to; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); @@ -526,7 +525,10 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); - err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); + + fp_to = (unsigned long)&frame->fpstate; + + err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate); if (sizeof(*set) == 16) { err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); -- 1.8.4.5 |
From: Richard W. <ri...@no...> - 2015-11-28 23:11:48
|
Am 29.11.2015 um 00:00 schrieb Mickaël Salaün: > > > On 28/11/2015 23:55, Richard Weinberger wrote: >> Am 28.11.2015 um 23:52 schrieb Mickaël Salaün: >>> >>> On 28/11/2015 22:40, Richard Weinberger wrote: >>>> Am 28.11.2015 um 22:32 schrieb Mickaël Salaün: >>>>> Replace the default insecure mode 0777 with 0700 for temporary file. >>>>> >>>>> Prohibit other users to change the executable mapped code. >>>> >>>> Hmm, isn't the tmp file already unlinked at this stage? >>>> >>> >>> Yes, but if someone could open it before the unlink e.g. because of the umask (which does not seems to be the case thanks to mkstemp, but remains unspecified [1]), this user should then be able to have write access to the file descriptor/description. >> >> Yes, someone can open it before the unlink. But you change the file mode after that. >> How does it improve the situation? The attacker has already the file handle. > > The attacker could have the file handle only in a read-only mode, which is a bit different than being able to write and execute arbitrary code thanks to a file descriptor mapped RWX :) Fair point. Please describe this in detail in the patch changelog. :-) Thanks, //richard |
From: Richard W. <ri...@no...> - 2015-11-28 22:59:11
|
Am 28.11.2015 um 23:56 schrieb Mickaël Salaün: > > On 28/11/2015 23:07, Richard Weinberger wrote: >> Am 28.11.2015 um 22:32 schrieb Mickaël Salaün: >>> Open the memory mapped file with the O_TMPFILE flag when available. >>> >>> Signed-off-by: Mickaël Salaün <mi...@di...> >>> --- >>> arch/um/os-Linux/mem.c | 12 ++++++++++++ >>> 1 file changed, 12 insertions(+) >>> >>> diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c >>> index 798aeb4..fe52e2d 100644 >>> --- a/arch/um/os-Linux/mem.c >>> +++ b/arch/um/os-Linux/mem.c >>> @@ -106,6 +106,18 @@ static int __init make_tempfile(const char *template) >>> } >>> } >>> >>> +#ifdef O_TMPFILE >>> + fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700); >>> + /* >>> + * If the running system does not support O_TMPFILE flag then retry >>> + * without it. >>> + */ >>> + if (fd != -1 || (errno != EINVAL && errno != EISDIR && >> >> Why are you handling EISDIR? > > I follow the man page for open [1], I think it was a workaround needed for some kernel versions just after the O_TMPFILE was added but before the support for EOPNOTSUPP. > We may need to add the EACCES too for some version of glibc [2, 3]? Makes sense! :) > 1. http://man7.org/linux/man-pages/man2/openat.2.html#BUGS > 2. Commit 69a91c237ab0ebe4e9fdeaf6d0090c85275594ec and https://sourceware.org/bugzilla/show_bug.cgi?id=17523 > 3. https://bugs.gentoo.org/529044 > >> >>> + errno != EOPNOTSUPP)) >>> + return fd; >>> + errno = 0; >> >> Why are you resetting errno? > > It's to ignore/reset the error code from open, but it may not be needed because of the next call to malloc? But then you'd have to reset errno after every syscall. :-) Thanks, //richard |
From: Richard W. <ri...@no...> - 2015-11-28 22:55:33
|
Am 28.11.2015 um 23:52 schrieb Mickaël Salaün: > > On 28/11/2015 22:40, Richard Weinberger wrote: >> Am 28.11.2015 um 22:32 schrieb Mickaël Salaün: >>> Replace the default insecure mode 0777 with 0700 for temporary file. >>> >>> Prohibit other users to change the executable mapped code. >> >> Hmm, isn't the tmp file already unlinked at this stage? >> > > Yes, but if someone could open it before the unlink e.g. because of the umask (which does not seems to be the case thanks to mkstemp, but remains unspecified [1]), this user should then be able to have write access to the file descriptor/description. Yes, someone can open it before the unlink. But you change the file mode after that. How does it improve the situation? The attacker has already the file handle. Thanks, //richard |
From: Richard W. <ri...@no...> - 2015-11-28 22:07:18
|
Am 28.11.2015 um 22:32 schrieb Mickaël Salaün: > Open the memory mapped file with the O_TMPFILE flag when available. > > Signed-off-by: Mickaël Salaün <mi...@di...> > --- > arch/um/os-Linux/mem.c | 12 ++++++++++++ > 1 file changed, 12 insertions(+) > > diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c > index 798aeb4..fe52e2d 100644 > --- a/arch/um/os-Linux/mem.c > +++ b/arch/um/os-Linux/mem.c > @@ -106,6 +106,18 @@ static int __init make_tempfile(const char *template) > } > } > > +#ifdef O_TMPFILE > + fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700); > + /* > + * If the running system does not support O_TMPFILE flag then retry > + * without it. > + */ > + if (fd != -1 || (errno != EINVAL && errno != EISDIR && Why are you handling EISDIR? > + errno != EOPNOTSUPP)) > + return fd; > + errno = 0; Why are you resetting errno? Thanks, //richard |
From: Richard W. <ri...@no...> - 2015-11-28 21:40:36
|
Am 28.11.2015 um 22:32 schrieb Mickaël Salaün: > Replace the default insecure mode 0777 with 0700 for temporary file. > > Prohibit other users to change the executable mapped code. Hmm, isn't the tmp file already unlinked at this stage? Thanks, //richard |
From: Anton I. <ant...@ko...> - 2015-11-27 11:03:10
|
On 27/11/15 09:59, Richard Weinberger wrote: > Hi! > > Am 26.11.2015 um 10:49 schrieb Anton Ivanov: >> Hi List, hi Richard, >> >> While working on the EPOLL I managed to consistently reproduce and get down to the bottom of the process in D state bug which you occasionally see with UML. I recall asking >> Richard's help on this for the first time nearly 5 years ago ;-). > O_o > >> It is extremely rare with the POLL based controller, timers and the stock UBD drivers. As you make things go faster (anywhere in UML) it rares its ugly head. So improving the IRQs, >> improving UBD itself, etc - all make it easier to trigger. >> >> It looks like it is possible to end up in a state where the restart list is not empty (an earlier transaction to the disk io thread failed with EAGAIN), but with no pending IO on >> the UBD IPC thread fd. So the restart list is never re-triggered and the UBD device ends up with a non-empty queue. The process that requested the IO ends up in D state. Any other >> processes trying IO to the same disk join it. As the requests to the same UBD queue up, ultimately, UML goes belly up. >> >> Pinging the UML process with SIGIO does not help as there is no IO pending on the fd. So it is not a lost interrupt. It somehow manages to race forming the restart queue. >> >> If, however, you have more than one UBD device IO to the other one unstucks it by re-running the restart queue out of the ubd interrupt handler. >> >> Once again - this is extremely rare at present, but possible (I have seen it a few times over the last 5 years). >> >> So it needs a viable fix or a workaround. I will have to get this one out of the door first as it constantly gets in the way in debugging both the Epoll and the signals stuff. > Okay, let's collect some facts first. > Is a guest or a host process in state D? Guest > If it is a guest process, you can use "/proc/<pid>/stack" to find out where in the UML kernel it is blocking. Done that already using the uml_mconsole functionality :) Based on that it looks like block IO. It has issued a read request, that has gone through the guts of ext3fs and has gotten as far as the block layer. It is not inside UBD, it is reported as a couple of layers up. I can roll back my tree to a state where I can replicate that reliably and re-take the trace (sorry, did not keep it). > 5 years ago UML didn't have this feature. I already did some experiments and did some re-reading of the source: 1. You can have a failed atomic allocation. This is rare, but it happens. This is why it is atomic - it does not wait. 2. If you fail the atomic allocation in the ubd submit routine you have the request bounced up and the device scheduled for a restart (it is added for a restart list). 3. The restarts themselves will happen only if you have IO - they are done in the bottom of the IRQ routine. There is no other place to restart the IO. So what happens if you have no IO pending from the disk IO thread and have a failed allocation so a device is scheduled for a restart? That however may not be the only way you get in this state - I stuck a few debug printks and they are extremely difficult to trigger and it is possible to trigger this in a different way. I have yet to figure out what triggers it in "the different way" use case. I am also trying a couple of potential solutions: 1. Start a timer on IO submission and update it on last IO so we have an efficient timeout mechanism. Rerun the event loop and the restart queue if the timer expires. This is much closer to how a real disk operates - we can even recover from a failed IO thread this way. I tried that already. It works 100%, but looks a bit expensive. I do not like it. I also need to do a few experiments to ensure that the effect is real and not a Heisenbug from triggering more timer interrupts. 2. Replace blocking IO in the disk IO thread with non-blocking, do a timeouted poll and throw a NULL or pointer to a static "keepalive" transaction when timeouts expire. This retriggers the IO interrupt and reruns both IO and the restart queue. This is much better aligned with what is needed to make the disk IO faster by bulking transactions as you can now read N disk io requests at a time in the io thread. I will try that over the weekend and report back with traces. A. > > Thanks, > //richard > |
From: Richard W. <ri...@no...> - 2015-11-27 10:00:02
|
Hi! Am 26.11.2015 um 10:49 schrieb Anton Ivanov: > Hi List, hi Richard, > > While working on the EPOLL I managed to consistently reproduce and get down to the bottom of the process in D state bug which you occasionally see with UML. I recall asking > Richard's help on this for the first time nearly 5 years ago ;-). O_o > It is extremely rare with the POLL based controller, timers and the stock UBD drivers. As you make things go faster (anywhere in UML) it rares its ugly head. So improving the IRQs, > improving UBD itself, etc - all make it easier to trigger. > > It looks like it is possible to end up in a state where the restart list is not empty (an earlier transaction to the disk io thread failed with EAGAIN), but with no pending IO on > the UBD IPC thread fd. So the restart list is never re-triggered and the UBD device ends up with a non-empty queue. The process that requested the IO ends up in D state. Any other > processes trying IO to the same disk join it. As the requests to the same UBD queue up, ultimately, UML goes belly up. > > Pinging the UML process with SIGIO does not help as there is no IO pending on the fd. So it is not a lost interrupt. It somehow manages to race forming the restart queue. > > If, however, you have more than one UBD device IO to the other one unstucks it by re-running the restart queue out of the ubd interrupt handler. > > Once again - this is extremely rare at present, but possible (I have seen it a few times over the last 5 years). > > So it needs a viable fix or a workaround. I will have to get this one out of the door first as it constantly gets in the way in debugging both the Epoll and the signals stuff. Okay, let's collect some facts first. Is a guest or a host process in state D? If it is a guest process, you can use "/proc/<pid>/stack" to find out where in the UML kernel it is blocking. 5 years ago UML didn't have this feature. Thanks, //richard |
From: Anton I. <ant...@ko...> - 2015-11-26 09:49:18
|
Hi List, hi Richard, While working on the EPOLL I managed to consistently reproduce and get down to the bottom of the process in D state bug which you occasionally see with UML. I recall asking Richard's help on this for the first time nearly 5 years ago ;-). It is extremely rare with the POLL based controller, timers and the stock UBD drivers. As you make things go faster (anywhere in UML) it rares its ugly head. So improving the IRQs, improving UBD itself, etc - all make it easier to trigger. It looks like it is possible to end up in a state where the restart list is not empty (an earlier transaction to the disk io thread failed with EAGAIN), but with no pending IO on the UBD IPC thread fd. So the restart list is never re-triggered and the UBD device ends up with a non-empty queue. The process that requested the IO ends up in D state. Any other processes trying IO to the same disk join it. As the requests to the same UBD queue up, ultimately, UML goes belly up. Pinging the UML process with SIGIO does not help as there is no IO pending on the fd. So it is not a lost interrupt. It somehow manages to race forming the restart queue. If, however, you have more than one UBD device IO to the other one unstucks it by re-running the restart queue out of the ubd interrupt handler. Once again - this is extremely rare at present, but possible (I have seen it a few times over the last 5 years). So it needs a viable fix or a workaround. I will have to get this one out of the door first as it constantly gets in the way in debugging both the Epoll and the signals stuff. A. |
From: Anton I. <ant...@ko...> - 2015-11-24 17:00:51
|
I got to the bottom of this now: 1. This has been around for a long time. 2. It was not showing up because: 2.1. The deactivate_fd/reactivate_fd in the poll based IRQ controller double up as per-fd reentrancy guard for SIGIO. So while SIGIO handler was reentrant (potentially bad), the actual device reading and writing were not. This is why this was not blowing up regularly so far. Even if there are potential races in modifying the poll structures, etc, they are very difficult to trigger. 2.2. The VTALRM based timer system produced timer interrupts only when UML was running or out of idle. This ensured that there is only one timer interrupt in flight at any given time. The new timer subsystem + me trying to move to EPOLL brought the gremlins out of the woodwork (which is good, we know that they are there now). I am going to reissue the timer errata + IRQ guard setting a guard only around the timer. The per-fd disable/reenable behaviour provides a sufficient guard for SIGIO so while you can crudely guard for both (as I have in the first version of the reentrancy patch), you do not need to do that. A guard for the timer is sufficient. I now have a fixed version for the EPOLL patch which replicates the per-fd old reentrancy guard behavior and has a timer guard. That has killed all WARN() and BUG() in my testing so far. I am going to give all of these some testing for 1-2 days and if I am happy with the results resubmit the errata patchset and the revised EPOLL IRQ controller patch (as an incremental on top of the errata). The Epoll controller with the fixes still manages ~ 10%+ better performance than the old POLL based one and it will also allow further optimizations later on. A. On 20/11/15 12:05, Anton Ivanov wrote: > I have gotten to the bottom of this. > > 1. The IRQ handler re-entrancy issue predates the timer patch. Adding a > simple guard with a WARN_ON_ONCE around the device loop in the > sig_io_handler catches it in plain 4.3 > > diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c > index 23cb935..ac0bbce 100644 > --- a/arch/um/kernel/irq.c > +++ b/arch/um/kernel/irq.c > @@ -30,12 +30,17 @@ static struct irq_fd **last_irq_ptr = &active_fds; > > extern void free_irqs(void); > > +static int in_poll_handler = 0; > + > void sigio_handler(int sig, struct siginfo *unused_si, struct > uml_pt_regs *regs) > { > struct irq_fd *irq_fd; > int n; > > + WARN_ON_ONCE(in_poll_handler == 1); > + > while (1) { > + in_poll_handler = 1; > n = os_waiting_for_events(active_fds); > if (n <= 0) { > if (n == -EINTR) > @@ -51,6 +56,7 @@ void sigio_handler(int sig, struct siginfo *unused_si, > struct uml_pt_regs *regs) > } > } > } > + in_poll_handler = 0; > > free_irqs(); > } > > This is dangerously broken - you can under heavy IO exhaust the stack, > you can get packets out of order, etc. Most IO is reasonably atomic so > corruption is not likely, but not impossible (especially if one or more > drivers are optimized to use multi-read/multi-write). > > 2. I cannot catch what is wrong with the current code in signal.c. When > I read it, it should not produce re-entrancy. But it does. > > 3. I found 2-3 minor issues with signal handling and the timer patch > which I will submit a hot-fix for, including a proper fix for the > hang-in-sleep issue. > > 4. While I can propose a brutal patch for signal.c which sets guards > against reentrancy which works fine, I suggest we actually get to the > bottom of this. Why the code in unblock_signals() does not guard > correctly against that? > > A. > > ------------------------------------------------------------------------------ > _______________________________________________ > User-mode-linux-devel mailing list > Use...@li... > https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel > |
From: Anton I. <ant...@ko...> - 2015-11-24 08:02:30
|
On 23/11/15 22:24, Richard Weinberger wrote: > On Fri, Nov 20, 2015 at 7:47 PM, Anton Ivanov > <ant...@ko...> wrote: >> Hi list, hi Richard. >> >> Have you had time to review this one? I think it is not contentious - it >> is something QEMU has been doing for a very long time now. It also gives >> a measurable speed up, especially for random IO. >> >> I am going to hold off on the rest of the ubd patches because they bulk >> up the transactions. This is dangerous if you end up in a reentrant >> situation. > Sorry for the delay. > The patch looks useful! Thanks a lot! > > Please CC me in future such that patches don't get lost. OK. I have a few that follow it to improve UBD even further, but I am going to hold them off for now until we have the IRQ controller sorted. Multi-read/write and irq/io handler reentrancy do not get along very well :( A. > I handle CC'ed mails with higher priority. > (This is my way to deal with the LKML storm ;)) > |
From: Richard W. <ric...@gm...> - 2015-11-23 22:25:06
|
On Fri, Nov 20, 2015 at 7:47 PM, Anton Ivanov <ant...@ko...> wrote: > Hi list, hi Richard. > > Have you had time to review this one? I think it is not contentious - it > is something QEMU has been doing for a very long time now. It also gives > a measurable speed up, especially for random IO. > > I am going to hold off on the rest of the ubd patches because they bulk > up the transactions. This is dangerous if you end up in a reentrant > situation. Sorry for the delay. The patch looks useful! Thanks a lot! Please CC me in future such that patches don't get lost. I handle CC'ed mails with higher priority. (This is my way to deal with the LKML storm ;)) -- Thanks, //richard |
From: Richard W. <ric...@gm...> - 2015-11-22 12:55:43
|
On Sun, Nov 22, 2015 at 10:49 AM, Vegard Nossum <veg...@gm...> wrote: > On 22 November 2015 at 10:23, Lorenzo Colitti <lo...@go...> wrote: >> >> On Sun, Nov 22, 2015 at 5:08 PM, Anton Ivanov >> <ant...@ko...> wrote: >> > set_timer needs -lrt which the patch adds to the link arguments. For some >> > reason on your config it is not in the correct location in the command line. >> > >> > This is being discussed on the list, lo...@go... was working on a >> > patch. >> >> I sent out the patch a few days ago: >> http://sourceforge.net/p/user-mode-linux/mailman/message/34629247/ > > That patch fixes the problem for me, thanks! I'll send a pull request as soon as I'm at home. /me is traveling... -- Thanks, //richard |
From: Vegard N. <veg...@gm...> - 2015-11-22 09:49:24
|
On 22 November 2015 at 10:23, Lorenzo Colitti <lo...@go...> wrote: > > On Sun, Nov 22, 2015 at 5:08 PM, Anton Ivanov > <ant...@ko...> wrote: > > set_timer needs -lrt which the patch adds to the link arguments. For some > > reason on your config it is not in the correct location in the command line. > > > > This is being discussed on the list, lo...@go... was working on a > > patch. > > I sent out the patch a few days ago: > http://sourceforge.net/p/user-mode-linux/mailman/message/34629247/ That patch fixes the problem for me, thanks! Vegard |
From: Lorenzo C. <lo...@go...> - 2015-11-22 09:23:56
|
On Sun, Nov 22, 2015 at 5:08 PM, Anton Ivanov <ant...@ko...> wrote: > set_timer needs -lrt which the patch adds to the link arguments. For some > reason on your config it is not in the correct location in the command line. > > This is being discussed on the list, lo...@go... was working on a > patch. I sent out the patch a few days ago: http://sourceforge.net/p/user-mode-linux/mailman/message/34629247/ |
From: Anton I. <ant...@ko...> - 2015-11-22 08:08:51
|
set_timer needs -lrt which the patch adds to the link arguments. For some reason on your config it is not in the correct location in the command line. This is being discussed on the list, lo...@go... was working on a patch. In any case, can you send the actual gcc line leading to it so we see why -lrt is not in the right place. A. On 21/11/15 19:09, Vegard Nossum wrote: > On 10 November 2015 at 23:08, Richard Weinberger <ri...@no... > <mailto:ri...@no...>> wrote: > > Linus, > > the following changes since commit > 6a13feb9c82803e2b815eca72fa7a9f5561d7861: > > Linux 4.3 (2015-11-01 16:05:25 -0800) > > are available in the git repository at: > > git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git > <http://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git> > for-linus-4.4-rc1 > > for you to fetch changes up to > 2eb5f31bc4ea24bb293e82934cfa1cce9573304b: > > um: Switch clocksource to hrtimers (2015-11-06 22:54:49 +0100) > > ---------------------------------------------------------------- > This pull request includes the following UML changes: > * A new hrtimer based clocksource by Anton Ivanov > * ptrace() enhancments by Richard Weinberger > * random cleanups and bug fixes all over the place > > ---------------------------------------------------------------- > Anton Ivanov (1): > um: Switch clocksource to hrtimers > > > This commit (2eb5f31bc4) seems to have broken the UML defconfig build > in latest Linus master for me: > > [...] > LINK vmlinux > LD vmlinux.o > MODPOST vmlinux.o > GEN .version > CHK include/generated/compile.h > UPD include/generated/compile.h > CC init/version.o > LD init/built-in.o > arch/um/os-Linux/built-in.o: In function `os_timer_create': > /home/vegard/linux/arch/um/os-Linux/time.c:51: undefined reference to > `timer_create' > arch/um/os-Linux/built-in.o: In function `os_timer_set_interval': > /home/vegard/linux/arch/um/os-Linux/time.c:84: undefined reference to > `timer_settime' > arch/um/os-Linux/built-in.o: In function `os_timer_remain': > /home/vegard/linux/arch/um/os-Linux/time.c:109: undefined reference to > `timer_gettime' > arch/um/os-Linux/built-in.o: In function `os_timer_one_shot': > /home/vegard/linux/arch/um/os-Linux/time.c:132: undefined reference to > `timer_settime' > arch/um/os-Linux/built-in.o: In function `os_timer_disable': > /home/vegard/linux/arch/um/os-Linux/time.c:145: undefined reference to > `timer_settime' > collect2: error: ld returned 1 exit status > make: *** [vmlinux] Error 1 > > > Vegard > > > ------------------------------------------------------------------------------ > > > _______________________________________________ > User-mode-linux-devel mailing list > Use...@li... > https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel |
From: Vegard N. <veg...@gm...> - 2015-11-21 19:09:37
|
On 10 November 2015 at 23:08, Richard Weinberger <ri...@no...> wrote: > Linus, > > the following changes since commit > 6a13feb9c82803e2b815eca72fa7a9f5561d7861: > > Linux 4.3 (2015-11-01 16:05:25 -0800) > > are available in the git repository at: > > git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git > for-linus-4.4-rc1 > > for you to fetch changes up to 2eb5f31bc4ea24bb293e82934cfa1cce9573304b: > > um: Switch clocksource to hrtimers (2015-11-06 22:54:49 +0100) > > ---------------------------------------------------------------- > This pull request includes the following UML changes: > * A new hrtimer based clocksource by Anton Ivanov > * ptrace() enhancments by Richard Weinberger > * random cleanups and bug fixes all over the place > > ---------------------------------------------------------------- > Anton Ivanov (1): > um: Switch clocksource to hrtimers > This commit (2eb5f31bc4) seems to have broken the UML defconfig build in latest Linus master for me: [...] LINK vmlinux LD vmlinux.o MODPOST vmlinux.o GEN .version CHK include/generated/compile.h UPD include/generated/compile.h CC init/version.o LD init/built-in.o arch/um/os-Linux/built-in.o: In function `os_timer_create': /home/vegard/linux/arch/um/os-Linux/time.c:51: undefined reference to `timer_create' arch/um/os-Linux/built-in.o: In function `os_timer_set_interval': /home/vegard/linux/arch/um/os-Linux/time.c:84: undefined reference to `timer_settime' arch/um/os-Linux/built-in.o: In function `os_timer_remain': /home/vegard/linux/arch/um/os-Linux/time.c:109: undefined reference to `timer_gettime' arch/um/os-Linux/built-in.o: In function `os_timer_one_shot': /home/vegard/linux/arch/um/os-Linux/time.c:132: undefined reference to `timer_settime' arch/um/os-Linux/built-in.o: In function `os_timer_disable': /home/vegard/linux/arch/um/os-Linux/time.c:145: undefined reference to `timer_settime' collect2: error: ld returned 1 exit status make: *** [vmlinux] Error 1 Vegard |
From: Anton I. <ant...@ko...> - 2015-11-20 18:48:11
|
Hi list, hi Richard. Have you had time to review this one? I think it is not contentious - it is something QEMU has been doing for a very long time now. It also gives a measurable speed up, especially for random IO. I am going to hold off on the rest of the ubd patches because they bulk up the transactions. This is dangerous if you end up in a reentrant situation. A. On 08/11/15 15:20, Anton Ivanov wrote: > This decreases the number of syscalls per read/write by half. > > Signed-off-by: Anton Ivanov <ai...@br...> > --- > arch/um/drivers/ubd_kern.c | 27 +++++---------------------- > arch/um/include/shared/os.h | 2 ++ > arch/um/os-Linux/file.c | 19 +++++++++++++++++++ > 3 files changed, 26 insertions(+), 22 deletions(-) > > diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c > index e8ab93c..39ba207 100644 > --- a/arch/um/drivers/ubd_kern.c > +++ b/arch/um/drivers/ubd_kern.c > @@ -535,11 +535,7 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len) > { > int err; > > - err = os_seek_file(fd, offset); > - if (err < 0) > - return err; > - > - err = os_read_file(fd, buf, len); > + err = os_pread_file(fd, buf, len, offset); > if (err < 0) > return err; > > @@ -1377,14 +1373,8 @@ static int update_bitmap(struct io_thread_req *req) > if(req->cow_offset == -1) > return 0; > > - n = os_seek_file(req->fds[1], req->cow_offset); > - if(n < 0){ > - printk("do_io - bitmap lseek failed : err = %d\n", -n); > - return 1; > - } > - > - n = os_write_file(req->fds[1], &req->bitmap_words, > - sizeof(req->bitmap_words)); > + n = os_pwrite_file(req->fds[1], &req->bitmap_words, > + sizeof(req->bitmap_words), req->cow_offset); > if(n != sizeof(req->bitmap_words)){ > printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, > req->fds[1]); > @@ -1399,7 +1389,6 @@ static void do_io(struct io_thread_req *req) > char *buf; > unsigned long len; > int n, nsectors, start, end, bit; > - int err; > __u64 off; > > if (req->op == UBD_FLUSH) { > @@ -1428,18 +1417,12 @@ static void do_io(struct io_thread_req *req) > len = (end - start) * req->sectorsize; > buf = &req->buffer[start * req->sectorsize]; > > - err = os_seek_file(req->fds[bit], off); > - if(err < 0){ > - printk("do_io - lseek failed : err = %d\n", -err); > - req->error = 1; > - return; > - } > if(req->op == UBD_READ){ > n = 0; > do { > buf = &buf[n]; > len -= n; > - n = os_read_file(req->fds[bit], buf, len); > + n = os_pread_file(req->fds[bit], buf, len, off); > if (n < 0) { > printk("do_io - read failed, err = %d " > "fd = %d\n", -n, req->fds[bit]); > @@ -1449,7 +1432,7 @@ static void do_io(struct io_thread_req *req) > } while((n < len) && (n != 0)); > if (n < len) memset(&buf[n], 0, len - n); > } else { > - n = os_write_file(req->fds[bit], buf, len); > + n = os_pwrite_file(req->fds[bit], buf, len, off); > if(n != len){ > printk("do_io - write failed err = %d " > "fd = %d\n", -n, req->fds[bit]); > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h > index 21d704b..de5d572 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -146,6 +146,8 @@ extern int os_read_file(int fd, void *buf, int len); > extern int os_write_file(int fd, const void *buf, int count); > extern int os_sync_file(int fd); > extern int os_file_size(const char *file, unsigned long long *size_out); > +extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset); > +extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset); > extern int os_file_modtime(const char *file, unsigned long *modtime); > extern int os_pipe(int *fd, int stream, int close_on_exec); > extern int os_set_fd_async(int fd); > diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c > index 26e0164..2db18cb 100644 > --- a/arch/um/os-Linux/file.c > +++ b/arch/um/os-Linux/file.c > @@ -264,6 +264,15 @@ int os_read_file(int fd, void *buf, int len) > return n; > } > > +int os_pread_file(int fd, void *buf, int len, unsigned long long offset) > +{ > + int n = pread(fd, buf, len, offset); > + > + if (n < 0) > + return -errno; > + return n; > +} > + > int os_write_file(int fd, const void *buf, int len) > { > int n = write(fd, (void *) buf, len); > @@ -282,6 +291,16 @@ int os_sync_file(int fd) > return n; > } > > +int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset) > +{ > + int n = pwrite(fd, (void *) buf, len, offset); > + > + if (n < 0) > + return -errno; > + return n; > +} > + > + > int os_file_size(const char *file, unsigned long long *size_out) > { > struct uml_stat buf; |
From: Anton I. <ai...@Br...> - 2015-11-20 18:12:31
|
This one came up with a messed up formatting, I will resubmit at some point (hopefully as we refine this). A. On 20/11/15 16:45, Anton Ivanov wrote: > The signals should be restored to their pre-off state > not turned on. > > Signed-off-by: Anton Ivanov <ai...@br...> > --- > arch/um/kernel/skas/mmu.c | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c > index 9591a66..a845de6 100644 > --- a/arch/um/kernel/skas/mmu.c > +++ b/arch/um/kernel/skas/mmu.c > @@ -53,6 +53,7 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) > struct mm_context *to_mm = &mm->context; > unsigned long stack = 0; > int ret = -ENOMEM; > + unsigned long int flags; > > stack = get_zeroed_page(GFP_KERNEL); > if (stack == 0) > @@ -62,12 +63,12 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) > if (current->mm != NULL && current->mm != &init_mm) > from_mm = ¤t->mm->context; > > - block_signals(); > + local_irq_save(flags); > if (from_mm) > to_mm->id.u.pid = copy_context_skas0(stack, > from_mm->id.u.pid); > else to_mm->id.u.pid = start_userspace(stack); > - unblock_signals(); > + local_irq_restore(flags); > > if (to_mm->id.u.pid < 0) { > ret = to_mm->id.u.pid; |
From: Anton I. <ai...@br...> - 2015-11-20 16:45:32
|
Fixes: IRQ Reentrancy The code in signal.c used in irq controller emulation does not prevent IRQ reentrancy which can result in all types of issues as IRQs including ones on the same device can be executed in a nested manner Signed-off-by: Anton Ivanov <ai...@br...> --- arch/um/kernel/irq.c | 8 ++++++++ arch/um/os-Linux/signal.c | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 23cb935..4813263 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -30,11 +30,17 @@ static struct irq_fd **last_irq_ptr = &active_fds; extern void free_irqs(void); +static int in_poll_handler = 0; + void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { struct irq_fd *irq_fd; int n; + WARN_ON_ONCE(in_poll_handler == 1); + + in_poll_handler = 1; + while (1) { n = os_waiting_for_events(active_fds); if (n <= 0) { @@ -52,6 +58,8 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) } } + in_poll_handler = 0; + free_irqs(); } diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index c211153..9aa7097 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -27,6 +27,8 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGALRM] = timer_handler }; +static int irq_guard = 0; + static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { struct uml_pt_regs r; @@ -40,11 +42,17 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) } /* enable signals if sig isn't IRQ signal */ - if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) { unblock_signals(); + } else { + irq_guard = 1; + } (*sig_info[sig])(sig, si, &r); + if (!((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM))) + irq_guard = 0; + errno = save_errno; } @@ -86,7 +94,9 @@ static void timer_real_alarm_handler(mcontext_t *mc) if (mc != NULL) get_regs_from_mc(®s, mc); + irq_guard = 1; timer_handler(SIGALRM, NULL, ®s); + irq_guard = 0; } void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) @@ -243,6 +253,9 @@ void unblock_signals(void) if (signals_enabled == 1) return; + if (irq_guard == 1) + return; + /* * We loop because the IRQ handler returns with interrupts off. So, * interrupts may have arrived and we need to re-enable them and -- 2.1.4 |
From: Anton I. <ai...@br...> - 2015-11-20 16:45:32
|
Fix signal handling to use store/restore instead of block/unblock as that may cause IRQ reentrancy Signed-off-by: Anton Ivanov <ai...@br...> --- arch/um/os-Linux/skas/process.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 13c5a2c..5916267 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -313,6 +313,7 @@ void userspace(struct uml_pt_regs *regs) int err, status, op, pid = userspace_pid[0]; /* To prevent races if using_sysemu changes under us.*/ int local_using_sysemu; + unsigned long flags; siginfo_t si; /* Handle any immediate reschedules or signals */ @@ -396,9 +397,9 @@ void userspace(struct uml_pt_regs *regs) case SIGBUS: case SIGFPE: case SIGWINCH: - block_signals(); + flags = set_signals(0); (*sig_info[sig])(sig, (struct siginfo *)&si, regs); - unblock_signals(); + set_signals(flags); break; default: printk(UM_KERN_ERR "userspace - child stopped " @@ -586,15 +587,16 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf) void initial_thread_cb_skas(void (*proc)(void *), void *arg) { jmp_buf here; + unsigned long int flags; cb_proc = proc; cb_arg = arg; cb_back = &here; - block_signals(); + flags = set_signals(0); if (UML_SETJMP(&here) == 0) UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); - unblock_signals(); + set_signals(flags); cb_proc = NULL; cb_arg = NULL; -- 2.1.4 |
From: Anton I. <ai...@br...> - 2015-11-20 16:45:32
|
The signals should be restored to their pre-off state not turned on. Signed-off-by: Anton Ivanov <ai...@br...> --- arch/um/kernel/skas/mmu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 9591a66..a845de6 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -53,6 +53,7 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) struct mm_context *to_mm = &mm->context; unsigned long stack = 0; int ret = -ENOMEM; + unsigned long int flags; stack = get_zeroed_page(GFP_KERNEL); if (stack == 0) @@ -62,12 +63,12 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) if (current->mm != NULL && current->mm != &init_mm) from_mm = ¤t->mm->context; - block_signals(); + local_irq_save(flags); if (from_mm) to_mm->id.u.pid = copy_context_skas0(stack, from_mm->id.u.pid); else to_mm->id.u.pid = start_userspace(stack); - unblock_signals(); + local_irq_restore(flags); if (to_mm->id.u.pid < 0) { ret = to_mm->id.u.pid; -- 2.1.4 |