libexecstream has worked well in a couple projects, but we did run into a problem when there were multiple threads started simultaneously and there was likely thread/process contention. Calls in exec-stream-impl.cpp and exec-stream-helpers.cpp to select(...) would occasionally be interrupted by an EINTR error. As far as we can tell, this does not represent an error condition. The select call is just waiting until the status descriptor has something to read, at which point it will return +1. Restarting the status descriptor with the same time delay will extend the possible timeout (or, possibly, on some systems, continue with the remaining time as previously requested). This solution seems to work for our applications.
Specifically, around each call to select(), we replaced the if() with a while() loop to restart the select() call in the event of errno==EINTR.
A diff of exec-stream-impl.cpp follows (including the changes listed in sansay0's sourceforge comment above):
Index: exec-stream-impl.cpp
--- exec-stream-impl.cpp (revision 821)
+++ exec-stream-impl.cpp (working copy)
@@ -259,7 +259,18 @@
struct timeval timeout;
timeout.tv_sec=3;
timeout.tv_usec=0;
- if( select( status_pipe.r()+1, &status_fds, 0, 0, &timeout )==-1 ) {
+ / Modified JPT 2014-08-07 - converted the if statement to a while loop to handle EINTR.
+ * From the best that I can tell, select() == -1 w/errno == EINTR does not actually represent
+ * an error condition, but only that there was a long wait that got interrupted. The select call
+ * is just waiting until the status descriptor has something to read, at which point it will return +1.
+ * Restarting the status descriptor with the same time delay will extend the possible timeout (or, possibly,
+ * on some systems, continue with the same time as previously requested).
+ *
+ * This error was only manifested when many processes were started nearly simultaneously, and is
+ * likely due to thread/process contention.
+ * /
+ while ( select( status_pipe.r()+1, &status_fds, 0, 0, &timeout )==-1 ) {
+ if (errno == EINTR); continue;
throw os_error_t( "exec_stream_t::start: select on status_pipe failed" );
}
if( !FD_ISSET( status_pipe.r(), &status_fds ) ) {
@@ -336,7 +347,8 @@
struct timeval select_timeout;
select_timeout.tv_sec=m_impl->m_child_timeout/1000;
select_timeout.tv_usec=(m_impl->m_child_timeout%1000)*1000;
- if( (code=select( 0, 0, 0, 0, &select_timeout ))==-1 ) {
+ while( (code=select( 0, 0, 0, 0, &select_timeout ))==-1 ) {
+ if (errno == EINTR) continue;
throw os_error_t( "exec_stream_t::close: select failed" );
}
@@ -358,6 +370,34 @@
return true;
}
+
+void exec_stream_t::kill()
+{
+ if( m_impl->m_child_pid!=-1 )
+ {
+ if( ::kill( m_impl->m_child_pid, SIGKILL )==-1 )
+ {
+ throw os_error_t( "exec_stream_t::kill: kill failed" );
+ }
+ // Updated via Sourceforge sansay0 comment as of 2010-08-20
+ // The correct way to handle killing a child process is to wait for the OS
+ // to release the child process resources before assigning -1 to m_child_pid.
+ // Assigning -1 to m_child_pid without waiting will result in a "defunct" process
+ // (aka zombie process) remaining in the process table.
+ pid_t code=waitpid( m_impl->m_child_pid, &m_impl->m_exit_code, 0 );
+ if( code == m_impl->m_child_pid )
+ {
+ m_impl->m_child_pid=-1;
+ m_impl->m_exit_code=0;
+ }
+ else if( code == -1 )
+ {
+ throw os_error_t( "exec_stream_t::kill: waitpid failed" );
+ }
+ }
+}
+
+/
void exec_stream_t::kill()
{
if( m_impl->m_child_pid!=-1 ) {
@@ -367,7 +407,7 @@
m_impl->m_child_pid=-1;
m_impl->m_exit_code=0;
}
-}
+}/
int exec_stream_t::exit_code()
{
A diff for exec-stream-helpers.cpp follows as well:
libexecstream has worked well in a couple projects, but we did run into a problem when there were multiple threads started simultaneously and there was likely thread/process contention. Calls in exec-stream-impl.cpp and exec-stream-helpers.cpp to select(...) would occasionally be interrupted by an EINTR error. As far as we can tell, this does not represent an error condition. The select call is just waiting until the status descriptor has something to read, at which point it will return +1. Restarting the status descriptor with the same time delay will extend the possible timeout (or, possibly, on some systems, continue with the remaining time as previously requested). This solution seems to work for our applications.
Specifically, around each call to select(), we replaced the if() with a while() loop to restart the select() call in the event of errno==EINTR.
A diff of exec-stream-impl.cpp follows (including the changes listed in sansay0's sourceforge comment above):
Index: exec-stream-impl.cpp
--- exec-stream-impl.cpp (revision 821)
+++ exec-stream-impl.cpp (working copy)
@@ -259,7 +259,18 @@
struct timeval timeout;
timeout.tv_sec=3;
timeout.tv_usec=0;
- if( select( status_pipe.r()+1, &status_fds, 0, 0, &timeout )==-1 ) {
+ / Modified JPT 2014-08-07 - converted the if statement to a while loop to handle EINTR.
+ * From the best that I can tell, select() == -1 w/errno == EINTR does not actually represent
+ * an error condition, but only that there was a long wait that got interrupted. The select call
+ * is just waiting until the status descriptor has something to read, at which point it will return +1.
+ * Restarting the status descriptor with the same time delay will extend the possible timeout (or, possibly,
+ * on some systems, continue with the same time as previously requested).
+ *
+ * This error was only manifested when many processes were started nearly simultaneously, and is
+ * likely due to thread/process contention.
+ * /
+ while ( select( status_pipe.r()+1, &status_fds, 0, 0, &timeout )==-1 ) {
+ if (errno == EINTR); continue;
throw os_error_t( "exec_stream_t::start: select on status_pipe failed" );
}
if( !FD_ISSET( status_pipe.r(), &status_fds ) ) {
@@ -336,7 +347,8 @@
struct timeval select_timeout;
select_timeout.tv_sec=m_impl->m_child_timeout/1000;
select_timeout.tv_usec=(m_impl->m_child_timeout%1000)*1000;
- if( (code=select( 0, 0, 0, 0, &select_timeout ))==-1 ) {
+ while( (code=select( 0, 0, 0, 0, &select_timeout ))==-1 ) {
+ if (errno == EINTR) continue;
throw os_error_t( "exec_stream_t::close: select failed" );
}
@@ -358,6 +370,34 @@
return true;
}
+
+void exec_stream_t::kill()
+{
+ if( m_impl->m_child_pid!=-1 )
+ {
+ if( ::kill( m_impl->m_child_pid, SIGKILL )==-1 )
+ {
+ throw os_error_t( "exec_stream_t::kill: kill failed" );
+ }
+ // Updated via Sourceforge sansay0 comment as of 2010-08-20
+ // The correct way to handle killing a child process is to wait for the OS
+ // to release the child process resources before assigning -1 to m_child_pid.
+ // Assigning -1 to m_child_pid without waiting will result in a "defunct" process
+ // (aka zombie process) remaining in the process table.
+ pid_t code=waitpid( m_impl->m_child_pid, &m_impl->m_exit_code, 0 );
+ if( code == m_impl->m_child_pid )
+ {
+ m_impl->m_child_pid=-1;
+ m_impl->m_exit_code=0;
+ }
+ else if( code == -1 )
+ {
+ throw os_error_t( "exec_stream_t::kill: waitpid failed" );
+ }
+ }
+}
+
+/
void exec_stream_t::kill()
{
if( m_impl->m_child_pid!=-1 ) {
@@ -367,7 +407,7 @@
m_impl->m_child_pid=-1;
m_impl->m_exit_code=0;
}
-}
+}/
int exec_stream_t::exit_code()
{
A diff for exec-stream-helpers.cpp follows as well:
Index: exec-stream-helpers.cpp
--- exec-stream-helpers.cpp (revision 821)
+++ exec-stream-helpers.cpp (working copy)
@@ -714,11 +714,16 @@
select_timeout.tv_sec=0;
select_timeout.tv_usec=100000;
int nfds=std::max( p->m_in_pipe.w(), std::max( p->m_out_pipe.r(), p->m_err_pipe.r() ) )+1;
- if( select( nfds, &read_fds, &write_fds, 0, &select_timeout )==-1 ) {
+ bool dobreak = false;
+ //Modified JPT 2014-08-07 (see exec-stream-inpl.cpp exec_stream_t::impl_t::start for details)
+ while( select( nfds, &read_fds, &write_fds, 0, &select_timeout )==-1 ) {
+ if (errno == EINTR) continue;
p->m_error_code=errno;
p->m_error_prefix="thread_buffer_t::thread_func: select failed";
+ dobreak = true;
break;
}
+ if (dobreak) break;
}