There is a bug in _st_iterate_threads(). The bug can be demonstrated with
the following program:
#include <stdio.h>
#include "public.h"
extern _st_iterate_threads_flag;
int a, *ap = &a;
static void * thread_fn(void *arg) {
printf("enter thread_fn\n");
*ap = 1000;
ap = NULL; // re-entry of this thread will cause a core dump
_st_iterate_threads();
printf("exit thread_fn\n");
return NULL;
}
int main(int argc, char *argv[])
{
st_init();
_st_iterate_threads_flag = 1;
st_thread_t t = st_thread_create(thread_fn, NULL, 1, 0);
st_thread_join(t, NULL);
}
With release 1.8, the program produces the following outcome:
my_machine > gcc try.c LINUX_2.6.24.5-smp_DBG/libst.a
my_machine > ./a.out
enter thread_fn
enter thread_fn
Segmentation fault (core dumped)
The bug in _st_iterate_threads() causes thread_fn to be re-entered. The
following diff fixes the problem. With this diff, the output of the above
test program is
my_machine > ./a.out
enter thread_fn
show thread msg: Iteration started
show thread msg: Iteration: a new thread
show thread msg: Iteration: a running thread
show thread msg: Iteration completed
exit thread_fn
The diff is explained with the comments in _st_iterate_threads(). I added
some non-essential messages to show the control flow in the test program.
--- common.h.save 2009-05-26 14:46:21.000000000 -0400
+++ common.h 2009-05-26 14:47:05.000000000 -0400
@@ -310,6 +310,7 @@
#define _ST_FL_ON_SLEEPQ 0x04
#define _ST_FL_INTERRUPT 0x08
#define _ST_FL_TIMEDOUT 0x10
+#define _ST_FL_STARTING 0x20
--- sched.c.save 2009-05-26 14:46:00.000000000 -0400
+++ sched.c 2009-05-26 14:57:29.000000000 -0400
@@ -168,7 +168,7 @@
NULL, 0, 0);
if (!_st_this_vp.idle_thread)
return -1;
- _st_this_vp.idle_thread->flags = _ST_FL_IDLE_THREAD;
+ _st_this_vp.idle_thread->flags |= _ST_FL_IDLE_THREAD;
_st_active_count--;
_ST_DEL_RUNQ(_st_this_vp.idle_thread);
@@ -316,6 +316,9 @@
{
_st_thread_t *thread = _ST_CURRENT_THREAD();
+ assert(thread->flags & _ST_FL_STARTING);
+ thread->flags &= ~_ST_FL_STARTING;
+
/*
* Cap the stack by zeroing out the saved return address register
* value. This allows some debugging/profiling tools to know when
@@ -587,6 +590,13 @@
thread->start = start;
thread->arg = arg;
+ /*
+ * _ST_FL_STARTING indicates that the thread has been created but has
+ * not started its execution yet. See _st_iterate_threads_helper() for more
+ * info.
+ */
+ thread->flags |= _ST_FL_STARTING;
+
#ifndef __ia64__
_ST_INIT_CONTEXT(thread, stack->sp, _st_thread_main);
#else
@@ -624,7 +634,7 @@
/* ARGSUSED */
void _st_show_thread_stack(_st_thread_t *thread, const char *messg)
{
-
+ printf("show thread msg: %s\n", messg);
}
/* To be set from debugger */
@@ -646,7 +656,7 @@
if (thread) {
memcpy(thread->context, save_jb, sizeof(jmp_buf));
- _st_show_thread_stack(thread, NULL);
+ _st_show_thread_stack(thread, "Iteration: a running thread");
} else {
if (MD_SETJMP(orig_jb)) {
_st_iterate_threads_flag = 0;
@@ -658,13 +668,28 @@
_st_show_thread_stack(thread, "Iteration started");
}
- q = thread->tlink.next;
- if (q == &_ST_THREADQ)
- q = q->next;
- ST_ASSERT(q != &_ST_THREADQ);
- thread = _ST_THREAD_THREADQ_PTR(q);
- if (thread == _ST_CURRENT_THREAD())
- MD_LONGJMP(orig_jb, 1);
+ while (1) {
+ q = thread->tlink.next;
+ if (q == &_ST_THREADQ) // _ST_THREADQ is part of _st_this_vp, skip
+ q = q->next;
+ ST_ASSERT(q != &_ST_THREADQ);
+ thread = _ST_THREAD_THREADQ_PTR(q);
+ if (thread == _ST_CURRENT_THREAD()) {
+ MD_LONGJMP(orig_jb, 1); // Looped back to current thread. Done.
+ }
+ /*
+ * When a thread is created but haven't started running yet (haven't
+ * got to _st_thread_main), simply longjmp to that thread's context (set
+ * by MD_INIT_CONTEXT) causes _ST_CURRENT_THREAD's start function to be
+ * re-executed. Flag _ST_FL_STARTING is to indicate a thread is in
+ * such state and we'll avoid longjmp.
+ */
+ if (thread->flags & _ST_FL_STARTING) {
+ _st_show_thread_stack(thread, "Iteration: a new thread");
+ } else {
+ break;
+ }
+ }
memcpy(save_jb, thread->context, sizeof(jmp_buf));
MD_LONGJMP(thread->context, 1);
}
Thanks for your bug report and patch. _st_iterate_threads() is intended to be used from the debugger not from within the running program. Can you explain why you're using it within the program? Thanks.