|
From: Paul A. <pa...@vi...> - 2011-11-10 20:17:31
|
Ok, here's a small(ish) code sample that reproduces the issue. There's
two levels of threading going on to model a multi-instance program with
each instance having multiple threads. As you can see, there is a single
thread-local variable which Helgrind claims is being raced...
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <pthread.h>
#include <unistd.h>
bool g_shutdown = false;
pthread_mutex_t g_lock;
int g_max_threads = 4;
int g_max_controllers = 2;
__thread __attribute__ ((aligned (8))) unsigned long l_inst_id = 0;
int g_max_wait = 100;
bool Shutdown()
{
bool res = false;
pthread_mutex_lock(&g_lock);
res = g_shutdown;
pthread_mutex_unlock(&g_lock);
return res;
}
void Error(const char *message)
{
fprintf(stderr, message);
abort();
}
class Thread
{
public:
Thread(int id)
: m_thread(0), m_id(id) {}
void Run()
{
pthread_create(&m_thread, NULL, &Thread::EntryPoint,
this);
}
void Wait()
{
pthread_join(m_thread, NULL);
}
private:
pthread_t m_thread;
int m_id;
static void *EntryPoint(void *ptr)
{
Thread *thread = (Thread *)ptr;
thread->Start();
return 0;
}
void Start()
{
printf("thread %d start\n", m_id);
fflush(stdout);
l_inst_id = m_id; // <----- offending line of code
printf("thread %d done\n", m_id);
fflush(stdout);
}
};
class Instance
{
public:
void Run()
{
while (!Shutdown())
{
// wait a random time
int wait = rand() % g_max_wait;
usleep(wait * 1000);
Thread *threads[g_max_threads];
printf("inst start\n");
// create and run a random number of threads
int num_threads = (rand() % g_max_threads) + 1;
for (int i = 0; i < num_threads; i++)
{
threads[i] = new Thread(i);
if (threads[i])
threads[i]->Run();
}
// wait for threads to terminate
for (int i = 0; i < num_threads; i++)
{
if (threads[i])
{
threads[i]->Wait();
delete threads[i];
}
}
printf("inst done\n");
fflush(stdout);
}
}
};
class Controller
{
public:
Controller()
{
}
void Run()
{
pthread_create(&m_thread, NULL, &Controller::EntryPoint,
this);
}
void Wait()
{
pthread_join(m_thread, NULL);
}
private:
pthread_t m_thread;
static void *EntryPoint(void *ptr)
{
Controller *controller = (Controller *)ptr;
controller->Start();
return 0;
}
void Start()
{
while (!Shutdown())
{
// wait a random time
int wait = rand() % g_max_wait;
usleep(wait * 1000);
// try to create and run an instance
Instance().Run();
}
printf("controller shutdown\n");
}
};
static void
handle_sigint(int signum)
{
printf("got ctrl-c\n");
pthread_mutex_lock(&g_lock);
g_shutdown = true;
pthread_mutex_unlock(&g_lock);
}
int main(int argc, char *argv[])
{
pthread_mutex_init(&g_lock, NULL);
// set up signal handlers
signal(SIGINT, handle_sigint);
// create a number of controllers
Controller *controllers[g_max_controllers];
for (int i = 0; i < g_max_controllers; i++)
{
controllers[i] = new Controller;
if (controllers[i])
controllers[i]->Run();
}
// wait for controllers to terminate
for (int i = 0; i < g_max_controllers; i++)
{
if (controllers[i])
{
controllers[i]->Wait();
delete controllers[i];
}
}
pthread_mutex_destroy(&g_lock);
return 0;
}
Valgrind output:
==9879== Helgrind, a thread error detector
==9879== Copyright (C) 2007-2011, and GNU GPL'd, by OpenWorks LLP et al.
==9879== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==9879== Command: ./insttest
==9879==
==9879== ---Thread-Announcement------------------------------------------
==9879==
==9879== Thread #7 was created
==9879== at 0x376D2E0BEE: clone (in /lib64/libc-2.13.so)
==9879== by 0x376D605D9F: do_clone.clone.2 (in
/lib64/libpthread-2.13.so)
==9879== by 0x376D60731A: pthread_create@@GLIBC_2.2.5 (in
/lib64/libpthread-2.13.so)
==9879== by 0x4A0950F: pthread_create_WRK (hg_intercepts.c:255)
==9879== by 0x4A096B2: pthread_create@* (hg_intercepts.c:286)
==9879== by 0x400EA6: Thread::Run() (insttest.cpp:40)
==9879== by 0x401062: Instance::Run() (insttest.cpp:92)
==9879== by 0x4011D2: Controller::Start() (insttest.cpp:148)
==9879== by 0x40118B: Controller::EntryPoint(void*) (insttest.cpp:135)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879== ---Thread-Announcement------------------------------------------
==9879==
==9879== Thread #4 was created
==9879== at 0x376D2E0BEE: clone (in /lib64/libc-2.13.so)
==9879== by 0x376D605D9F: do_clone.clone.2 (in
/lib64/libpthread-2.13.so)
==9879== by 0x376D60731A: pthread_create@@GLIBC_2.2.5 (in
/lib64/libpthread-2.13.so)
==9879== by 0x4A0950F: pthread_create_WRK (hg_intercepts.c:255)
==9879== by 0x4A096B2: pthread_create@* (hg_intercepts.c:286)
==9879== by 0x400EA6: Thread::Run() (insttest.cpp:40)
==9879== by 0x401062: Instance::Run() (insttest.cpp:92)
==9879== by 0x4011D2: Controller::Start() (insttest.cpp:148)
==9879== by 0x40118B: Controller::EntryPoint(void*) (insttest.cpp:135)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879== ----------------------------------------------------------------
==9879==
==9879== Possible data race during write of size 8 at 0x6CDE6F8 by thread
#7
==9879== Locks held: none
==9879== at 0x400F30: Thread::Start() (insttest.cpp:64)
==9879== by 0x400EEB: Thread::EntryPoint(void*) (insttest.cpp:55)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879== This conflicts with a previous write of size 8 by thread #4
==9879== Locks held: none
==9879== at 0x400F30: Thread::Start() (insttest.cpp:64)
==9879== by 0x400EEB: Thread::EntryPoint(void*) (insttest.cpp:55)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879==
==9879== For counts of detected and suppressed errors, rerun with: -v
==9879== Use --history-level=approx or =none to gain increased speed, at
==9879== the cost of reduced accuracy of conflicting-access information
==9879== ERROR SUMMARY: 149 errors from 1 contexts (suppressed: 39932 from
419)
Thanks,
Paul
> -----Original Message-----
> From: Bart Van Assche [mailto:bar...@gm...]
> Sent: Wednesday, November 09, 2011 11:11 AM
> To: Paul Archard
> Cc: val...@li...
> Subject: Re: [Valgrind-users] Thread local storage (TLS) support
>
> On Wed, Nov 9, 2011 at 7:18 PM, Paul Archard
> <pa...@vi...> wrote:
> > Could someone in the know please clarify the support of TLS in
> Valgrind/Helgrind?
> >
> > I am running Helgrind (version 3.7.0) on our code, which makes heavy
> use of TLS on GCC 4.5.1 with GLIBC 2.13. I am seeing a lot of what I
> believe are false positives, where a thread local variable is read on
> one thread and set on another "simultaneously". I don't see any
> possible way that there is contention since different threads are
> involved and by definition this access is safe. This leads me to
> believe that Helgrind is not recognizing the fact that the variables
> are thread-local.
> >
> > I have tried using VALGRIND_HG_DISABLE_CHECKING( ) on some of these
> variables, but even that seems to not work consistently. It's
> important to us for Valgrind tests to pass since we need to hand off
> the binaries to another group and they use Valgrind to validate their
> releases.
>
> TLS should be supported by Helgrind and DRD. If you can post a minimal
> example that allows to reproduce the issue you observed with TLS we
> can have a closer look at it.
>
> Bart.
|