|
From: Paul A. <pa...@vi...> - 2011-11-09 18:40:59
|
Hello fellow Valgrind users! Could someone in the know please clarify the support of TLS in Valgrind/Helgrind? I am running Helgrind (version 3.7.0) on our code, which makes heavy use of TLS on GCC 4.5.1 with GLIBC 2.13. I am seeing a lot of what I believe are false positives, where a thread local variable is read on one thread and set on another "simultaneously". I don't see any possible way that there is contention since different threads are involved and by definition this access is safe. This leads me to believe that Helgrind is not recognizing the fact that the variables are thread-local. I have tried using VALGRIND_HG_DISABLE_CHECKING( ) on some of these variables, but even that seems to not work consistently. It's important to us for Valgrind tests to pass since we need to hand off the binaries to another group and they use Valgrind to validate their releases. Thanks in advance, Paul |
|
From: Julian S. <js...@ac...> - 2011-11-09 19:12:08
|
> access is safe. This leads me to believe that Helgrind is not recognizing > the fact that the variables are thread-local. Possibly so; but it's too hard to diagnose without a specific test case. Please send a small program that shows the problem, or (better) file a bug report and attach the file to it. J |
|
From: Paul A. <pa...@vi...> - 2011-11-10 20:17:31
|
Ok, here's a small(ish) code sample that reproduces the issue. There's
two levels of threading going on to model a multi-instance program with
each instance having multiple threads. As you can see, there is a single
thread-local variable which Helgrind claims is being raced...
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <pthread.h>
#include <unistd.h>
bool g_shutdown = false;
pthread_mutex_t g_lock;
int g_max_threads = 4;
int g_max_controllers = 2;
__thread __attribute__ ((aligned (8))) unsigned long l_inst_id = 0;
int g_max_wait = 100;
bool Shutdown()
{
bool res = false;
pthread_mutex_lock(&g_lock);
res = g_shutdown;
pthread_mutex_unlock(&g_lock);
return res;
}
void Error(const char *message)
{
fprintf(stderr, message);
abort();
}
class Thread
{
public:
Thread(int id)
: m_thread(0), m_id(id) {}
void Run()
{
pthread_create(&m_thread, NULL, &Thread::EntryPoint,
this);
}
void Wait()
{
pthread_join(m_thread, NULL);
}
private:
pthread_t m_thread;
int m_id;
static void *EntryPoint(void *ptr)
{
Thread *thread = (Thread *)ptr;
thread->Start();
return 0;
}
void Start()
{
printf("thread %d start\n", m_id);
fflush(stdout);
l_inst_id = m_id; // <----- offending line of code
printf("thread %d done\n", m_id);
fflush(stdout);
}
};
class Instance
{
public:
void Run()
{
while (!Shutdown())
{
// wait a random time
int wait = rand() % g_max_wait;
usleep(wait * 1000);
Thread *threads[g_max_threads];
printf("inst start\n");
// create and run a random number of threads
int num_threads = (rand() % g_max_threads) + 1;
for (int i = 0; i < num_threads; i++)
{
threads[i] = new Thread(i);
if (threads[i])
threads[i]->Run();
}
// wait for threads to terminate
for (int i = 0; i < num_threads; i++)
{
if (threads[i])
{
threads[i]->Wait();
delete threads[i];
}
}
printf("inst done\n");
fflush(stdout);
}
}
};
class Controller
{
public:
Controller()
{
}
void Run()
{
pthread_create(&m_thread, NULL, &Controller::EntryPoint,
this);
}
void Wait()
{
pthread_join(m_thread, NULL);
}
private:
pthread_t m_thread;
static void *EntryPoint(void *ptr)
{
Controller *controller = (Controller *)ptr;
controller->Start();
return 0;
}
void Start()
{
while (!Shutdown())
{
// wait a random time
int wait = rand() % g_max_wait;
usleep(wait * 1000);
// try to create and run an instance
Instance().Run();
}
printf("controller shutdown\n");
}
};
static void
handle_sigint(int signum)
{
printf("got ctrl-c\n");
pthread_mutex_lock(&g_lock);
g_shutdown = true;
pthread_mutex_unlock(&g_lock);
}
int main(int argc, char *argv[])
{
pthread_mutex_init(&g_lock, NULL);
// set up signal handlers
signal(SIGINT, handle_sigint);
// create a number of controllers
Controller *controllers[g_max_controllers];
for (int i = 0; i < g_max_controllers; i++)
{
controllers[i] = new Controller;
if (controllers[i])
controllers[i]->Run();
}
// wait for controllers to terminate
for (int i = 0; i < g_max_controllers; i++)
{
if (controllers[i])
{
controllers[i]->Wait();
delete controllers[i];
}
}
pthread_mutex_destroy(&g_lock);
return 0;
}
Valgrind output:
==9879== Helgrind, a thread error detector
==9879== Copyright (C) 2007-2011, and GNU GPL'd, by OpenWorks LLP et al.
==9879== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==9879== Command: ./insttest
==9879==
==9879== ---Thread-Announcement------------------------------------------
==9879==
==9879== Thread #7 was created
==9879== at 0x376D2E0BEE: clone (in /lib64/libc-2.13.so)
==9879== by 0x376D605D9F: do_clone.clone.2 (in
/lib64/libpthread-2.13.so)
==9879== by 0x376D60731A: pthread_create@@GLIBC_2.2.5 (in
/lib64/libpthread-2.13.so)
==9879== by 0x4A0950F: pthread_create_WRK (hg_intercepts.c:255)
==9879== by 0x4A096B2: pthread_create@* (hg_intercepts.c:286)
==9879== by 0x400EA6: Thread::Run() (insttest.cpp:40)
==9879== by 0x401062: Instance::Run() (insttest.cpp:92)
==9879== by 0x4011D2: Controller::Start() (insttest.cpp:148)
==9879== by 0x40118B: Controller::EntryPoint(void*) (insttest.cpp:135)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879== ---Thread-Announcement------------------------------------------
==9879==
==9879== Thread #4 was created
==9879== at 0x376D2E0BEE: clone (in /lib64/libc-2.13.so)
==9879== by 0x376D605D9F: do_clone.clone.2 (in
/lib64/libpthread-2.13.so)
==9879== by 0x376D60731A: pthread_create@@GLIBC_2.2.5 (in
/lib64/libpthread-2.13.so)
==9879== by 0x4A0950F: pthread_create_WRK (hg_intercepts.c:255)
==9879== by 0x4A096B2: pthread_create@* (hg_intercepts.c:286)
==9879== by 0x400EA6: Thread::Run() (insttest.cpp:40)
==9879== by 0x401062: Instance::Run() (insttest.cpp:92)
==9879== by 0x4011D2: Controller::Start() (insttest.cpp:148)
==9879== by 0x40118B: Controller::EntryPoint(void*) (insttest.cpp:135)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879== ----------------------------------------------------------------
==9879==
==9879== Possible data race during write of size 8 at 0x6CDE6F8 by thread
#7
==9879== Locks held: none
==9879== at 0x400F30: Thread::Start() (insttest.cpp:64)
==9879== by 0x400EEB: Thread::EntryPoint(void*) (insttest.cpp:55)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879== This conflicts with a previous write of size 8 by thread #4
==9879== Locks held: none
==9879== at 0x400F30: Thread::Start() (insttest.cpp:64)
==9879== by 0x400EEB: Thread::EntryPoint(void*) (insttest.cpp:55)
==9879== by 0x4A0969B: mythread_wrapper (hg_intercepts.c:219)
==9879== by 0x376D606CCA: start_thread (in /lib64/libpthread-2.13.so)
==9879== by 0x376D2E0C2C: clone (in /lib64/libc-2.13.so)
==9879==
==9879==
==9879== For counts of detected and suppressed errors, rerun with: -v
==9879== Use --history-level=approx or =none to gain increased speed, at
==9879== the cost of reduced accuracy of conflicting-access information
==9879== ERROR SUMMARY: 149 errors from 1 contexts (suppressed: 39932 from
419)
Thanks,
Paul
> -----Original Message-----
> From: Bart Van Assche [mailto:bar...@gm...]
> Sent: Wednesday, November 09, 2011 11:11 AM
> To: Paul Archard
> Cc: val...@li...
> Subject: Re: [Valgrind-users] Thread local storage (TLS) support
>
> On Wed, Nov 9, 2011 at 7:18 PM, Paul Archard
> <pa...@vi...> wrote:
> > Could someone in the know please clarify the support of TLS in
> Valgrind/Helgrind?
> >
> > I am running Helgrind (version 3.7.0) on our code, which makes heavy
> use of TLS on GCC 4.5.1 with GLIBC 2.13. I am seeing a lot of what I
> believe are false positives, where a thread local variable is read on
> one thread and set on another "simultaneously". I don't see any
> possible way that there is contention since different threads are
> involved and by definition this access is safe. This leads me to
> believe that Helgrind is not recognizing the fact that the variables
> are thread-local.
> >
> > I have tried using VALGRIND_HG_DISABLE_CHECKING( ) on some of these
> variables, but even that seems to not work consistently. It's
> important to us for Valgrind tests to pass since we need to hand off
> the binaries to another group and they use Valgrind to validate their
> releases.
>
> TLS should be supported by Helgrind and DRD. If you can post a minimal
> example that allows to reproduce the issue you observed with TLS we
> can have a closer look at it.
>
> Bart.
|
|
From: Paul A. <pa...@vi...> - 2011-11-10 23:02:34
|
I've also found that if I collapse the "Instance" layer and create the threads directly from main() instead, the problem seems to go away. This leads me to the possible conclusion that HG is having an issue with data access from a thread created by a (non-main) thread. Paul > -----Original Message----- > From: Bart Van Assche [mailto:bar...@gm...] > Sent: Wednesday, November 09, 2011 11:11 AM > To: Paul Archard > Cc: val...@li... > Subject: Re: [Valgrind-users] Thread local storage (TLS) support > > On Wed, Nov 9, 2011 at 7:18 PM, Paul Archard > <pa...@vi...> wrote: > > Could someone in the know please clarify the support of TLS in > Valgrind/Helgrind? > > > > I am running Helgrind (version 3.7.0) on our code, which makes heavy > use of TLS on GCC 4.5.1 with GLIBC 2.13. I am seeing a lot of what I > believe are false positives, where a thread local variable is read on > one thread and set on another "simultaneously". I don't see any > possible way that there is contention since different threads are > involved and by definition this access is safe. This leads me to > believe that Helgrind is not recognizing the fact that the variables > are thread-local. > > > > I have tried using VALGRIND_HG_DISABLE_CHECKING( ) on some of these > variables, but even that seems to not work consistently. It's > important to us for Valgrind tests to pass since we need to hand off > the binaries to another group and they use Valgrind to validate their > releases. > > TLS should be supported by Helgrind and DRD. If you can post a minimal > example that allows to reproduce the issue you observed with TLS we > can have a closer look at it. > > Bart. |