|
From: Matthias A. <gu...@un...> - 2014-07-28 13:07:58
|
Hello,
While analyzing a complex C-written database layer with valgrind 3.9.0, I
encounter the following problem in some statement; the called functions
are putting together a database SELECT statement:
...
==17454== Conditional jump or move depends on uninitialised value(s)
==17454== at 0x5921F10: strchrnul (in /lib/libc-2.11.3.so)
==17454== by 0x58E55D6: vfprintf (in /lib/libc-2.11.3.so)
==17454== by 0x59076FB: vsprintf (in /lib/libc-2.11.3.so)
==17454== by 0x58EF96A: sprintf (in /lib/libc-2.11.3.so)
==17454== by 0x5555A0C: select_record (sisisinst.c:1397)
==17454== by 0x55553F4: sisisinst (sisisinst.c:933)
==17454== by 0x553AC45: DB_rdir (dbcall.c:1894)
==17454== by 0x5539C20: DB_ChkVer (dbcall.c:604)
==17454== by 0x553A098: DB_opdbP (dbcall.c:955)
==17454== by 0x5539D39: DB_opdb (dbcall.c:654)
==17454== by 0x804BF69: InitVDaemon (ZFLVDaemon.c:715)
==17454== by 0x804BAAC: main (ZFLVDaemon.c:413)
==17454== Uninitialised value was created by a stack allocation
==17454== at 0x553AA48: DB_rdir (dbcall.c:1827)
the involved fuctions are shown below; the statement in question (see below)
is
sprintf (select_anw, sel_anw, name, name); <********* sisisinst.c:1397
I have checked carefully the code and the 4 args to sprintf() are
all correct defined on the stack; when I change the code to:
select_anw[0] = '\0';
sprintf (select_anw, sel_anw, name, name);
then is valgrind happy, i.e, does not raise the messages any more;
I have two questions:
1) What makes valgrind complaining about this code exactly?
2) When I write a small example like:
#include <stdio.h>
main() {
char select_anw[1024];
sprintf (select_anw, "SELECT %s.* from %s ", "bla", "bla");
}
I'm not able to reproduce the valgrind warning, why?
Thanks
matthias
DB_rdir():
DB_rdir(int (*tabmodul)(), /* Name des Moduls, das die DB-Operationen
* fuer die gewuenschte Tabelle realisiert */
int key, /* Bezeichner f. Schluessel */
int scroll, /* SCROLL-Cursor anlegen oder nicht */
int lock, /* Satz sperren oder nicht */
void *p_daten /* Zeiger auf die Struktur der jeweiligen
* Datenbanktabelle. Vor Aufruf m?ssen die Werte
* der Felder, die zum Key geh?ren, gef?llt
* werden. Nach Ablauf der Funktion ist Struktur
* bei erfolgreicher Abarbeitung mit erstem Satz
* der Ergebnismenge gef?llt */
)
{ <******* dbcall.c:1827
/*----------------------------------------------------------------------*
* DB_rdir() *
*----------------------------------------------------------------------*/
int db_ret=RC_OK;
char sel_anw[LEN_SELECT];
^^^^^^^^^^^^^^^^^^^^^^^^^
....
/*
* SELECT-Anweisung in Teilen zusammen setzen
* DB_NORID: keine rowids lesen
*/
if(lock == DB_NORID) {
/* no rowid und scroll cursor passen nicht zusammen: Fehler */
if(scroll == DB_SCROLL) {
db_ret = db_errfill(DBCALL_FEHLER, RC_NORIDSCR);
if(db_report_is_on) db_report(REPORT_FCT_OUT, "DB_rdir", db_ret);
return(db_ret);
}
strcpy(sel_anw, SELECT0);
} else {
strcpy(sel_anw, SELECT1);
}
strcpy(where_anw, WHERE1);
/*
* Aufruf tabellenspezifisches Modul
*/
if(sql_trace_is_on) sybDebug("now entering tabmodul");
db_ret = ((* tabmodul) (RDIR, scroll, lock, key, (int)DB_NOPARA, p_daten,
sel_anw, where_anw, (void *)NULL,
(char *)NULL, (char *)NULL, (char *)NULL, (char *)NULL, (char *)NULL, (long *)&count));
sisisinst():
/*------------------------------------------------------------------------*/
/* tabmodul */
/*------------------------------------------------------------------------*/
int sisisinst (
int zugriff, /* Art der Zugriffsanforderung */
int scroll, /* Typ des Satzzeigers SCROLL/NOSCROLL */
int lock, /* Datensatz sperren DB_LOCK/DB_NOLOCK */
int key, /* Nummer des Schluessels f. <zugriff> */
int sto, /* ein Satz oder alle (nur DB_dlet()) */
void *p_daten, /* Pointer auf Struktur d. DB-Tabelle */
char *sel_anw, /* 1.Teil der SELECT-Anweisung */
char *where_anw, /* 2.Teil der SELECT-Anw (WHERE-Bedingung) */
void *p_btw_daten, /* Pointer auf zustzl. Struktur *
* nur fuer DB_rbtw(). *
* beschreibt Obergrenze f.Bereichssuche */
char *order_by, /* Liste der Spaltennamen fuer ORDER BY */
char *auf_ab, /* Sortierung erfolgt aufsteigend (DB_ASC) *
* bzw. absteigend (DB_DESC) */
char *group_by, /* Liste der Spaltennamen fuer GROUP BY */
char *having, /* Bedingung fuer HAVING */
char *into_temp, /* Name der temp. Tabelle fuer INTO TEMP */
long *count /* fuer Lese-Operationen Rxxx :
* wenn Wert bei Aufruf
* DO_COUNT : Anzahl Saetze der Ergebnismenge
* ermitteln und in count
* zurueckgeliefern
* NULL : Leseoperation durchfuehren
*/
)
...
/*
* Verzeigung zu DB-Operation
*/
switch (zugriff)
{
case RGEQ :
case RGRT :
case RLEQ :
case RLES :
case RWHR :
case RDIR : db_ret = select_record(scroll, lock, key,
sel_anw, where_anw, p_tabdaten,
NOBTW, p_oben,
order_by, auf_ab, group_by,
having, into_temp,
count
);
break;
select_record():
static int
select_record (
int scroll, /* Angabe, ob SCROLL-Cursor oder sequent. *
* Cursor */
int lock, /* Satz sperren oder nicht */
int key, /* Lesen mit einem Key oder der ganzen *
* Tabelle (NOKEY) */
char *sel_anw, /* erster Teil der Select-Anweisung */
char *where_anw, /* where- Klausel */
t_sisisinst_ec *p_daten, /* Datensatz : enthaelt *
* - Werte d. Schluesselfelder, nach denen *
* gesucht wird *
* - nach Suchen : 1. Satz d. Ergebnismenge *
* - bei Bereichssuche : Untergrenze */
int i_between, /* Bereichssuche, oder nicht (BTW/NOBTW) */
t_sisisinst_ec *p_oben, /* Datensatz, Obergrenze des Suchbereichs *
* bei Bereichssuche (i_between==BTW) */
char *order_by, /* Liste der Spaltennamen fuer ORDER BY */
char *auf_ab, /* Sortierung erfolgt aufsteigend (DB_ASC) *
* bzw. absteigend (DB_DESC) */
char *group_by, /* Liste der Spaltennamen fuer GROUP BY */
char *having, /* Bedingung fuer HAVING */
char *into_temp, /* Name der temp. Tabelle fuer INTO TEMP */
long *count /* wenn Wert bei Aufruf
* DO_COUNT : Anzahl Saetze der Ergebnismenge
* ermitteln und in count
* zurueckgeliefern
* NULL : Leseoperation durchfuehren
*/
)
{
....
char select_anw[MAX_SEL_LEN]; /* Bereich fuer aufbereitete */
....
#ifdef ORACLE
{
/* how to make valgrind happy */
char *name = TAB_SISISINST;
sprintf (select_anw, sel_anw, name, name); <********* sisisinst.c:1397
}
#endif
--
Matthias Apitz | /"\ ASCII Ribbon Campaign:
E-mail: gu...@un... | \ / - No HTML/RTF in E-mail
WWW: http://www.unixarea.de/ | X - No proprietary attachments
phone: +49-170-4527211 | / \ - Respect for open standards
| en.wikipedia.org/wiki/ASCII_Ribbon_Campaign
|
|
From: John R. <jr...@Bi...> - 2014-07-28 14:11:13
|
> ==17454== Conditional jump or move depends on uninitialised value(s) > ==17454== at 0x5921F10: strchrnul (in /lib/libc-2.11.3.so) > ==17454== by 0x58E55D6: vfprintf (in /lib/libc-2.11.3.so) > the involved fuctions are shown below; the statement in question (see below) > is > > sprintf (select_anw, sel_anw, name, name); <********* sisisinst.c:1397 > > I have checked carefully the code and the 4 args to sprintf() are > all correct defined on the stack; when I change the code to: > > > select_anw[0] = '\0'; > sprintf (select_anw, sel_anw, name, name); > > then is valgrind happy, i.e, does not raise the messages any more; You say that all 4 args are on the stack. What are their actual addresses? Run with --db-attach=yes, say 'y' when asked, and use gdb to look around. One possibility is that sel_anw (the format string) has been overwritten because the string being built into select_anw (the buffer) has overflowed. Try changing the code to use snprintf(select_anw, LEN_SELECT, sel_anw, name, name); which is much safer. |
|
From: Philippe W. <phi...@sk...> - 2014-07-28 21:03:44
|
On Mon, 2014-07-28 at 07:11 -0700, John Reiser wrote: > > ==17454== Conditional jump or move depends on uninitialised value(s) > > ==17454== at 0x5921F10: strchrnul (in /lib/libc-2.11.3.so) > > ==17454== by 0x58E55D6: vfprintf (in /lib/libc-2.11.3.so) > > > the involved fuctions are shown below; the statement in question (see below) > > is > > > > sprintf (select_anw, sel_anw, name, name); <********* sisisinst.c:1397 > > > > I have checked carefully the code and the 4 args to sprintf() are > > all correct defined on the stack; when I change the code to: > > > > > > select_anw[0] = '\0'; > > sprintf (select_anw, sel_anw, name, name); > > > > then is valgrind happy, i.e, does not raise the messages any more; > > You say that all 4 args are on the stack. What are their actual addresses? > Run with --db-attach=yes, say 'y' when asked, and use gdb to look around. --db-attach=yes should be considered as (is?) obsolete. You could instead use --vgdb-error=1 (to just attach when the error is reported) or better use --vgdb-error=0, put breakpoints and verify the (un-)definedness of the relevant variables at various points between their declaration and their usage. Philippe |
|
From: John R. <jr...@Bi...> - 2014-07-29 00:24:01
|
> --db-attach=yes should be considered as (is?) obsolete. > > You could instead use --vgdb-error=1 (to just attach when the error is > reported) or better use --vgdb-error=0, put breakpoints > and verify the (un-)definedness of the relevant variables at various > points between their declaration and their usage. https://bugs.kde.org/show_bug.cgi?id=337871 deprecate --db-attach=yes in favor of --vgdb-debug=1 |
|
From: Philippe W. <phi...@sk...> - 2014-07-29 18:35:39
|
On Tue, 2014-07-29 at 08:15 +0200, Matthias Apitz wrote: > El día Monday, July 28, 2014 a las 07:11:02AM -0700, John Reiser escribió: > > > > ==17454== Conditional jump or move depends on uninitialised value(s) > > > ==17454== at 0x5921F10: strchrnul (in /lib/libc-2.11.3.so) > > > ==17454== by 0x58E55D6: vfprintf (in /lib/libc-2.11.3.so) ... > All was fine. Why is valgrind complaining? Here is an hypothesis: Looking at (in the SVN version) shared/vg_replace_strmem.c strchrnul should have been replaced by the implementation in vg_replace_strmem.c. >From the stacktrace above, it looks like strchrnul was not replaced. Often, the glibc implementations of the str* functions are highly optimised, and causes false positive (e.g. by assuming they can read a few more bytes than the end of the string). That might be the case for you. What you could do is to redo your GDB session, but using the valgrind gdbserver monitor command to check the definedness of the printf args at various momenets and just before the print call. Note that using --track-origins=yes should indicate where this unitialised byte is coming from. Would be nice to understand why strchrnul was not replaced (using e.g. -v -v -v --trace-redir=yes). Philippe |
|
From: Matthias A. <gu...@un...> - 2014-07-30 06:17:25
|
El día Tuesday, July 29, 2014 a las 08:35:51PM +0200, Philippe Waroquiers escribió:
> On Tue, 2014-07-29 at 08:15 +0200, Matthias Apitz wrote:
> > El día Monday, July 28, 2014 a las 07:11:02AM -0700, John Reiser escribió:
> >
> > > > ==17454== Conditional jump or move depends on uninitialised value(s)
> > > > ==17454== at 0x5921F10: strchrnul (in /lib/libc-2.11.3.so)
> > > > ==17454== by 0x58E55D6: vfprintf (in /lib/libc-2.11.3.so)
> ...
> > All was fine. Why is valgrind complaining?
>
> Here is an hypothesis:
>
> Looking at (in the SVN version) shared/vg_replace_strmem.c
> strchrnul should have been replaced by the implementation
> in vg_replace_strmem.c.
>
> From the stacktrace above, it looks like strchrnul was not replaced.
This (your hypothesis) matches with the fact that I was not able to
reproduce the same problem with a small C-pgm doing exactly the same
sprintf() call:
#include <stdio.h>
#define MAX_SEL_LEN 1024
static int select_record();
main()
{
select_record("SELECT rowid, sisisinst.* from sisisinst");
}
static int select_record (char *sel_anw)
{
char select_anw[MAX_SEL_LEN];
char *name = "sisisinst";
sprintf (select_anw, sel_anw, name, name);
}
in this case the valgrind does not complain with the sprintf() ...
strchrnul() stack trace; and in addition, the output shows the
replacement of strchrnul() to some valgrind' function implementation; I
have the output here:
the big C-server (where strchrnul() is not replaced):
http://www.unixarea.de/valg1.txt
the small C-code aboce (where strchrnul() is replaced):
http://www.unixarea.de/valg2.txt
I'm not a valgrind expert and can not say what is causing the
difference; let me know if you want me to file a bug report with the data.
> Often, the glibc implementations of the str* functions
> are highly optimised, and causes false positive
> (e.g. by assuming they can read a few more bytes than
> the end of the string).
>
> That might be the case for you.
>
> What you could do is to redo your GDB session, but using
> the valgrind gdbserver monitor command to check the definedness
> of the printf args at various momenets and
> just before the print call.
>
> Note that using --track-origins=yes should indicate where
> this unitialised byte is coming from.
>
> Would be nice to understand why strchrnul was not replaced
> (using e.g. -v -v -v --trace-redir=yes).
yes, would be nice; let me know if you need more data to understand this
problem of not replacing strchrnul()
Thx
matthias
--
Matthias Apitz | /"\ ASCII Ribbon Campaign:
E-mail: gu...@un... | \ / - No HTML/RTF in E-mail
WWW: http://www.unixarea.de/ | X - No proprietary attachments
phone: +49-170-4527211 | / \ - Respect for open standards
| en.wikipedia.org/wiki/ASCII_Ribbon_Campaign
|
|
From: Matthias A. <gu...@un...> - 2014-07-30 10:25:23
|
El día Wednesday, July 30, 2014 a las 08:17:11AM +0200, Matthias Apitz escribió: > > the big C-server (where strchrnul() is not replaced): > > http://www.unixarea.de/valg1.txt > > the small C-code above (where strchrnul() is replaced): > > http://www.unixarea.de/valg2.txt > > I'm not a valgrind expert and can not say what is causing the > difference; let me know if you want me to file a bug report with the data. I found the reason by reading and comparing both files line by line; the problem is visible in valg1.txt: ERROR: ld.so: object '/usr/local/lib/valgrind/vgpreload_core-x86-linux.so' from LD_PRELOAD cannot be preloaded: ignored. this was caused by a Linux problem; someone did an error and updated the SuSE Linux to x86_64 while it should stay 32bit; valgrind was was compiled on it; later the box was resetted to 32bit x86 but valgrind was not re-compiled again; I did this now and now all the C-lib functions are redirected correctly. Thread closed :-) Thanks matthias -- Matthias Apitz | /"\ ASCII Ribbon Campaign: E-mail: gu...@un... | \ / - No HTML/RTF in E-mail WWW: http://www.unixarea.de/ | X - No proprietary attachments phone: +49-170-4527211 | / \ - Respect for open standards | en.wikipedia.org/wiki/ASCII_Ribbon_Campaign |
|
From: Matthias A. <gu...@un...> - 2014-07-29 06:15:35
|
El día Monday, July 28, 2014 a las 07:11:02AM -0700, John Reiser escribió:
> > ==17454== Conditional jump or move depends on uninitialised value(s)
> > ==17454== at 0x5921F10: strchrnul (in /lib/libc-2.11.3.so)
> > ==17454== by 0x58E55D6: vfprintf (in /lib/libc-2.11.3.so)
>
> > the involved fuctions are shown below; the statement in question (see below)
> > is
> >
> > sprintf (select_anw, sel_anw, name, name); <********* sisisinst.c:1397
> >
> > I have checked carefully the code and the 4 args to sprintf() are
> > all correct defined on the stack; when I change the code to:
> >
> >
> > select_anw[0] = '\0';
> > sprintf (select_anw, sel_anw, name, name);
> >
> > then is valgrind happy, i.e, does not raise the messages any more;
>
> You say that all 4 args are on the stack. What are their actual addresses?
> Run with --db-attach=yes, say 'y' when asked, and use gdb to look around.
>
> One possibility is that sel_anw (the format string) has been overwritten
> because the string being built into select_anw (the buffer) has overflowed.
>
> Try changing the code to use
> snprintf(select_anw, LEN_SELECT, sel_anw, name, name);
> which is much safer.
Thanks for your hints. Before I will change the code (yes, your proposal is
much safer), I will try to understand why valgrind is complaining;
I grabbed the gdb and debugged through the code:
(gdb) where
#0 DB_rdir (tabmodul=0xf6a68170 <sisisinst>, key=2, scroll=1, lock=0, p_daten=0xffffc860) at dbcall.c:1834
#1 0xf6a4cc21 in DB_ChkVer () at dbcall.c:604
#2 0xf6a4d099 in DB_opdbP (mode=1) at dbcall.c:955
#3 0xf6a4cd3a in DB_opdb () at dbcall.c:654
#4 0x0804bf6a in InitVDaemon () at ZFLVDaemon.c:715
#5 0x0804baad in main (argc=1, argv=0xffffce14) at ZFLVDaemon.c:413
(gdb) p &sel_anw
$3 = (char (*)[1000]) 0xffffc3c0
sel_anw is an automatic char[1000] area and will now be initialized from
some static string 'SELECT1':
1885 strcpy(sel_anw, SELECT1);
(gdb)
1887 strcpy(where_anw, WHERE1);
(gdb)
'sel_anw' and 'where_anw' both are set correctly:
(gdb) p sel_anw
$4 = "SELECT rowid, %s.* from %s", '\000' <repeats 46 times> ...
(gdb) p where_anw
$5 = "%s = :v1", '\000' <repeats 24 times> ...
(gdb) p &sel_anw
$6 = (char (*)[1000]) 0xffffc3c0
(gdb) p &where_anw
$7 = (char (*)[5000]) 0xffffb030
the pointers are passed correctly to sisisinst() function:
(gdb) s
sisisinst (zugriff=1, scroll=1, lock=0, key=2, sto=-20000, p_daten=0xffffc860,
sel_anw=0xffffc3c0 "SELECT rowid, %s.* from %s", where_anw=0xffffb030 "%s = :v1", p_btw_daten=0x0,
order_by=0x0, auf_ab=0x0, group_by=0x0, having=0x0, into_temp=0x0, count=0xffffb02c) at sisisinst.c:799
933 case RDIR : db_ret = select_record(scroll, lock, key,
(gdb) s
and passed further to select_record() function:
Breakpoint 2, select_record (scroll=1, lock=0, key=2, sel_anw=0xffffc3c0 "SELECT rowid, %s.* from %s",
where_anw=0xffffb030 "%s = :v1", p_daten=0xf6ae04a0 <hrec_sisisinst>, i_between=0, p_oben=0xffffaf30,
order_by=0x0, auf_ab=0x0, group_by=0x0, having=0x0, into_temp=0x0, count=0xffffb02c) at sisisinst.c:1353
(gdb) p sel_anw
$8 = 0xffffc3c0 "SELECT rowid, %s.* from %s"
(gdb) p where_anw
$9 = 0xffffb030 "%s = :v1"
(gdb)
1396 char *name = TAB_SISISINST;
(gdb)
this is now the call to sprintf() which was identified by valgrind:
1397 sprintf (select_anw, sel_anw, name, name);
(gdb) p name
$10 = 0xf6ac8f3e "sisisinst"
(gdb) p sel_anw
$11 = 0xffffc3c0 "SELECT rowid, %s.* from %s"
(gdb) p &select_anw
$12 = (char (*)[5000]) 0xffff9ac0
now executing the sprintf() ...
(gdb) n
1401 switch (key)
the result is fine and the target buffer of sprintf(), the 'select_anw'
is corretcly filled:
(gdb) p select_anw
$13 = "SELECT rowid, sisisinst.* from sisisinst", '\000' <repeats 536 times>, "ALTER SESSION SET NLS_LANGUAGE= 'GERMAN' NLS_TERRITORY= 'GERMANY' NLS_CURRENCY= '??' NLS_ISO_CURRENCY= 'GERMANY' NLS_NUMERIC_CHARACTERS= ',.' NLS_CALEN"...
(gdb) p &select_anw
$14 = (char (*)[5000]) 0xffff9ac0
All was fine. Why is valgrind complaining?
Thanks
matthias
--
Matthias Apitz | /"\ ASCII Ribbon Campaign:
E-mail: gu...@un... | \ / - No HTML/RTF in E-mail
WWW: http://www.unixarea.de/ | X - No proprietary attachments
phone: +49-170-4527211 | / \ - Respect for open standards
| en.wikipedia.org/wiki/ASCII_Ribbon_Campaign
|