|
From: <sv...@va...> - 2006-10-17 01:07:24
|
Author: sewardj
Date: 2006-10-17 02:07:21 +0100 (Tue, 17 Oct 2006)
New Revision: 6249
Log:
Merge r6100:
Code for reading XCOFF32 and XCOFF64 symbol tables and line numbers.
Added:
trunk/coregrind/m_debuginfo/priv_readxcoff.h
trunk/coregrind/m_debuginfo/readxcoff.c
Added: trunk/coregrind/m_debuginfo/priv_readxcoff.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_debuginfo/priv_readxcoff.h =
(rev 0)
+++ trunk/coregrind/m_debuginfo/priv_readxcoff.h 2006-10-17 01:07:21 UTC =
(rev 6249)
@@ -0,0 +1,46 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Read XCOFF format debug info. priv_readxcoff.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2006-2006 OpenWorks LLP
+ in...@op...
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __PRIV_READXCOFF_H
+#define __PRIV_READXCOFF_H
+
+
+/* Read whatever info we can from an XCOFF object file. */
+extern
+Bool ML_(read_xcoff_debug_info) ( struct _SegInfo* si,
+ Addr data_addr,
+ SSizeT data_len,
+ Bool is_mainexe );
+
+#endif /* ndef __PRIV_READXCOFF_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/
Added: trunk/coregrind/m_debuginfo/readxcoff.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_debuginfo/readxcoff.c (rev =
0)
+++ trunk/coregrind/m_debuginfo/readxcoff.c 2006-10-17 01:07:21 UTC (rev =
6249)
@@ -0,0 +1,2680 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Read XCOFF debug info. readxcoff.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2006-2006 OpenWorks LLP
+ in...@op...
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+/* This file reads XCOFF symbol tables and debug info.
+ Known limitations:
+
+ * only one text section per object file is handled
+
+ * C_BINCL/C_EINCL handling is wrong, so functions defined in files
+ included from other files will end up with the wrong file name
+ and possibly line numbers. Fixable.
+
+ * The line number reader leans heavily on the fact that the generic
+ line number canonicaliser in storage.c truncates overlapping
+ ranges.
+*/
+
+#include "pub_core_basics.h"
+#include "pub_core_vki.h" /* struct vki_stat et al */
+#include "pub_core_debuginfo.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_mallocfree.h"
+#include "pub_core_libcfile.h" /* stat, open, close */
+#include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
+#include "pub_core_options.h" /* VG_(clo_trace_symtab) */
+#include "priv_storage.h"
+#include "priv_readxcoff.h" /* self */
+
+/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
+#if defined(VGP_ppc32_aix5)
+# define __XCOFF32__ 1
+# undef __XCOFF64__
+#elif defined(VGP_ppc64_aix5)
+# define __XCOFF64__ 1
+# undef __XCOFF32__
+#else
+# error "This file should only be compiled on AIX"
+#endif
+#include <xcoff.h>
+
+#undef __AR_SMALL__
+#define __AR_BIG__ 1
+#include <ar.h>
+/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
+
+/* Debug stuff */
+#define SHOW_LD_STRTAB 1 /* loader string tables */
+#define SHOW_LD_SYMTAB 1 /* loader symbol table */
+#define SHOW_LD_RELTAB 1 /* loader reloc table */
+#define SHOW_STRTAB 1 /* main string table */
+#define SHOW_SYMS_P1 1 /* P1: find text sym starts */
+#define SHOW_SYMS_P2 1 /* P2: find text sym ends */
+#define SHOW_SYMS_P3 1 /* P3: src filenames & fn start/end line #s *=
/
+#define SHOW_SYMS_P4 1 /* P4: line numbers */
+#define SHOW_SYMS_P5 1 /* P5: find TOC pointers */
+#define SHOW_SYMS_P6 1 /* P6: finalise symbol info */
+
+#define SHOW_AR_DETAILS 0 /* show details of .a file internals */
+
+#define SHOW VG_(clo_trace_symtab)
+
+/* A small stack of filenames is maintained for dealing
+ with BINCL/EINCL symbol table entries. */
+
+#define N_FILENAME_STACK 16
+
+/* Phase 5 (find TOC pointers) has two implementations, the official
+ version, which involves reading the data segment symbols, and the
+ kludgey version, which basically scans the (actual loaded) data
+ segment to find structs which look like function descriptors. */
+
+#if 1
+# undef OFFICIAL_PHASE5
+#else
+# define OFFICIAL_PHASE5 1
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Read XCOFF format debug info. ---*/
+/*------------------------------------------------------------*/
+
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+
+// priv
+struct _XArray {
+ void* (*alloc) ( SizeT );
+ void (*free) ( void* );
+ Word (*cmpFn) ( void*, void* );
+ Word elemSzB;
+ void* arr;
+ Word usedsize;
+ Word totsize;
+ Bool sorted;
+};
+
+// public
+typedef void XArray;
+
+/* Create new XArray, using given allocation and free function, and
+ for elements of the specified size. Alloc fn must not fail. */
+extern=20
+XArray* newXA ( void*(*alloc_fn)(SizeT),=20
+ void(*free_fn)(void*),
+ Word elemSzB );
+
+/* Free all memory associated with an XArray. */
+void deleteXA ( XArray* );
+
+/* Set the comparison function for this XArray. */
+void setCmpFnXA ( XArray*, Word (*compar)(void*,void*) );
+
+/* Add an element to an XArray. Element is copied into the XArray. */
+void addToXA ( XArray*, void* elem );
+
+/* Sort an XArray using its comparison function, if set; else bomb. */
+void sortXA ( XArray* );
+
+/* Lookup (by binary search) 'key' in the array. Set *first to be the
+ index of the first, and *last to be the index of the last matching
+ value found. If any values are found, return True, else return
+ False, and don't change *first or *last. Bomb if the array is not
+ sorted. */
+Bool lookupXA ( XArray*, void* key, Word* first, Word* last );
+
+/* How big is the XArray now? */
+Word sizeXA ( XArray* );
+
+/* Index into the XArray. */
+void* indexXA ( XArray*, Word );
+
+/* Drop the last n elements of an XArray. */
+void dropTailXA ( XArray*, Word );
+
+///////////////////////
+
+XArray* newXA ( void*(*alloc_fn)(SizeT),=20
+ void(*free_fn)(void*),
+ Word elemSzB )
+{
+ struct _XArray* xa;
+ vg_assert(alloc_fn);
+ vg_assert(free_fn);
+ vg_assert(elemSzB > 0);
+ xa =3D alloc_fn( sizeof(struct _XArray) );
+ vg_assert(xa);
+ xa->alloc =3D alloc_fn;
+ xa->free =3D free_fn;
+ xa->cmpFn =3D NULL;
+ xa->elemSzB =3D elemSzB;
+ xa->usedsize =3D 0;
+ xa->totsize =3D 0;
+ xa->sorted =3D False;
+ xa->arr =3D NULL;
+ return xa;
+}
+
+void deleteXA ( XArray* xao )
+{
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(xa->free);
+ if (xa->arr);
+ xa->free(xa->arr);
+ xa->free(xa);
+}
+
+void setCmpFnXA ( XArray* xao, Word (*compar)(void*,void*) )
+{
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(compar);
+ xa->cmpFn =3D compar;
+ xa->sorted =3D False;
+}
+
+void addToXA ( XArray* xao, void* elem )
+{
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(elem);
+ vg_assert(xa->totsize >=3D 0);
+ vg_assert(xa->usedsize >=3D 0 && xa->usedsize <=3D xa->totsize);
+ if (xa->usedsize =3D=3D xa->totsize) {
+ void* tmp;
+ Word newsz;
+ if (xa->totsize =3D=3D 0)
+ vg_assert(!xa->arr);
+ if (xa->totsize > 0)
+ vg_assert(xa->arr);
+ newsz =3D xa->totsize=3D=3D0 ? 2 : 2 * xa->totsize;
+ if (0)=20
+ VG_(printf)("addToXA: increasing from %ld to %ld\n",=20
+ xa->totsize, newsz);
+ tmp =3D xa->alloc(newsz * xa->elemSzB);
+ vg_assert(tmp);
+ if (xa->usedsize > 0)=20
+ VG_(memcpy)(tmp, xa->arr, xa->usedsize * xa->elemSzB);
+ if (xa->arr)
+ xa->free(xa->arr);
+ xa->arr =3D tmp;
+ xa->totsize =3D newsz;
+ }
+ vg_assert(xa->usedsize < xa->totsize);
+ vg_assert(xa->arr);
+ VG_(memcpy)( ((UChar*)xa->arr) + xa->usedsize * xa->elemSzB,
+ elem, xa->elemSzB );
+ xa->usedsize++;
+ xa->sorted =3D False;
+}
+
+// Generic shell sort. Like stdlib.h's qsort().
+static void ssort( void* base, Word nmemb, Word size,
+ Word (*compar)(void*, void*) )
+{
+ Int incs[14] =3D { 1, 4, 13, 40, 121, 364, 1093, 3280,
+ 9841, 29524, 88573, 265720,
+ 797161, 2391484 };
+ Int lo =3D 0;
+ Int hi =3D nmemb-1;
+ Int i, j, h, bigN, hp;
+
+ bigN =3D hi - lo + 1; if (bigN < 2) return;
+ hp =3D 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
+
+ #define SORT \
+ for ( ; hp >=3D 0; hp--) { \
+ h =3D incs[hp]; \
+ for (i =3D lo + h; i <=3D hi; i++) { \
+ ASSIGN(v,0, a,i); \
+ j =3D i; \
+ while (COMPAR(a,(j-h), v,0) > 0) { \
+ ASSIGN(a,j, a,(j-h)); \
+ j =3D j - h; \
+ if (j <=3D (lo + h - 1)) break; \
+ } \
+ ASSIGN(a,j, v,0); \
+ } \
+ }
+
+ // General case
+ {
+ char* a =3D base;
+ char v[size]; // will be at least 'size' bytes
+
+ #define ASSIGN(dst, dsti, src, srci) \
+ VG_(memcpy)( &dst[size*(dsti)], &src[size*(srci)], size );
+
+ #define COMPAR(dst, dsti, src, srci) \
+ compar( &dst[size*(dsti)], &src[size*(srci)] )
+
+ SORT;
+
+ #undef ASSIGN
+ #undef COMPAR
+ }
+ #undef SORT
+}
+
+void sortXA ( XArray* xao )
+{
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(xa->cmpFn);
+ ssort( xa->arr, xa->usedsize, xa->elemSzB, xa->cmpFn );
+ xa->sorted =3D True;
+}
+
+Bool lookupXA ( XArray* xao, void* key, Word* first, Word* last )
+{
+ Word lo, mid, hi, cres;
+ void* midv;
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(xa->cmpFn);
+ vg_assert(xa->sorted);
+ lo =3D 0;
+ hi =3D xa->usedsize-1;
+ while (True) {
+ /* current unsearched space is from lo to hi, inclusive. */
+ if (lo > hi) return False; /* not found */
+ mid =3D (lo + hi) / 2;
+ midv =3D indexXA( xa, mid );
+ cres =3D xa->cmpFn( key, midv );
+ if (cres < 0) { hi =3D mid-1; continue; }
+ if (cres > 0) { lo =3D mid+1; continue; }
+ /* Found it, at mid. See how far we can expand this. */
+ vg_assert(xa->cmpFn( key, indexXA(xa, lo) ) >=3D 0);
+ vg_assert(xa->cmpFn( key, indexXA(xa, hi) ) <=3D 0);
+ *first =3D *last =3D mid;
+ while (*first > 0=20
+ && 0 =3D=3D xa->cmpFn( key, indexXA(xa, (*first)-1)))
+ (*first)--;
+ while (*last < xa->usedsize-1
+ && 0 =3D=3D xa->cmpFn( key, indexXA(xa, (*last)+1)))
+ (*last)++;
+ return True;
+ }
+}
+
+Word sizeXA ( XArray* xao )
+{
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ return xa->usedsize;
+}
+
+void* indexXA ( XArray* xao, Word n )
+{
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(n >=3D 0);
+ vg_assert(n < xa->usedsize);
+ return ((char*)xa->arr) + n * xa->elemSzB;
+}
+
+void dropTailXA ( XArray* xao, Word n )
+{
+ struct _XArray* xa =3D (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(n >=3D 0);
+ vg_assert(n <=3D xa->usedsize);
+ xa->usedsize -=3D n;
+}
+
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+
+/* COFF uses a strange way to represent symbol names. A symbol is an
+ eight-byte field.
+
+ In 32-bit mode: if the first four bytes are zero, then the second
+ four bytes give the offset into the string table where the string
+ really is. Otherwise, the whole 8-byte thing is itself the name.
+
+ In 64-bit mode: a four-byte field at offset 8 is always interpreted
+ as an offset into the string table.
+
+ For a symbol of length 8, in 32-bit mode, there is no obvious way
+ to zero-terminate it. One solution is to copy the name into
+ dynamically allocated memory, but that complicates storage
+ management.
+
+ An alternative solution, used here, is to represent a name as a
+ (data, length) pair instead of the traditional zero-terminated
+ string. Such a pair can be constructed for any XCOFF symbol name,
+ and has the advantages that (1) no dynamic memory is required, and
+ (2) the name is guaranteed to be accessible as long as the object
+ image is mapped in.
+
+ What the .vec points at must not be modified; if you want to do
+ that, copy it elsewhere first.
+*/
+
+typedef
+ struct {
+ UChar* vec; /* the text of the name */
+ UInt len; /* length of the text */
+ }
+ Name;
+
+static Name maybeDerefStrTab( SYMENT* sym,
+ UChar* oi_strtab, UWord oi_n_strtab)
+{
+ Name res;
+ static UChar* bogus=20
+ =3D (UChar*)"**_Error_Dereferencing_COFF_String_Table_**";
+ UChar* bytes =3D (UChar*)sym;
+
+# if defined(VGP_ppc32_aix5)
+ if (bytes[0]=3D=3D0 && bytes[1]=3D=3D0 && bytes[2]=3D=3D0 && bytes[3]=
=3D=3D0) {
+ UInt off =3D *(UInt*)&bytes[4];
+ if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) {
+ res.vec =3D &oi_strtab[off];
+ res.len =3D VG_(strlen)(res.vec);
+ return res;
+ } else
+ goto bad;
+ } else {
+ Int i;
+ res.vec =3D bytes;
+ res.len =3D 8;
+ for (i =3D 0; i < 8; i++)
+ if (bytes[i] =3D=3D 0)
+ res.len--;
+ return res;
+ }
+
+# elif defined(VGP_ppc64_aix5)
+ ULong off =3D (ULong)( *(UInt*)&bytes[8] );
+ if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) {
+ res.vec =3D &oi_strtab[off];
+ res.len =3D VG_(strlen)(res.vec);
+ return res;
+ } else
+ goto bad;
+
+# else
+# error "Unknown platform"
+# endif
+
+ bad:
+ res.vec =3D bogus;
+ res.len =3D VG_(strlen)(bogus);
+ return res;
+}
+
+
+/* Similar scheme for extracting names from C_FILE auxiliary entries,
+ except that the 32-bit scheme appears to be always used, even for
+ XCOFF64. */
+
+static Name maybeDerefStrTab_fname ( UChar* bytes,
+ UChar* oi_strtab, UWord oi_n_strtab=
)
+{
+ Name res;
+ static UChar* bogus=20
+ =3D (UChar*)"**_Error_Dereferencing_COFF_String_Table_**";
+
+ if (bytes[0]=3D=3D0 && bytes[1]=3D=3D0 && bytes[2]=3D=3D0 && bytes[3]=
=3D=3D0) {
+ UInt off =3D *(UInt*)&bytes[4];
+ if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) {
+ res.vec =3D &oi_strtab[off];
+ res.len =3D VG_(strlen)(res.vec);
+ return res;
+ } else
+ goto bad;
+ } else {
+ Int i;
+ res.vec =3D bytes;
+ res.len =3D 8;
+ for (i =3D 0; i < 8; i++)
+ if (bytes[i] =3D=3D 0)
+ res.len--;
+ return res;
+ }
+
+ bad:
+ res.vec =3D bogus;
+ res.len =3D VG_(strlen)(bogus);
+ return res;
+}
+
+
+static Name mk_const_Name ( HChar* str )
+{
+ Name res;
+ res.vec =3D str;
+ res.len =3D VG_(strlen)(res.vec);
+ return res;
+}
+
+static Name mk_empty_Name ( void )
+{
+ Name res;
+ res.vec =3D "";
+ res.len =3D 0;
+ return res;
+}
+
+static Bool is_empty_Name ( Name name )
+{
+ return name.len =3D=3D 0;
+}
+
+static Bool eq_string_Name ( Name name, UChar* str )
+{
+ UInt i;
+ for (i =3D 0; i < name.len; i++) {
+ if (str[i] =3D=3D 0)
+ return False;
+ if (str[i] !=3D name.vec[i])
+ return False;
+ }
+ if (str[name.len] =3D=3D 0)
+ return True;
+ else
+ return False;
+}
+
+static Word cmp_Names ( Name n1, Name n2 )
+{
+ UInt i =3D 0;
+ while (1) {
+ vg_assert(i >=3D 0 && i <=3D n1.len);
+ vg_assert(i >=3D 0 && i <=3D n2.len);
+ if (i =3D=3D n1.len && i =3D=3D n2.len)
+ return 0;
+ if (i =3D=3D n1.len && i < n2.len)
+ return -1;
+ if (i < n1.len && i =3D=3D n2.len)
+ return 1;
+ if (n1.vec[i] < n2.vec[i])
+ return -1;
+ if (n1.vec[i] > n2.vec[i])
+ return 1;
+ i++;
+ }
+}
+
+static void print_Name ( Name name )
+{
+ UInt i;
+ for (i =3D 0; i < name.len; i++)
+ VG_(printf)("%c", name.vec[i]);
+}
+
+
+static UChar sanitiseChar ( UChar c )
+{
+ if (c < 32 || c > 127)
+ c =3D '?';
+ return c;
+}
+
+static HChar* name_of_filhdr_f_magic ( Int magic )
+{
+ switch (magic) {
+ case 0x01DF: return "xcoff32";
+ case 0x01EF: return "xcoff64-upto-aix43";
+ case 0x01F7: return "xcoff64-from-aix51";
+ default: return "unknown-xcoff-header-magic";
+ }
+}
+
+static HChar* name_of_scnhdr_s_flags ( Int flags )
+{
+ switch (flags & 0xFFFF) {
+ case STYP_REG: return "\"regular\"";
+ case STYP_PAD: return "\"padding\"";
+ case STYP_TEXT: return "text only";
+ case STYP_DATA: return "data only";
+ case STYP_BSS: return "bss only";
+ case STYP_EXCEPT: return "Exception";
+ case STYP_INFO: return "Comment";
+ case STYP_LOADER: return "Loader";
+ case STYP_DEBUG: return "Debug";
+ case STYP_TYPCHK: return "Typecheck";
+ case STYP_OVRFLO: return "Overflow";
+ default: return "unknown-section-header-name";
+ }
+}
+
+static HChar* name_of_syment_n_sclass ( Int sclass )
+{
+ static HChar buf[10];
+ switch (sclass) {
+ /* dbx ones (>=3D 0x80) */
+ case C_GSYM: return "gsym";
+ case C_LSYM: return "lsym";
+ case C_PSYM: return "psym";
+ case C_RSYM: return "rsym";
+ case C_RPSYM: return "rpsym";
+ case C_STSYM: return "stsym";
+ case C_DECL: return "decl";
+ case C_FUN: return "fun";
+ case C_BSTAT: return "bstat";
+ case C_ESTAT: return "estat";
+ /* non-dbx ones (< 0x80) */
+ case C_STAT: return "STAT";
+ case C_FILE: return "FILE";
+ case C_HIDEXT: return "HIDEXT";
+ case C_EXT: return "EXT";
+ case C_FCN: return "FCN";
+ case C_BINCL: return "BINCL";
+ case C_EINCL: return "EINCL";
+ case C_BLOCK: return "BLOCK";
+ case C_WEAKEXT: return "WEAKEXT";
+ default:
+ VG_(sprintf)(buf, "??%d??", sclass);
+ return buf;
+ }
+}
+
+typedef=20
+ struct {
+ Name name; /* symbol's name */
+ Addr first; /* first address; always known */
+ Addr last; /* last address; may be an overestimate */
+
+ Name fname; /* source file name, if known */
+ Int slnno; /* starting line #, or 0 if unknown */
+ Int elnno; /* ending line #, or 0 if unknown */
+
+ UWord r2value; /* what r2 should be for this fn (tocptr) */
+ Bool r2known; /* do we have a r2 value? */
+ }=20
+ XCoffSym;
+
+static void init_XCoffSym( XCoffSym* sym )
+{
+ sym->name =3D mk_empty_Name();
+ sym->first =3D 0;
+ sym->last =3D 0;
+ sym->fname =3D mk_empty_Name();
+ sym->slnno =3D 0;
+ sym->elnno =3D 0;
+ sym->r2known =3D False;
+ sym->r2value =3D False;
+}
+
+/* Compare XCoffSyms by their start address. */
+static Word cmp_XCoffSym_by_start ( void* v1, void* v2 )
+{
+ XCoffSym* s1 =3D (XCoffSym*)v1;
+ XCoffSym* s2 =3D (XCoffSym*)v2;
+ if (s1->first < s2->first) return -1;
+ if (s1->first > s2->first) return 1;
+ return 0;
+}
+
+/* Compare XCoffSyms by a slightly weaker ordering, returning zero
+ (equivalence) for any overlap, and -1 or 1 otherwise. */
+static Word cmp_XCoffSym_by_overlap ( void* v1, void* v2 )
+{
+ XCoffSym* s1 =3D (XCoffSym*)v1;
+ XCoffSym* s2 =3D (XCoffSym*)v2;
+ if (s1->last < s2->first) return -1;
+ if (s2->last < s1->first) return 1;
+ return 0;
+}
+
+/* Compare XCoffSyms by their start address, and for equal addresses,
+ use the name as a secondary sort key. */
+static Word cmp_XCoffSym_by_start_then_name ( void* v1, void* v2 )
+{
+ XCoffSym* s1 =3D (XCoffSym*)v1;
+ XCoffSym* s2 =3D (XCoffSym*)v2;
+ if (s1->first < s2->first) return -1;
+ if (s1->first > s2->first) return 1;
+ return cmp_Names(s1->name, s2->name);
+}
+
+
+/* csect_idx is an index in the symbol table (start, n_entries) to a
+ symbol defining a csect. If possible, find the bounds of the csect
+ and assign them to *first and *last, and return True; else return
+ False. sntext_1based_if_known is the 1-based number of the text
+ section. Note: computes stated VMAs, not actual VMAs. */
+
+#if defined(VGP_ppc32_aix5)
+# define SMTYP_SMTYP(x) ((x) & 0x7) /* symbol type */
+# define CSECT(PP) (((AUXENT*)(PP))->x_csect)
+# define CSECT_LEN(PP) (CSECT(PP).x_scnlen)
+# define CSECT_ALIGN(PP) (SMTYP_ALIGN(CSECT(PP).x_smtyp))
+# define CSECT_SMTYP(PP) (SMTYP_SMTYP(CSECT(PP).x_smtyp))
+# define CSECT_SCLAS(PP) (CSECT(PP).x_smclas)
+
+#elif defined(VGP_ppc64_aix5)
+# define SMTYP_SMTYP(x) ((x) & 0x7) /* symbol type */
+# define CSECT(PP) (((AUXENT*)(PP))->x_csect)
+# define CSECT_LEN(PP) ((((ULong)(CSECT(PP).x_scnlen_hi)) << 32) \
+ | ((ULong)(CSECT(PP).x_scnlen_lo)))
+# define CSECT_ALIGN(PP) (SMTYP_ALIGN(CSECT(PP).x_smtyp))
+# define CSECT_SMTYP(PP) (SMTYP_SMTYP(CSECT(PP).x_smtyp))
+# define CSECT_SCLAS(PP) (CSECT(PP).x_smclas)
+
+#else
+# error "Unknown platform"
+
+#endif
+
+
+#define SYM_IX(_tab,_n) ((SYMENT*)(((UChar*)(_tab)) + SYMESZ * (_n)))
+
+static=20
+Bool get_csect_bounds ( UChar* start, UWord n_entries,
+ UWord csect_idx,=20
+ Int sntext_1based_if_known,
+ /*OUT*/UChar** first, /*OUT*/UChar** last )
+{
+ Bool is_text;
+ SYMENT* cssym;
+ AUXENT* csaux;
+
+ vg_assert(SYMESZ =3D=3D 18); /* both for XCOFF32 and XCOFF64 */
+
+ if (n_entries < 2)
+ return False;
+ if (csect_idx+1 >=3D n_entries)
+ return False;
+ cssym =3D (SYMENT*)SYM_IX(start, csect_idx);
+ csaux =3D (AUXENT*)SYM_IX(start, csect_idx+1);
+ is_text =3D sntext_1based_if_known !=3D -1
+ && (Int)cssym->n_scnum =3D=3D sntext_1based_if_known;
+
+ if (!is_text)
+ return False;
+
+ if (cssym->n_sclass =3D=3D C_EXT || cssym->n_sclass =3D=3D C_HIDEXT) =
{
+ if (cssym->n_numaux =3D=3D 1) {
+ if (CSECT_SMTYP(csaux) =3D=3D XTY_SD) {
+ if (0) VG_(printf)("GCB: SD: len is %ld\n", CSECT_LEN(csaux)=
);
+ *first =3D (UChar*)(cssym->n_value);
+ *last =3D *first + CSECT_LEN(csaux)-1;
+ return True;
+ }
+ } else {
+ /* Possibly complain or take evasive action here. In fact
+ I've yet to see a case where a csect definition symbol has
+ n_numaux !=3D 1. */
+ }
+ }
+ return False;
+}
+
+static void* malloc_AR_SYMTAB ( SizeT nbytes ) {
+ return VG_(arena_malloc)(VG_AR_SYMTAB, nbytes);
+}
+static void free_AR_SYMTAB ( void* ptr ) {
+ return VG_(arena_free)(VG_AR_SYMTAB, ptr);
+}
+
+/* Read symbol and line number info for the given text section. (This
+ is the central routine for XCOFF reading.) Returns NULL on
+ success, or the text of an error message otherwise. */
+static=20
+HChar* read_symbol_table (=20
+ /*MOD*/SegInfo* si,
+
+ /* location of symbol table */
+ UChar* oi_symtab, UWord oi_nent_symtab,
+
+ /* location of string table */
+ UChar* oi_strtab, UWord oi_n_strtab,
+
+ /* location of debug section (stabs strings, if any) */
+ UChar* oi_debug, UWord oi_n_debug,
+
+ /* location of line number info, if any */
+ UChar* oi_lnos, UWord oi_nent_lnos,
+
+ /* section indices */
+ Int sntext_1based_if_known,
+ Int sndata_1based_if_known,
+
+ /* where the mapped data section is */
+ Addr data_avma,=20
+ UWord data_alen,
+ UWord data_alen_from_auxhdr,
+
+ /* where the mapped toc is (in the data section,
+ presumably), if known */
+ Addr toc_avma,
+
+ /* stated-to-actual VMA offsets */=20
+ Word text_bias,
+ Word data_bias=20
+ )
+{
+ SYMENT* sym;
+ SYMENT* aux;
+ UInt i, j, nsyms, k, m;
+ Name name;
+ Bool is_text, is_data;
+ XArray* syms =3D NULL; /* XArray of XCoffSyms */
+
+ /* If the TOC avma is obviously bogus, get rid of it */
+ {=20
+ UWord data_maxlen =3D data_alen;
+ if (data_maxlen < data_alen_from_auxhdr)
+ data_maxlen =3D data_alen_from_auxhdr;
+
+ //VG_(printf)(" toc_avma %p\n", toc_avma);
+ //VG_(printf)("data_avma %p\n", data_avma);
+ //VG_(printf)("dxxx_avma %p\n", data_avma + data_maxlen);
+
+ if (toc_avma !=3D 0
+ && (toc_avma < data_avma || toc_avma >=3D data_avma + data_maxl=
en))
+ toc_avma =3D 0;
+ //VG_(printf)("2toc_avma %p\n", toc_avma);
+ }
+
+ /* We can't just treat this as an array of SYMENTs, because C
+ thinks they have size 20 whereas the spec says they have size 18
+ (alignment padding) so doing the obvious thing screws up. Hence
+ we have to calculate the offset of each entry manually. */
+
+ if (0) VG_(printf)("size of SYMENT =3D %ld\n", sizeof(SYMENT));
+
+ /* ----------------------------------------------------------
+ Phase 1: first make a pass through the symbols, looking for
+ stuff in the text segment. Calculate their actual VMAs,
+ dump any outside the text segment actual VMA bounds, and=20
+ add the rest to 'syms'.
+ ---------------------------------------------------------- */
+
+ syms =3D newXA( malloc_AR_SYMTAB, free_AR_SYMTAB, sizeof(XCoffSym) );
+
+ if (SHOW && SHOW_SYMS_P1) {
+ VG_(printf)("--- BEGIN Phase1 (find text symbol starts) ---\n");
+ VG_(printf)("--- note: shown addresses are STATED VMAs ---\n");
+ }
+
+ i =3D 0;
+ while (1) {
+
+ if (i >=3D oi_nent_symtab)
+ break;
+
+ sym =3D SYM_IX(oi_symtab, i);
+ is_text =3D sntext_1based_if_known !=3D -1
+ && (Int)sym->n_scnum =3D=3D sntext_1based_if_known;
+ is_data =3D sndata_1based_if_known !=3D -1
+ && (Int)sym->n_scnum =3D=3D sndata_1based_if_known;
+
+ if (SHOW && SHOW_SYMS_P1)
+ VG_(printf)("Phase1: %5d+%d ", i, (Int)sym->n_numaux);
+
+ name =3D mk_const_Name("(unknown)");
+ if (sym->n_scnum =3D=3D N_DEBUG && sym->n_sclass =3D=3D C_FUN)
+ name =3D maybeDerefStrTab( sym, oi_debug, oi_n_debug );
+ else=20
+ if (sym->n_sclass & DBXMASK)
+ name =3D mk_const_Name("(dbxstr)");
+ else
+ name =3D maybeDerefStrTab( sym, oi_strtab, oi_n_strtab);
+
+ if (SHOW && SHOW_SYMS_P1) {
+ VG_(printf)("%5s(%2d) %6s 0x%016llx ",=20
+ is_text ? "text" : is_data ? "data" : "other",
+ (Int)sym->n_scnum,=20
+ name_of_syment_n_sclass(sym->n_sclass),=20
+ (ULong)sym->n_value);
+ print_Name(name);
+ VG_(printf)("\n");
+ }
+
+ i++;
+ i +=3D sym->n_numaux;
+
+ if (!is_text)
+ continue;
+
+ /* --- BEGIN regular(ish) symbol --- */
+ if ((sym->n_sclass =3D=3D C_EXT || sym->n_sclass =3D=3D C_HIDEXT)
+ && (sym->n_numaux =3D=3D 1 || sym->n_numaux =3D=3D 2)) {
+ /* Dealing with a symbol with a csect entry. By convention
+ (according to IBM docs) the csect entry is the last
+ auxiliary for this symbol, if there is more than one
+ auxiliary present; hence "SYM_IX(oi_symtab, i-1)" below. */
+
+ aux =3D SYM_IX(oi_symtab, i-1);
+ if (0) VG_(printf)("symtype is %d\n", CSECT_SMTYP(aux));
+
+ if (CSECT_SMTYP(aux) =3D=3D XTY_SD) {
+ /* Aux is a csect definition. This is relatively rare,
+ but at least it is simple: the CSECT_LEN(aux) field
+ contains it's length, so we just heave that into the
+ pot for phase 2. */
+ XCoffSym cand;
+ if (0) VG_(printf)("SD: len is %d\n", (Int)CSECT_LEN(aux));
+ if (0) VG_(printf)("SD: proposed %p\n", sym->n_value);
+ init_XCoffSym(&cand);
+ cand.first =3D sym->n_value;
+ cand.last =3D cand.first + (UWord)CSECT_LEN(aux) - 1;
+
+ cand.first +=3D text_bias;
+ cand.last +=3D text_bias;
+ cand.name =3D name;
+
+ if (cand.last < si->start || cand.first >=3D si->start+si->s=
ize)
+ continue;
+ if (cand.last < cand.first)
+ continue;
+ if (is_empty_Name(name))
+ continue;
+ addToXA(syms, &cand);
+ }
+
+ if (CSECT_SMTYP(aux) =3D=3D XTY_LD) {
+ /* Aux is a label definition. This is the common case. */
+ XCoffSym cand;
+ Bool ok;
+ UChar *csect_first, *csect_last;
+ /* x_scnlen contains the symbol table entry of the
+ containing csect. Use the symbol's stated vma and csect
+ end as the initial approximation of this symbol's start
+ and length. The length will get revised downwards in
+ Phase 2. */
+ init_XCoffSym(&cand);
+ ok =3D get_csect_bounds( oi_symtab, oi_nent_symtab,=20
+ CSECT_LEN(aux),=20
+ sntext_1based_if_known,
+ &csect_first, &csect_last );
+ if (0 && ok)
+ VG_(printf)("new csect svma %p %p\n", csect_first, csect_=
last);
+ if (ok && ((UWord)csect_first) <=3D ((UWord)sym->n_value)
+ && ((UWord)sym->n_value) <=3D ((UWord)csect_last)) {
+ if (0) {
+ VG_(printf)("LD: in a csect %p %p\n",=20
+ csect_first, csect_last);
+ VG_(printf)("CAND: %p .. %p %s\n",=20
+ (void*)sym->n_value, (void*)csect_last, na=
me);
+ }
+ cand.first =3D sym->n_value;
+ cand.last =3D (Addr)csect_last;
+ } else {
+ if (0) {
+ VG_(printf)("LD: can't compute csect bounds?!\n");
+ VG_(printf)("CAND: %p .. %p %s\n",=20
+ (HChar*)sym->n_value,
+ (HChar*)sym->n_value+1, name);
+ }
+ cand.first =3D sym->n_value;
+ cand.last =3D cand.first + 1;
+ }
+
+ /* cand.first is a stated VMA; turn it into an actual VMA
+ and ignore it if not in the actual text segment. */
+
+ cand.first +=3D text_bias;
+ cand.last +=3D text_bias;
+ cand.name =3D name;
+
+ if (cand.last < si->start || cand.first >=3D si->start+si->s=
ize)
+ continue;
+ if (cand.last < cand.first)
+ continue;
+ if (is_empty_Name(name))
+ continue;
+
+ addToXA(syms, &cand);
+ }
+ }
+ /* --- END regular(ish) symbol --- */
+
+ }
+
+ /* ----------------------------------------------------------
+ Phase 2: suitable text symbols have been put into 'syms'. Their
+ start addresses are correct, but end addresses are those of the
+ containing csect, which is in general way too long. This phase
+ clips the ends so that the ranges no longer overlap, and thereby
+ constrains each symbol's range to something which, for the most
+ part, is correct.
+ ---------------------------------------------------------- */
+
+ nsyms =3D sizeXA(syms);
+
+ if (SHOW && SHOW_SYMS_P1)
+ VG_(printf)("Phase1 acquired %d text symbols\n", nsyms);
+
+ if (SHOW && SHOW_SYMS_P2) {
+ VG_(printf)("--- BEGIN Phase2 (find text symbol ends) ---\n");
+ VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n");
+ }
+
+ setCmpFnXA(syms, cmp_XCoffSym_by_start_then_name);
+ sortXA(syms);
+
+ /* We only know for sure the start addresses (actual VMAs) of
+ symbols, and an overestimation of their end addresses. So sort
+ by start address, then clip each symbol so that its end address
+ does not overlap with the next one along.
+
+ There is a small refinement: if a group of symbols have the same
+ address, treat them as a group: find the next symbol along that
+ has a higher start address, and clip all of the group
+ accordingly. This clips the group as a whole so as not to
+ overlap following symbols. This leaves prefersym() in
+ storage.c, which is not XCOFF-specific, to later decide which of
+ the symbols in the group to keep.=20
+
+ Another refinement is that we need to get rid of symbols which,
+ after clipping, have identical starts, ends, and names. So the
+ sorting uses the name as a secondary key.
+ */
+
+ for (i =3D 0; i < nsyms; i++) {
+ for (k =3D i+1;=20
+ k < nsyms=20
+ && ((XCoffSym*)indexXA(syms,i))->first=20
+ =3D=3D ((XCoffSym*)indexXA(syms,k))->first;=20
+ k++)
+ ;
+ /* So now [i .. k-1] is a group all with the same start address.
+ Clip their ending addresses so they don't overlap [k]. In
+ the normal case (no overlaps), k =3D=3D i+1. */
+ if (k < nsyms) {
+ XCoffSym* next =3D (XCoffSym*)indexXA(syms,k);
+ for (m =3D i; m < k; m++) {
+ XCoffSym* here =3D (XCoffSym*)indexXA(syms,m);
+ vg_assert(here->first < next->first);
+ if (here->last >=3D next->first)
+ here->last =3D next->first-1;
+ }
+ }
+ i =3D k-1;
+ vg_assert(i <=3D nsyms);
+ }
+
+ j =3D 0;
+ if (nsyms > 0) {
+ j =3D 1;
+ for (i =3D 1; i < nsyms; i++) {
+ vg_assert(j <=3D i);
+ XCoffSym* s_j1 =3D (XCoffSym*)indexXA(syms, j-1);
+ XCoffSym* s_j =3D (XCoffSym*)indexXA(syms, j);
+ XCoffSym* s_i =3D (XCoffSym*)indexXA(syms, i);
+ if (s_i->first !=3D s_j1->first
+ || s_i->last !=3D s_j1->last
+ || 0 !=3D cmp_Names(s_i->name, s_j1->name)) {
+ *s_j =3D *s_i;
+ j++;
+ } else {
+ if (SHOW && SHOW_SYMS_P2) {
+ VG_(printf)("Phase2: dump duplicate ");=20
+ print_Name(s_i->name);
+ VG_(printf)("\n");
+ }
+ }
+ }
+ }
+ vg_assert(j >=3D 0 && j <=3D nsyms);
+ dropTailXA(syms, nsyms - j);
+ nsyms =3D j;
+
+ if (1) {
+ for (i =3D 0; i < nsyms; i++) {
+ XCoffSym* s =3D (XCoffSym*)indexXA(syms, i);
+ if (SHOW && SHOW_SYMS_P2) {
+ VG_(printf)("Phase2: %d 0x%lx 0x%lx ",=20
+ i, s->first, s->last);
+ print_Name(s->name);
+ VG_(printf)("\n");
+ }
+ }
+ }
+
+ /* ----------------------------------------------------------
+ Phase 3: rescan the symbol table, looking for info on function
+ start/end line numbers and source file names. Generally
+ this will be absent for sources compiled without -g.
+ ---------------------------------------------------------- */
+
+ if (SHOW && SHOW_SYMS_P3) {
+ VG_(printf)("--- BEGIN Phase3 (find src filenames "
+ "& fn start/end line #s) ---\n");
+ VG_(printf)("--- note: shown addresses are STATED VMAs ---\n");
+ }
+
+ /* The lookupXAs in the C_FUN(.bf) part have to operate by
+ inclusion. Hence: */
+ setCmpFnXA(syms, cmp_XCoffSym_by_overlap);
+ sortXA(syms);
+
+ /* In this loop, p3currsym is maintained as a pointer to the most
+ recent XCoffSym identified as FCN(.bf) (function start).
+ Subsequent FCN(.ef) (function end) indications are compared
+ against said symbol. This assumes that function start/end
+ indications are not nested. */
+
+ XCoffSym* p3currsym =3D NULL;
+
+ /* Maintain a stack of filenames. We allow the stack pointer to go
+ beyond the end, but obviously nothing is stored in this
+ imaginary part of the stack. */
+ Name filenames[N_FILENAME_STACK];
+ Int filenames_used =3D 1;
+
+ Name name_unknown =3D mk_empty_Name();
+ Name name_overflow =3D mk_const_Name("(filename_stack_overflow)");
+
+ for (i =3D 0; i < N_FILENAME_STACK; i++)
+ filenames[i] =3D name_unknown;
+
+# define FNAME_PUSH(_fname) \
+ do { \
+ vg_assert(filenames_used >=3D 1);\
+ if (filenames_used < N_FILENAME_STACK)\
+ filenames[filenames_used] =3D (_fname);\
+ filenames_used++;\
+ } while (0)
+
+# define FNAME_POP \
+ do {\
+ vg_assert(filenames_used >=3D 1);\
+ if (filenames_used > 1 && filenames_used <=3D N_FILENAME_STACK)=
\
+ filenames[filenames_used-1] =3D name_unknown; \
+ if (filenames_used > 1)\
+ filenames_used--;\
+ } while (0)
+
+# define FNAME_GET_TOP \
+ (filenames_used > N_FILENAME_STACK \
+ ? name_overflow \
+ : filenames[filenames_used-1])
+
+# define FNAME_SET_TOP(_fname) \
+ do {\
+ vg_assert(filenames_used >=3D 1);\
+ filenames[filenames_used-1] =3D (_fname);\
+ } while (0)
+
+
+ i =3D 0;
+ while (1) {
+
+ if (i >=3D oi_nent_symtab)
+ break;
+
+ sym =3D SYM_IX(oi_symtab, i);
+ is_text =3D sntext_1based_if_known !=3D -1
+ && (Int)sym->n_scnum =3D=3D sntext_1based_if_known;
+ is_data =3D sndata_1based_if_known !=3D -1
+ && (Int)sym->n_scnum =3D=3D sndata_1based_if_known;
+
+ if (0 && SHOW && SHOW_SYMS_P3)
+ VG_(printf)("Phase3: %5d+%d ", i, (Int)sym->n_numaux);
+
+ name =3D mk_const_Name("(unknown)");
+ if (sym->n_scnum =3D=3D N_DEBUG && sym->n_sclass =3D=3D C_FUN)
+ name =3D maybeDerefStrTab( sym, oi_debug, oi_n_debug );
+ else=20
+ if (sym->n_sclass & DBXMASK)
+ name =3D mk_const_Name("(dbxstr)");
+ else
+ name =3D maybeDerefStrTab( sym, oi_strtab, oi_n_strtab);
+
+ if (0 && SHOW && SHOW_SYMS_P3) {
+ VG_(printf)("%5s(%2d) %6s 0x%016llx ",=20
+ is_text ? "text" : is_data ? "data" : "other",
+ (Int)sym->n_scnum,=20
+ name_of_syment_n_sclass(sym->n_sclass),=20
+ (ULong)sym->n_value);
+ print_Name(name);
+ VG_(printf)("\n");
+ }
+
+ i++;
+ i +=3D sym->n_numaux;
+
+ /* --- BEGIN C_FILE [source file] --- */
+ /* There are two variants of C_FILE: a simple one with n_numaux
+ =3D=3D 0, where the primary name is what we're after, and anoth=
er
+ variant with n_numaux =3D=3D 3, in which we have to hunt around
+ in the auxiliary entries to find the file name. gcc produces
+ exclusively the first kind, and xlc a mixture of both. */
+ if (sym->n_sclass =3D=3D C_FILE && sym->n_numaux =3D=3D 0) {
+ if (!is_empty_Name(name))
+ FNAME_SET_TOP(name);
+ if (SHOW && SHOW_SYMS_P3) {
+ VG_(printf)("Phase3: %5d+%d FILE ",
+ i-1-sym->n_numaux, (Int)sym->n_numaux );
+ print_Name(name);
+ VG_(printf)("\n");
+ }
+ continue;
+ }
+ if (sym->n_sclass =3D=3D C_FILE && sym->n_numaux > 1=20
+ && sym->n_numaux <=3D 5 /*stay sane*/)=
{
+ for (k =3D 0; k < sym->n_numaux; k++) {
+ aux =3D SYM_IX(oi_symtab, i - sym->n_numaux + k);
+ Name fname
+ =3D maybeDerefStrTab_fname(=20
+ (UChar*)&((AUXENT*)aux)->x_file.x_fname,
+ oi_strtab, oi_n_strtab);
+ if (((AUXENT*)aux)->x_file._x.x_ftype =3D=3D XFT_FN) {
+ if (!is_empty_Name(fname))
+ FNAME_SET_TOP(fname);
+ if (SHOW && SHOW_SYMS_P3) {
+ VG_(printf)("Phase3: %5d+%d FILE ",
+ i-1-sym->n_numaux, (Int)sym->n_numaux );
+ print_Name(fname);
+ VG_(printf)("\n");
+ }
+ break;
+ }
+ }
+ continue;
+ }
+ /* --- END C_FILE [source file] --- */
+
+ /* --- BEGIN C_BINCL [beginning of include] --- */
+ if (sym->n_sclass =3D=3D C_BINCL && sym->n_numaux =3D=3D 0) {
+ FNAME_PUSH(name);
+ if (SHOW && SHOW_SYMS_P3)
+ VG_(printf)("Phase3: %5d+%d BINCL %s\n",
+ i-1-sym->n_numaux, (Int)sym->n_numaux,=20
+ name );
+ continue;
+ }
+ /* --- END C_BINCL [beginning of include] --- */
+
+ /* --- BEGIN C_EINCL [end of include] --- */
+ if (sym->n_sclass =3D=3D C_EINCL && sym->n_numaux =3D=3D 0) {
+ FNAME_POP;
+ if (SHOW && SHOW_SYMS_P3)
+ VG_(printf)("Phase3: %5d+%d EINCL %s\n",
+ i-1-sym->n_numaux, (Int)sym->n_numaux,=20
+ name );
+ continue;
+ }
+ /* --- END C_EINCL [end of include] --- */
+
+ /* everything else that is interesting is in the text
+ section. */
+ if (!is_text)
+ continue;
+=20
+ /* --- BEGIN C_FCN(.bf) [function begin mark] --- */
+ if (sym->n_sclass =3D=3D C_FCN=20
+ && sym->n_numaux =3D=3D 1=20
+ && eq_string_Name(name, ".bf")) {
+ /* aux is BLOCK */
+ aux =3D SYM_IX(oi_symtab, i-1);
+ Addr fn_start_avma =3D ((Addr)sym->n_value) + text_bias;
+ Int fn_start_lnno =3D ((AUXENT*)aux)->x_sym.x_misc.x_lnsz.x_ln=
no;
+ /* Look in 'syms' to see if we have anything for address
+ fn_avma. */
+ XCoffSym key;
+ VG_(memset)(&key, 0, sizeof(key));
+ key.first =3D fn_start_avma;
+ key.last =3D fn_start_avma;
+ Word ix_lo, ix_hi;
+
+ /* Search for all symbols intersecting fn_start_avma. */
+ Bool found =3D lookupXA(syms, &key, &ix_lo, &ix_hi);
+ if (found) {
+ /* All the 'syms' entries from ix_lo to ix_hi match. */
+
+ for (k =3D ix_lo; k <=3D ix_hi; k++) {
+ XCoffSym* tsym =3D (XCoffSym*)indexXA(syms,k);
+
+ /* note the start line number */
+ if (tsym->slnno =3D=3D 0 && fn_start_lnno > 0)
+ tsym->slnno =3D fn_start_lnno;
+
+ /* also the current filename, if we know it */
+ if (is_empty_Name(tsym->fname)=20
+ && !is_empty_Name(FNAME_GET_TOP))=20
+ tsym->fname =3D FNAME_GET_TOP;
+
+ /* remember the first in the range as the new current
+ (I've never seen a range with > 1) */
+ if (k =3D=3D ix_lo)
+ p3currsym =3D tsym;
+ if (SHOW && SHOW_SYMS_P3) {
+ VG_(printf)("Phase3: %5d+%d FCN(.bf) 0x%016llx "
+ "lnno=3D%-4d ",=20
+ i-1-sym->n_numaux, (Int)sym->n_numaux,=20
+ (ULong)sym->n_value,
+ fn_start_lnno );
+ print_Name(tsym->name);
+ VG_(printf)("\n");
+ if (!is_empty_Name(tsym->fname)) {
+ VG_(printf)("Phase3: ");
+ print_Name(tsym->fname);
+ VG_(printf)("\n");
+ }
+ }
+ }
+ }
+ continue;
+ }
+ /* --- END C_FCN(.bf) [function begin mark] --- */
+
+ /* --- BEGIN C_FCN(.ef) [function end mark] --- */
+ if (sym->n_sclass =3D=3D C_FCN=20
+ && sym->n_numaux =3D=3D 1=20
+ && eq_string_Name(name, ".ef")) {
+ /* aux is BLOCK */
+ aux =3D SYM_IX(oi_symtab, i-1);
+ /* In this case the n_value field appears to give the address
+ of the first insn following the end of the function.
+ Hence the - 1. */
+ Addr fn_end_avma =3D ((Addr)sym->n_value) + text_bias - 1;
+ Int fn_end_lnno =3D ((AUXENT*)aux)->x_sym.x_misc.x_lnsz.x_lnno=
;
+
+ if (p3currsym
+ && fn_end_avma >=3D p3currsym->first
+ && fn_end_avma <=3D p3currsym->last) {
+ if (p3currsym->elnno =3D=3D 0 && fn_end_lnno > 0)
+ p3currsym->elnno =3D fn_end_lnno;
+ if (SHOW && SHOW_SYMS_P3) {
+ VG_(printf)("Phase3: %5d+%d FCN(.ef) 0x%016llx "
+ "lnno=3D%-4d ",=20
+ i-1-sym->n_numaux, (Int)sym->n_numaux,=20
+ (ULong)sym->n_value,
+ fn_end_lnno );
+ print_Name(p3currsym->name);
+ VG_(printf)("\n");
+ }
+ if (fn_end_avma < p3currsym->last) {
+ /* also take the opportunity to trim the symbol's
+ length to something less than established by the
+ initial estimation done by Phases 1 and 2. */
+ if (0) VG_(printf)("trim end from %p to %p\n",=20
+ p3currsym->last, fn_end_avma);
+ p3currsym->last =3D fn_end_avma;
+ }
+ }
+ continue;
+ }
+ /* --- END C_FCN(.ef) [function end mark] --- */
+
+ }
+
+ /* ----------------------------------------------------------
+ Phase 4: read and enumerate the line number entries, if=20
+ there are any. This depends on knowing the function start/end
+ line numbers established in Phase 3.
+ ---------------------------------------------------------- */
+
+ if (SHOW && SHOW_SYMS_P4) {
+ VG_(printf)("--- BEGIN Phase4 (read line number info) ---\n");
+ VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n");
+ }
+
+ /* Re-sort 'syms' using the compare-start-addresses ordering, so we
+ can use that in subsequent searches. */
+ setCmpFnXA(syms, cmp_XCoffSym_by_start);
+ sortXA(syms);
+
+ if (oi_lnos && oi_nent_lnos > 0) {
+
+# if defined(VGP_ppc32_aix5)
+ vg_assert(LINESZ =3D=3D 6); /* XCOFF32 */
+# elif defined(VGP_ppc64_aix5)
+ vg_assert(LINESZ =3D=3D 12); /* XCOFF64 */
+# else
+# error "Unknown plat"
+# endif
+
+# define LNO_IX(_tab,_n) \
+ ((LINENO*)(((UChar*)(_tab)) + LINESZ * (_n)))
+
+ /* Current fn that we are processing line numbers for */
+ XCoffSym* p4currsym =3D NULL;
+
+ /* SegInfo's string table pointer for p4currsym's file name.
+ Allocated on demand, so as not to waste space in the
+ SegInfo's string table. */
+ UChar* si_fname_str =3D NULL;
+
+ /* Ditto the directory name, if we can manage it. */
+ UChar* si_dname_str =3D NULL;
+
+ for (i =3D 0; i < oi_nent_lnos; i++) {
+ LINENO* lno =3D LNO_IX(oi_lnos,i);
+
+ if (lno->l_lnno =3D=3D 0) {
+ /* New fn. We get given the index in the symbol table of
+ the relevant function. It should be a C_EXT, C_WEAKEXT
+ or C_HIDEXT flavour, according to the IBM docs. */
+ Int sym_ix =3D (Int)lno->l_addr.l_symndx;
+ sym =3D SYM_IX(oi_symtab, sym_ix);
+ if (!(sym->n_sclass =3D=3D C_EXT=20
+ || sym->n_sclass =3D=3D C_WEAKEXT=20
+ || sym->n_sclass =3D=3D C_HIDEXT))
+ return "readxcoff.c: invalid symbol reference"
+ " in line number info";
+ /* For these 3 symbol kinds, the n_value field is the
+ symbol's stated VMA. Convert this to an actual VMA and
+ use that to find the associated XCoffSym. */
+ Addr sym_avma =3D ((Addr)sym->n_value) + text_bias;
+
+ XCoffSym key;
+ VG_(memset)(&key, 0, sizeof(key));
+ key.first =3D sym_avma;
+ Word ix_lo, ix_hi;
+
+ Bool found =3D lookupXA(syms, &key, &ix_lo, &ix_hi);
+ if (found) {
+ /* All the 'syms' entries from ix_lo to ix_hi match.
+ Just use the lowest (sigh ..) */
+ p4currsym =3D (XCoffSym*)indexXA(syms, ix_lo);
+ } else {
+ /* We can't find the relevant sym, but we still have to
+ wade through the line number info for this function
+ until we get to the starting record for the next
+ one. */
+ p4currsym =3D NULL;
+ }
+
+ /* If we decide to add any line info for this fn to the
+ SegInfo, we'll allocate this. Otherwise don't
+ bother. */
+ si_fname_str =3D NULL;
+ si_dname_str =3D NULL;
+
+ if (SHOW && SHOW_SYMS_P4) {
+ VG_(printf)("Phase4: new fn (%d found), avma 0x%016llx "=
,=20
+ (Int)(ix_hi-ix_lo+1),
+ (ULong)sym_avma );
+ if (p4currsym)
+ print_Name(p4currsym->name);
+ else
+ VG_(printf)("UNKNOWN");
+ VG_(printf)("\n");
+ }
+
+ } else {
+ /* Line number entry for the current fn. */
+ if (!p4currsym)
+ continue;
+ Int line_no =3D (Int)(UInt)lno->l_lnno;
+ line_no +=3D (p4currsym->slnno - 1);
+ Addr line_first_avma =3D ((Addr)lno->l_addr.l_paddr) + text_=
bias;
+ if (line_first_avma < p4currsym->first
+ || line_first_avma > p4currsym->last)
+ continue;
+ Addr line_last_avma =3D p4currsym->last;
+ /* Try to refine the last_avma by looking at the next
+ line's entry. */
+
+ /* XXX: TODO. What we have currently works only because
+ the generic line number canonicaliser truncates
+ overlapping address ranges in the way which we happen
+ to need anyway. */
+ if (SHOW && SHOW_SYMS_P4)
+ VG_(printf)("Phase4: line %d 0x%016llx - 0x%016llx\n",=20
+ line_no, (ULong)line_first_avma,=20
+ (ULong)line_last_avma);
+
+ /* This now has to be allocated. Try and figure out the
+ dir name at the same time. This is a bit ugly in that
+ it involves messing with the string after it's been
+ copied into the SegInfo's string table, but seems
+ harmless enough. */
+ if ((!si_fname_str) && !is_empty_Name(p4currsym->fname)) {
+ si_dname_str =3D NULL;
+ si_fname_str =3D ML_(addStr)(si, p4currsym->fname.vec,
+ p4currsym->fname.len);
+ UChar* lastslash =3D VG_(strrchr)(si_fname_str, '/');
+ if (lastslash)
+ vg_assert(lastslash[0] =3D=3D '/');
+ if (lastslash[1] !=3D 0) {
+ si_dname_str =3D si_fname_str;
+ lastslash[0] =3D 0; /* replace the / with a NUL
+ terminator */
+ si_fname_str =3D lastslash+1;
+ if (0) VG_(printf)("XXX %s %s\n", si_dname_str,=20
+ si_fname_str);
+ }
+ }
+ /* finally .. */
+ if (line_no >=3D 0)
+ ML_(addLineInfo)(si, si_fname_str, si_dname_str,
+ line_first_avma, line_last_avma+1,
+ line_no, i/*debugging only*/);
+ }
+ }
+
+# undef LNO_IX
+ }
+
+#if defined(OFFICIAL_PHASE5)
+ /* ----------------------------------------------------------
+ Phase 5: Do another trawl of the XCOFF symbol table, looking
+ for TOC entries for the entries we've already placed in 'syms'.
+ ---------------------------------------------------------- */
+
+ if (SHOW && SHOW_SYMS_P5)
+ VG_(printf)("--- BEGIN official Phase5 (find TOC pointers) ---\n")=
;
+
+ Bool is_cfun;
+
+ i =3D 0;
+ while (1) {
+
+ if (i >=3D oi_nent_symtab)
+ break;
+
+ sym =3D SYM_IX(oi_symtab, i);
+ is_text =3D sntext_1based_if_known !=3D -1
+ && (Int)sym->n_scnum =3D=3D sntext_1based_if_known;
+ is_data =3D sndata_1based_if_known !=3D -1
+ && (Int)sym->n_scnum =3D=3D sndata_1based_if_known;
+ is_cfun =3D sym->n_scnum =3D=3D N_DEBUG=20
+ && sym->n_sclass =3D=3D C_FUN;
+
+ i++;
+ i +=3D sym->n_numaux;
+
+ if (!is_cfun && !is_data)
+ continue;
+
+ if (SHOW && SHOW_SYMS_P5)
+ VG_(printf)("Phase5o: %5d+%d ", i-1-sym->n_numaux,=20
+ (Int)sym->n_numaux);
+
+ name =3D mk_const_Name("(unknown)");
+ if (is_cfun)
+ name =3D maybeDerefStrTab( sym, oi_debug, oi_n_debug );
+ else=20
+ if (sym->n_sclass & DBXMASK)
+ name =3D mk_const_Name("(dbxstr)");
+ else
+ name =3D maybeDerefStrTab( sym, oi_strtab, oi_n_strtab);
+
+ if (SHOW && SHOW_SYMS_P5) {
+ VG_(printf)("%5s(%2d) %6s svma 0x%016llx ",=20
+ is_text ? "text" : is_data ? "data" : "other",
+ (Int)sym->n_scnum,=20
+ name_of_syment_n_sclass(sym->n_sclass),=20
+ (ULong)sym->n_value);
+ print_Name(name);
+ VG_(printf)("\n");
+ }
+
+ Addr avma =3D (Addr)sym->n_value + data_bias;
+ if (0) VG_(printf)("data sym: avma %p, limits %p-%p\n",=20
+ avma, data_avma,data_avma + data_alen);
+
+ /* Does avma point to 3 valid words inside the actual data
+ segment? iow, can it possibly be a valid function
+ descriptor? If not, move on. */
+ if (! (avma >=3D data_avma=20
+ && avma + 3 * sizeof(Word) <=3D data_avma + data_alen) )
+ continue;
+
+ UWord* fndescr =3D (UWord*)avma;
+
+ if (SHOW && SHOW_SYMS_P5)=20
+ VG_(printf)(" fndescr =3D {0x%lx,0x%lx}\n",=20
+ fndescr[0], fndescr[1]);
+
+ /* Another check: fndescr[0], the entry point, must point inside
+ the actual text segment. Discard any that don't. */
+
+ Addr fndescr_0 =3D (Addr)fndescr[0];
+ if (fndescr_0 < si->start || fndescr_0 >=3D si->start+si->size)
+ continue;
+
+ /* Let's suppose that fndescr is the descriptor for a
+ function with name NAME. If that's so, then 'syms'
+ acquired by stage 2 should have an entry of name '.NAME'
+ whose address is fndescr[0]. If so, then fndescr[1] must
+ be the relevant r2 value for it. */
+ /* Look in 'syms' to see if we have anything for address
+ fndescr[0]. */
+ XCoffSym key;
+ VG_(memset)(&key, 0, sizeof(key));
+ key.first =3D fndescr_0;
+ Word ix_lo, ix_hi;
+ Bool found =3D lookupXA(syms, &key, &ix_lo, &ix_hi);
+ if (found) {
+ /* So all the 'syms' entries from ix_lo to ix_hi have an
+ address which matches the entry point address stated in
+ this descriptor. For each one, as a final sanity
+ check, see if the 'syms' entry has a name .NAME where
+ NAME is that of the data symbol currently under
+ consideration. If so, it's a pretty good bet that this
+ descriptor matches the text symbol we already have, and
+ so we have a valid tocptr value from fndescr[1]. */
+ for (k =3D ix_lo; k <=3D ix_hi; k++) {
+ XCoffSym* tsym =3D (XCoffSym*)indexXA(syms,k);
+ vg_assert(!is_empty_Name(tsym->name));
+ /* VG_(printf)("cmp %s %s\n", name, tsym->name); */
+ /* VG_(printf)("found matching %d %s\n", k, tsym->name); */
+ if (tsym->name.len =3D=3D 1 + name.len
+ && tsym->name.vec[0] =3D=3D '.'
+ && 0 =3D=3D VG_(memcmp)(&tsym->name.vec[1],
+ &name.vec[0], name.len)) {
+ Addr r2val =3D fndescr[1];
+ if (tsym->r2known) {
+ if (tsym->r2value !=3D r2val)
+ /* COMPLAIN - conflicting r2 values*/ ;
+ } else {
+ tsym->r2known =3D True;
+ tsym->r2value =3D r2val;
+ }
+ }
+ }
+ }
+
+ }
+
+#else /* !defined(OFFICIAL_PHASE5) */
+ /* ----------------------------------------------------------
+ Alternative kludgey Phase 5: find TOC entries for 'syms' by the
+ blunt-instrument approach of scanning the actual data section
+ and noting anything that looks like a function descriptor.
+ This is dangerous in the sense that if there are any 3 word
+ structs which are not real function descriptors but just happen
+ to look like them, then those will be included too. =20
+ Seems unlikely though.
+ ---------------------------------------------------------- */
+
+ if (SHOW && SHOW_SYMS_P5)
+ VG_(printf)("--- BEGIN kludged Phase5 (find TOC pointers) ---\n");
+
+ if (SHOW)
+ VG_(printf)("Phase5: actual data segment: %p %p\n",
+ data_avma, data_avma + data_alen);
+
+ /* Skip obviously-missing data sections. */
+ if (data_avma !=3D 0 && data_alen >=3D sizeof(UWord)) {
+
+ /* set up for inspecting all the aligned words in the actual
+ data section. */
+
+ Addr tmp =3D (Addr)data_avma;
+ while (tmp & (sizeof(UWord)-1))
+ tmp++;
+
+ UWord* first_data_word =3D (UWord*)tmp;
+ tmp =3D data_avma + data_alen - sizeof(UWord);
+ while (tmp & (sizeof(UWord)-1))
+ tmp--;
+ UWord* last_data_word =3D (UWord*)tmp;
+
+ if (SHOW)=20
+ VG_(printf)("Phase5: data segment conservatively aligned %p %p\=
n",=20
+ first_data_word, last_data_word);
+
+ UWord* wP =3D first_data_...
[truncated message content] |