|
From: <sv...@va...> - 2006-10-01 16:26:25
|
Author: sewardj
Date: 2006-10-01 17:26:22 +0100 (Sun, 01 Oct 2006)
New Revision: 6111
Log:
AIX5 changes. Perhaps this isn't quite the right place to add a
sqrt() replacement. Hmm.
Modified:
branches/AIX5/memcheck/mc_replace_strmem.c
Modified: branches/AIX5/memcheck/mc_replace_strmem.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/AIX5/memcheck/mc_replace_strmem.c 2006-10-01 16:19:17 UTC (r=
ev 6110)
+++ branches/AIX5/memcheck/mc_replace_strmem.c 2006-10-01 16:26:22 UTC (r=
ev 6111)
@@ -116,8 +116,23 @@
RECORD_OVERLAP_ERROR( s, &extra );
}
=20
-// Some handy Z-encoded names
-#define m_libc_so_star libcZdsoZa // libc.so*
+/* --------- Some handy Z-encoded names. --------- */
+
+/* --- Soname of the standard C library. --- */
+
+#if defined(VGO_linux)
+# define m_libc_soname libcZdsoZa // libc.so*
+#elif defined(VGP_ppc32_aix5)
+ /* AIX has both /usr/lib/libc.a and /usr/lib/libc_r.a. */
+# define m_libc_soname libcZaZdaZLshrZdoZR // libc*.a(shr.o)
+#elif defined(VGP_ppc64_aix5)
+# define m_libc_soname libcZaZdaZLshrZu64ZdoZR // libc*.a(shr_64.o=
)
+#else
+# error "Unknown platform"
+#endif
+
+/* --- Sonames for Linux ELF linkers. --- */
+
#define m_ld_linux_so_2 ldZhlinuxZdsoZd2 // ld-linux.=
so.2
#define m_ld_linux_x86_64_so_2 ldZhlinuxZhx86Zh64ZdsoZd2 // ld-linux-=
x86-64.so.2
#define m_ld64_so_1 ld64ZdsoZd1 // ld64.so.1
@@ -139,8 +154,8 @@
}
=20
// Apparently rindex() is the same thing as strrchr()
-STRRCHR(m_libc_so_star, strrchr)
-STRRCHR(m_libc_so_star, rindex)
+STRRCHR(m_libc_soname, strrchr)
+STRRCHR(m_libc_soname, rindex)
STRRCHR(m_ld_linux_so_2, rindex)
=20
=20
@@ -158,10 +173,10 @@
}
=20
// Apparently index() is the same thing as strchr()
-STRCHR(m_libc_so_star, strchr)
+STRCHR(m_libc_soname, strchr)
STRCHR(m_ld_linux_so_2, strchr)
STRCHR(m_ld_linux_x86_64_so_2, strchr)
-STRCHR(m_libc_so_star, index)
+STRCHR(m_libc_soname, index)
STRCHR(m_ld_linux_so_2, index)
STRCHR(m_ld_linux_x86_64_so_2, index)
=20
@@ -187,7 +202,7 @@
return dst_orig; \
}
=20
-STRCAT(m_libc_so_star, strcat)
+STRCAT(m_libc_soname, strcat)
=20
=20
#define STRNCAT(soname, fnname) \
@@ -215,9 +230,9 @@
return dst_orig; \
}
=20
-STRNCAT(m_libc_so_star, strncat)
- =20
+STRNCAT(m_libc_soname, strncat)
=20
+
#define STRNLEN(soname, fnname) \
SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* str, SizeT =
n ); \
SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* str, SizeT =
n ) \
@@ -227,7 +242,7 @@
return i; \
}
=20
-STRNLEN(m_libc_so_star, strnlen)
+STRNLEN(m_libc_soname, strnlen)
=20
=20
// Note that this replacement often doesn't get used because gcc inlines
@@ -243,11 +258,11 @@
return i; \
}
=20
-STRLEN(m_libc_so_star, strlen)
+STRLEN(m_libc_soname, strlen)
STRLEN(m_ld_linux_so_2, strlen)
STRLEN(m_ld_linux_x86_64_so_2, strlen)
- =20
=20
+
#define STRCPY(soname, fnname) \
char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char*=
src ); \
char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char*=
src ) \
@@ -269,7 +284,7 @@
return dst_orig; \
}
=20
-STRCPY(m_libc_so_star, strcpy)
+STRCPY(m_libc_soname, strcpy)
=20
=20
#define STRNCPY(soname, fnname) \
@@ -292,7 +307,7 @@
return dst_orig; \
}
=20
-STRNCPY(m_libc_so_star, strncpy)
+STRNCPY(m_libc_soname, strncpy)
=20
=20
#define STRNCMP(soname, fnname) \
@@ -315,7 +330,7 @@
} \
}
=20
-STRNCMP(m_libc_so_star, strncmp)
+STRNCMP(m_libc_soname, strncmp)
=20
=20
#define STRCMP(soname, fnname) \
@@ -338,7 +353,7 @@
return 0; \
}
=20
-STRCMP(m_libc_so_star, strcmp)
+STRCMP(m_libc_soname, strcmp)
STRCMP(m_ld_linux_x86_64_so_2, strcmp)
STRCMP(m_ld64_so_1, strcmp)
=20
@@ -355,7 +370,7 @@
return NULL; \
}
=20
-MEMCHR(m_libc_so_star, memchr)
+MEMCHR(m_libc_soname, memchr)
=20
=20
#define MEMCPY(soname, fnname) \
@@ -403,10 +418,10 @@
return dst; \
}
=20
-MEMCPY(m_libc_so_star, memcpy)
-MEMCPY(m_ld_so_1, memcpy) /* ld.so.1 */
- =20
+MEMCPY(m_libc_soname, memcpy)
+MEMCPY(m_ld_so_1, memcpy) /* ld.so.1 */
=20
+
#define MEMCMP(soname, fnname) \
int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
( const void *s1V, const void *s2V, SizeT n ); \
@@ -432,8 +447,8 @@
return 0; \
}
=20
-MEMCMP(m_libc_so_star, memcmp)
-MEMCMP(m_libc_so_star, bcmp)
+MEMCMP(m_libc_soname, memcmp)
+MEMCMP(m_libc_soname, bcmp)
=20
=20
/* Copy SRC to DEST, returning the address of the terminating '\0' in
@@ -459,7 +474,7 @@
return dst; \
}
=20
-STPCPY(m_libc_so_star, stpcpy)
+STPCPY(m_libc_soname, stpcpy)
STPCPY(m_ld_linux_so_2, stpcpy)
STPCPY(m_ld_linux_x86_64_so_2, stpcpy)
=20
@@ -476,7 +491,7 @@
return s; \
}
=20
-MEMSET(m_libc_so_star, memset)
+MEMSET(m_libc_soname, memset)
=20
=20
#define MEMMOVE(soname, fnname) \
@@ -500,7 +515,7 @@
return dst; \
}
=20
-MEMMOVE(m_libc_so_star, memmove)
+MEMMOVE(m_libc_soname, memmove)
=20
=20
/* Find the first occurrence of C in S or the final NUL byte. */
@@ -517,7 +532,7 @@
} \
}
=20
-GLIBC232_STRCHRNUL(m_libc_so_star, strchrnul)
+GLIBC232_STRCHRNUL(m_libc_soname, strchrnul)
=20
=20
/* Find the first occurrence of C in S. */
@@ -533,9 +548,311 @@
} \
}
=20
-GLIBC232_RAWMEMCHR(m_libc_so_star, rawmemchr)
+GLIBC232_RAWMEMCHR(m_libc_soname, rawmemchr)
=20
=20
+/*------------------------------------------------------------*/
+/*--- AIX stuff only after this point ---*/
+/*------------------------------------------------------------*/
+
+/* Generate replacements for strcat, strncat, strcpy, strncpy,
+ in the given soname. */
+#define Str4FNs(_soname) \
+ STRCAT(_soname, strcat) \
+ STRNCAT(_soname, strncat) \
+ STRCPY(_soname, strcpy) \
+ STRNCPY(_soname, strncpy)
+
+#if defined(VGP_ppc32_aix5)
+Str4FNs(NONE) /* in main exe */
+Str4FNs(libCZdaZLshrcoreZdoZR) /* libC.a(shrcore.o) */
+Str4FNs(libX11ZdaZLshr4ZdoZR) /* libX11.a(shr4.o) */
+Str4FNs(libXmZdaZLshrZaZdoZR) /* libXm.a(shr*.o) */
+Str4FNs(libXtZdaZLshr4ZdoZR) /* libXt.a(shr4.o) */
+Str4FNs(libppeZurZdaZLdynamicZdoZR) /* libppe_r.a(dynamic.o) */
+Str4FNs(libodmZdaZLshrZdoZR) /* libodm.a(shr.o) */
+Str4FNs(libmpiZurZdaZLmpicoreZurZdoZR) /* libmpi_r.a(mpicore_r.o) */
+Str4FNs(libmpiZurZdaZLmpipoeZurZdoZR) /* libmpi_r.a(mpipoe_r.o) */
+Str4FNs(libmpiZurZdaZLmpciZurZdoZR) /* libmpi_r.a(mpci_r.o) */
+Str4FNs(libslurmZdso) /* libslurm.so */
+Str4FNs(libglibZdso) /* libglib.so */
+Str4FNs(libIMZdaZLshrZdoZR) /* libIM.a(shr.o) */
+Str4FNs(libiconvZdaZLshr4ZdoZR) /* libiconv.a(shr4.o) */
+Str4FNs(libGLZdaZLshrZdoZR) /* libGL.a(shr.o) */
+Str4FNs(libgdkZdso) /* libgdk.so */
+Str4FNs(libcursesZdaZLshr42ZdoZR) /* libcurses.a(shr42.o) */
+Str4FNs(libqtZda) /* libqt.a */
+#endif
+#if defined(VGP_ppc64_aix5)
+Str4FNs(NONE) /* in main exe */
+Str4FNs(libX11ZdaZLshrZu64ZdoZR) /* libX11.a(shr_64.o) */
+Str4FNs(libGLZdaZLshrZu64ZdoZR) /* libGL.a(shr_64.o) */
+Str4FNs(libppeZurZdaZLdynamic64ZdoZR) /* libppe_r.a(dynamic64.o) */
+Str4FNs(libodmZdaZLshrZu64ZdoZR) /* libodm.a(shr_64.o) */
+Str4FNs(libmpiZurZdaZLmpicore64ZurZdoZR) /* libmpi_r.a(mpicore64_r.o) *=
/
+Str4FNs(libmpiZurZdaZLmpipoe64ZurZdoZR) /* libmpi_r.a(mpipoe64_r.o) */
+Str4FNs(libCZdaZLshrcoreZu64ZdoZR) /* libC.a(shrcore_64.o) */
+Str4FNs(libmpiZurZdaZLmpci64ZurZdoZR) /* libmpi_r.a(mpci64_r.o) */
+Str4FNs(libqtZda) /* libqt.a */
+#endif
+
+
+/* AIX's libm contains a sqrt implementation which does a nasty thing:
+ it loads the initial estimate of the root into a FP register, but
+ only the upper half of the number is initialised data. Hence the
+ least significant 32 mantissa bits are undefined, and it then uses
+ Newton-Raphson iteration to compute the final, defined result.
+ This fools memcheck completely; the only solution I can think of is
+ provide our own substitute. The _FAST variant is almost right
+ except the result is not correctly rounded. The _EXACT variant,
+ which is selected by default, is always right; but it's also pretty
+ darn slow. */
+
+#if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
+#define SQRT_FAST(soname, fnname) \
+ double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ); \
+ double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ) \
+ { \
+ static UInt T1[32] =3D \
+ { 0, 1024, 3062, 5746, 9193, 13348, \
+ 18162, 23592, 29598, 36145, 43202, 50740, \
+ 58733, 67158, 75992, 85215, 83599, 71378, \
+ 60428, 50647, 41945, 34246, 27478, 21581, \
+ 16499, 12183, 8588, 5674, 3403, 1742, \
+ 661, 130 }; \
+ UInt x0, x1, sign, expo, mant0, bIGENDIAN =3D 1; \
+ union { UInt w[2]; double d; } u; \
+ u.d =3D x; \
+ x0 =3D u.w[1 - bIGENDIAN]; /* high half */ \
+ x1 =3D u.w[bIGENDIAN]; /* low half */ \
+ sign =3D x0 >> 31; \
+ expo =3D (x0 >> 20) & 0x7FF; \
+ mant0 =3D x0 & 0xFFFFF; \
+ if ( (sign =3D=3D 0 && expo >=3D 1 && expo <=3D 0x7FE) /* +normal =
*/ \
+ || (sign =3D=3D 0 && expo =3D=3D 0 \
+ && (mant0 | x1) > 0) /* +denorm */) { \
+ /* common case; do Newton-Raphson */ \
+ /* technically k should be signed int32, but since we're \
+ always entering here with x > 0, doesn't matter that it's \
+ unsigned. */ \
+ double y; \
+ UInt k =3D (x0>>1) + 0x1ff80000; \
+ u.w[1 - bIGENDIAN] =3D k - T1[31&(k>>15)]; \
+ u.w[bIGENDIAN] =3D 0; \
+ y =3D u.d; \
+ y =3D (y+x/y)/2.0 ; \
+ y =3D (y+x/y)/2.0 ; \
+ y =3D y-(y-x/y)/2.0 ; \
+ return y; \
+ } \
+ if ( (sign =3D=3D 1 && expo >=3D 1 && expo <=3D 0x7FE) /* -normal =
*/ \
+ || (sign =3D=3D 1 && expo =3D=3D 0 \
+ && (mant0 | x1) > 0) /* -denorm */) { \
+ u.w[1 - bIGENDIAN] =3D 0xFFF00000; \
+ u.w[bIGENDIAN] =3D 0x1; \
+ return u.d; /* -Inf -> NaN */ \
+ } \
+ if ((expo | mant0 | x1) =3D=3D 0) \
+ return x; /* +/-zero -> self */ \
+ if (expo =3D=3D 0x7FF && (mant0 | x1) =3D=3D 0) { \
+ if (sign =3D=3D 0) \
+ return x; /* +Inf -> self */ \
+ u.w[1 - bIGENDIAN] =3D 0xFFF00000; \
+ u.w[bIGENDIAN] =3D 0x1; \
+ return u.d; /* -Inf -> NaN */ \
+ } \
+ /* must be +/- NaN */ \
+ return x; /* +/-NaN -> self */ \
+ }
+
+#define SQRT_EXACT(soname, fnname) \
+ /* \
+ * =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D \
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. =
\
+ * \
+ * Developed at SunPro, a Sun Microsystems, Inc. business. \
+ * Permission to use, copy, modify, and distribute this \
+ * software is freely granted, provided that this notice \
+ * is preserved. \
+ * =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D \
+ */ \
+ /* \
+ * Return correctly rounded sqrt. \
+ * ------------------------------------------ \
+ * | Use the hardware sqrt if you have one | \
+ * ------------------------------------------ \
+ * Method: \
+ * Bit by bit method using integer arithmetic. (Slow, but portable)=
\
+ * 1. Normalization \
+ * Scale x to y in [1,4) with even powers of 2: \
+ * find an integer k such that 1 <=3D (y=3Dx*2^(2k)) < 4, then =
\
+ * sqrt(x) =3D 2^k * sqrt(y) \
+ * 2. Bit by bit computation \
+ * Let q =3D sqrt(y) truncated to i bit after binary point (q =3D=
1), \
+ * i 0 \
+ * i+1 2 \
+ * s =3D 2*q , and y =3D 2 * ( y - q ). (=
1) \
+ * i i i i \
+ * \
+ * To compute q from q , one checks whether \
+ * i+1 i \
+ * \
+ * -(i+1) 2 \
+ * (q + 2 ) <=3D y. (2)=
\
+ * i \
+ * -(i+1) =
\
+ * If (2) is false, then q =3D q ; otherwise q =3D q + 2 =
. \
+ * i+1 i i+1 i \
+ * \
+ * With some algebric manipulation, it is not difficult to see \
+ * that (2) is equivalent to \
+ * -(i+1) \
+ * s + 2 <=3D y (3)=
\
+ * i i \
+ * \
+ * The advantage of (3) is that s and y can be computed by \
+ * i i \
+ * the following recurrence formula: \
+ * if (3) is false \
+ * \
+ * s =3D s , y =3D y ; (=
4) \
+ * i+1 i i+1 i \
+ * \
+ * otherwise, \
+ * -i -(i+1) \
+ * s =3D s + 2 , y =3D y - s - 2 (=
5) \
+ * i+1 i i+1 i i \
+ * \
+ * \
+ * One may easily use induction to prove (4) and (5). \
+ * Note. Since the left hand side of (3) contain only i+2 bits, =
\
+ * it does not necessary to do a full (53-bit) comparison =
\
+ * in (3). \
+ * 3. Final rounding \
+ * After generating the 53 bits result, we compute one more bit.=
\
+ * Together with the remainder, we can decide whether the \
+ * result is exact, bigger than 1/2ulp, or less than 1/2ulp \
+ * (it will never equal to 1/2ulp). \
+ * The rounding mode can be detected by checking whether \
+ * huge + tiny is equal to huge, and whether huge - tiny is \
+ * equal to huge for some floating point number "huge" and "tiny=
". \
+ * \
+ * Special cases: \
+ * sqrt(+-0) =3D +-0 ... exact \
+ * sqrt(inf) =3D inf \
+ * sqrt(-ve) =3D NaN ... with invalid signal \
+ * sqrt(NaN) =3D NaN ... with invalid signal for signali=
ng NaN \
+ * \
+ */ \
+ double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ); \
+ double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ) \
+ { \
+ const Int bIGENDIAN =3D 1; \
+ const double one =3D 1.0, tiny=3D1.0e-300; \
+ double z; \
+ Int sign =3D (Int)0x80000000; \
+ Int ix0,s0,q,m,t,i; \
+ UInt r,t1,s1,ix1,q1; \
+ union { UInt w[2]; double d; } u; \
+ u.d =3D x; \
+ ix0 =3D u.w[1-bIGENDIAN]; \
+ ix1 =3D u.w[bIGENDIAN]; \
+ \
+ /* take care of Inf and NaN */ \
+ if((ix0&0x7ff00000)=3D=3D0x7ff00000) { \
+ return x*x+x; /* sqrt(NaN)=3DNaN, sqrt(+inf)=3D+i=
nf \
+ sqrt(-inf)=3DsNaN */ \
+ } \
+ /* take care of zero */ \
+ if(ix0<=3D0) { \
+ if(((ix0&(~sign))|ix1)=3D=3D0) return x;/* sqrt(+-0) =3D +-0 */=
\
+ else if(ix0<0) \
+ return (x-x)/(x-x); /* sqrt(-ve) =3D sNaN */ \
+ } \
+ /* normalize x */ \
+ m =3D (ix0>>20); \
+ if(m=3D=3D0) { /* subnormal x */ \
+ while(ix0=3D=3D0) { \
+ m -=3D 21; \
+ ix0 |=3D (ix1>>11); ix1 <<=3D 21; \
+ } \
+ for(i=3D0;(ix0&0x00100000)=3D=3D0;i++) ix0<<=3D1; \
+ m -=3D i-1; \
+ ix0 |=3D (ix1>>(32-i)); \
+ ix1 <<=3D i; \
+ } \
+ m -=3D 1023; /* unbias exponent */ \
+ ix0 =3D (ix0&0x000fffff)|0x00100000; \
+ if(m&1){ /* odd m, double x to make it even */ \
+ ix0 +=3D ix0 + ((ix1&sign)>>31); \
+ ix1 +=3D ix1; \
+ } \
+ m >>=3D 1; /* m =3D [m/2] */ \
+ /* generate sqrt(x) bit by bit */ \
+ ix0 +=3D ix0 + ((ix1&sign)>>31); \
+ ix1 +=3D ix1; \
+ q =3D q1 =3D s0 =3D s1 =3D 0; /* [q,q1] =3D sqrt(x) */ \
+ r =3D 0x00200000; /* r =3D moving bit from right to left *=
/ \
+ while(r!=3D0) { \
+ t =3D s0+r; \
+ if(t<=3Dix0) { \
+ s0 =3D t+r; \
+ ix0 -=3D t; \
+ q +=3D r; \
+ } \
+ ix0 +=3D ix0 + ((ix1&sign)>>31); \
+ ix1 +=3D ix1; \
+ r>>=3D1; \
+ } \
+ r =3D sign; \
+ while(r!=3D0) { \
+ t1 =3D s1+r; \
+ t =3D s0; \
+ if((t<ix0)||((t=3D=3Dix0)&&(t1<=3Dix1))) { \
+ s1 =3D t1+r; \
+ if(((t1&sign)=3D=3Dsign)&&(s1&sign)=3D=3D0) s0 +=3D 1; \
+ ix0 -=3D t; \
+ if (ix1 < t1) ix0 -=3D 1; \
+ ix1 -=3D t1; \
+ q1 +=3D r; \
+ } \
+ ix0 +=3D ix0 + ((ix1&sign)>>31); \
+ ix1 +=3D ix1; \
+ r>>=3D1; \
+ } \
+ /* use floating add to find out rounding direction */ \
+ if((ix0|ix1)!=3D0) { \
+ z =3D one-tiny; /* trigger inexact flag */ \
+ if (z>=3Done) { \
+ z =3D one+tiny; \
+ if (q1=3D=3D(UInt)0xffffffff) { q1=3D0; q +=3D 1;} \
+ else if (z>one) { \
+ if (q1=3D=3D(UInt)0xfffffffe) q+=3D1; \
+ q1+=3D2; \
+ } else \
+ q1 +=3D (q1&1); \
+ } \
+ } \
+ ix0 =3D (q>>1)+0x3fe00000; \
+ ix1 =3D q1>>1; \
+ if ((q&1)=3D=3D1) ix1 |=3D sign; \
+ ix0 +=3D (m <<20); \
+ ix0 =3D u.w[1-bIGENDIAN] =3D ix0; \
+ ix1 =3D u.w[bIGENDIAN] =3D ix1; \
+ z =3D u.d; \
+ return z; \
+ }
+
+#if 0
+SQRT_FAST(NONE, sqrt) /* xlC generates these */
+SQRT_FAST(NONE, _sqrt) /* xlf generates these */
+#else
+SQRT_EXACT(NONE, sqrt) /* xlC generates these */
+SQRT_EXACT(NONE, _sqrt) /* xlf generates these */
+#endif
+
+#endif /* defined(VGP_ppc32_aix5) */
+
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/
|