From: Josh V. <ho...@na...> - 2001-05-13 02:54:20
|
The current IFLOOR/ICEIL gets the wrong answer on .99999 + any odd integer. Here are some better routines. IFLOOR_4M and ICEIL_4M are 70% faster but have less range. Josh /* Use fast rounding mode and correct result afterward. */ static inline int IFLOOR(float f) { int i = IROUND(f); return (i > f) ? i - 1 : i; } /* IEEE floor for computers that round to nearest or even. 'f' must be between -4194304 and 4194303. This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1", but uses some IEEE specific tricks for better speed. */ static inline int IFLOOR_4M(float f) { int ai, bi; double af, bf; af = (3 << 22) + 0.5 + (double)f; bf = (3 << 22) + 0.5 - (double)f; #if defined(__GNUC__) && defined(__i386__) /* GCC generates an extra fstp/fld without this. */ asm ("fstps %0" : "=m" (ai) : "t" (af) : "st"); asm ("fstps %0" : "=m" (bi) : "t" (bf) : "st"); #else { union { int i; float f; } u; u.f = af; ai = u.i; u.f = bf; bi = u.i; } #endif return (ai - bi) >> 1; } /* Use fast rounding mode and correct result afterward. */ static inline int ICEIL(float f) { int i = IROUND(f); return (i < f) ? i + 1 : i; } /* IEEE ceil for computers that round to nearest or even. 'f' must be between -4194304 and 4194303. This ceil operation is done by "(iround(f + .5) + iround(f - .5) + 1) >> 1", but uses some IEEE specific tricks for better speed. */ static inline int ICEIL_4M(float f) { int ai, bi; double af, bf; af = (3 << 22) + 0.5 + (double)f; bf = (3 << 22) + 0.5 - (double)f; #if defined(__GNUC__) && defined(__i386__) /* GCC generates an extra fstp/fld without this. */ asm ("fstps %0" : "=m" (ai) : "t" (af) : "st"); asm ("fstps %0" : "=m" (bi) : "t" (bf) : "st"); #else { union { int i; float f; } u; u.f = af; ai = u.i; u.f = bf; bi = u.i; } #endif return (ai - bi + 1) >> 1; } |