[Mesa3d-dev] IFLOOR(1.99999) == 2

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

The current IFLOOR/ICEIL gets the wrong answer on .99999 + any odd
integer.

Here are some better routines.  IFLOOR_4M and ICEIL_4M are 70% faster
but have less range.

Josh

/*
 Use fast rounding mode and correct result afterward.
*/
static inline int
IFLOOR(float f)
{
	int i = IROUND(f);

	return (i > f) ? i - 1 : i;
}

/*
 IEEE floor for computers that round to nearest or even.

 'f' must be between -4194304 and 4194303.

 This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
 but uses some IEEE specific tricks for better speed.
*/
static inline int
IFLOOR_4M(float f)
{
	int ai, bi;
	double af, bf;

	af = (3 << 22) + 0.5 + (double)f;
	bf = (3 << 22) + 0.5 - (double)f;

#if defined(__GNUC__) && defined(__i386__)
	/*
	 GCC generates an extra fstp/fld without this.
	*/
	asm ("fstps %0" : "=m" (ai) : "t" (af) : "st");
	asm ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
#else
	{
		union { int i; float f; } u;
		u.f = af; ai = u.i;
		u.f = bf; bi = u.i;
	}
#endif

	return (ai - bi) >> 1;
}

/*
 Use fast rounding mode and correct result afterward.
*/
static inline int
ICEIL(float f)
{
	int i = IROUND(f);

	return (i < f) ? i + 1 : i;
}

/*
 IEEE ceil for computers that round to nearest or even.

 'f' must be between -4194304 and 4194303.

 This ceil operation is done by "(iround(f + .5) + iround(f - .5) + 1) >> 1",
 but uses some IEEE specific tricks for better speed.
*/
static inline int
ICEIL_4M(float f)
{
	int ai, bi;
	double af, bf;

	af = (3 << 22) + 0.5 + (double)f;
	bf = (3 << 22) + 0.5 - (double)f;

#if defined(__GNUC__) && defined(__i386__)
	/*
	 GCC generates an extra fstp/fld without this.
	*/
	asm ("fstps %0" : "=m" (ai) : "t" (af) : "st");
	asm ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
#else
	{
		union { int i; float f; } u;
		u.f = af; ai = u.i;
		u.f = bf; bi = u.i;
	}
#endif

	return (ai - bi + 1) >> 1;
}