/* This file is part of libcoding, a library handling (re-)coding of
* datatypes with arithmetic, huffman and various other coders
* Copyright (C) 2004-2011 Niels Fröhling
*
* Interval-Arithmetic.hpp: class for interval-arithmetics
* Copyright 2004-2011 Niels Fröhling
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef INTERVAL_ARITHMETIC_HPP
#define INTERVAL_ARITHMETIC_HPP
#define DATATYPES_INCLUDE_0D
#include "libdatatypes.hpp"
#define INTERVAL_ARITHMETIC_RECIPROCAL
#undef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
#define INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
#undef prolog_first
#define prolog_first()
#undef prolog_after
#define prolog_after()
#if defined(COMPILER_NOASMSSE42b)
#define popcnt error
#define lzcnt error
#endif
/* this should not be disabled in release mode */
#if !defined(COMPILER_NOASMX86)
#ifdef NDEBUG
#undef prolog_first
#define prolog_first() \
/* __asm mov edx, edx *; ivl */ \
__asm lea esi, [ecx]this.ast /*; this*/
#undef prolog_after
#define prolog_after() \
__asm mov ecx, this /*; this*/ \
__asm mov edx, ivl /*; ivl */ \
__asm lea esi, [ecx]this.ast /*; this*/
#endif
/* make things short:
*
* a) ebp as base-register requirees an offset in the encoding
* as an additional register not!, so [ebp+edx] is 1 byte longer than [edx+ebp]
* and the assembler does NOT! choose the shorter form automatically
* b) adc/add/and/cmp/or/sbb/sub to eax is 1 byte shorter
* adc/add/sbb/sub as 1 byte form is mostly not produced (because the imm is
* not the same size as the register)
*/
#define jnbe(label, pad, offset) \
/* __asm jnbe label */ \
__asm _emit 0x77 \
__asm _emit pad + offset
#define jnzo(label, pad, offset) \
/* __asm jnbe label */ \
__asm _emit 0x75 \
__asm _emit pad + offset
#define jnzf(label, pad, offset) \
/* __asm jnbe label */ \
__asm _emit 0x0F \
__asm _emit 0x85 \
__asm _emit pad + offset \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00
#define jncy(label, pad, offset) \
/* __asm jnbe label */ \
__asm _emit 0x73 \
__asm _emit pad + offset
#define jncf(label, pad, offset) \
/* __asm jnbe label */ \
__asm _emit 0x0F \
__asm _emit 0x83 \
__asm _emit pad + offset \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00
#define jmpn(label, pad, offset) \
/* __asm jmp label */ \
__asm _emit 0xEB \
__asm _emit pad + offset
#define jmpf(label, pad, offset) \
/* __asm jmp label */ \
__asm _emit 0xE9 \
__asm _emit pad + offset \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00
#define jmpF(label, pad, off1, off2) \
/* __asm jmp label */ \
__asm _emit 0xE9 \
__asm _emit pad + off1 \
__asm _emit pad + off2 \
__asm _emit 0x00 \
__asm _emit 0x00
#define pad0() \
#define pad3() \
/* __asm align 16 ; 32 */ \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00
#define pad5() \
/* __asm align 16 ; 32 */ \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00
#define pad9() \
/* __asm align 16 ; 32 */ \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00 \
__asm _emit 0x00
#define labl(label, _pad_) \
/*label: */ \
pad##_pad_()
#else
/* make things short:
*
* a) ebp as base-register requirees an offset in the encoding
* as an additional register not!, so [ebp+edx] is 1 byte longer than [edx+ebp]
* and the assembler does NOT! choose the shorter form automatically
* b) add to eax is 1 byte shorter
*/
#define jnbe(label, pad, offset) \
__asm jnbe label
#define jnzo(label, pad, offset) \
__asm jnz label
#define jnzf(label, pad, offset) \
__asm jnz label
#define jncy(label, pad, offset) \
__asm jnc label
#define jncf(label, pad, offset) \
__asm jnc label
#define jmpn(label, pad, offset) \
__asm jmp label
#define jmpf(label, pad, offset) \
__asm jmp label
#define jmpF(label, pad, off1, off2) \
__asm jmp label
#define labl(label, _pad_) \
label:
#endif
#if defined(INTERVAL_ARITHMETIC_RECIPROCAL)
#define rec_prolog_first() prolog_first()
#define rec_prolog_after() prolog_after()
#define div_prolog_first() 0
#define div_prolog_after() 0
#else
#define rec_prolog_first() 0
#define rec_prolog_after() 0
#define div_prolog_first() prolog_first()
#define div_prolog_after() prolog_after()
#endif
/* =============================================================================
*/
template<typename ityp>
class IntervalArithmetic {
public:
/* dtyp = destination datatype
* atyp = accumulator datatype
*
* reg = arithmetic range structure
* ivl = interval structure
*/
IntervalArithmetic() {}
public:
// powerof2:
// f = (v & (v - 1)) == 0;
template<typename typ>
static forceinline
typ zeroBits(typ n);
template<typename typ>
static forceinline
typ oneBits(typ n);
#if !defined(COMPILER_TARGETX86)
const unsigned short int BitCounts[256];
#endif
/* ------------------------------------------------------------------------ */
/* Determine number of leading zero bits on unsigned value */
template<>
static forceinline
unsigned short int zeroBits<unsigned short int>(unsigned short int n) {
unsigned short int z;
#if defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__)
// fast hardware support
// __asm {
// lzcnt ax, n
// }
z = __lzcnt16(n);
#elif defined(COMPILER_TARGETX86) && defined(__GNUC__)
// fast hardware support
// __asm {
// lzcnt ax, n
// }
// z = __builtin_clzs(n);
z = __builtin_clz((unsigned int)n) - 16;
#elif defined(COMPILER_TARGETX86)
// slow hardware support
// __asm {
// mov cx, -1
// mov ax, 15
// bsr cx, n
// sub ax, cx
// }
// temporaries
DWORD t;
if (!_BitScanReverse(&t, n))
t = -1;
z = (unsigned short int)(15 - t);
#else
#if 0
// slow non-hardware support
z = 0;
while ((!(n & 0x8000)) && (z < 16))
z++, n <<= 1;
#else
// fast non-hardware support
// temporaries
unsigned short int t;
z = (t = n >> 8) ? 0 + BitCounts[t] : 8 + BitCounts[n];
#endif
#endif
assume(z >= 0);
assume(z <= 16);
return z;
}
/* Determine number of leading zero bits on unsigned value */
template<>
static forceinline
unsigned long int zeroBits<unsigned long int>(unsigned long int n) {
unsigned long int z;
#if defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__)
// fast hardware support
// __asm {
// lzcnt eax, n
// }
z = __lzcnt(n);
#elif defined(COMPILER_TARGETX86) && defined(__GNUC__)
// fast hardware support
// __asm {
// lzcnt ax, n
// }
z = __builtin_clz(n);
#elif defined(COMPILER_TARGETX86)
// slow hardware support
// __asm {
// mov ecx, -1
// mov eax, 31
// bsr ecx, n
// sub eax, ecx
// }
// temporaries
DWORD t;
if (!_BitScanReverse(&t, n))
t = -1;
z = (unsigned long int)(31 - t);
#else
#if 0
// slow non-hardware support
z = 0;
while ((!(n & 0x80000000)) && (z < 32))
z++, n <<= 1;
#else
// fast non-hardware support
// temporaries
unsigned short int t;
if ((tt = n >> 16))
z = (t = tt >> 8) ? 0 + BitCounts[t] : 8 + BitCounts[tt];
else
z = (t = n >> 8) ? 16 + BitCounts[t] : 24 + BitCounts[n ];
#endif
#endif
assume(z >= 0);
assume(z <= 32);
return z;
}
/* Determine number of leading zero bits on unsigned value */
static forceinline
unsigned long int zeroBits(unsigned_int64 n) {
unsigned long int z;
#if defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__) && defined(COMPILER_TARGETX64)
// fast hardware support
// __asm {
// lzcnt rax, n
// }
z = (unsigned long int)__lzcnt64(n);
#elif defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__)
// fast hardware support
// __asm {
// lzcnt eax, n
// lzcnt eax, n
// }
z = __lzcnt((unsigned long int)(n >> 32));
if (z == 32)
z += __lzcnt((unsigned long int)(n >> 0));
#elif defined(COMPILER_TARGETX86) && defined(__GNUC__)
// fast hardware support
// __asm {
// lzcnt eax, n
// lzcnt eax, n
// }
z = __builtin_clzll(n);
#elif defined(COMPILER_TARGETX86)
// slow hardware support
// __asm {
// mov ecx, -1
// mov eax, 31
// bsr ecx, n
// sub eax, ecx
// }
// temporaries
DWORD t;
if (_BitScanReverse(&t, (n >> 32)))
t = t + 32;
else if (_BitScanReverse(&t, (n >> 0)))
t = t + 0;
else
t = -1;
z = (unsigned long int)(63 - t);
#else
#if 0
// slow non-hardware support
z = 0;
while ((!(n & 0x8000000000000000ULL)) && (z < 64))
z++, n <<= 1;
#else
// fast non-hardware support
// temporaries
unsigned short int t;
abort();
if ((tt = n >> 16))
z = (t = tt >> 8) ? 0 + Table256[t] : 8 + Table256[tt];
else
z = (t = n >> 8) ? 16 + Table256[t] : 24 + Table256[n ];
#endif
#endif
assume(z >= 0);
assume(z <= 64);
return z;
}
/* ------------------------------------------------------------------------ */
/* Determine number of one bits on unsigned value */
template<>
static forceinline
unsigned short int oneBits<unsigned short int>(unsigned short int n) {
unsigned short int p;
#if defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__)
// __asm {
// popcnt ax, n
// }
p = __popcnt16(n);
#elif defined(COMPILER_TARGETSSE4) && !defined(__GNUC__)
p = _mm_popcnt_u32(n);
#elif defined(COMPILER_TARGETX86) && defined(__GNUC__)
// __asm {
// popcnt ax, n
// }
// p = __builtin_popcounts(n);
p = __builtin_popcount((unsigned long int)n);
#else
// slow non-hardware support
p = 0; int i = 0;
while ((i < 16)) {
i++; p += (n & 0x8000) ? 1 : 0; n <<= 1; }
#endif
assume(p >= 0);
assume(p <= 16);
return p;
}
/* Determine number of one bits on unsigned value */
template<>
static forceinline
unsigned long int oneBits<unsigned long int>(unsigned long int n) {
unsigned long int p;
#if defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__)
// __asm {
// popcnt eax, n
// }
p = __popcnt(n);
#elif defined(COMPILER_TARGETSSE4) && !defined(__GNUC__)
p = _mm_popcnt_u32(n);
#elif defined(COMPILER_TARGETX86) && defined(__GNUC__)
// __asm {
// popcnt ax, n
// }
p = __builtin_popcount(n);
#else
// slow non-hardware support
p = 0; int i = 0;
while ((i < 32)) {
i++; p += (n & 0x80000000) ? 1 : 0; n <<= 1; }
#endif
assume(p >= 0);
assume(p <= 32);
return p;
}
/* Determine number of one bits on unsigned value */
static forceinline
unsigned long int oneBits(unsigned_int64 n) {
unsigned long int p;
#if defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__) && defined(COMPILER_TARGETX64)
// __asm {
// popcnt rax, n
// }
p = (unsigned long int)
__popcnt64(n);
#elif defined(COMPILER_TARGETSSE42b) && !defined(__GNUC__)
// __asm {
// popcnt eax, n
// popcnt eax, n
// }
p =
__popcnt((unsigned long int)(n >> 32)) +
__popcnt((unsigned long int)(n >> 0));
#elif defined(COMPILER_TARGETSSE4) && !defined(__GNUC__) && defined(COMPILER_TARGETX64)
p =
_mm_popcnt_u64(n);
#elif defined(COMPILER_TARGETSSE4) && !defined(__GNUC__)
p =
_mm_popcnt_u32((unsigned long int)(n >> 32)) +
_mm_popcnt_u32((unsigned long int)(n >> 0));
#elif defined(COMPILER_TARGETX86) && defined(__GNUC__)
// __asm {
// popcnt ax, n
// }
p = __builtin_popcountll(n);
#else
// slow non-hardware support
p = 0; int i = 0;
while ((i < 64)) {
i++; p += (n & 0x8000000000000000ULL) ? 1 : 0; n <<= 1; }
#endif
assume(p >= 0);
assume(p <= 64);
return p;
}
/* ------------------------------------------------------------------------ */
static forceinline
unsigned short sign(unsigned short int v) {
return (signed short int)v >> 15;
}
static forceinline
unsigned long sign(unsigned long int v) {
return (signed long int)v >> 31;
}
static forceinline
unsigned_int64 sign(unsigned_int64 v) {
return (signed_int64)v >> 63;
}
/* ------------------------------------------------------------------------ */
static forceinline
unsigned long dn(unsigned short int v) {
return (unsigned long )(v) << 0;
}
static forceinline
unsigned_int64 dn(unsigned long int v) {
return (unsigned_int64)(v) << 0;
}
static forceinline
unsigned long up(unsigned short int v) {
return (unsigned long )(v) << 16;
}
static forceinline
unsigned_int64 up(unsigned long int v) {
return (unsigned_int64)(v) << 32;
}
static forceinline
unsigned_int64 up(unsigned_int64 v) {
return (v) << 32;
}
/* ------------------------------------------------------------------------ */
//static forceinline
//unsigned short sr(unsigned long int v, unsigned long int bits) {
// assert(bits <= 15);
// assume(bits <= 15);
//
// return (unsigned short)(v >> bits);
//}
//
//static forceinline
//unsigned long sr(unsigned_int64 v, unsigned long int bits) {
// assert(bits <= 31);
// assume(bits <= 31);
//
// return (hi(v) << (32 - bits)) + (lo(v) >> bits);
//}
static forceinline
unsigned_int64 sr(unsigned_int64 v, unsigned long int bits) {
#if !defined(COMPILER_TARGETX64)
assert(bits <= 31);
assume(bits <= 31);
#endif
return __ull_rshift(v, bits);
}
//static forceinline
//unsigned long sl(unsigned short int v, unsigned short int bits) {
// assert(bits <= 15);
// assume(bits <= 15);
//
// return (unsigned long)(v) << bits;
//}
static forceinline
unsigned_int64 sl(unsigned long int v, unsigned long int bits) {
#if !defined(COMPILER_TARGETX64)
assert(bits <= 31);
assume(bits <= 31);
return up(v >> (32 - bits)) + (v << bits);
#else
return __ll_lshift(v, bits);
#endif
}
static forceinline
unsigned_int64 sl(unsigned_int64 v, unsigned long int bits) {
#if !defined(COMPILER_TARGETX64)
assert(bits <= 31);
assume(bits <= 31);
#endif
return __ll_lshift(v, bits);
}
/* ------------------------------------------------------------------------ */
static forceinline
unsigned short lo(unsigned long int v) {
return (unsigned short)(v >> 0);
}
static forceinline
unsigned short hi(unsigned long int v) {
return (unsigned short)(v >> 16);
}
static forceinline
unsigned long lo(unsigned_int64 v) {
return (unsigned long)(v >> 0);
}
static forceinline
unsigned long hi(unsigned_int64 v) {
return (unsigned long)(v >> 32);
}
/* ------------------------------------------------------------------------ */
static forceinline
void divide(unsigned long int ÷nd , unsigned short int divisor) {
dividend /= divisor; }
static forceinline
void divide(unsigned long int ÷ndA, unsigned long int ÷ndB, unsigned short int divisor) {
dividendA /= divisor;
dividendB /= divisor; }
static forceinline
void divide(unsigned long int ÷nd , unsigned long int divisor) {
dividend /= divisor; }
static forceinline
void divide(unsigned long int ÷ndA, unsigned long int ÷ndB, unsigned long int divisor) {
dividendA /= divisor;
dividendB /= divisor; }
static forceinline
void divide(unsigned_int64 ÷nd , unsigned long int divisor) {
dividend /= divisor; }
static forceinline
void divide(unsigned_int64 ÷ndA, unsigned_int64 ÷ndB, unsigned long int divisor) {
dividendA /= divisor;
dividendB /= divisor; }
#if 0
static forceinline
void divide(unsigned_int64 ÷nd , unsigned_int64 divisor) {
dividend /= divisor; }
static forceinline
void divide(unsigned_int64 ÷ndA, unsigned_int64 ÷ndB, unsigned_int64 divisor) {
dividendA /= divisor;
dividendB /= divisor; }
#endif
/* ------------------------------------------------------------------------ */
static forceinline
unsigned long umul(unsigned short int a, unsigned short int b) {
return ((unsigned long int)b * a);
}
static forceinline
unsigned_int64 umul(unsigned short int a, unsigned long int b) {
return __emulu(b, a);
}
static forceinline
unsigned_int64 umul(unsigned long int a, unsigned long int b) {
return __emulu(b, a);
}
/* ------------------------------------------------------------------------ */
static forceinline
unsigned short umull(unsigned short int a, unsigned short int b) {
return (b *= a);
}
static forceinline
unsigned long umull(unsigned short int a, unsigned long int b) {
return (b *= a);
}
static forceinline
unsigned_int64 umull(unsigned short int a, unsigned_int64 b) {
#if !defined(COMPILER_TARGETX64)
return up(umull(a, hi(b))) +
umul (a, lo(b));
#else
return (b *= a);
#endif
}
static forceinline
unsigned long umull(unsigned long int a, unsigned long int b) {
return (b *= a);
}
static forceinline
unsigned_int64 umull(unsigned long int a, unsigned_int64 b) {
#if !defined(COMPILER_TARGETX64)
return up(umull(hi(b), a)) +
umul (lo(b), a);
#else
return (b *= a);
#endif
}
static forceinline
unsigned_int64 umull(unsigned_int64 a, unsigned_int64 b) {
#if !defined(COMPILER_TARGETX64)
return up(umull(hi(b), lo(a))) +
up(umull(lo(b), hi(a))) +
umul (lo(b), lo(a));
#else
return (b *= a);
#endif
}
/* ------------------------------------------------------------------------ */
static forceinline
unsigned short umulh(unsigned short int a, unsigned short int b) {
return ((unsigned long int)a * b) >> 16;
}
static forceinline
unsigned long umulh(unsigned long int a, unsigned short int b) {
return __emulu(a, b) >> 16;
}
static forceinline
unsigned long umulh(unsigned long int a, unsigned long int b) {
return __emulu(a, b) >> 32;
}
static forceinline
unsigned_int64 umulh(unsigned_int64 a, unsigned short int b) {
#if !defined(COMPILER_TARGETX64)
return
(__emulu(a >> 32, b) << 16) +
(__emulu(a >> 0, b) >> 16);
#else
unsigned_int64 c;
a = _umul128(a, b, &c);
return ((a >> 16) | (c << 48));
#endif
}
static forceinline
unsigned_int64 umulh(unsigned_int64 a, unsigned long int b) {
#if !defined(COMPILER_TARGETX64)
return
(__emulu(a >> 32, b) << 0) +
(__emulu(a >> 0, b) >> 32);
#else
unsigned_int64 c;
a = _umul128(a, b, &c);
return ((a >> 32) | (c << 32));
#endif
}
static forceinline
unsigned_int64 umulh(unsigned_int64 a, unsigned_int64 b) {
#if !defined(COMPILER_TARGETX64)
unsigned_int64 prodA =
(__emulu(a >> 32, b >> 32) >> 0);
unsigned_int64 prodB =
(__emulu(a >> 0, b >> 32) >> 0) +
(__emulu(a >> 32, b >> 0) >> 0);
unsigned_int64 prodC =
(__emulu(a >> 0, b >> 0) >> 0);
return prodA + ((prodB + (prodC >> 32)) >> 32);
#else
unsigned_int64 c;
a = _umul128(a, b, &c);
return c;
#endif
}
/*
public:
template<class state>
void shrink(state &st, const struct Interval *ivl) const {
}
template<class state>
void center(const state &st, struct Interval *ivl) const {
}
template<class state>
void expand(state &st, const struct Interval *ivl) const {
}
public:
template<class state>
void shrink(state &st, const struct BitInterval *ivl) const {
}
template<class state>
void center(const state &st, struct BitInterval *ivl) const {
}
template<class state>
void expand(const state &st, struct BitInterval *ivl) const {
}
*/
};
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL
/* =============================================================================
*/
template<typename ityp>
class loIntervalArithmetic;
template<typename ityp>
class lxIntervalArithmetic;
template<>
class loIntervalArithmetic<unsigned short int> : protected IntervalArithmetic<char> {
friend class lxIntervalArithmetic<unsigned short int>;
public:
/* dtyp = destination datatype
* atyp = accumulator datatype
*
* reg = arithmetic range structure
* ivl = interval structure
*/
loIntervalArithmetic<unsigned short int>() {
for (int i = 0x8000; i <= 0xFFFF; i++) {
sLUT[i - 0x8000] = inverse_div(i);
// fprintf(stderr, " [0x%4x] = 0x%4x\n", i - 0x8000, sLUT[i - 0x8000]);
}
#ifdef REPORT_CODING_STATISTICS
memset(&stats, 0, sizeof(stats));
#endif
}
#ifdef REPORT_CODING_STATISTICS
~loIntervalArithmetic<unsigned short int>() {
fprintf(stderr, "hits %d %.5f%%\n", stats[0].hits , 100.0 * stats[0].hits / (stats[0].hits ));
fprintf(stderr, "divz %d %.5f%%\n", stats[0].divz , 100.0 * stats[0].divz / (stats[0].hits ));
fprintf(stderr, "divp %d %.5f%%\n", stats[0].divp , 100.0 * stats[0].divp / (stats[0].hits ));
fprintf(stderr, "div <= 8 %d %.5f%%\n", stats[0].div08 , 100.0 * stats[0].div08 / (stats[0].hits ));
fprintf(stderr, "div <= 16 %d %.5f%%\n", stats[0].div16 , 100.0 * stats[0].div16 / (stats[0].hits ));
fprintf(stderr, "div <= 32 %d %.5f%%\n", stats[0].div32 , 100.0 * stats[0].div32 / (stats[0].hits ));
fprintf(stderr, "div <= 64 %d %.5f%%\n", stats[0].div64 , 100.0 * stats[0].div64 / (stats[0].hits ));
fprintf(stderr, "counter %d %.5f%%\n", stats[0].counter , 100.0 * stats[0].counter / (stats[0].counter));
fprintf(stderr, "branch1 %d %.5f%%\n", stats[0].branch1 , 100.0 * stats[0].branch1 / (stats[0].counter));
fprintf(stderr, "branch2 %d %.5f%%\n", stats[0].branch2 , 100.0 * stats[0].branch2 / (stats[0].counter));
}
#endif
protected:
#ifdef REPORT_CODING_STATISTICS
struct {
unsigned long int hits;
unsigned long int divz;
unsigned long int divp;
unsigned long int div08;
unsigned long int div16;
unsigned long int div32;
unsigned long int div64;
unsigned long int counter;
unsigned long int branch1;
unsigned long int branch2;
} stats[2];
#endif
// (2) * 32768 => 65536 (L1 cache full)
static unsigned short int sLUT[0x8000];
private:
/* ------------------------------------------------------------------------ */
/* calculate the "unsigned short" inverse of a "unsigned short" divisor */
static forceinline
unsigned short int inverse_div(unsigned short int divisor) {
unsigned short int inv;
inv = lo(~up(divisor) / divisor);
return inv;
}
/* calculate the "unsigned short" inverse of a "unsigned long" divisor */
static forceinline
unsigned short int inverse_adj(unsigned short inv, unsigned long int divisor) {
unsigned short int d1, d0;
unsigned short int p, t1, t0, mask;
/* 16.16 divisor */ {
d1 = hi(divisor);
d0 = lo(divisor);
}
p = d1 * inv;
p += d0;
if (p < d0) {
inv--;
mask = -(p >= d1);
p -= d1;
inv += mask;
p -= mask & d1;
}
/* 16.16 product */ {
unsigned long int t;
t = umul(d0, inv);
t1 = hi(t);
t0 = lo(t);
}
p += t1;
if (p < t1) {
inv--;
if (p >= d1) {
if ((p > d1) || (t0 >= d0))
inv--;
}
}
return inv;
}
/* calculate the "unsigned short" inverse of a "unsigned short" divisor */
static forceinline
unsigned short int inverse_lut(unsigned short int divisor) {
return sLUT[divisor - 0x8000];
}
protected:
/* ------------------------------------------------------------------------ */
/* calculate the division of a "unsigned long" dividend by a
* "unsigned short" inverse of a "unsigned short" divisor
*/
forceinline
unsigned short int divrem(
// unsigned short int &remainder,
unsigned short int dividendh,
unsigned short int dividendl,
unsigned short int divisor,
unsigned short int inverse
) /*const*/ {
unsigned short int div, rest, rem;
unsigned long int product, dividend;
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
unsigned short int dmask, dadj, dwraph, dwrapl; // dividendh 0x7FFC, dividendl 0x0000, divisor 0xFFF8, inverse 0x0008
// dividendh 0x0004, dividendl 0x0000, divisor 0xFFF8, inverse 0x0008
// dividendh 0xA900, dividendl 0x0000, divisor 0xC800, inverse 0x47AE
// dividendh 0x0100, dividendl 0x0000, divisor 0xC800, inverse 0x47AE
profile(stats[0].counter++);
dmask = sign(dividendl); // 0x0000 0x0000 0x0000 0x0000
dadj = (dmask & (divisor)); // 0x0000 0x0000 0x0000 0x0000
dwraph = dividendh - dmask; // 0x7FFC 0x0004 0xA900 0x0100
dwrapl = dividendl + dadj; // 0x0000 0x0000 0x0000 0x0000
profile(stats[0].branch1 += !dmask);
dividend = up(dividendh) + dwrapl; // 0x7FFC0000 0x00040000 0xA9000000 0x01000000
product = umul(dwraph, inverse) + dividend; // 0x7FFFFFE0 0x00040020 0xD851DE00 0x0147AE00
div = ~hi(product); // 0x8000 0xFFFB 0x27AE 0xFEB8
dividend = up(dividendh) + dividendl; // 0x7FFC0000 0x00040000 0xA9000000 0x01000000
product = umul(div, divisor) + dividend; // 0xFFF80000 0xFFF70028 0xC7FFF000 0xC7FFC000
rest = hi(product); // 0xFFF8 0xFFF7 0xC7FF 0xC7FF
rem = lo(product); // 0x0000 0x0028 0xF000 0xC000
rest -= divisor; /* xh = 0 or -1 */ // 0x0000 0xFFFF 0xFFFF 0xFFFF
rem += divisor & rest; // 0x0000 0x0020 0xB800 0x8800
div = rest - div; // 0x8000 0x0004 0xD851 0x0147
#else
dividend = up((unsigned short int)(dividendh + 1)) + dividendl;
product = umul(dividendh, inverse) + dividend;
div = hi(product);
rest = lo(product);
rem = dividendl - umull(div, divisor);
profile(stats[0].counter++);
/* Conditionally adjust q and the remainders */
if (rem > rest) {
rem += divisor;
div--;
profile(stats[0].branch1++);
}
if (rem >= divisor) {
rem -= divisor;
div++;
profile(stats[0].branch2++);
}
#endif
// remainder = rem;
return div;
}
/* calculate the division of a "unsigned long" dividend by a
* "unsigned short" inverse of a "unsigned short" divisor
*/
forceinline
void divrem(
unsigned short int "ientA,
unsigned short int "ientB,
// unsigned short int &remainderA,
// unsigned short int &remainderB,
unsigned short int dividendAh,
unsigned short int dividendAl,
unsigned short int dividendBh,
unsigned short int dividendBl,
unsigned short int divisor,
unsigned short int inverse
) /*const*/ {
unsigned short int divA, restA, remA;
unsigned short int divB, restB, remB;
unsigned long int productA/*, dividendA*/;
unsigned long int productB/*, dividendB*/;
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
unsigned short int dmaskA, dadjA, dwrapAh, dwrapAl;
unsigned short int dmaskB, dadjB, dwrapBh, dwrapBl;
profile(stats[0].counter += 2);
dmaskA = sign(dividendAl);
dmaskB = sign(dividendBl);
dadjA = (dmaskA & (divisor));
dadjB = (dmaskB & (divisor));
dwrapAh = dividendAh - dmaskA;
dwrapBh = dividendBh - dmaskB;
dwrapAl = dividendAl + dadjA;
dwrapBl = dividendBl + dadjB;
profile(stats[0].branch1 += !dmaskA + !dmaskB);
productA = umul(dwrapAh, inverse);
productB = umul(dwrapBh, inverse);
productA += up(dividendAh);
productB += up(dividendBh);
productA += dwrapAl;
productB += dwrapBl;
divA = ~hi(productA);
divB = ~lo(productB);
productA = umul(divA, divisor);
productB = umul(divB, divisor);
productA += up(dividendAh);
productB += up(dividendBh);
productA += dividendAl;
productB += dividendBl;
restA = hi(productA);
restB = hi(productB);
remA = lo(productA);
remB = lo(productB);
restA -= divisor; /* xh = 0 or -1 */
restB -= divisor; /* xh = 0 or -1 */
remA += divisor & restA;
remB += divisor & restB;
divA = restA - divA;
divB = restB - divB;
#else
productA = umull(dividendAh, (1UL << 16) + inverse);
productB = umull(dividendBh, (1UL << 16) + inverse);
productA += 1UL << 16;
productB += 1UL << 16;
productA += dividendAl;
productB += dividendBl;
divA = hi(productA);
divB = hi(productB);
remA = dividendAl;
remB = dividendBl;
remA -= umull(divA, divisor);
remB -= umull(divB, divisor);
profile(stats[0].counter += 2);
/* Conditionally adjust q and the remainders */
{
restA = lo(productA);
restB = lo(productB);
if (remA > restA) {
remA += divisor;
divA--;
profile(stats[0].branch1++);
}
if (remB > restB) {
remB += divisor;
divB--;
profile(stats[0].branch1++);
}
}
if (remA >= divisor) {
remA -= divisor;
divA++;
profile(stats[0].branch2++);
}
if (remB >= divisor) {
remB -= divisor;
divB++;
profile(stats[0].branch2++);
}
#endif
// remainderA = remA;
// remainderB = remB;
quotientA = divA;
quotientB = divB;
}
/* calculate the division of a "unsigned medium long" dividend by a
* "unsigned short" inverse of a "unsigned long" divisor
*/
forceinline
unsigned short int divrem(
// unsigned short int &remainderh,
// unsigned short int &remainderl,
unsigned short int dividendh,
unsigned short int dividendm,
unsigned short int dividendl,
unsigned short int divisorh,
unsigned short int divisorl,
unsigned short int inverse
) /*const*/ {
unsigned short int div, rest, remh, reml;
unsigned long int product, dividend, divisor, rem;
dividend = up(dividendh) + dividendm;
product = umul(dividendh, inverse) + dividend;
/* Compute the two most significant limbs of n - q'd */
div = hi(product);
rest = lo(product);
{
remh = dividendm - umull(div, divisorh);
reml = dividendl;
rem = up(remh) + reml;
divisor = up(divisorh) + divisorl;
product = umul(divisorl, div) + divisor;
rem = rem - product;
remh = hi(rem);
reml = lo(rem);
div += 1;
}
/* Conditionally adjust q and the remainders */
if (remh > rest) {
rem += divisor;
div--;
}
if (rem >= divisor) {
rem -= divisor;
div++;
}
// remh = rem >> 16;
// reml = rem >> 0;
//
// remainderh = remh;
// remainderl = reml;
return div;
}
public:
forceinline
void divide(unsigned long int ÷nd , unsigned short int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned short int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
// stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend >>= (16); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend >>= (15 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* R = r * p / t; p <= t
*
* dividend == 32bit
* divisor == 16bit
*
* 2by1
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividend / divisor) <= /*0xFFFF*/ 0x10000);
// dividend /= divisor;
unsigned short int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 15);
unsigned long int _dividend = dividend << shift;
unsigned short int _divisor = divisor << shift;
unsigned short int _inverse = inverse_lut(_divisor);
unsigned short int _quotient = divrem(hi(_dividend), lo(_dividend), _divisor, _inverse);
assert((unsigned short int)(dividend / divisor) == _quotient );
dividend = _quotient ;
}
forceinline
void divide(unsigned long int ÷ndA, unsigned long int ÷ndB, unsigned short int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned short int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
// stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividendA >>= (16);
dividendB >>= (16); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividendA >>= (15 - zeroBits(divisor));
dividendB >>= (15 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* R = r * p / t; p <= t
*
* dividend == 32bit
* divisor == 16bit
*
* 2by1
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividendA / divisor) <= /*0xFFFF*/ 0x10000);
assert((dividendB / divisor) <= /*0xFFFF*/ 0x10000);
// dividendA /= divisor;
// dividendB /= divisor;
unsigned short int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 15);
unsigned long int _dividendA = dividendA << shift;
unsigned long int _dividendB = dividendB << shift;
unsigned short int _divisor = divisor << shift;
unsigned short int _inverse = inverse_lut(_divisor);
unsigned short int _quotientA;
unsigned short int _quotientB;
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
_quotientA = divrem(hi(_dividendA), lo(_dividendA), _divisor, _inverse);
_quotientB = divrem(hi(_dividendB), lo(_dividendB), _divisor, _inverse);
#else
divrem(_quotientA,
_quotientB,
hi(_dividendA), lo(_dividendA),
hi(_dividendB), lo(_dividendB), _divisor, _inverse);
#endif
assert((unsigned short int)(dividendA / divisor) == _quotientA);
assert((unsigned short int)(dividendB / divisor) == _quotientB);
dividendA = _quotientA;
dividendB = _quotientB;
}
forceinline
void divide(unsigned_int64 ÷nd , unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend = hi(dividend); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend = sr(dividend, 31 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* C = c * t / r; c <= r
*
* dividend == 48bit
* divisor == 32bit
*
* 3by2
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividend / divisor) <= /*0xFFFF*/ 0x10000);
// dividend /= divisor;
unsigned long int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 31);
unsigned_int64 _dividend = sl(dividend, shift);
unsigned long int _divisor = divisor << shift;
unsigned short int _inverse = inverse_adj(inverse_lut(hi(_divisor)), _divisor);
unsigned short int _quotient = divrem((unsigned short int)(_dividend >> 32), (unsigned short int)(_dividend >> 16), (unsigned short int)(_dividend >> 0),
(unsigned short int)(_divisor >> 16), (unsigned short int)(_divisor >> 0), _inverse);
assert((unsigned short int)(dividend / divisor) == _quotient );
dividend = _quotient ;
}
forceinline
void divide(unsigned_int64 ÷ndA, unsigned_int64 ÷ndB, unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividendA >>= (32);
dividendB >>= (32); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividendA >>= (31 - zeroBits(divisor));
dividendB >>= (31 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* C = c * t / r; c <= r
*
* dividend == 48bit
* divisor == 32bit
*
* 3by2
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividendA / divisor) <= /*0xFFFF*/ 0x10000);
assert((dividendB / divisor) <= /*0xFFFF*/ 0x10000);
// dividendA /= divisor;
// dividendB /= divisor;
unsigned long int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 31);
unsigned_int64 _dividendA = sl(dividendA, shift);
unsigned_int64 _dividendB = sl(dividendB, shift);
unsigned long int _divisor = divisor << shift;
unsigned short int _inverse = inverse_adj(inverse_lut(_divisor >> 16), _divisor);
unsigned short int _quotientA = divrem((unsigned short int)(_dividendA >> 32), (unsigned short int)(_dividendA >> 16), (unsigned short int)(_dividendA >> 0),
(unsigned short int)(_divisor >> 16), (unsigned short int)(_divisor >> 0), _inverse);
unsigned short int _quotientB = divrem((unsigned short int)(_dividendB >> 32), (unsigned short int)(_dividendB >> 16), (unsigned short int)(_dividendB >> 0),
(unsigned short int)(_divisor >> 16), (unsigned short int)(_divisor >> 0), _inverse);
assert((unsigned short int)(dividendA / divisor) == _quotientA);
assert((unsigned short int)(dividendB / divisor) == _quotientB);
dividendA = _quotientA;
dividendB = _quotientB;
}
};
/* =============================================================================
*/
template<>
class loIntervalArithmetic<unsigned long int> : protected IntervalArithmetic<char> {
friend class lxIntervalArithmetic<unsigned long int>;
public:
/* dtyp = destination datatype
* atyp = accumulator datatype
*
* reg = arithmetic range structure
* ivl = interval structure
*/
loIntervalArithmetic<unsigned long int>() {
for (int i = 0x8000; i <= 0xFFFF; i++) {
sLUT[i - 0x8000] = inverse_div((i << 16) | 0xFFFF);
// fprintf(stderr, " [0x%4x] = 0x%4x\n", i - 0x8000, sLUT[i - 0x8000]);
}
#ifdef REPORT_CODING_STATISTICS
memset(&stats, 0, sizeof(stats));
#endif
}
#ifdef REPORT_CODING_STATISTICS
~loIntervalArithmetic<unsigned long int>() {
fprintf(stderr, "hits %d %.5f%%\n", stats[0].hits , 100.0 * stats[0].hits / (stats[0].hits ));
fprintf(stderr, "divz %d %.5f%%\n", stats[0].divz , 100.0 * stats[0].divz / (stats[0].hits ));
fprintf(stderr, "divp %d %.5f%%\n", stats[0].divp , 100.0 * stats[0].divp / (stats[0].hits ));
fprintf(stderr, "div <= 8 %d %.5f%%\n", stats[0].div08 , 100.0 * stats[0].div08 / (stats[0].hits ));
fprintf(stderr, "div <= 16 %d %.5f%%\n", stats[0].div16 , 100.0 * stats[0].div16 / (stats[0].hits ));
fprintf(stderr, "div <= 32 %d %.5f%%\n", stats[0].div32 , 100.0 * stats[0].div32 / (stats[0].hits ));
fprintf(stderr, "div <= 64 %d %.5f%%\n", stats[0].div64 , 100.0 * stats[0].div64 / (stats[0].hits ));
fprintf(stderr, "counter %d %.5f%%\n", stats[0].counter , 100.0 * stats[0].counter / (stats[0].counter));
fprintf(stderr, "branch1 %d %.5f%%\n", stats[0].branch1 , 100.0 * stats[0].branch1 / (stats[0].counter));
fprintf(stderr, "branch2 %d %.5f%%\n", stats[0].branch2 , 100.0 * stats[0].branch2 / (stats[0].counter));
}
#endif
protected:
#ifdef REPORT_CODING_STATISTICS
struct {
unsigned long int hits;
unsigned long int divz;
unsigned long int divp;
unsigned long int div08;
unsigned long int div16;
unsigned long int div32;
unsigned long int div64;
unsigned long int counter;
unsigned long int branch1;
unsigned long int branch2;
} stats[2];
#endif
// (2) * 32768 => 65536 (L1 cache full)
static unsigned short int sLUT[0x8000];
static unsigned short int lLUT[0x0200];
private:
/* ------------------------------------------------------------------------ */
/* calculate the "unsigned short" inverse of a "unsigned long" divisor */
static forceinline
unsigned short int inverse_div(unsigned long int divisor) {
unsigned long int inv;
inv = lo(~up(divisor) / divisor);
return hi(inv);
}
/* calculate the "unsigned short" inverse of a "unsigned short" divisor */
static forceinline
unsigned long int inverse_nri(unsigned long int divisor) {
#if 0
__asm {
mov edi, divisor ; 0xfff80000 1.0
mov eax, edi ; 0xfff80000 0.5
mov ebx, edi ; 0xfff80000 0.333
shr eax, 22 ; 0x000003ff d9 1.0
movzx ecx, lLUT[eax*2-512*2] ; 0x00004000 v0 1.0
; v1 = (v0 << 4) - ((v0 * v0 * d_21) >> 32) - 1
shr ebx, 11 ; 0x001fff00 d21 0.5
mov eax, ecx ; 0x00004000 0.5
inc ebx ; 0x001fff01 d21 + 1 0.5
imul eax, eax ; 0x10000000 v0 * v0 3.0
mul ebx ; 0x0001fff0 v0 * v0 * (d21 + 1) 3.0
mov ebx, edi ; 0xfff80000 0.5
shl ecx, 4 ; 0x00040000 v0 << 4 0.333
shr ebx, 1 ; 0x7ffc0000 d31 0.333
sbb eax, eax ; 0x00000000 0.5
dec ecx ; 0x0003ffff v1 - 1 0.5
sub ebx, eax ; 0x7ffc0000 d31 + 1 0.5
sub ecx, edx ; 0x0002000f v1p 1.0
and eax, ecx ; 0x00000000 v1 0.5
; v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
imul ebx, ecx ; 0x7fc40000 lo(v1 * (d31 + 1)) 3.0
shr eax, 1 ; 0x00000000 (v1 >> 1) 1.0
sub eax, ebx ; 0x803c0000 0 - lo(v1 * (d31 + 1)) + (v1 >> 1) 0.5
mul ecx ; 0x0001007f 3.0
shr edx, 1 ; 0x0000803f v2m >> 33 0.333
shl ecx, 15 ; 0x00078000 0.5
mov eax, edi ; 0xfff80000 0.5
add ecx, edx ; 0x0008003f 1.0
; v_3 = v_2 + ((v_2 * d32 + d32) >> 32) + d32
mul ecx ; 0x0007fffefe080000 3.0
add eax, edi ; 0xfe000000 1.0
mov eax, ecx ; 0x0008003f 0.5
adc edx, edi ; 0xffffffff 1.0
sub eax, edx ; 0x00080040 0.333
}
#else
unsigned long int inv;
// inv = (unsigned long int)(~up(divisor) / divisor) /*>> 16*/;
unsigned long int d9 = divisor >> 22; //0x000003ff
unsigned long int d21 = divisor >> 11; //0x001fff00
unsigned long int d31 = divisor >> 1; //0x7ffc0000
unsigned long int mask = (divisor & 1 ? 0xFFFFFFFF : 0);
unsigned long int v0 = lLUT[d9 - 512]; //0x00004000
unsigned long int v1v;
unsigned_int64 v1p;
v1v = v0; // 15bit
v1v *= v0; // 30bit //0x10000000
d21 += 1; //0x001fff01
v1p = v1v;
v1p *= d21; // 21bit //0x0001fff0
v1v = v1p >> 32;// 51bit - 32bit = 19bit
unsigned long int v1;
unsigned long int v1m;
v1 = v0 << 4; // 15bit + 4bit = 19bit //0x00040000
v1 -= 1; //0x0003ffff
v1 -= v1v; // 19bit precision //0x0002000f
// 21bit different divisors
// v1 = (v0 << 4) - ((v0 * v0 * d21) >> 32) - 1
//
// xk = xk + xk * (1 - xk * divisor21)
// xk = xk + xk - xk * xk * divisor21
// xk = xk * 2 - xk * xk * divisor21
d31 -= mask; //+=1? //0x0002000f
unsigned long int v2p;
unsigned_int64 v2m;
v2p = v1; // 19bit //0x0002000f
v2p *= d31; // 50bit, lower 32bit //0x7fc40000
v1m = v1 & mask; //0x00000000
v1m >>= 1; // 18bit //0x00000000
v1m -= v2p; // 32bit //0x803c0000
v2m = v1m; // 32bit //0x803c0000
v2m *= v1; // 51bit //0x0001007f83840000
// v2m >>= 32; // 19bit //0x0001007f
// v2m >>= 1; // 18bit //0x0000803f
unsigned long int v2;
v2 = v2m >> 33;// 32bit //0x0008003f
v2 += v1 << 15; // 34bit, lower 32bit //0x00078000
// v_2 = (v1 << 15) + ((v1 * (2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
unsigned_int64 v3p;
unsigned long int v3o;
unsigned long int nd = 0 - divisor;
v3p = v2; //0x000000000008003f
v3p *= divisor; //0x0007fffe fe080000
// v3p += divisor; //0x0007ffff fe000000
// v3p >>= 32; //0x0007ffff
// v3p += divisor; //0xffffffff
v3o = (v3p >> 32);
v3o -= ((unsigned long int)v3p >= nd ? -1 : 0);
v3o -= nd; //0xffffffff
// v_3 = v_2 + ((v_2 * d32 + d32) >> 32) + d32
unsigned long int v3;
v3 = v2;
v3 -= v3o;
inv = v3;
return inv;
#endif
}
/* calculate the "unsigned long" inverse of a "unsigned long" divisor */
static forceinline
unsigned long int inverse_lut(unsigned long int divisor) {
unsigned long int check = inverse_nri(divisor);
#if 0
__asm {
mov edi, divisor ; 1.0
mov ebx, edi ; 0.5
not edi ; 0.5 (~diva)
shr ebx, 16 ; 0.5
inc edi ; 0.5 (~diva) + 1
movzx ebx, lLUT[ebx*2-0x8000*2] ; 3.0
shl ebx, 16 ; 0.5
mov ecx, edi ; 0.5
mov eax, ebx ; 0.5 aa = x_k
sub ecx, ebx ; 0.0 - xx = dv << 32 - x_k << 32
mul edi ; 3.0 aa = x_k * dv
lea eax, [edx+ecx] ; 0.5 bb = x_k * dv + dv << 32 - x_k << 32
lea ecx, [edx+edi] ; 1.0 iv = (aa >> 32) + dv;
mul ebx ; 3.0 dd = x_k * cc
lea ebx, [edx+ecx] ; 0.5 iv = (dd >> 32) + iv;
mov ecx, edi ; 0.5
mov eax, ebx ; 0.5 aa = x_k
sub ecx, ebx ; 0.0 - xx = dv << 32 - x_k << 32
mul edi ; 3.0 aa = x_k * dv
add edx, ecx ; 0.5 bb = x_k * dv - x_k << 32 + dv << 32
shr eax, 4 ; 0.5
shl edx, 32 - 4 ; 1.0
lea ecx, [edx+eax] ; 0.5 cc = bb >> 4
lea eax, [edx+eax] ; 1.0 cc = bb >> 4
mul ebx ; 3.0 dd = x_k * cc
add edx, ecx ; 1.0 ee = x_k * cc + cc << 32
shr edx, 32 - 4 ; 1.0 ff = ee >> 64 - 4
add ebx, edx ; 1.0
}
#else
unsigned long int v0 = sLUT[(divisor >> 16) - 0x8000], v1, v2;
unsigned long int dv = (~divisor) + 1, sb;
v0 <<= 16;
{
unsigned_int64 aa, dd;
unsigned long int cc;
sb = dv - v0;
// aa = ((unsigned_int64)v0) * dv; // x_k 16bit, diva 32bit
aa = umul(v0, dv);
v1 = hi(aa) + dv;
cc = hi(aa) + sb; // 14bit correct, 48-14=34bit wrong, 34bit -> 32bit
// dd = ((unsigned_int64)v0) * cc; // x_k 16bit, c_ 32bit
dd = umul(v0, cc);
v1 = hi(dd) + v1;
/* exact on 28bit precision */
assert((abs<int>(v1 - check) >> 4) <= 0);
}
{
unsigned_int64 aa, bb, dd, ff;
unsigned long int cc, ee;
sb = dv - v1;
// aa = ((unsigned_int64)v1) * dv; // x_k 16bit, diva 32bit
aa = umul(v1, dv);
bb = (aa >> (0)) + up(sb); // 48bit product
cc = lo(bb >> ((4) + 0)); // 14bit correct, 48-14=34bit wrong, 34bit -> 32bit
// dd = ((unsigned_int64)v1) * cc; // x_k 16bit, c_ 32bit
dd = umul(v1, cc);
ff = (dd >> (0)) + up(cc); // 48bit product
ee = hi(ff) >> (32 - (4)); // 14bit precision doubling (16bit - 2bit cutoff above)
v2 = v1 + ee;
/* exact on 31bit precision */
assert((abs<int>(v2 - check) >> 1) <= 0);
}
// assert(v2 == check);
return v2;
#endif
}
/* ------------------------------------------------------------------------ */
/* calculate the division of a "unsigned long" dividend by a
* "unsigned short" inverse of a "unsigned short" divisor
*/
forceinline
unsigned long int divrem(
// unsigned long int &remainder,
unsigned long int dividendh,
unsigned long int dividendl,
unsigned long int divisor,
unsigned long int inverse
) /*const*/ {
unsigned long int div, rest, rem;
unsigned_int64 product, dividend;
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
unsigned long int dmask, dadj, dwraph, dwrapl; // dividendh 0x7FFC0000, dividendl 0x00000000, divisor 0xFFF80000, inverse 0x00080000
// dividendh 0x00040000, dividendl 0x00000000, divisor 0xFFF80000, inverse 0x00080000
// dividendh 0xA9000000, dividendl 0x00000000, divisor 0xC8000000, inverse 0x47ae147a
// dividendh 0x01000000, dividendl 0x00000000, divisor 0xC8000000, inverse 0x47ae147a
profile(stats[0].counter++);
dmask = sign(dividendl); // 0x00000000 0x00000000 0x00000000 0x00000000
dadj = (dmask & (divisor)); // 0x00000000 0x00000000 0x00000000 0x00000000
dwraph = dividendh - dmask; // 0x7FFC0000 0x00040000 0xA9000000 0x01000000
dwrapl = dividendl + dadj; // 0x00000000 0x00000000 0x00000000 0x00000000
// counter 181352129 100.00000%
// branch1 139754845 77.06270%
profile(stats[0].branch1 += !dmask);
dividend = up(dividendh) + dwrapl; // 0x7FFC000000000000 0x0004000000000000 0xa900000000000000 0x0100000000000000
product = umul(dwraph, inverse) + dividend; // 0x7fffffffff000000 0x0004002001000000 0xd851eb848a000000 0x0147ae147a000000
div = ~hi(product); // 0x80000000 0xfffbffdf 0x27ae147b 0xfeb851eb
dividend = up(dividendh) + dividendl; // 0x7FFC000000000000 0x0004000000000000 0xa900000000000000 0x0100000000000000
product = umul(div, divisor) + dividend; // 0xfff8000000000000 0xfff7ffff01080000 0xc800000018000000 0xc7ffffff98000000
rest = hi(product); // 0xfff80000 0xfff7ffff 0xc8000000 0xc7ffffff
rem = lo(product); // 0x00000000 0x01080000 0x18000000 0x98000000
rest -= divisor; /* xh = 0 or -1 */ // 0x00000000 0xffffffff 0x00000000 0xffffffff
rem += divisor & rest; // 0x00000000 0x01000000 0x18000000 0x60000000
div = rest - div; // 0x80000000 0x00040020 0xd851eb85 0x0147ae14
#else
dividend = up(dividendh + 1) + dividendl;
product = umul(dividendh, inverse) + dividend;
div = hi(product);
rest = lo(product);
rem = dividendl - umull(div, divisor);
profile(stats[0].counter++);
/* Conditionally adjust q and the remainders */
if (rem > rest) {
rem += divisor;
div--;
profile(stats[0].branch1++);
}
if (rem >= divisor) {
rem -= divisor;
div++;
profile(stats[0].branch2++);
}
#endif
// remainder = rem;
return div;
}
/* calculate the division of a "unsigned long" dividend by a
* "unsigned short" inverse of a "unsigned short" divisor
*/
forceinline
void divrem(
unsigned long int "ientA, unsigned long int "ientB,
// unsigned long int &remainderA, unsigned long int &remainderB,
unsigned long int dividendAh, unsigned long int dividendBh,
unsigned long int dividendAl, unsigned long int dividendBl,
unsigned long int divisor,
unsigned long int inverse
) /*const*/ {
unsigned long int divA, restA, remA;
unsigned long int divB, restB, remB;
unsigned_int64 productA;
unsigned_int64 productB;
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
unsigned long int dmaskA, dadjA, dwrapAh, dwrapAl;
unsigned long int dmaskB, dadjB, dwrapBh, dwrapBl;
unsigned_int64 dividendA;
unsigned_int64 dividendB;
profile(stats[0].counter += 2);
dmaskA = sign(dividendAl);
dmaskB = sign(dividendBl);
dadjA = (dmaskA & (divisor));
dadjB = (dmaskB & (divisor));
dwrapAh = dividendAh - dmaskA;
dwrapBh = dividendBh - dmaskB;
dwrapAl = dividendAl + dadjA;
dwrapBl = dividendBl + dadjB;
profile(stats[0].branch1 += !dmaskA + !dmaskB);
dividendA = up(dividendAh) + dwrapAl;
dividendB = up(dividendBh) + dwrapBl;
productA = umul(dwrapAh, inverse) + dividendA;
productB = umul(dwrapBh, inverse) + dividendB;
divA = ~hi(productA);
divB = ~hi(productB);
dividendA = up(dividendAh) + dividendAl;
dividendB = up(dividendBh) + dividendBl;
productA = umul(divA, divisor) + dividendA;
productB = umul(divB, divisor) + dividendB;
restA = hi(productA);
restB = hi(productB);
remA = lo(productA);
remB = lo(productB);
restA -= divisor; /* xh = 0 or -1 */
restB -= divisor; /* xh = 0 or -1 */
remA += divisor & restA;
remB += divisor & restB;
divA = restA - divA;
divB = restB - divB;
#else
productA = umul(dividendAh, inverse);
productB = umul(dividendBh, inverse);
productA += dividendAl;
productB += dividendBl;
divA = hi(productA) + dividendAh;
divB = hi(productB) + dividendBh;
divA += 1;
divB += 1;
remA = dividendAl;
remB = dividendBl;
remA -= umull(divA, divisor);
remB -= umull(divB, divisor);
profile(stats[0].counter += 2);
/* Conditionally adjust q and the remainders */
{
restA = lo(productA);
restB = lo(productB);
if (remA > restA) {
remA += divisor;
divA--;
profile(stats[0].branch1++);
}
if (remB > restB) {
remB += divisor;
divB--;
profile(stats[0].branch1++);
}
}
if (remA >= divisor) {
remA -= divisor;
divA++;
profile(stats[0].branch2++);
}
if (remB >= divisor) {
remB -= divisor;
divB++;
profile(stats[0].branch2++);
}
#endif
// remainderA = remA;
// remainderB = remB;
quotientA = divA;
quotientB = divB;
}
public:
forceinline
void divide(unsigned_int64 ÷nd , unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend = hi(dividend); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend = sr(dividend, 31 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* R = r * p / t; p <= t
*
* dividend == 64bit
* divisor == 32bit
*
* 2by1
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividend / divisor) <= /*0xFFFFFFFF*/ 0x100000000ULL);
// dividend /= divisor;
unsigned long int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 31);
unsigned_int64 _dividend = sl(dividend, shift);
unsigned long int _divisor = divisor << shift;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned long int _quotient = divrem(hi(_dividend), lo(_dividend), _divisor, _inverse);
assert((unsigned long int)(dividend / divisor) == _quotient );
dividend = _quotient ;
}
forceinline
void divide(unsigned_int64 ÷ndA, unsigned_int64 ÷ndB, unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividendA >>= (32);
dividendB >>= (32); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividendA >>= (31 - zeroBits(divisor));
dividendB >>= (31 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* R = r * p / t; p <= t
*
* dividend == 64bit
* divisor == 32bit
*
* 2by1
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividendA / divisor) <= /*0xFFFF*/ 0x100000000ULL);
assert((dividendB / divisor) <= /*0xFFFF*/ 0x100000000ULL);
// dividendA /= divisor;
// dividendB /= divisor;
unsigned long int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 31);
unsigned_int64 _dividendA = sl(dividendA, shift);
unsigned_int64 _dividendB = sl(dividendB, shift);
unsigned long int _divisor = divisor << shift;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned long int _quotientA;
unsigned long int _quotientB;
#if 1 //def INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
_quotientA = divrem(hi(_dividendA), lo(_dividendA), _divisor, _inverse);
_quotientB = divrem(hi(_dividendB), lo(_dividendB), _divisor, _inverse);
#else
divrem(_quotientA, hi(_dividendA), lo(_dividendA),
_quotientB, hi(_dividendB), lo(_dividendB), _divisor, _inverse);
#endif
assert((unsigned long int)(dividendA / divisor) == _quotientA);
assert((unsigned long int)(dividendB / divisor) == _quotientB);
dividendA = _quotientA;
dividendB = _quotientB;
}
};
/* =============================================================================
*/
template<>
class lxIntervalArithmetic<unsigned short int> : protected loIntervalArithmetic<unsigned short int> {
/* dtyp = destination datatype
* atyp = accumulator datatype
*
* reg = arithmetic range structure
* ivl = interval structure
*/
private:
/* ------------------------------------------------------------------------ */
/* calculate the division of a "unsigned long" dividend by a
* "unsigned short" inverse of a "unsigned short" divisor
*/
forceinline
unsigned short int divrem(
unsigned short int &remainder,
unsigned short int dividendh,
unsigned short int dividendl,
unsigned short int divisor,
unsigned short int inverse
) /*const*/ {
unsigned short int div, rest, rem;
unsigned long int product, dividend;
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
unsigned short int dmask, dadj, dwraph, dwrapl; // dividendh 0x7FFC, dividendl 0x0000, divisor 0xFFF8, inverse 0x0008
// dividendh 0x0004, dividendl 0x0000, divisor 0xFFF8, inverse 0x0008
// dividendh 0xA900, dividendl 0x0000, divisor 0xC800, inverse 0x47AE
// dividendh 0x0100, dividendl 0x0000, divisor 0xC800, inverse 0x47AE
profile(stats[0].counter++);
dmask = sign(dividendl); // 0x0000 0x0000 0x0000 0x0000
dadj = (dmask & (divisor)); // 0x0000 0x0000 0x0000 0x0000
dwraph = dividendh - dmask; // 0x7FFC 0x0004 0xA900 0x0100
dwrapl = dividendl + dadj; // 0x0000 0x0000 0x0000 0x0000
profile(stats[0].branch1 += !dmask);
dividend = up(dividendh) + dwrapl; // 0x7FFC0000 0x00040000 0xA9000000 0x01000000
product = umul(dwraph, inverse) + dividend; // 0x7FFFFFE0 0x00040020 0xD851DE00 0x0147AE00
div = ~hi(product); // 0x8000 0xFFFB 0x27AE 0xFEB8
dividend = up(dividendh) + dividendl; // 0x7FFC0000 0x00040000 0xA9000000 0x01000000
product = umul(div, divisor) + dividend; // 0xFFF80000 0xFFF70028 0xC7FFF000 0xC7FFC000
rest = hi(product); // 0xFFF8 0xFFF7 0xC7FF 0xC7FF
rem = lo(product); // 0x0000 0x0028 0xF000 0xC000
rest -= divisor; /* xh = 0 or -1 */ // 0x0000 0xFFFF 0xFFFF 0xFFFF
rem += divisor & rest; // 0x0000 0x0020 0xB800 0x8800
div = rest - div; // 0x8000 0x0004 0xD851 0x0147
#else
dividend = up((unsigned short int)(dividendh + 1)) + dividendl;
product = umul(dividendh, inverse) + dividend;
div = hi(product);
rest = lo(product);
rem = dividendl - umull(div, divisor);
profile(stats[0].counter++);
/* Conditionally adjust q and the remainders */
if (rem > rest) {
rem += divisor;
div--;
profile(stats[0].branch1++);
}
if (rem >= divisor) {
rem -= divisor;
div++;
profile(stats[0].branch2++);
}
#endif
remainder = rem;
return div;
}
public:
forceinline
void divide(unsigned_int64 ÷nd, unsigned short int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned short int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
// stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend = sr(dividend, 16); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend = sr(dividend, 15 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* R = r / t;
*
* dividend == 64bit
* divisor == 32bit
*
* 2by1
*/
// dividend /= divisor;
unsigned short int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 15);
unsigned short int _divisor = divisor << shift;
unsigned short int _inverse = loIntervalArithmetic<unsigned short int>::inverse_lut(_divisor);
unsigned short int _remainder;
/* 0x-------- 08000000 08000000 dividendB
* 0x00008000 divisor
* 0x----0800 00000800 0000---- dividendB
* 0x00000800 00000800 _dividendB1
* 0x80000000 _divisor
* 0x00001000 _quotientB1
* 0x00000800 _remainder
* 0x00000800 00000000 _dividendB2
* 0x80000000 _divisor
* 0x00001000 _quotientB2
* 0x00001000 00001000 _quotientB
*/
unsigned long int _dividend1 = (unsigned long int)(hi( dividend ) >> (16 - shift));
unsigned short int _quotient1 = divrem(
_remainder, hi(_dividend1), lo(_dividend1), _divisor, _inverse);
unsigned long int _dividend2 = up(_remainder) + (unsigned short int)(lo(sr(dividend, 16)) >> (16 - shift));
unsigned short int _quotient2 = divrem(
_remainder, hi(_dividend2), lo(_dividend2), _divisor, _inverse);
unsigned long int _dividend3 = up(_remainder) + (unsigned short int)(lo( dividend ) >> (16 - shift));
unsigned short int _quotient3 = divrem(
_remainder, hi(_dividend3), lo(_dividend3), _divisor, _inverse);
unsigned long int _dividend4 = up(_remainder) + (unsigned short int)(lo( dividend ) << ( shift));
unsigned short int _quotient4 = loIntervalArithmetic<unsigned short int>::divrem(
hi(_dividend4), lo(_dividend4), _divisor, _inverse);
unsigned_int64 _quotient =
up(up(_quotient1) +
dn(_quotient2)) +
dn(up(_quotient3) +
dn(_quotient4));
assert((dividend / divisor) == _quotient);
dividend = _quotient;
}
forceinline
void divide(unsigned_int64 ÷nd, unsigned short int &result, unsigned_int64 divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
result = 0; return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
result = (unsigned short int)(dividend >> (63 - zeroBits(divisor))); return; }
#endif
/* range-contraction:
*
* R = r / t;
*
* dividend == 64bit
* divisor == 64bit
*
* 2by2
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividend / divisor) <= /*0xFFFF*/ 0x10000ULL);
// dividendA /= divisor;
unsigned long int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 63);
if (shift >= 48) {
shift = shift - 48;
assume(shift >= 0);
assume(shift <= 15);
unsigned short int _divisor = lo(lo(divisor)) << shift;
unsigned short int _inverse = inverse_lut(_divisor);
unsigned long int _dividend = lo(dividend) << shift;
unsigned short int _quotient = loIntervalArithmetic<unsigned short int>::divrem(
hi(_dividend), lo(_dividend), _divisor, _inverse);
assert((unsigned short int)(dividend / divisor) == _quotient );
result = _quotient ;
}
else {
assume(shift >= 0);
assume(shift <= 47);
unsigned short int _divisor = lo(lo(divisor >> (48 - shift)));
unsigned short int _inverse = inverse_lut(_divisor);
#if 1
unsigned long int _dividend = lo(dividend >> (48 - shift));
unsigned short int _quotient = loIntervalArithmetic<unsigned short int>::divrem(
hi(_dividend), lo(_dividend), _divisor, _inverse);
unsigned_int64 _divisorx = umull(_quotient, divisor);
if (_divisorx > dividend)
_quotient--;
#else
unsigned short int _remainder;
unsigned long int _dividend = dividend >> (48 - shift);
unsigned short int _quotient = divrem(
_remainder, hi(_dividend), lo(_dividend), _divisor, _inverse);
/* multiply the cut-off part of the dividend by the quotient and look if it's bigger */
unsigned_int64 _dividendx = (dividend << shift) & 0x0000FFFFFFFFFFFFULL;
unsigned_int64 _divisorx = (divisor << shift) & 0x0000FFFFFFFFFFFFULL;
unsigned_int64 _divisorr = umull(_quotient, _divisorx);
unsigned_int64 _remaindrr = up(up(_remainder)) + _dividendx;
if (_divisorr > _remaindrr)
_quotient--;
// quotient * aproxdivisor + remainder == partialdividend
// quotient * (aproxdivisor + truncateddivisor) + X == partialdividend + truncateddividend
//
// quotient * aproxdivisor + remainder == partialdividend
// quotient * (aproxdivisor + truncateddivisor) + X - truncateddividend == partialdividend
//
// + remainder == partialdividend - quotient * aproxdivisor
// quotient * truncateddivisor + X - truncateddividend == partialdividend - quotient * aproxdivisor
//
// quotient * truncateddivisor + X - truncateddividend == remainder
//
// remainder + truncateddividend - quotient * truncateddivisor == X
// remainder + truncateddividend - quotient * truncateddivisor < 0
#endif
assert((unsigned short int)(dividend / divisor) == (unsigned short int)_quotient);
result = _quotient ;
}
}
};
/* =============================================================================
*/
template<>
class lxIntervalArithmetic<unsigned long int> : protected loIntervalArithmetic<unsigned long int> {
/* dtyp = destination datatype
* atyp = accumulator datatype
*
* reg = arithmetic range structure
* ivl = interval structure
*/
private:
/* ------------------------------------------------------------------------ */
/* calculate the division of a "unsigned long" dividend by a
* "unsigned short" inverse of a "unsigned short" divisor
*/
forceinline
unsigned long int divrem(
unsigned long int &remainder,
unsigned long int dividendh,
unsigned long int dividendl,
unsigned long int divisor,
unsigned long int inverse
) /*const*/ {
unsigned long int div, rest, rem;
unsigned_int64 product, dividend;
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_NOBRANCH
unsigned long int dmask, dadj, dwraph, dwrapl; // dividendh 0x7FFC0000, dividendl 0x00000000, divisor 0xFFF80000, inverse 0x00080000
// dividendh 0x00040000, dividendl 0x00000000, divisor 0xFFF80000, inverse 0x00080000
// dividendh 0xA9000000, dividendl 0x00000000, divisor 0xC8000000, inverse 0x47ae147a
// dividendh 0x01000000, dividendl 0x00000000, divisor 0xC8000000, inverse 0x47ae147a
profile(stats[0].counter++);
dmask = sign(dividendl); // 0x00000000 0x00000000 0x00000000 0x00000000
dadj = (dmask & (divisor)); // 0x00000000 0x00000000 0x00000000 0x00000000
dwraph = dividendh - dmask; // 0x7FFC0000 0x00040000 0xA9000000 0x01000000
dwrapl = dividendl + dadj; // 0x00000000 0x00000000 0x00000000 0x00000000
// counter 181352129 100.00000%
// branch1 139754845 77.06270%
profile(stats[0].branch1 += !dmask);
dividend = up(dividendh) + dwrapl; // 0x7FFC000000000000 0x0004000000000000 0xa900000000000000 0x0100000000000000
product = umul(dwraph, inverse) + dividend; // 0x7fffffffff000000 0x0004002001000000 0xd851eb848a000000 0x0147ae147a000000
div = ~hi(product); // 0x80000000 0xfffbffdf 0x27ae147b 0xfeb851eb
dividend = up(dividendh) + dividendl; // 0x7FFC000000000000 0x0004000000000000 0xa900000000000000 0x0100000000000000
product = umul(div, divisor) + dividend; // 0xfff8000000000000 0xfff7ffff01080000 0xc800000018000000 0xc7ffffff98000000
rest = hi(product); // 0xfff80000 0xfff7ffff 0xc8000000 0xc7ffffff
rem = lo(product); // 0x00000000 0x01080000 0x18000000 0x98000000
rest -= divisor; /* xh = 0 or -1 */ // 0x00000000 0xffffffff 0x00000000 0xffffffff
rem += divisor & rest; // 0x00000000 0x01000000 0x18000000 0x60000000
div = rest - div; // 0x80000000 0x00040020 0xd851eb85 0x0147ae14
#else
dividend = up(dividendh + 1) + dividendl;
product = umul(dividendh, inverse) + dividend;
div = hi(product);
rest = lo(product);
rem = dividendl - umull(div, divisor);
profile(stats[0].counter++);
/* Conditionally adjust q and the remainders */
if (rem > rest) {
rem += divisor;
div--;
profile(stats[0].branch1++);
}
if (rem >= divisor) {
rem -= divisor;
div++;
profile(stats[0].branch2++);
}
#endif
remainder = rem;
return div;
}
public:
forceinline
void divide(unsigned_int64 ÷nd, unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend = hi(dividend); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend = sr(dividend, 31 - zeroBits(divisor)); return; }
#endif
/* range-contraction:
*
* R = r / t;
*
* dividend == 64bit
* divisor == 32bit
*
* 2by1
*/
// dividend /= divisor;
unsigned long int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 31);
unsigned long int _divisor = divisor << shift;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned long int _remainder;
/* 0x-------- 08000000 08000000 dividendB
* 0x00008000 divisor
* 0x----0800 00000800 0000---- dividendB
* 0x00000800 00000800 _dividendB1
* 0x80000000 _divisor
* 0x00001000 _quotientB1
* 0x00000800 _remainder
* 0x00000800 00000000 _dividendB2
* 0x80000000 _divisor
* 0x00001000 _quotientB2
* 0x00001000 00001000 _quotientB
*/
unsigned_int64 _dividend1 = sr(dividend , 32 - shift);
unsigned long int _quotient1 = divrem(
_remainder, hi(_dividend1), lo(_dividend1), _divisor, _inverse);
unsigned_int64 _dividend2 = up(_remainder) + (lo(dividend) << shift);
unsigned long int _quotient2 = loIntervalArithmetic<unsigned long int>::divrem(
hi(_dividend2), lo(_dividend2), _divisor, _inverse);
unsigned_int64 _quotient =
up(_quotient1) +
dn(_quotient2);
assert((dividend / divisor) == _quotient);
dividend = _quotient;
}
forceinline
void divide(unsigned_int64 ÷nd, unsigned long int &result, unsigned_int64 divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
result = 0; return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
result = (unsigned long int)(dividend >> (63 - zeroBits(divisor))); return; }
#endif
/* range-contraction:
*
* R = r / t;
*
* dividend == 64bit
* divisor == 64bit
*
* 2by2
*/
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividend / divisor) <= /*0xFFFF*/ 0x100000000ULL);
// dividendA /= divisor;
unsigned long int shift = zeroBits(divisor);
assume(shift >= 0);
assume(shift <= 63);
if (shift >= 32) {
shift = shift - 32;
assume(shift >= 0);
assume(shift <= 31);
unsigned long int _divisor = lo(divisor) << shift;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned_int64 _dividend = sl(dividend, shift);
unsigned long int _quotient = loIntervalArithmetic<unsigned long int>::divrem(
hi(_dividend), lo(_dividend), _divisor, _inverse);
assert((unsigned long int)(dividend / divisor) == _quotient );
result = _quotient ;
}
else {
assume(shift >= 0);
assume(shift <= 31);
unsigned long int _divisor = lo(sr(divisor, 32 - shift));
unsigned long int _inverse = inverse_lut(_divisor);
#if 0
unsigned_int64 _dividend = sr(dividend, 32 - shift);
unsigned long int _quotient = loIntervalArithmetic<unsigned long int>::divrem(
hi(_dividend), lo(_dividend), _divisor, _inverse);
unsigned_int64 _divisorx = umull(_quotient, divisor);
if (_divisorx > dividend)
_quotient--;
#else
unsigned long int _remainder;
unsigned_int64 _dividend = sr(dividend, 32 - shift);
unsigned long int _quotient = divrem(
_remainder, hi(_dividend), lo(_dividend), _divisor, _inverse);
/* multiply the cut-off part of the dividend by the quotient and look if it's bigger */
unsigned long int _dividendx = (lo(dividend) << shift);
unsigned long int _divisorx = (lo(divisor ) << shift);
unsigned_int64 _divisorr = umul(_quotient, _divisorx);
unsigned_int64 _remaindrr = up(_remainder) + _dividendx;
if (_divisorr > _remaindrr)
_quotient--;
// quotient * aproxdivisor + remainder == partialdividend
// quotient * (aproxdivisor + truncateddivisor) + X == partialdividend + truncateddividend
//
// quotient * aproxdivisor + remainder == partialdividend
// quotient * (aproxdivisor + truncateddivisor) + X - truncateddividend == partialdividend
//
// + remainder == partialdividend - quotient * aproxdivisor
// quotient * truncateddivisor + X - truncateddividend == partialdividend - quotient * aproxdivisor
//
// quotient * truncateddivisor + X - truncateddividend == remainder
//
// remainder + truncateddividend - quotient * truncateddivisor == X
// remainder + truncateddividend - quotient * truncateddivisor < 0
// remainder + truncateddividend < quotient * truncateddivisor
#endif
assert((unsigned long int)(dividend / divisor) == _quotient );
result = _quotient ;
}
}
};
/* =============================================================================
*/
template<typename ityp>
class hiIntervalArithmetic;
template<>
class hiIntervalArithmetic<unsigned short int> : protected IntervalArithmetic<char> {
#define LUT_BITS 16 //16
#define LUT_RANGE (1 << (LUT_BITS - 0)) // 0x10000
#define LUT_MIDDLE (1 << (LUT_BITS - 1)) // 0x8000
#define PRC_BITS 16 //15
#define PRC_MIDDLE (1 << PRC_BITS)
public:
/* dtyp = destination datatype
* atyp = accumulator datatype
*
* reg = arithmetic range structure
* ivl = interval structure
*/
hiIntervalArithmetic<unsigned short int>() {
zLUT[0x7FFF] = 0;
for (int i = 0x8000; i < 0xFFFF; i++) {
zLUT[i - 0x8000] = inverse_nri(i);
// fprintf(stderr, " [0x%4x] = 0x%4x\n", i, LUT[i]);
}
#ifdef REPORT_CODING_STATISTICS
memset(&stats, 0, sizeof(stats));
#endif
}
#ifdef REPORT_CODING_STATISTICS
~hiIntervalArithmetic<unsigned short int>() {
fprintf(stderr, "hits %d %.5f%%\n", stats[0].hits , 100.0 * stats[0].hits / (stats[0].hits ));
fprintf(stderr, "divz %d %.5f%%\n", stats[0].divz , 100.0 * stats[0].divz / (stats[0].hits ));
fprintf(stderr, "divp %d %.5f%%\n", stats[0].divp , 100.0 * stats[0].divp / (stats[0].hits ));
fprintf(stderr, "div <= 8 %d %.5f%%\n", stats[0].div08 , 100.0 * stats[0].div08 / (stats[0].hits ));
fprintf(stderr, "div <= 16 %d %.5f%%\n", stats[0].div16 , 100.0 * stats[0].div16 / (stats[0].hits ));
fprintf(stderr, "div <= 32 %d %.5f%%\n", stats[0].div32 , 100.0 * stats[0].div32 / (stats[0].hits ));
fprintf(stderr, "div <= 64 %d %.5f%%\n", stats[0].div64 , 100.0 * stats[0].div64 / (stats[0].hits ));
fprintf(stderr, "counter %d %.5f%%\n", stats[0].counter , 100.0 * stats[0].counter / (stats[0].counter));
fprintf(stderr, "branch1 %d %.5f%%\n", stats[0].branch1 , 100.0 * stats[0].branch1 / (stats[0].counter));
fprintf(stderr, "branch2 %d %.5f%%\n", stats[0].branch2 , 100.0 * stats[0].branch2 / (stats[0].counter));
}
#endif
protected:
#ifdef REPORT_CODING_STATISTICS
struct {
unsigned long int hits;
unsigned long int divz;
unsigned long int divp;
unsigned long int div08;
unsigned long int div16;
unsigned long int div32;
unsigned long int div64;
unsigned long int counter;
unsigned long int branch1;
unsigned long int branch2;
} stats[2];
#endif
// (2) * 32768 => 65536 (L1 cache full)
static unsigned short int zLUT [0x8000];
static unsigned long int zLUTs[16]; // zLUTs[k] + (0x10000 << k)
static unsigned long int zLUTl[32];
static unsigned_int64 zLUTq[64];
private:
/* ------------------------------------------------------------------------ */
/* calculate the "unsigned short" inverse of a "unsigned long" divisor */
static forceinline
unsigned short int inverse_nri(unsigned long int divisor) {
unsigned long int t;
t = divisor + 1;
t = inverse_find(t);
t = t - 0x10000;
return (unsigned short int)t;
}
/* calculate the "unsigned short" inverse of a "unsigned long" divisor */
static forceinline
unsigned long int inverse_find(unsigned long int divisor) {
// decent start
unsigned long int inverse = 1UL << zeroBits(divisor);
// z recurrence
unsigned long int ndivisor = 0UL - divisor;
// newton-raphson
for (;;) {
unsigned long int delta = umulh(inverse, umull(ndivisor, inverse));
if (delta == 0)
break;
inverse = inverse + delta;
}
// inch z upward if possible
while (umull(ndivisor, inverse) > divisor && 0 - inverse > 1) {
inverse = inverse + 1;
}
return inverse;
}
/* ------------------------------------------------------------------------ */
/* calculate the division of a "unsigned long" dividend by a
* "unsigned long" inverse of a "unsigned short" divisor
*/
forceinline
unsigned short int divrem(
// unsigned short int &remainder,
unsigned short int dividend,
unsigned short int divisor,
unsigned short int inverse
) /*const*/ {
/* brute-force check:
*
* counter 65535 100.00000%
* branch1 16361 24.96529%
* branch2 1 0.00153%
*
* branch2 is taken once for divisor "5"
*/
unsigned short int quotient, remainder;
// q estimate
quotient = umulh(dividend, inverse);
remainder = dividend - umull(divisor, quotient);
profile(stats[0].counter++);
// q refinement
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch1++);
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch2++);
}
}
return quotient;
}
/* calculate the division of a "unsigned long" dividend by a
* "unsigned long" inverse of a "unsigned short" divisor
*/
forceinline
unsigned long int divrem(
// unsigned long int &remainder,
unsigned long int dividend,
unsigned short int divisor,
unsigned long int inverse,
bool symmetric = false // dividend / divisor <= 0xFFFF
) /*const*/ {
/* brute-force check:
*
* counter 65535 100.00000%
* branch1 16248 24.79286%
* branch2 0 0.00000%
*
* branch2 is NEVER taken! if the newton-raphson
* step is not omitted, the nr-step can be omitted
* if the quotient would result in less than 0xFFFF
*
* of course the branch is cheaper
*
* example: 0xffd3fb00 / 0x0bfd
*/
unsigned long int quotient, remainder;
// newton-raphson
if (!symmetric) {
unsigned long int ndivisor = 0UL - divisor;
inverse += umulh(inverse, umull(ndivisor, inverse));
}
// q estimate
quotient = umulh(dividend, inverse);
remainder = dividend - umull(divisor, quotient);
profile(stats[0].counter++);
// q refinement
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch1++);
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch2++);
}
}
return quotient;
}
/* calculate the division of a "unsigned long" dividend by a
* "unsigned long" inverse of a "unsigned long" divisor
*/
forceinline
unsigned long int divrem(
// unsigned long int &remainder,
unsigned long int dividend,
unsigned long int divisor,
unsigned long int inverse,
bool symmetric = false // dividend / divisor <= 0xFFFF
) /*const*/ {
/* brute-force check:
*
* counter 65535 100.00000%
* branch1 16248 24.79286%
* branch2 1 0.00000%
*
* branch2 is taken once for divisor "0x17462CAE"
*/
unsigned long int quotient, remainder;
// newton-raphson
{
unsigned long int ndivisor = 0UL - divisor;
inverse += umulh(inverse, umull(ndivisor, inverse));
}
// q estimate
quotient = umulh(dividend, inverse);
remainder = dividend - umull(divisor, quotient);
profile(stats[0].counter++);
// q refinement
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch1++);
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch2++);
}
}
return quotient;
}
/* calculate the division of a "unsigned long" dividend by a
* "unsigned long" inverse of a "unsigned long" divisor
*/
forceinline
unsigned_int64 divrem(
// unsigned_int64 &remainder,
unsigned_int64 dividend,
unsigned long int divisor,
unsigned_int64 inverse,
bool symmetric = false // dividend / divisor <= 0xFFFFFFFF
) /*const*/ {
/* brute-force check:
*
* counter
* branch1
* branch2
*
*
*/
unsigned_int64 quotient, remainder;
// newton-raphson
{
unsigned_int64 ndivisor = 0ULL - divisor;
inverse += umulh(inverse, umull(ndivisor, inverse));
inverse += umulh(inverse, umull(ndivisor, inverse));
}
// q estimate
quotient = umulh(dividend, inverse);
remainder = dividend - umull(divisor, quotient);
profile(stats[0].counter++);
// q refinement
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch1++);
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch2++);
}
}
return quotient;
}
/* calculate the division of a "unsigned long" dividend by a
* "unsigned long" inverse of a "unsigned long" divisor
*/
forceinline
unsigned_int64 divrem(
// unsigned_int64 &remainder,
unsigned_int64 dividend,
unsigned_int64 divisor,
unsigned_int64 inverse,
bool symmetric = false // dividend / divisor <= 0xFFFFFFFF
) /*const*/ {
unsigned_int64 quotient, remainder;
// newton-raphson
{
unsigned_int64 ndivisor = 0ULL - divisor;
inverse += umulh(inverse, umull(ndivisor, inverse));
inverse += umulh(inverse, umull(ndivisor, inverse));
}
// q estimate
quotient = umulh(dividend, inverse);
remainder = dividend - umull(divisor, quotient);
profile(stats[0].counter++);
// q refinement
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch1++);
/* if it creates no borrow-bit: jnb, sub+adc0 */
if (remainder >= divisor) {
remainder = remainder - divisor;
quotient = quotient + 1;
profile(stats[0].branch2++);
}
}
return quotient;
}
/* ------------------------------------------------------------------------ */
static forceinline
unsigned long int inverse_lut(unsigned short int divisor) {
unsigned short int k;
unsigned short int ty;
unsigned long int t, inverse;
k = zeroBits(divisor);
assume(k >= 0);
assume(k <= 15);
// prescaling
ty = (divisor << k) >> 0;
// table lookup
// t = zLUT[ty - 0x8000] + 0x10000;
t = zLUT[ty - 0x8000];
// postscaling
// inverse = (t << 15) >> (15 - k);
inverse = (t << k);
// small divisor fix
inverse += zLUTs[k];
// inverse += zLUTs[k] + (0x10000 << k);
return inverse;
}
static forceinline
unsigned long int inverse_lut(unsigned long int divisor) {
unsigned long int k;
unsigned short int ty;
unsigned long int t, inverse;
k = zeroBits(divisor);
assume(k >= 0);
assume(k <= 31);
// prescaling
ty = (divisor << k) >> 16;
// table lookup
t = zLUT[ty - 0x8000] + 0x10000;
// postscaling
inverse = (t << 15) >> (31 - k);
// small divisor fix
inverse += zLUTl[k];
return inverse;
}
static forceinline
unsigned_int64 inverse_lut(unsigned_int64 divisor) {
unsigned long int k;
unsigned short int ty;
unsigned_int64 t, inverse;
k = zeroBits(divisor);
assume(k >= 0);
assume(k <= 63);
// prescaling
ty = (divisor << k) >> 48;
// table lookup
t = zLUT[ty - 0x8000] + 0x10000;
// postscaling
inverse = (t << 47) >> (63 - k);
// small divisor fix
inverse += zLUTq[k];
return inverse;
}
public:
forceinline
void divide(unsigned long int ÷nd , unsigned short int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned short int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
// stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend >>= (16); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend >>= (15 - zeroBits(divisor)); return; }
#endif
// dividend /= divisor;
unsigned long int _dividend = dividend;
unsigned short int _divisor = divisor;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned long int _quotient = divrem(_dividend, _divisor, _inverse, false);
assert((unsigned long int)(dividend / divisor) == _quotient );
dividend = _quotient ;
}
forceinline
void divide(unsigned long int ÷ndA, unsigned long int ÷ndB, unsigned short int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned short int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
// stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividendA >>= (16);
dividendB >>= (16); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividendA >>= (15 - zeroBits(divisor));
dividendB >>= (15 - zeroBits(divisor)); return; }
#endif
// dividend /= divisor;
unsigned long int _dividendA = dividendA;
unsigned long int _dividendB = dividendB;
unsigned short int _divisor = divisor;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned long int _quotientA = divrem(_dividendA, _divisor, _inverse, false);
unsigned long int _quotientB = divrem(_dividendB, _divisor, _inverse, false);
assert((unsigned long int)(dividendA / divisor) == _quotientA);
assert((unsigned long int)(dividendB / divisor) == _quotientB);
dividendA = _quotientA;
dividendB = _quotientB;
}
forceinline
void divide(unsigned long int ÷nd , unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend >>= (32); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend >>= (31 - zeroBits(divisor)); return; }
#endif
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividend / divisor) <= /*0xFFFF*/ 0x10000);
// dividend /= divisor;
unsigned long int _dividend = dividend;
unsigned long int _divisor = divisor;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned long int _quotient = divrem(_dividend, _divisor, _inverse, true);
assert((unsigned long int)(dividend / divisor) == _quotient );
dividend = _quotient ;
}
forceinline
void divide(unsigned long int ÷ndA, unsigned long int ÷ndB, unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividendA >>= (32);
dividendB >>= (32); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividendA >>= (31 - zeroBits(divisor));
dividendB >>= (31 - zeroBits(divisor)); return; }
#endif
// some coders can overflow by 1, they can handle the 0 result as well
assert((dividendA / divisor) <= /*0xFFFF*/ 0x10000);
assert((dividendB / divisor) <= /*0xFFFF*/ 0x10000);
// dividend /= divisor;
unsigned long int _dividendA = dividendA;
unsigned long int _dividendB = dividendB;
unsigned long int _divisor = divisor;
unsigned long int _inverse = inverse_lut(_divisor);
unsigned long int _quotientA = divrem(_dividendA, _divisor, _inverse, true);
unsigned long int _quotientB = divrem(_dividendB, _divisor, _inverse, true);
assert((unsigned long int)(dividendA / divisor) == _quotientA);
assert((unsigned long int)(dividendB / divisor) == _quotientB);
dividendA = _quotientA;
dividendB = _quotientB;
}
forceinline
void divide(unsigned_int64 ÷nd , unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividend = hi(dividend); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividend = sr(dividend, 31 - zeroBits(divisor)); return; }
#endif
}
forceinline
void divide(unsigned_int64 ÷ndA, unsigned_int64 ÷ndB, unsigned long int divisor) /*const*/ {
#ifdef REPORT_CODING_STATISTICS
stats[0].hits++; unsigned long int pop = oneBits(divisor);
stats[0].divz += (pop == 0);
stats[0].divp += (pop == 1);
stats[0].div08 += (divisor > (1L << 0) && divisor <= (1L << 8));
stats[0].div16 += (divisor > (1L << 8) && divisor <= (1L << 16));
stats[0].div32 += (divisor > (1L << 16) && divisor <= (1LL << 32));
// stats[0].div64 += (divisor > (1LL << 32));
#endif
/* wrap around, 0 == max+1 */
if (!divisor) {
dividendA >>= (32);
dividendB >>= (32); return; }
#ifdef INTERVAL_ARITHMETIC_RECIPROCAL_SHIFT
/* convert to shift */
if (oneBits(divisor) == 1) {
dividendA >>= (31 - zeroBits(divisor));
dividendB >>= (31 - zeroBits(divisor)); return; }
#endif
}
};
#endif //INTERVAL_ARITHMETIC_RECIPROCAL
#endif //INTERVAL_ARITHMETIC_HPP
#pragma warning(disable : 4731)
#pragma warning(disable : 4244)
#include "Arithmetic/divide-then-multiply.hpp"
#include "Arithmetic/divide-then-multiply-carry.hpp"
#include "Arithmetic/divide-then-multiply-extra.hpp"
#include "Arithmetic/divide-then-multiply-extra-data.hpp"
#include "Arithmetic/divide-then-multiply-symetric.hpp"
#include "Arithmetic/divide-then-multiply-data.hpp"
#include "Arithmetic/divide-then-multiply-sub.hpp"
#include "Arithmetic/divide-then-multiply-sub-carry.hpp"
#include "Arithmetic/multiply-then-divide.hpp"
#include "Arithmetic/multiply-then-divide-carry.hpp"
#include "Arithmetic/multiply-then-divide-extra.hpp"
#include "Arithmetic/multiply-then-divide-symetric.hpp"
#include "Arithmetic/multiply-then-divide-inverse.hpp"
#include "Arithmetic/multiply-then-divide-reverse.hpp"
#include "Arithmetic/multiply-then-divide-reverse-mid.hpp"
#pragma warning(default : 4731)
#pragma warning(default : 4244)