From: <tr...@ff...> - 2008-10-26 22:01:08
|
Author: ppalmers Date: 2008-10-26 15:01:00 -0700 (Sun, 26 Oct 2008) New Revision: 1376 Modified: branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp Log: fix clipping and float->int conversion. optimize this a bit. fix SSE code. Modified: branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp =================================================================== --- branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp 2008-10-24 17:58:07 UTC (rev 1375) +++ branches/libffado-2.0/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp 2008-10-26 22:01:00 UTC (rev 1376) @@ -38,8 +38,7 @@ #include <assert.h> #include <cstring> -#define AMDTP_FLOAT_MULTIPLIER 2147483392.0 - +#define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1)) namespace Streaming { @@ -515,7 +514,6 @@ } #ifdef __SSE2__ -//#if 0 #include <emmintrin.h> #warning SSE2 build @@ -543,6 +541,7 @@ memset(m_scratch_buffer, 0, nevents * 4); const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000); + const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF); const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER); #if AMDTP_CLIP_FLOATS @@ -570,7 +569,6 @@ // the base event for this position target_event = (quadlet_t *)(data + i); - // process the events for (j=0;j < nevents; j += 1) { @@ -596,8 +594,8 @@ v_float = _mm_mul_ps(v_float, mult); // convert to signed integer v_int = _mm_cvttps_epi32( v_float ); - // shift right 8 bits - v_int = _mm_srli_epi32( v_int, 8 ); + // mask + v_int = _mm_and_si128( v_int, mask ); // label it v_int = _mm_or_si128( v_int, label ); @@ -606,7 +604,6 @@ v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) ); // do second swap v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) ); - // store the packed int // (target misalignment is assumed since we don't know the m_dimension) _mm_storeu_si128 (target, v_int); @@ -654,13 +651,12 @@ v_float = _mm_max_ps(v_float, v_min); v_float = _mm_min_ps(v_float, v_max); #endif - // multiply v_float = _mm_mul_ps(v_float, mult); // convert to signed integer v_int = _mm_cvttps_epi32( v_float ); - // shift right 8 bits - v_int = _mm_srli_epi32( v_int, 8 ); + // mask + v_int = _mm_and_si128( v_int, mask ); // label it v_int = _mm_or_si128( v_int, label ); @@ -688,13 +684,23 @@ for(;j < nevents; j += 1) { float *in = (float *)buffer; #if AMDTP_CLIP_FLOATS - if(*in > 1.0) *in=1.0; - if(*in < -1.0) *in=-1.0; -#endif + // clip directly to the value of a maxed event + if(*in > 1.0) { + *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF); + } else if(*in < -1.0) { + *target_event = CONDSWAPTOBUS32_CONST(0x40800001); + } else { + float v = (*in) * AMDTP_FLOAT_MULTIPLIER; + unsigned int tmp = ((int) v); + tmp = ( tmp & 0x00FFFFFF ) | 0x40000000; + *target_event = CondSwapToBus32((quadlet_t)tmp); + } +#else float v = (*in) * AMDTP_FLOAT_MULTIPLIER; unsigned int tmp = ((int) v); - tmp = ( tmp >> 8 ) | 0x40000000; + tmp = ( tmp & 0x00FFFFFF ) | 0x40000000; *target_event = CondSwapToBus32((quadlet_t)tmp); +#endif buffer++; target_event += m_dimension; } @@ -937,14 +943,23 @@ { float *in = (float *)buffer; #if AMDTP_CLIP_FLOATS - if(*in > 1.0) *in=1.0; - if(*in < -1.0) *in=-1.0; -#endif + // clip directly to the value of a maxed event + if(*in > 1.0) { + *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF); + } else if(*in < -1.0) { + *target_event = CONDSWAPTOBUS32_CONST(0x40800001); + } else { + float v = (*in) * AMDTP_FLOAT_MULTIPLIER; + unsigned int tmp = ((int) v); + tmp = ( tmp & 0x00FFFFFF ) | 0x40000000; + *target_event = CondSwapToBus32((quadlet_t)tmp); + } +#else float v = (*in) * AMDTP_FLOAT_MULTIPLIER; - unsigned int tmp = ((int) lrintf(v)); - - tmp = ( tmp >> 8 ) | 0x40000000; + unsigned int tmp = ((int) v); + tmp = ( tmp & 0x00FFFFFF ) | 0x40000000; *target_event = CondSwapToBus32((quadlet_t)tmp); +#endif buffer++; target_event += m_dimension; } |