From: <tr...@ff...> - 2007-12-26 21:59:33
|
Author: ppalmers Date: 2007-12-26 13:59:21 -0800 (Wed, 26 Dec 2007) New Revision: 777 Modified: trunk/libffado/SConstruct trunk/libffado/src/debugmodule/debugmodule.h trunk/libffado/src/devicemanager.cpp trunk/libffado/src/devicemanager.h trunk/libffado/src/libieee1394/IsoHandler.cpp trunk/libffado/src/libieee1394/ieee1394service.cpp trunk/libffado/src/libstreaming/amdtp/AmdtpReceiveStreamProcessor.cpp trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp Log: port freebob SSE optimized event encoding functions (untested) Modified: trunk/libffado/SConstruct =================================================================== --- trunk/libffado/SConstruct 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/SConstruct 2007-12-26 21:59:21 UTC (rev 777) @@ -284,7 +284,8 @@ elif config[config_cpu] == "i686": opt_flags.append ("-march=i686") - if ((env['DIST_TARGET'] == 'i686') or (env['DIST_TARGET'] == 'x86_64')) and build_host_supports_sse: + if ((env['DIST_TARGET'] == 'i686') or (env['DIST_TARGET'] == 'x86_64')) \ + and build_host_supports_sse and env['ENABLE_OPTIMIZATIONS']: opt_flags.extend (["-msse", "-mfpmath=sse"]) env['USE_SSE'] = 1 Modified: trunk/libffado/src/debugmodule/debugmodule.h =================================================================== --- trunk/libffado/src/debugmodule/debugmodule.h 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/src/debugmodule/debugmodule.h 2007-12-26 21:59:21 UTC (rev 777) @@ -52,7 +52,10 @@ #define MB_BUFFERSIZE DEBUG_MAX_MESSAGE_LENGTH -#define IMPLEMENT_BACKLOG +#ifdef DEBUG + #define IMPLEMENT_BACKLOG +#endif + #ifdef IMPLEMENT_BACKLOG // the backlog is a similar buffer as the message buffer #define BACKLOG_MB_BUFFERS (256) Modified: trunk/libffado/src/devicemanager.cpp =================================================================== --- trunk/libffado/src/devicemanager.cpp 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/src/devicemanager.cpp 2007-12-26 21:59:21 UTC (rev 777) @@ -80,6 +80,7 @@ DeviceManager::DeviceManager() : Control::Container("devicemanager") + , m_processorManager( new Streaming::StreamProcessorManager() ) { addOption(Util::OptionContainer::Option("slaveMode",false)); addOption(Util::OptionContainer::Option("snoopMode",false)); @@ -87,6 +88,8 @@ DeviceManager::~DeviceManager() { + delete m_processorManager; + for ( FFADODeviceVectorIterator it = m_avDevices.begin(); it != m_avDevices.end(); ++it ) @@ -97,14 +100,15 @@ delete *it; } - for ( Ieee1394ServiceVectorIterator it = m_1394Services.begin(); - it != m_1394Services.end(); + for ( FunctorVectorIterator it = m_busreset_functors.begin(); + it != m_busreset_functors.end(); ++it ) { delete *it; } - for ( FunctorVectorIterator it = m_busreset_functors.begin(); - it != m_busreset_functors.end(); + + for ( Ieee1394ServiceVectorIterator it = m_1394Services.begin(); + it != m_1394Services.end(); ++it ) { delete *it; @@ -113,7 +117,7 @@ bool DeviceManager::setThreadParameters(bool rt, int priority) { - if (!m_processorManager.setThreadParameters(rt, priority)) { + if (!m_processorManager->setThreadParameters(rt, priority)) { debugError("Could not set processor manager thread parameters\n"); return false; } @@ -418,17 +422,17 @@ } debugOutput(DEBUG_LEVEL_VERBOSE, "Setting samplerate to %d for (%p)\n", - m_processorManager.getNominalRate(), device); + m_processorManager->getNominalRate(), device); // Set the device's sampling rate to that requested // FIXME: does this really belong here? If so we need to handle errors. - if (!device->setSamplingFrequency(m_processorManager.getNominalRate())) { + if (!device->setSamplingFrequency(m_processorManager->getNominalRate())) { debugOutput(DEBUG_LEVEL_VERBOSE, " => Retry setting samplerate to %d for (%p)\n", - m_processorManager.getNominalRate(), device); + m_processorManager->getNominalRate(), device); // try again: - if (!device->setSamplingFrequency(m_processorManager.getNominalRate())) { - debugFatal("Could not set sampling frequency to %d\n",m_processorManager.getNominalRate()); + if (!device->setSamplingFrequency(m_processorManager->getNominalRate())) { + debugFatal("Could not set sampling frequency to %d\n",m_processorManager->getNominalRate()); return false; } } @@ -437,7 +441,7 @@ } // set the sync source - if (!m_processorManager.setSyncSource(getSyncSource())) { + if (!m_processorManager->setSyncSource(getSyncSource())) { debugWarning("Could not set processorManager sync source (%p)\n", getSyncSource()); } @@ -447,7 +451,7 @@ bool DeviceManager::prepareStreaming() { - if (!m_processorManager.prepare()) { + if (!m_processorManager->prepare()) { debugFatal("Could not prepare streaming...\n"); return false; } @@ -499,7 +503,7 @@ } } - if(m_processorManager.start()) { + if(m_processorManager->start()) { return true; } else { stopStreaming(); @@ -516,7 +520,7 @@ DeviceManager::stopStreaming() { bool result = true; - m_processorManager.stop(); + m_processorManager->stop(); // create the connections for all devices // iterate over the found devices @@ -549,21 +553,21 @@ bool DeviceManager::waitForPeriod() { - if(m_processorManager.waitForPeriod()) { + if(m_processorManager->waitForPeriod()) { return true; } else { debugWarning("XRUN detected\n"); // do xrun recovery - m_processorManager.handleXrun(); + m_processorManager->handleXrun(); return false; } } bool DeviceManager::setStreamingParams(unsigned int period, unsigned int rate, unsigned int nb_buffers) { - m_processorManager.setPeriodSize(period); - m_processorManager.setNominalRate(rate); - m_processorManager.setNbBuffers(nb_buffers); + m_processorManager->setPeriodSize(period); + m_processorManager->setNominalRate(rate); + m_processorManager->setNbBuffers(nb_buffers); return true; } @@ -755,7 +759,7 @@ { setDebugLevel(l); Control::Element::setVerboseLevel(l); - m_processorManager.setVerboseLevel(l); + m_processorManager->setVerboseLevel(l); for ( FFADODeviceVectorIterator it = m_avDevices.begin(); it != m_avDevices.end(); ++it ) @@ -809,5 +813,5 @@ } void DeviceManager::showStreamingInfo() { - m_processorManager.dumpInfo(); + m_processorManager->dumpInfo(); } Modified: trunk/libffado/src/devicemanager.h =================================================================== --- trunk/libffado/src/devicemanager.h 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/src/devicemanager.h 2007-12-26 21:59:21 UTC (rev 777) @@ -116,9 +116,9 @@ public: // FIXME: this should be better Streaming::StreamProcessorManager& getStreamProcessorManager() - {return m_processorManager;}; + {return *m_processorManager;}; private: - Streaming::StreamProcessorManager m_processorManager; + Streaming::StreamProcessorManager* m_processorManager; protected: std::vector<std::string> m_SpecStrings; Modified: trunk/libffado/src/libieee1394/IsoHandler.cpp =================================================================== --- trunk/libffado/src/libieee1394/IsoHandler.cpp 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/src/libieee1394/IsoHandler.cpp 2007-12-26 21:59:21 UTC (rev 777) @@ -216,7 +216,7 @@ (this->getType()==eHT_Receive?'R':'X'), this, poll_exit-poll_enter, iter_exit-iter_enter); #else - // iterate itself blocks if nothing is available + // iterate blocks if no 1394 data is available // so poll'ing is not really necessary bool result = iterate(); usleep(125); Modified: trunk/libffado/src/libieee1394/ieee1394service.cpp =================================================================== --- trunk/libffado/src/libieee1394/ieee1394service.cpp 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/src/libieee1394/ieee1394service.cpp 2007-12-26 21:59:21 UTC (rev 777) @@ -101,7 +101,6 @@ { delete m_pCTRHelper; delete m_pIsoManager; - delete m_pTimeSource; stopRHThread(); for ( arm_handler_vec_t::iterator it = m_armHandlers.begin(); it != m_armHandlers.end(); @@ -115,6 +114,7 @@ } } + delete m_pTimeSource; if ( m_handle ) { raw1394_destroy_handle( m_handle ); } @@ -351,8 +351,12 @@ uint64_t Ieee1394Service::getCurrentTimeAsUsecs() { - assert(m_pTimeSource); - return m_pTimeSource->getCurrentTimeAsUsecs(); + if(m_pTimeSource) { + return m_pTimeSource->getCurrentTimeAsUsecs(); + } else { + debugError("No timesource!\n"); + return 0; + } } bool Modified: trunk/libffado/src/libstreaming/amdtp/AmdtpReceiveStreamProcessor.cpp =================================================================== --- trunk/libffado/src/libstreaming/amdtp/AmdtpReceiveStreamProcessor.cpp 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/src/libstreaming/amdtp/AmdtpReceiveStreamProcessor.cpp 2007-12-26 21:59:21 UTC (rev 777) @@ -20,6 +20,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ +#include "config.h" #include "AmdtpReceiveStreamProcessor.h" #include "AmdtpPort.h" @@ -249,6 +250,11 @@ return ok; } +#ifdef USE_SSE +typedef float v4sf __attribute__ ((vector_size (16))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef int v2si __attribute__ ((vector_size (8))); + int AmdtpReceiveStreamProcessor::decodeMBLAEventsToPort( AmdtpAudioPort *p, quadlet_t *data, @@ -259,6 +265,15 @@ target_event=(quadlet_t *)(data + p->getPosition()); + static const float multiplier = 1.0f / (float)(0x7FFFFF); + static const float sse_multiplier[4] __attribute__((aligned(16))) = { + 1.0f / (float)(0x7FFFFF), + 1.0f / (float)(0x7FFFFF), + 1.0f / (float)(0x7FFFFF), + 1.0f / (float)(0x7FFFFF) + }; + unsigned int tmp[4]; + switch(p->getDataType()) { default: case Port::E_Int24: @@ -278,6 +293,88 @@ break; case Port::E_Float: { + float *buffer=(float *)(p->getBufferAddress()); + + assert(nevents + offset <= p->getBufferSize()); + + buffer += offset; + j = 0; + if(nevents > 3) { + for(j = 0; j < nevents-3; j += 4) { + tmp[0] = ntohl(*target_event); + target_event += m_dimension; + tmp[1] = ntohl(*target_event); + target_event += m_dimension; + tmp[2] = ntohl(*target_event); + target_event += m_dimension; + tmp[3] = ntohl(*target_event); + target_event += m_dimension; + asm("pslld $8, %[in2]\n\t" // sign extend 24th bit + "pslld $8, %[in1]\n\t" + "psrad $8, %[in2]\n\t" + "psrad $8, %[in1]\n\t" + "cvtpi2ps %[in2], %%xmm0\n\t" + "movlhps %%xmm0, %%xmm0\n\t" + "cvtpi2ps %[in1], %%xmm0\n\t" + "mulps %[ssemult], %%xmm0\n\t" + "movups %%xmm0, %[floatbuff]" + : [floatbuff] "=m" (*(v4sf*)buffer) + : [in1] "y" (*(v2si*)tmp), + [in2] "y" (*(v2si*)(tmp+2)), + [ssemult] "x" (*(v4sf*)sse_multiplier) + : "xmm0"); + buffer += 4; + } + } + for(; j < nevents; ++j) { // decode max nsamples + unsigned int v = ntohl(*target_event) & 0x00FFFFFF; + // sign-extend highest bit of 24-bit int + int tmp = (int)(v << 8) / 256; + *buffer = tmp * multiplier; + + buffer++; + target_event += m_dimension; + } + asm volatile("emms"); + break; + } + break; + } + + return 0; +} + +#else + +int +AmdtpReceiveStreamProcessor::decodeMBLAEventsToPort( + AmdtpAudioPort *p, quadlet_t *data, + unsigned int offset, unsigned int nevents) +{ + unsigned int j=0; + quadlet_t *target_event; + + target_event=(quadlet_t *)(data + p->getPosition()); + + switch(p->getDataType()) { + default: + case Port::E_Int24: + { + quadlet_t *buffer=(quadlet_t *)(p->getBufferAddress()); + + assert(nevents + offset <= p->getBufferSize()); + + buffer+=offset; + + for(j = 0; j < nevents; j += 1) { // decode max nsamples + *(buffer)=(ntohl((*target_event) ) & 0x00FFFFFF); + buffer++; + target_event+=m_dimension; + } + } + break; + case Port::E_Float: + { const float multiplier = 1.0f / (float)(0x7FFFFF); float *buffer=(float *)(p->getBufferAddress()); @@ -302,4 +399,5 @@ return 0; } +#endif } // end of namespace Streaming Modified: trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp =================================================================== --- trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp 2007-12-26 15:26:00 UTC (rev 776) +++ trunk/libffado/src/libstreaming/amdtp/AmdtpTransmitStreamProcessor.cpp 2007-12-26 21:59:21 UTC (rev 777) @@ -21,6 +21,7 @@ * */ +#include "config.h" #include "AmdtpTransmitStreamProcessor.h" #include "AmdtpPort.h" #include "../StreamProcessorManager.h" @@ -98,7 +99,6 @@ // packets early if we want to. (not completely according to spec) const int max_cycles_to_transmit_early = 2; -try_block_of_frames: debugOutput ( DEBUG_LEVEL_ULTRA_VERBOSE, "Try for cycle %d\n", cycle ); // check whether the packet buffer has packets for us to send. // the base timestamp is the one of the next sample in the buffer @@ -590,11 +590,29 @@ return ok; } +#ifdef USE_SSE +typedef float v4sf __attribute__ ((vector_size (16))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef int v2si __attribute__ ((vector_size (8))); int AmdtpTransmitStreamProcessor::encodePortToMBLAEvents ( AmdtpAudioPort *p, quadlet_t *data, unsigned int offset, unsigned int nevents ) { + static const float sse_multiplier[4] __attribute__((aligned(16))) = { + (float)(0x7FFFFF00), + (float)(0x7FFFFF00), + (float)(0x7FFFFF00), + (float)(0x7FFFFF00) + }; + + static const int sse_mask[4] __attribute__((aligned(16))) = { + 0x40000000, 0x40000000, 0x40000000, 0x40000000 + }; + + unsigned int out[4]; + unsigned int j=0; + unsigned int read=0; quadlet_t *target_event; @@ -628,9 +646,96 @@ buffer+=offset; + j=0; + if(read>3) { + for (j = 0; j < read-3; j += 4) { + asm("movups %[floatbuff], %%xmm0\n\t" + "mulps %[ssemult], %%xmm0\n\t" + "cvttps2pi %%xmm0, %[out1]\n\t" + "movhlps %%xmm0, %%xmm0\n\t" + "psrld $8, %[out1]\n\t" + "cvttps2pi %%xmm0, %[out2]\n\t" + "por %[mmxmask], %[out1]\n\t" + "psrld $8, %[out2]\n\t" + "por %[mmxmask], %[out2]\n\t" + : [out1] "=&y" (*(v2si*)&out[0]), + [out2] "=&y" (*(v2si*)&out[2]) + : [floatbuff] "m" (*(v4sf*)buffer), + [ssemult] "x" (*(v4sf*)sse_multiplier), + [mmxmask] "y" (*(v2si*)sse_mask) + : "xmm0"); + buffer += 4; + *target_event = htonl(out[0]); + target_event += m_dimension; + *target_event = htonl(out[1]); + target_event += m_dimension; + *target_event = htonl(out[2]); + target_event += m_dimension; + *target_event = htonl(out[3]); + target_event += m_dimension; + } + } + for(; j < read; ++j) { + // don't care for overflow + float v = *buffer * multiplier; // v: -231 .. 231 + unsigned int tmp = (int)v; + *target_event = htonl((tmp >> 8) | 0x40000000); + + buffer++; + target_event += m_dimension; + } + + asm volatile("emms"); + break; + } + break; + } + + return 0; +} + +#else + +int AmdtpTransmitStreamProcessor::encodePortToMBLAEvents ( AmdtpAudioPort *p, quadlet_t *data, + unsigned int offset, unsigned int nevents ) +{ + unsigned int j=0; + + quadlet_t *target_event; + + target_event= ( quadlet_t * ) ( data + p->getPosition() ); + + switch ( p->getDataType() ) + { + default: + case Port::E_Int24: + { + quadlet_t *buffer= ( quadlet_t * ) ( p->getBufferAddress() ); + + assert ( nevents + offset <= p->getBufferSize() ); + + buffer+=offset; + for ( j = 0; j < nevents; j += 1 ) // decode max nsamples { + *target_event = htonl ( ( * ( buffer ) & 0x00FFFFFF ) | 0x40000000 ); + buffer++; + target_event += m_dimension; + } + } + break; + case Port::E_Float: + { + const float multiplier = ( float ) ( 0x7FFFFF00 ); + float *buffer= ( float * ) ( p->getBufferAddress() ); + assert ( nevents + offset <= p->getBufferSize() ); + + buffer+=offset; + + for ( j = 0; j < nevents; j += 1 ) // decode max nsamples + { + // don't care for overflow float v = *buffer * multiplier; // v: -231 .. 231 unsigned int tmp = ( ( int ) v ); @@ -645,6 +750,8 @@ return 0; } +#endif + int AmdtpTransmitStreamProcessor::encodeSilencePortToMBLAEvents ( AmdtpAudioPort *p, quadlet_t *data, unsigned int offset, unsigned int nevents ) { |