Thread: [Jahshaka-cvs] openlibraries/src/openmedialib/plugins/directshow directshow_plugin.cpp, 1.15, 1.16
Status: Beta
Brought to you by:
jahshaka
From: jpn <non...@us...> - 2007-10-17 16:13:55
|
Update of /cvsroot/openlibraries/openlibraries/src/openmedialib/plugins/directshow In directory sc8-pr-cvs3.sourceforge.net:/tmp/cvs-serv25980 Modified Files: directshow_plugin.cpp directshow_plugin.opl Log Message: Update due to being pull off doing this for the time being. Currently trying to figure out how best to accurately map audio MediaSample data to the audio_type data required by the openlibraries. It appears the GetMediaTime & ConvertTimeFormat function on the IMediaSeeking interface do not return anything useful - I may be just doing something wrong. The intention was to refactor in stages once the above had been solved to make the whole thing more efficient. This may include better seek handling, changing the the AV containers to deques (perhaps) and better handling of these containers - currently flushes them on every seek! Index: directshow_plugin.opl =================================================================== RCS file: /cvsroot/openlibraries/openlibraries/src/openmedialib/plugins/directshow/directshow_plugin.opl,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- directshow_plugin.opl 29 Jun 2007 11:42:05 -0000 1.1 +++ directshow_plugin.opl 17 Oct 2007 16:13:51 -0000 1.2 @@ -2,7 +2,9 @@ <openlibraries version="1.0"> <!-- plugins go here --> <openmedialib name="oml" version="0.4.1"> - <plugin name="OpenLibraries directshow plugin" type="input" in_filter="*.wmv *.mpeg *.asf *.avi *.mov" extension='".*\.wmv", ".*\.mpeg", ".*\.asf", ".*\.avi", ".*\.mov"' merit="10" filename='"openmedialib_directshow-vc80-d-0_4_1.dll", "openmedialib_directshow-vc80-r-0_4_1.dll"'/> + <plugin name="OpenLibraries directshow plugin" type="input" in_filter="*.wmv *.mpeg *.asf *.avi" extension='".*\.wmv", ".*\.mpeg", ".*\.asf", ".*\.avi"' merit="10" filename='"openmedialib_directshow-vc80-d-0_4_1.dll", "openmedialib_directshow-vc80-r-0_4_1.dll"'/> + <!-- <plugin name="OpenLibraries directshow plugin" type="output" extension='"dshow:"' merit="0" filename='"openmedialib_directshow-vc80-d-0_4_1.dll", "openmedialib_directshow-vc80-r-0_4_1.dll"'/> + --> </openmedialib> </openlibraries> Index: directshow_plugin.cpp =================================================================== RCS file: /cvsroot/openlibraries/openlibraries/src/openmedialib/plugins/directshow/directshow_plugin.cpp,v retrieving revision 1.15 retrieving revision 1.16 diff -u -d -r1.15 -r1.16 --- directshow_plugin.cpp 11 Oct 2007 15:24:57 -0000 1.15 +++ directshow_plugin.cpp 17 Oct 2007 16:13:51 -0000 1.16 @@ -485,7 +485,7 @@ if(mt.formattype != FORMAT_VideoInfo) continue; - OutputDebugStringA((std::string("FOURCC: ") + fourcc_descriptor((*reinterpret_cast<VIDEOINFOHEADER*>(mt.pbFormat)).bmiHeader.biCompression).c_str() + std::string("\n")).c_str()); + OutputDebugStringA((std::string("Native FOURCC format: ") + fourcc_descriptor((*reinterpret_cast<VIDEOINFOHEADER*>(mt.pbFormat)).bmiHeader.biCompression).c_str() + std::string("\n")).c_str()); } } } @@ -504,7 +504,7 @@ { try { - OutputDebugStringA((std::string("NUM FILTERS: ") + boost::lexical_cast<std::string>(num_got) + std::string("\n")).c_str()); + OutputDebugStringA((std::string("Filters in filter graph: ") + boost::lexical_cast<std::string>(num_got) + std::string("\n")).c_str()); } catch(const boost::bad_lexical_cast&) { @@ -619,7 +619,7 @@ #if defined(_DEBUG) try { - std::string msg("Received video frame: SampleTime: "); + std::string msg("Received video callback: SampleTime: "); msg += boost::lexical_cast<std::string>(SampleTime); msg += " (frame: "; msg += boost::lexical_cast<std::string>(static_cast<long long>((SampleTime * 10000000)/reinterpret_cast<VIDEOINFOHEADER*>(input_->video_mediatype_.pbFormat)->AvgTimePerFrame)); @@ -630,7 +630,7 @@ } catch(const boost::bad_lexical_cast&) { - OutputDebugStringA("Received video frame\n"); + OutputDebugStringA("Received video callback\n"); } #endif input_->video_container_[SampleTime] = pSample; @@ -654,7 +654,7 @@ #if defined(_DEBUG) try { - std::string msg("Received video frame: SampleTime: "); + std::string msg("Received video callback: SampleTime: "); msg += boost::lexical_cast<std::string>(SampleTime); msg += " (frame: "; msg += boost::lexical_cast<std::string>(static_cast<long long>((SampleTime * 10000000)/reinterpret_cast<VIDEOINFOHEADER*>(input_->video_mediatype_.pbFormat)->AvgTimePerFrame)); @@ -711,20 +711,55 @@ return S_FALSE; CAutoLock mutex(&input_->audio_container_cs_); #if defined(_DEBUG) + HRESULT hr = E_FAIL; +//#define DEBUG_GET_MEDIA_TIME +#ifdef DEBUG_GET_MEDIA_TIME + long long startTime = 0, + endTime = 0; + hr = pSample->GetMediaTime(&startTime, &endTime); + if(hr == VFW_E_MEDIA_TIME_NOT_SET) + { + startTime = -1; + endTime = -1; + } +#endif + long long frame = static_cast<long long>((SampleTime * 10000000)/reinterpret_cast<VIDEOINFOHEADER*>(input_->video_mediatype_.pbFormat)->AvgTimePerFrame); +//#define DEBUG_CONVERT_TIME_FORMAT +#ifdef DEBUG_CONVERT_TIME_FORMAT + long long targetTime = 0; + //hr = input_->pMediaSeeking_->ConvertTimeFormat(&targetTime, &TIME_FORMAT_MEDIA_TIME, frame, NULL/*&TIME_FORMAT_FRAME*/); + //hr = input_->pMediaSeeking_->ConvertTimeFormat(&targetTime, &TIME_FORMAT_SAMPLE, long long(SampleTime*10000000), &TIME_FORMAT_MEDIA_TIME); + //hr = input_->pMediaSeeking_->ConvertTimeFormat(&targetTime, &TIME_FORMAT_SAMPLE, long long(SampleTime*10000000), &TIME_FORMAT_MEDIA_TIME); + hr = input_->pMediaSeeking_->ConvertTimeFormat(&targetTime, &TIME_FORMAT_SAMPLE, SampleTime, NULL/*&TIME_FORMAT_FRAME*/); +#endif try { - std::string msg("Received audio frame: SampleTime: "); + std::string msg("Received audio callback: SampleTime: "); msg += boost::lexical_cast<std::string>(SampleTime); msg += " (frame: "; - msg += boost::lexical_cast<std::string>(static_cast<long long>((SampleTime * 10000000)/reinterpret_cast<VIDEOINFOHEADER*>(input_->video_mediatype_.pbFormat)->AvgTimePerFrame)); - msg += ") pSample: 0x"; + msg += boost::lexical_cast<std::string>(frame); + msg += ") "; +#ifdef DEBUG_GET_MEDIA_TIME + msg += "startTime: "; + msg += boost::lexical_cast<std::string>(startTime); + msg += "; endTime: "; + msg += boost::lexical_cast<std::string>(endTime); + msg += "; "; +#endif +#ifdef DEBUG_CONVERT_TIME_FORMAT + msg += "targetTime: "; +// msg += boost::lexical_cast<std::string>(double(targetTime)/10000000); + msg += boost::lexical_cast<std::string>(targetTime); + msg += "; "; +#endif + msg += "pSample: 0x"; msg += boost::lexical_cast<std::string>(pSample); msg += "\n"; OutputDebugStringA(msg.c_str()); } catch(const boost::bad_lexical_cast&) { - OutputDebugStringA("Received audio frame\n"); + OutputDebugStringA("Received audio callback\n"); } #endif input_->audio_container_[SampleTime] = pSample; @@ -748,7 +783,7 @@ #if defined(_DEBUG) try { - std::string msg("Received audio frame: SampleTime: "); + std::string msg("Received audio callback: SampleTime: "); msg += boost::lexical_cast<std::string>(SampleTime); msg += " (frame: "; msg += boost::lexical_cast<std::string>(static_cast<long long>((SampleTime * 10000000)/reinterpret_cast<VIDEOINFOHEADER*>(input_->video_mediatype_.pbFormat)->AvgTimePerFrame)); @@ -839,6 +874,23 @@ { hr = pMediaSeeking_->SetTimeFormat(&TIME_FORMAT_FRAME); } +#if defined(_DEBUG) + if(SUCCEEDED(hr)) + { + if(pMediaSeeking_->IsUsingTimeFormat(&TIME_FORMAT_NONE) == S_OK) + OutputDebugStringA("TimeFormat: No format\n"); + else if(pMediaSeeking_->IsUsingTimeFormat(&TIME_FORMAT_FRAME) == S_OK) + OutputDebugStringA("TimeFormat: Video frames\n"); + else if(pMediaSeeking_->IsUsingTimeFormat(&TIME_FORMAT_SAMPLE) == S_OK) + OutputDebugStringA("TimeFormat: Samples in the stream\n"); + else if(pMediaSeeking_->IsUsingTimeFormat(&TIME_FORMAT_FIELD) == S_OK) + OutputDebugStringA("TimeFormat: Interlaced video fields\n"); + else if(pMediaSeeking_->IsUsingTimeFormat(&TIME_FORMAT_BYTE) == S_OK) + OutputDebugStringA("TimeFormat: Byte offset within the stream\n"); + else if(pMediaSeeking_->IsUsingTimeFormat(&TIME_FORMAT_MEDIA_TIME) == S_OK) + OutputDebugStringA("TimeFormat: Reference time (100-nanosecond units)\n"); + } +#endif if(SUCCEEDED(hr)) { LONGLONG duration; @@ -1343,6 +1395,24 @@ } { // Clear the audio container CAutoLock audio_mutex(&audio_container_cs_); +#if 0 + WAVEFORMATEX* wfex = reinterpret_cast<WAVEFORMATEX*>(audio_mediatype_.pbFormat); + for(MediaSampleContainer::const_iterator it = audio_container_.begin(); + it != audio_container_.end(); + ++it) + { + long bufferSize = (*it).second->GetSize(); + long samplesInBuffer = bufferSize / (2 * wfex->nChannels); + + try + { + OutputDebugStringA((std::string("Audio sample time: ") + boost::lexical_cast<std::string>((*it).first) + std::string("; bufferSize: ") + boost::lexical_cast<std::string>(bufferSize) + std::string("; datasize: ") + boost::lexical_cast<std::string>((*it).second->GetActualDataLength()) + std::string("\n")).c_str()); + } + catch(const boost::bad_lexical_cast&) + { + } + } +#endif audio_container_.clear(); if(hAudioReceivedEvent_) ResetEvent(hAudioReceivedEvent_); @@ -1399,31 +1469,34 @@ int currentPosition = get_position(); +#if defined(_DEBUG) + try + { + std::string msg("Fetching frame: "); + msg += boost::lexical_cast<std::string>(currentPosition); + msg += " [video container size="; + msg += boost::lexical_cast<std::string>(video_container_.size()); + msg += " audio container size="; + msg += boost::lexical_cast<std::string>(audio_container_.size()); + msg += "]\n"; + OutputDebugStringA(msg.c_str()); + } + catch(const boost::bad_lexical_cast&) + { + OutputDebugStringA("Fetch!\n"); + } +#endif + acquire_values( ); frame_type_ptr frame = frame_type_ptr(new frame_type( )); frame->set_position( currentPosition ); int process_flags = get_process_flags( ); + //* if ( (process_flags & process_image) && (get_video_streams()) ) { -#if defined(_DEBUG) - try - { - std::string msg("Fetching frame: "); - msg += boost::lexical_cast<std::string>(currentPosition); - msg += " [video container size="; - msg += boost::lexical_cast<std::string>(video_container_.size()); - msg += "]\n"; - OutputDebugStringA(msg.c_str()); - } - catch(const boost::bad_lexical_cast&) - { - OutputDebugStringA("Fetch!\n"); - } -#endif - #if 1 bool foundMediaSample = false; @@ -1478,16 +1551,9 @@ #endif double frameDurationInSeconds = double(vih->AvgTimePerFrame) / 10000000; - try - { - OutputDebugStringA((std::string("pts: ") + boost::lexical_cast<std::string>(currentPosition * frameDurationInSeconds) + std::string("\n")).c_str()); - } - catch(const boost::bad_lexical_cast&) - { - } frame->set_duration( frameDurationInSeconds ); frame->set_sar( vih->bmiHeader.biXPelsPerMeter, vih->bmiHeader.biYPelsPerMeter ); - frame->set_fps( 25, 1 ); + frame->set_fps( 1/frameDurationInSeconds, 1 ); frame->set_pts( (*video_container_.begin()).first ); format_converter_ptr converter = get_format_converter(vih->bmiHeader.biCompression); @@ -1521,12 +1587,17 @@ image->set_pts( frame->get_pts() ); image->set_position( currentPosition ); image->set_writable( false ); - image->set_flipped(false); + image->set_flipped(true); + + // This should be done in the player itself as last step before pushing to store + // this will save any unnecessary processing, in case frames aren't actually used/skipped + image = oil::il::conform( image, oil::il::flipped | oil::il::cropped | oil::il::flopped ); // Assign the image to the frame frame->set_image(image); } else + //*/ { frame->set_duration( 1.0 / 25 ); frame->set_sar( 1, 1 ); @@ -1537,31 +1608,100 @@ if( (process_flags & process_audio) && (get_audio_streams()) ) { + //* + switch(WaitForSingleObject(hAudioReceivedEvent_, 400)) + { + case WAIT_TIMEOUT: + OutputDebugStringA("timed out waiting for audio\n"); + return frame_type_ptr(); + case WAIT_OBJECT_0: + { + CAutoLock mutex(&audio_container_cs_); + if(audio_container_.empty()) + return frame_type_ptr(); + } + break; + } + //*/ + WAVEFORMATEX* wfex = reinterpret_cast<WAVEFORMATEX*>(audio_mediatype_.pbFormat); if(wfex->wBitsPerSample == 16) { - CAutoLock mutex(&audio_container_cs_); -#ifdef BUFFER_MEDIA_SAMPLES - typedef audio< unsigned char, pcm16 > pcm16_audio_type; - audio_type_ptr a = audio_type_ptr( new audio_type( pcm16_audio_type( wfex->nSamplesPerSec, wfex->nChannels, audio_container_[0].BufferLen/(2 * wfex->nChannels) ) ) ); - a->set_position( currentPosition ); - memcpy( a->data( ), audio_container_[0].pBuffer, a->size( ) ); -#else + int fps_numerator = 0, + fps_denominator = 0; + frame->get_fps(fps_numerator, fps_denominator); + + int numSamplesForFrame = audio_samples_for_frame( currentPosition, wfex->nSamplesPerSec, fps_numerator, fps_denominator ); + long long numSamplesToFrame = audio_samples_to_frame( currentPosition, wfex->nSamplesPerSec, fps_numerator, fps_denominator ); + typedef audio< unsigned char, pcm16 > pcm16_audio_type; - audio_type_ptr a = audio_type_ptr( new audio_type( pcm16_audio_type( wfex->nSamplesPerSec, wfex->nChannels, (*audio_container_.begin()).second->GetActualDataLength()/(2 * wfex->nChannels) ) ) ); + audio_type_ptr a = audio_type_ptr( new audio_type( pcm16_audio_type( wfex->nSamplesPerSec, wfex->nChannels, numSamplesForFrame ) ) ); a->set_position( currentPosition ); - unsigned char* buffer; - HRESULT hr = (*audio_container_.begin()).second->GetPointer(&buffer); - if(FAILED(hr)) - return frame_type_ptr(); - memcpy( a->data( ), buffer, a->size( ) ); + unsigned char* pAudioBuffer = a->data(); + int samplesLeftToCopy = numSamplesForFrame; + bool first = true; + CAutoLock mutex(&audio_container_cs_); + for(MediaSampleContainer::const_iterator it = audio_container_.begin(); + it != audio_container_.end(); + ++it) + { + long bufferSize = (*it).second->GetSize(); + long samplesInBuffer = bufferSize / (2 * wfex->nChannels); + unsigned long dataSize = (*it).second->GetActualDataLength(); + + OutputDebugStringA((std::string("Audio sample time: ") + boost::lexical_cast<std::string>((*it).first) + std::string("; bufferSize: ") + boost::lexical_cast<std::string>(bufferSize) + std::string("; datasize: ") + boost::lexical_cast<std::string>((*it).second->GetActualDataLength()) + std::string("\n")).c_str()); + + int modulus = 0; + int remainder = 0; + if(first) + { + modulus = (!samplesInBuffer) ? 0 : numSamplesToFrame / samplesInBuffer; + remainder = (!samplesInBuffer) ? 0 : numSamplesToFrame % samplesInBuffer; + } + + unsigned char* pSampleBuffer = NULL; + HRESULT hr = (*it).second->GetPointer(&pSampleBuffer); + if(FAILED(hr)) + return frame_type_ptr(); + pSampleBuffer += (2 * remainder * wfex->nChannels); + + long remainingSamples = (samplesInBuffer - remainder); + if(remainingSamples >= samplesLeftToCopy) + { + memcpy( pAudioBuffer, + pSampleBuffer, + samplesLeftToCopy * wfex->nChannels * 2); + break; + } + else + { + memcpy( pAudioBuffer, + pSampleBuffer, + remainingSamples * wfex->nChannels * 2); + } + + pAudioBuffer += remainingSamples * 2 * wfex->nChannels; + samplesLeftToCopy -= remainingSamples; + +/* + { + CAutoLock mutex(&audio_container_cs_); +#if defined(BUFFER_MEDIA_SAMPLES) + memcpy( a->data( ), audio_container_[0].pBuffer, a->size( ) ); +#else + unsigned char* buffer; + HRESULT hr = (*it).second->GetPointer(&buffer); + if(FAILED(hr)) + return frame_type_ptr(); + memcpy( a->data( ), buffer, a->size( ) ); #endif - frame->set_audio(a); + } +*/ + } - // need to cache media samples and buffer audio so that the number of audio sample per audio_type object is correct for the position - //int samples = audio_samples_for_frame( index, frequency, fps_num_, fps_den_ ); + frame->set_audio(a); } } |