From: <jpg...@us...> - 2008-07-01 15:35:17
|
Revision: 1436 http://iaxclient.svn.sourceforge.net/iaxclient/?rev=1436&view=rev Author: jpgrayson Date: 2008-07-01 08:35:07 -0700 (Tue, 01 Jul 2008) Log Message: ----------- Fix AAGC. - Previously it was very common for the AAGC code to drive the input mixer level (input volume) very low and potentially take a long time to recover. - The new AAGC heuristic takes into account the amount of time needed for the speex-provided "loudness" value to recover after a loud event. - Also fixup input_postprocess() so that speex_preprocess_run() is called with the audio_lock held. Modified Paths: -------------- trunk/lib/audio_encode.c Modified: trunk/lib/audio_encode.c =================================================================== --- trunk/lib/audio_encode.c 2008-07-01 15:21:20 UTC (rev 1435) +++ trunk/lib/audio_encode.c 2008-07-01 15:35:07 UTC (rev 1436) @@ -41,7 +41,7 @@ #include "codec_ilbc.h" #endif -float iaxci_silence_threshold = AUDIO_ENCODE_SILENCE_DB; +static float iaxci_silence_threshold = AUDIO_ENCODE_SILENCE_DB; static float input_level = 0.0f; static float output_level = 0.0f; @@ -51,7 +51,11 @@ static SpeexPreprocessState *st = NULL; static int speex_state_size = 0; static int speex_state_rate = 0; -int iaxci_filters = IAXC_FILTER_AGC|IAXC_FILTER_DENOISE|IAXC_FILTER_AAGC|IAXC_FILTER_CN; +static int iaxci_filters = + IAXC_FILTER_AGC | + IAXC_FILTER_DENOISE | + IAXC_FILTER_AAGC | + IAXC_FILTER_CN; static MUTEX audio_lock; @@ -72,16 +76,6 @@ static char outRingBuf[EC_RING_SIZE]; #endif -/* AAGC threshold */ -#define AAGC_VERY_HOT 16 -#define AAGC_HOT 8 -#define AAGC_COLD 4 - -/* AAGC increments */ -#define AAGC_RISE_SLOW 0.10f -#define AAGC_DROP_SLOW 0.15f -#define AAGC_DROP_FAST 0.20f - /* use to measure time since last audio was processed */ static struct timeval timeLastInput ; static struct timeval timeLastOutput ; @@ -178,11 +172,17 @@ static int input_postprocess(short * audio, int len, int rate) { - static float lowest_volume = 1.0f; - float volume; - int silent = 0; + static int aagc_frame_count = 0; + static int aagc_periods_to_skip = 0; + const int using_vad = iaxci_silence_threshold > 0.0f; + const int aagc_period = rate / len; /* 1 second */ + + int speaking = 1; + int loudness = 0; + MUTEXLOCK(&audio_lock); + if ( !st || speex_state_size != len || speex_state_rate != rate ) { if (st) @@ -199,75 +199,114 @@ i = ECHO_SUPPRESS_ACTIVE; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE, &i); } -#endif +#endif speex_state_size = len; speex_state_rate = rate; set_speex_filters(); } + + /* go through the motions only if we need at least one of the + * preprocessor filters */ + if ( using_vad || (iaxci_filters & + (IAXC_FILTER_DENOISE | + IAXC_FILTER_AGC | + IAXC_FILTER_DEREVERB | + IAXC_FILTER_ECHO)) ) + { + speaking = speex_preprocess_run(st, audio); + speex_preprocess_ctl(st, SPEEX_PREPROCESS_GET_AGC_LOUDNESS, + &loudness); + } + MUTEXUNLOCK(&audio_lock); - calculate_level(audio, len, &input_level); + /* If we are using the VAD test and if speex indicates non-speaking, + * ignore the computed input level and indicate to the user that the + * input level was zero. + */ + if ( using_vad && !speaking ) + input_level = 0.0f; + else + calculate_level(audio, len, &input_level); - /* go through the motions only if we need at least one of the preprocessor filters */ - if ( (iaxci_filters & (IAXC_FILTER_DENOISE | IAXC_FILTER_AGC | IAXC_FILTER_DEREVERB | IAXC_FILTER_ECHO)) || - iaxci_silence_threshold > 0.0f ) - silent = !speex_preprocess(st, (spx_int16_t *)audio, NULL); - - /* Analog AGC: Bring speex AGC gain out to mixer, with lots of hysteresis */ - /* use a higher continuation threshold for AAGC than for VAD itself */ - if ( !silent && - iaxci_silence_threshold != 0.0f && - (iaxci_filters & IAXC_FILTER_AGC) && - (iaxci_filters & IAXC_FILTER_AAGC) - ) + /* Analog Automatic Gain Control, AAGC. */ + if ( speaking && iaxci_silence_threshold != 0.0f && + (iaxci_filters & IAXC_FILTER_AGC) && + (iaxci_filters & IAXC_FILTER_AAGC) && + ++aagc_frame_count % aagc_period == 0 && + !aagc_periods_to_skip-- ) { - static int i = 0; + /* This heuristic uses the loudness value from the speex + * preprocessor to determine a new mixer level. The loudness + * ranges from 0 to up over 80. When mixer level, speex AGC, + * and the actual speaker's level are in equilibrium, the + * loudness tends to be from 4 to 16. When the loudness goes + * above this comfortable range, there is a risk of the input + * signal being clipped. AAGC's primary purpose is to avoid + * clipping. + * + * After a loud event (think cough), the loudness level will + * spike and then decay over time (assuming the speaker + * speaking at a relatively constant level). To avoid + * over-adjusting, we skip some number of aagc sampling periods + * before making any more adjustments. This gives the loudness + * value time to normalize after one-time spikes in the input + * level. + */ - i++; + /* The mixer level is a percentage ranging from 0.00 to 1.00 */ + const float mixer_level = iaxc_input_level_get(); + float new_mixer_level = mixer_level; - if ( (i & 0x3f) == 0 ) + if ( loudness > 40 ) { - int loudness; - speex_preprocess_ctl(st, SPEEX_PREPROCESS_GET_AGC_LOUDNESS, &loudness); - if ( loudness > AAGC_HOT || loudness < AAGC_COLD ) - { - const float level = iaxc_input_level_get(); + new_mixer_level -= 0.20f; + aagc_periods_to_skip = 8; + } + else if ( loudness > 25 ) + { + new_mixer_level -= 0.15f; + aagc_periods_to_skip = 4; + } + else if ( loudness > 15 ) + { + new_mixer_level -= 0.10f; + aagc_periods_to_skip = 2; + } + else if ( loudness > 12 ) + { + new_mixer_level -= 0.05f; + aagc_periods_to_skip = 4; + } + else if ( loudness < 2 ) + { + new_mixer_level += 0.15f; + aagc_periods_to_skip = 4; + } + else if ( loudness < 4 ) + { + new_mixer_level += 0.10f; + aagc_periods_to_skip = 4; + } + else + { + aagc_periods_to_skip = 0; + } - if ( loudness > AAGC_VERY_HOT && level > 0.5f ) - { - /* lower quickly if we're really too hot */ - iaxc_input_level_set(level - AAGC_DROP_FAST); - } - else if ( loudness > AAGC_HOT && level >= 0.15f ) - { - /* lower less quickly if we're a bit too hot */ - iaxc_input_level_set(level - AAGC_DROP_SLOW); - } - else if ( loudness < AAGC_COLD && level <= 0.9f ) - { - /* raise slowly if we're cold */ - iaxc_input_level_set(level + AAGC_RISE_SLOW); - } - } - } + /* Normalize the proposed new mixer level */ + if ( new_mixer_level < 0.05f ) + new_mixer_level = 0.05f; + else if ( new_mixer_level > 1.00f ) + new_mixer_level = 1.00f; + + if ( new_mixer_level != mixer_level ) + iaxc_input_level_set(new_mixer_level); } - /* This is ugly. Basically just don't get volume level if speex thought - * we were silent. Just set it to 0 in that case */ - if ( iaxci_silence_threshold > 0.0f && silent ) - input_level = 0.0f; - do_level_callback(); - volume = vol_to_db(input_level); - - if ( volume < lowest_volume ) - lowest_volume = volume; - - if ( iaxci_silence_threshold > 0.0f ) - return silent; - else - return volume < iaxci_silence_threshold; + return using_vad ? !speaking : + vol_to_db(input_level) < iaxci_silence_threshold; } static int output_postprocess(const short * audio, int len) @@ -517,6 +556,36 @@ rb_ReadRingBuffer(&ecOutRing, delayedBuf, SAMPLES_PER_FRAME * 2); + /* TODO: speex_echo_cancellation() and speex_preprocess_run() operate + * on the same state and thus must be serialized. Because the audio + * lock is not held, this call has the potential to mess-up the + * preprocessor (which is serialized by the audio lock). I believe the + * net effect of this problem is to break residual echo cancellation + * when these calls overlap. Unfortunately, just serializing this + * speex_echo_cancellation() call with the audio lock may not be + * sufficient since the next call to speex_preprocess_run() is counting + * on operating on this cancelledBuffer -- since we buffer the input + * audio (cancelledBuffer), we are actually explicitly decoupling the + * calls to speex_echo_cancellation() and speex_preprocess_run(). Oops. + * + * In other words, it should go like this: + * + * speex_echo_cancellation(A) + * speex_preprocess_run(A) + * speex_echo_cancellation(B) + * speex_preprocess_run(B) + * speex_echo_cancellation(C) + * speex_preprocess_run(C) + * + * but it actually may be going like this: + * + * speex_echo_cancellation(A) + * speex_echo_cancellation(B) + * speex_preprocess_run(A) -- bad, residual echo from B is applied to A + * speex_echo_cancellation(C) + * speex_preprocess_run(B) -- bad, residual echo from C is applied to B + * speex_preprocess_run(C) + */ speex_echo_cancellation(ec, inputBuffer, delayedBuf, cancelledBuffer); memcpy(inputBuffer, cancelledBuffer, samples * sizeof(short)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |