CMU Sphinx / Forums / Help: How to stop listening till the espeak ends speaking?(Please help!)

I am trying to write a code that use spocketsphinx to recognize what the user says, then pass it to espeak to say what the pocketsphinx recognized!

I wrote this code:

#include <stdio.h>
#include <string.h>
#include <assert.h>


#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include "pocketsphinx.h"


#include <malloc.h>
#include <espeak/speak_lib.h>
#include <string>
using namespace std;

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 1000, Options=0;
void* user_data;
char Voice[] = {"English"};
char text2[30] = {"this is a english test"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
//char* text;

static void initFuncs()
{

    output = AUDIO_OUTPUT_PLAYBACK;
    espeak_Initialize(output, Buflength, path, Options ); 
    espeak_SetVoiceByName(Voice);
    const char *langNativeString = "en";
    espeak_VOICE voice;
    memset(&voice, 0, sizeof(espeak_VOICE));
        voice.languages = langNativeString;
        voice.name = "US";
        voice.variant = 2;
        voice.gender = 1;
        espeak_SetVoiceByProperties(&voice);


}

static void sleep_msec(int32 ms)
{


    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);

}


static void speech(char* hyp)
{


    Size = strlen(hyp)+1;
    espeak_Synth( hyp, Size, position, position_type, end_position, flags,unique_identifier, user_data );
    espeak_Synchronize( );


}


static void recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char  *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),(int) cmd_ln_float32_r(config,"-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");


    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {

            ps_end_utt(ps);
            hyp = (char*)ps_get_hyp(ps, NULL );
            if (hyp != NULL) {

     speech(hyp);

                printf("%s\n", hyp); 
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }

    }
    ad_close(ad);
}

int main(int argc, char *argv[])
{
    initFuncs();


    config = cmd_ln_init(NULL, ps_args(), TRUE,
                 "-hmm", MODELDIR "/en-us/en-us",
                     "-lm", MODELDIR "/en-us/en-us.lm.bin",
                     "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
                     NULL);
    ps = ps_init(config);
        recognize_from_microphone();

    ps_free(ps);
    cmd_ln_free_r(config);


    return 0;
}

But the problem is that, pocketsphinx only recognizes the first sentence, then the espeak says that, then pocketsphinx will recognize what espeak said! and it will repeat it for ever!

So I am looking for a way to stop pocketsphinx's listening when espeak says recognized voice! But don't know how to do it in best way?

Nickolay V. Shmyrev - 2018-04-24

Call ad_stop_rec before starting espeak.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

I edited the code like below but the problem didn't solve!

#include <stdio.h>
#include <string.h>
#include <assert.h>

#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include "pocketsphinx.h"

#include <malloc.h>
#include <espeak/speak_lib.h>
#include <string>
using namespace std;

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 1000, Options=0;
void* user_data;
char Voice[] = {"English"};
char text2[30] = {"this is a english test"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
//char* text;

static void initFuncs()
{

    output = AUDIO_OUTPUT_PLAYBACK;
    espeak_Initialize(output, Buflength, path, Options ); 
    espeak_SetVoiceByName(Voice);
    const char *langNativeString = "en";
    espeak_VOICE voice;
    memset(&voice, 0, sizeof(espeak_VOICE));
        voice.languages = langNativeString;
        voice.name = "US";
        voice.variant = 2;
        voice.gender = 1;
        espeak_SetVoiceByProperties(&voice);

}

static void sleep_msec(int32 ms)
{

    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);

}

static void speech(char* hyp)
{

    Size = strlen(hyp)+1;
    espeak_Synth( hyp, Size, position, position_type, end_position, flags,unique_identifier, user_data );
    espeak_Synchronize( );

}

static void recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char  *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),(int) cmd_ln_float32_r(config,"-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");

    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {

            ps_end_utt(ps);
            hyp = (char*)ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
////////////////////////////////////////////////put ad_stop_rec(ad) here
ad_stop_rec(ad);

                speech(hyp);
                printf("%s\n", hyp); 
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
     ///////////////////////////////////////////////////////put ad_start_rec(ad) here
     ad_start_rec(ad);
        }

    }
    ad_close(ad);
}

int main(int argc, char *argv[])
{
    initFuncs();

    config = cmd_ln_init(NULL, ps_args(), TRUE,
                 "-hmm", MODELDIR "/en-us/en-us",
                     "-lm", MODELDIR "/en-us/en-us.lm.bin",
                     "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
                     NULL);
    ps = ps_init(config);
        recognize_from_microphone();

    ps_free(ps);
    cmd_ln_free_r(config);

    return 0;
}

Last edit: rezaee 2018-04-26

Nickolay V. Shmyrev - 2018-05-11

What do you mean by "didn't solve". The behavior should change. It is hard to guess what changed though.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Temirlan Tashbolotov - 2018-05-05

Hello, I have the same issue, but i'm using python on raspberry pi 3. I'm calling pocketsphinx_continuous with subprocess.Popen and want to pause it untill espeak is done. I'll be thankful for any ways to solve it, like making pocketsphinx run only once, instead of continuous listening. Thank you.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
- Nickolay V. Shmyrev - 2018-05-11
  
  I'm calling pocketsphinx_continuous with subprocess.Popen
  
  This is a bad idea usually
  
  and want to pause it untill espeak is done.
  
  You can kill child process and restart it later.
  
  If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

rezaee - 2018-05-11

I tried a lof ot things but couldn't reach success till now!

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

How to stop listening till the espeak ends speaking?(Please help!)

Speech Recognition Toolkit

Forums

Help

How to stop listening till the espeak ends speaking?(Please help!) document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

How to stop listening till the espeak ends speaking?(Please help!)