Menu

How to stop listening till the espeak ends speaking?(Please help!)

Help
rezaee
2018-04-22
2018-05-11
  • rezaee

    rezaee - 2018-04-22

    I am trying to write a code that use spocketsphinx to recognize what the user says, then pass it to espeak to say what the pocketsphinx recognized!

    I wrote this code:

    #include <stdio.h>
    #include <string.h>
    #include <assert.h>
    
    #include <sphinxbase/err.h>
    #include <sphinxbase/ad.h>
    
    #include "pocketsphinx.h"
    
    #include <malloc.h>
    #include <espeak/speak_lib.h>
    #include <string>
    using namespace std;
    
    static ps_decoder_t *ps;
    static cmd_ln_t *config;
    static FILE *rawfd;
    
    espeak_POSITION_TYPE position_type;
    espeak_AUDIO_OUTPUT output;
    char *path=NULL;
    int Buflength = 1000, Options=0;
    void* user_data;
    char Voice[] = {"English"};
    char text2[30] = {"this is a english test"};
    unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
    t_espeak_callback *SynthCallback;
    espeak_PARAMETER Parm;
    //char* text;
    
    static void initFuncs()
    {
    
        output = AUDIO_OUTPUT_PLAYBACK;
        espeak_Initialize(output, Buflength, path, Options ); 
        espeak_SetVoiceByName(Voice);
        const char *langNativeString = "en";
        espeak_VOICE voice;
        memset(&voice, 0, sizeof(espeak_VOICE));
            voice.languages = langNativeString;
            voice.name = "US";
            voice.variant = 2;
            voice.gender = 1;
            espeak_SetVoiceByProperties(&voice);
    
    }
    
    static void sleep_msec(int32 ms)
    {
    
        struct timeval tmo;
    
        tmo.tv_sec = 0;
        tmo.tv_usec = ms * 1000;
    
        select(0, NULL, NULL, NULL, &tmo);
    
    }
    
    static void speech(char* hyp)
    {
    
        Size = strlen(hyp)+1;
        espeak_Synth( hyp, Size, position, position_type, end_position, flags,unique_identifier, user_data );
        espeak_Synchronize( );
    
    }
    
    static void recognize_from_microphone()
    {
        ad_rec_t *ad;
        int16 adbuf[2048];
        uint8 utt_started, in_speech;
        int32 k;
        char  *hyp;
    
        if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),(int) cmd_ln_float32_r(config,"-samprate"))) == NULL)
            E_FATAL("Failed to open audio device\n");
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    
        if (ps_start_utt(ps) < 0)
            E_FATAL("Failed to start utterance\n");
    
        utt_started = FALSE;
        E_INFO("Ready....\n");
    
        for (;;) {
            if ((k = ad_read(ad, adbuf, 2048)) < 0)
                E_FATAL("Failed to read audio\n");
            ps_process_raw(ps, adbuf, k, FALSE, FALSE);
            in_speech = ps_get_in_speech(ps);
            if (in_speech && !utt_started) {
                utt_started = TRUE;
                E_INFO("Listening...\n");
            }
            if (!in_speech && utt_started) {
    
                ps_end_utt(ps);
                hyp = (char*)ps_get_hyp(ps, NULL );
                if (hyp != NULL) {
    
         speech(hyp);
    
                    printf("%s\n", hyp); 
                    fflush(stdout);
                }
    
                if (ps_start_utt(ps) < 0)
                    E_FATAL("Failed to start utterance\n");
                utt_started = FALSE;
                E_INFO("Ready....\n");
            }
    
        }
        ad_close(ad);
    }
    
    int main(int argc, char *argv[])
    {
        initFuncs();
    
        config = cmd_ln_init(NULL, ps_args(), TRUE,
                     "-hmm", MODELDIR "/en-us/en-us",
                         "-lm", MODELDIR "/en-us/en-us.lm.bin",
                         "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
                         NULL);
        ps = ps_init(config);
            recognize_from_microphone();
    
        ps_free(ps);
        cmd_ln_free_r(config);
    
        return 0;
    }
    

    But the problem is that, pocketsphinx only recognizes the first sentence, then the espeak says that, then pocketsphinx will recognize what espeak said! and it will repeat it for ever!

    So I am looking for a way to stop pocketsphinx's listening when espeak says recognized voice! But don't know how to do it in best way?

     
    • Nickolay V. Shmyrev

      Call ad_stop_rec before starting espeak.

       
  • rezaee

    rezaee - 2018-04-26

    I edited the code like below but the problem didn't solve!

    #include <stdio.h>
    #include <string.h>
    #include <assert.h>
    
    #include <sphinxbase/err.h>
    #include <sphinxbase/ad.h>
    
    #include "pocketsphinx.h"
    
    #include <malloc.h>
    #include <espeak/speak_lib.h>
    #include <string>
    using namespace std;
    
    static ps_decoder_t *ps;
    static cmd_ln_t *config;
    static FILE *rawfd;
    
    espeak_POSITION_TYPE position_type;
    espeak_AUDIO_OUTPUT output;
    char *path=NULL;
    int Buflength = 1000, Options=0;
    void* user_data;
    char Voice[] = {"English"};
    char text2[30] = {"this is a english test"};
    unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
    t_espeak_callback *SynthCallback;
    espeak_PARAMETER Parm;
    //char* text;
    
    static void initFuncs()
    {
    
        output = AUDIO_OUTPUT_PLAYBACK;
        espeak_Initialize(output, Buflength, path, Options ); 
        espeak_SetVoiceByName(Voice);
        const char *langNativeString = "en";
        espeak_VOICE voice;
        memset(&voice, 0, sizeof(espeak_VOICE));
            voice.languages = langNativeString;
            voice.name = "US";
            voice.variant = 2;
            voice.gender = 1;
            espeak_SetVoiceByProperties(&voice);
    
    }
    
    static void sleep_msec(int32 ms)
    {
    
        struct timeval tmo;
    
        tmo.tv_sec = 0;
        tmo.tv_usec = ms * 1000;
    
        select(0, NULL, NULL, NULL, &tmo);
    
    }
    
    static void speech(char* hyp)
    {
    
        Size = strlen(hyp)+1;
        espeak_Synth( hyp, Size, position, position_type, end_position, flags,unique_identifier, user_data );
        espeak_Synchronize( );
    
    }
    
    static void recognize_from_microphone()
    {
        ad_rec_t *ad;
        int16 adbuf[2048];
        uint8 utt_started, in_speech;
        int32 k;
        char  *hyp;
    
        if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),(int) cmd_ln_float32_r(config,"-samprate"))) == NULL)
            E_FATAL("Failed to open audio device\n");
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    
        if (ps_start_utt(ps) < 0)
            E_FATAL("Failed to start utterance\n");
    
        utt_started = FALSE;
        E_INFO("Ready....\n");
    
        for (;;) {
            if ((k = ad_read(ad, adbuf, 2048)) < 0)
                E_FATAL("Failed to read audio\n");
            ps_process_raw(ps, adbuf, k, FALSE, FALSE);
            in_speech = ps_get_in_speech(ps);
            if (in_speech && !utt_started) {
                utt_started = TRUE;
                E_INFO("Listening...\n");
            }
            if (!in_speech && utt_started) {
    
                ps_end_utt(ps);
                hyp = (char*)ps_get_hyp(ps, NULL );
                if (hyp != NULL) {
    ////////////////////////////////////////////////put ad_stop_rec(ad) here
    ad_stop_rec(ad);
    
                    speech(hyp);
                    printf("%s\n", hyp); 
                    fflush(stdout);
                }
    
                if (ps_start_utt(ps) < 0)
                    E_FATAL("Failed to start utterance\n");
                utt_started = FALSE;
                E_INFO("Ready....\n");
         ///////////////////////////////////////////////////////put ad_start_rec(ad) here
         ad_start_rec(ad);
            }
    
        }
        ad_close(ad);
    }
    
    int main(int argc, char *argv[])
    {
        initFuncs();
    
        config = cmd_ln_init(NULL, ps_args(), TRUE,
                     "-hmm", MODELDIR "/en-us/en-us",
                         "-lm", MODELDIR "/en-us/en-us.lm.bin",
                         "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
                         NULL);
        ps = ps_init(config);
            recognize_from_microphone();
    
        ps_free(ps);
        cmd_ln_free_r(config);
    
        return 0;
    }
    
     

    Last edit: rezaee 2018-04-26
    • Nickolay V. Shmyrev

      What do you mean by "didn't solve". The behavior should change. It is hard to guess what changed though.

       
  • Temirlan Tashbolotov

    Hello, I have the same issue, but i'm using python on raspberry pi 3. I'm calling pocketsphinx_continuous with subprocess.Popen and want to pause it untill espeak is done. I'll be thankful for any ways to solve it, like making pocketsphinx run only once, instead of continuous listening. Thank you.

     
    • Nickolay V. Shmyrev

      I'm calling pocketsphinx_continuous with subprocess.Popen

      This is a bad idea usually

      and want to pause it untill espeak is done.

      You can kill child process and restart it later.

       
  • rezaee

    rezaee - 2018-05-11

    I tried a lof ot things but couldn't reach success till now!

     

Log in to post a comment.

Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.