Menu

PocketSphinx Search

Help
2016-05-14
2016-05-18
  • ElMokhtar Ahmed

    ElMokhtar Ahmed - 2016-05-14

    Hello,

    I am working on a c++ pocketsphinx application and realised that i should create two search objects.
    The first one is a keyphrase based search that activate the second search based on a language model that i have created.

    I want to know how to do this with the continuous.c

    This is my program working with the second search, i want to add a keyword search mode in the same program :

    #ifndef RECONNAISSANCE_VOCALE_H_INCLUDED
    #define RECONNAISSANCE_VOCALE_H_INCLUDED
    
    #include <stdio.h>
    #include <string.h>
    #include <assert.h>
    
    #if defined(_WIN32) && !defined(__CYGWIN__)
    #include <windows.h>
    #else
    #include <sys/select.h>
    #endif
    
    #include <sphinxbase/err.h>
    #include <sphinxbase/ad.h>
    
    #include <pocketsphinx.h>
    
    static const arg_t cont_args_def[] = {
        POCKETSPHINX_OPTIONS,
        /* Argument file. */
        {"-argfile",
         ARG_STRING,
         NULL,
         "Argument file giving extra arguments."},
        {"-adcdev",
         ARG_STRING,
         NULL,
         "Name of audio device to use for input."},
        {"-infile",
         ARG_STRING,
         NULL,
         "Audio file to transcribe."},
        {"-inmic",
         ARG_BOOLEAN,
         "no",
         "Transcribe audio from microphone."},
        {"-time",
         ARG_BOOLEAN,
         "no",
         "Print word times in file transcription."},
        CMDLN_EMPTY_OPTION
    };
    
    static ps_decoder_t *ps;
    static cmd_ln_t *config;
    static FILE *rawfd;
    
    static void
    print_word_times()
    {
        int frame_rate = cmd_ln_int32_r(config, "-frate");
        ps_seg_t *iter = ps_seg_iter(ps);
        while (iter != NULL) {
            int32 sf, ef, pprob;
            float conf;
    
            ps_seg_frames(iter, &sf, &ef);
            pprob = ps_seg_prob(iter, NULL, NULL, NULL);
            conf = logmath_exp(ps_get_logmath(ps), pprob);
            printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
                   ((float) ef / frame_rate), conf);
            iter = ps_seg_next(iter);
        }
    }
    
    static int
    check_wav_header(char *header, int expected_sr)
    {
        int sr;
    
        if (header[34] != 0x10) {
            E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
            return 0;
        }
        if (header[20] != 0x1) {
            E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
            return 0;
        }
        if (header[22] != 0x1) {
            E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
            return 0;
        }
        sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
        if (sr != expected_sr) {
            E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
            return 0;
        }
        return 1;
    }
    
    
    /* Sleep for specified msec */
    static void
    sleep_msec(int32 ms)
    {
    #if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
        Sleep(ms);
    #else
        /* ------------------- Unix ------------------ */
        struct timeval tmo;
    
        tmo.tv_sec = 0;
        tmo.tv_usec = ms * 1000;
    
        select(0, NULL, NULL, NULL, &tmo);
    #endif
    }
    
    /*
     * Main utterance processing loop:
     *     for (;;) {
     *        start utterance and wait for speech to process
     *        decoding till end-of-utterance silence will be detected
     *        print utterance result;
     *     }
     */
    static void
    recognize_from_microphone()
    {
        ad_rec_t *ad;
        int16 adbuf[2048];
        uint8 utt_started, in_speech;
        int32 k;
        char const *hyp;
    
        if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                              (int) cmd_ln_float32_r(config,
                                                     "-samprate"))) == NULL)
            E_FATAL("Failed to open audio device\n");
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    
        if (ps_start_utt(ps) < 0)
            E_FATAL("Failed to start utterance\n");
        utt_started = FALSE;
        printf("READY....\n");
    
        for (;;) {
            if ((k = ad_read(ad, adbuf, 2048)) < 0)
                E_FATAL("Failed to read audio\n");
            ps_process_raw(ps, adbuf, k, FALSE, FALSE);
            in_speech = ps_get_in_speech(ps);
            if (in_speech && !utt_started) {
                utt_started = TRUE;
                printf("Donner un ordre vocale...\n");
            }
            if (!in_speech && utt_started) {
                /* speech -> silence transition, time to start new utterance  */
                ps_end_utt(ps);
                hyp = ps_get_hyp(ps, NULL );
                if (hyp != NULL){
                    printf("#=> %s\n", hyp);
                    cout << "classifcation : "<< classification(hyp)<<endl;
                    int classe =  classification(hyp) ;
    
                    if(classe){
                        doActionBySpeech(hyp).wait();
                    }
    
                }
    
                if (ps_start_utt(ps) < 0)
                    E_FATAL("Failed to start utterance\n");
                utt_started = FALSE;
                printf("READY....\n");
            }
            sleep_msec(100);
        }
        ad_close(ad);
    }
    
    void * reconnaissance_vocale (void * arg) {
        printf("*   Le module de reconnaissance vocale a été démarrée avec succès\n");
    
        char const *cfg;
    
        char* res[] = { "-inmic", "yes", "-hmm", "/home/rami/Bureau/workspace::code::blocks/SpeechRecognition/language/cmusphinx-fr-5.2",
                            "-lm","/home/rami/Bureau/workspace::code::blocks/SpeechRecognition/language/last.lm",
                            "-dict","/home/rami/Bureau/workspace::code::blocks/SpeechRecognition/language/cmudict-en-fr.dict",
                            "-logfn","/dev/null",NULL };
    
        int res_size = sizeof(res)/sizeof(res[0]) - 1;
    
        printf("#1");
        config = cmd_ln_parse_r(NULL, cont_args_def, res_size, res, TRUE);
        printf("#2");
    
        /* Handle argument file as -argfile. */
        if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
            config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
        }
        printf("#3");
    
        if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
        E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.\n");
            cmd_ln_free_r(config);
        }
        printf("#4");
    
        ps_default_search_args(config);
        printf("#5");
    
    
        ps = ps_init(config);
        printf("#6");
    
        if (ps == NULL) {
            cmd_ln_free_r(config);
    
        }
    
        E_INFO("%s COMPILED ON: %s, AT: %s\n\n", "smarthomeClient", __DATE__, __TIME__);
        printf("#7");
    
        if (cmd_ln_str_r(config, "-infile") != NULL) {
        //    recognize_from_file();
        } else if (cmd_ln_boolean_r(config, "-inmic")) {
            recognize_from_microphone();
        }
        printf("#8");
    
        ps_free(ps);
        cmd_ln_free_r(config);
    
    }
    
    
    #endif // RECONNAISSANCE_VOCALE_H_INCLUDED
    
     
    • Nickolay V. Shmyrev

      So what is the problem? Search is added with ps_set_kws and ps_set_lm and switched with ps_set_search. You can grep sources for examples.

       

Log in to post a comment.

Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.