CMU Sphinx / Forums / Help: PocketSphinx Search

Hello,

I am working on a c++ pocketsphinx application and realised that i should create two search objects.
The first one is a keyphrase based search that activate the second search based on a language model that i have created.

I want to know how to do this with the continuous.c

This is my program working with the second search, i want to add a keyword search mode in the same program :

#ifndef RECONNAISSANCE_VOCALE_H_INCLUDED
#define RECONNAISSANCE_VOCALE_H_INCLUDED

#include <stdio.h>
#include <string.h>
#include <assert.h>

#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif

#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include <pocketsphinx.h>

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,
    /* Argument file. */
    {"-argfile",
     ARG_STRING,
     NULL,
     "Argument file giving extra arguments."},
    {"-adcdev",
     ARG_STRING,
     NULL,
     "Name of audio device to use for input."},
    {"-infile",
     ARG_STRING,
     NULL,
     "Audio file to transcribe."},
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    {"-time",
     ARG_BOOLEAN,
     "no",
     "Print word times in file transcription."},
    CMDLN_EMPTY_OPTION
};

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

static void
print_word_times()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps);
    while (iter != NULL) {
        int32 sf, ef, pprob;
        float conf;

        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
               ((float) ef / frame_rate), conf);
        iter = ps_seg_next(iter);
    }
}

static int
check_wav_header(char *header, int expected_sr)
{
    int sr;

    if (header[34] != 0x10) {
        E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
        return 0;
    }
    if (header[20] != 0x1) {
        E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
        return 0;
    }
    if (header[22] != 0x1) {
        E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
        return 0;
    }
    sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
    if (sr != expected_sr) {
        E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
        return 0;
    }
    return 1;
}


/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
    Sleep(ms);
#else
    /* ------------------- Unix ------------------ */
    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);
#endif
}

/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    printf("READY....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            printf("Donner un ordre vocale...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL){
                printf("#=> %s\n", hyp);
                cout << "classifcation : "<< classification(hyp)<<endl;
                int classe =  classification(hyp) ;

                if(classe){
                    doActionBySpeech(hyp).wait();
                }

            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            printf("READY....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}

void * reconnaissance_vocale (void * arg) {
    printf("*   Le module de reconnaissance vocale a été démarrée avec succès\n");

    char const *cfg;

    char* res[] = { "-inmic", "yes", "-hmm", "/home/rami/Bureau/workspace::code::blocks/SpeechRecognition/language/cmusphinx-fr-5.2",
                        "-lm","/home/rami/Bureau/workspace::code::blocks/SpeechRecognition/language/last.lm",
                        "-dict","/home/rami/Bureau/workspace::code::blocks/SpeechRecognition/language/cmudict-en-fr.dict",
                        "-logfn","/dev/null",NULL };

    int res_size = sizeof(res)/sizeof(res[0]) - 1;

    printf("#1");
    config = cmd_ln_parse_r(NULL, cont_args_def, res_size, res, TRUE);
    printf("#2");

    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }
    printf("#3");

    if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
    E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.\n");
        cmd_ln_free_r(config);
    }
    printf("#4");

    ps_default_search_args(config);
    printf("#5");


    ps = ps_init(config);
    printf("#6");

    if (ps == NULL) {
        cmd_ln_free_r(config);

    }

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", "smarthomeClient", __DATE__, __TIME__);
    printf("#7");

    if (cmd_ln_str_r(config, "-infile") != NULL) {
    //    recognize_from_file();
    } else if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }
    printf("#8");

    ps_free(ps);
    cmd_ln_free_r(config);

}


#endif // RECONNAISSANCE_VOCALE_H_INCLUDED

PocketSphinx Search

Speech Recognition Toolkit

Forums

Help

PocketSphinx Search document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

PocketSphinx Search