Qhwang - 2007-05-04

Hi all,

I altered an example program tty-ptt.c of sphinx2-0.6. It could not recognize
anything. My platform is Linux 2.6.9-42.0.8. I don't know why. I don't think
there is any problem of installing or configuration. Because in my project
I called the functions in sphinx2-0.6 libraries, they all worked very well
in Force Alignment Mode.
But tty-ptt.c works in normal mode. I created the language model and
dictionary for just one word "law" from
http://www.speech.cs.cmu.edu/tools/lmtool-adv.html.
In tty-ptt.c I comment ad_open_sps
or ad_read or ad_start_record etc. I just let the program read a raw audio
file, which is the recording of speech of "law". All the function calls
returned normally.

But in the loop,
for (;;) {
if ((k = uttproc_result (&fr, &hyp, 0)) == 0)
printf("\n FINAL RESULT @frm %d: %s", fr, hyp);
break;
}
.....
}

uttproc_result was called many times, k value became less and less.
From the uttproc.h, I knew this was normal. But in the last call after
k was 1, a failed message printed out:

...........................
begin uttproc_result
end of uttproc_result, k: 3, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s

begin uttproc_result
end of uttproc_result, k: 2, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s

begin uttproc_result
end of uttproc_result, k: 1, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s

begin uttproc_result

WARNING: "search.c", line 2609: Failed to terminate in final state

INFO: search.c(2646): FWDTREE: (00000001 -4262821 (A=-4262821 L=0))
INFO: search.c(2558): 50 words recognized (1/fr)
INFO: search.c(2563): 5698 senones evaluated (63/fr)
INFO: search.c(2565): 2776 channels searched (30/fr), 172 1st, 2604 last
INFO: search.c(2569): 280 words for which last channels evaluated (3/fr)
INFO: search.c(2571): 168 candidate words for entering last phone (1/fr)
INFO: uttproc.c(510): 0.89 SoS, 0.03 sec elapsed, 0.03 xRT, 0.03 sec CPU, 0.03 xRT

uttproc_result return 0, FINAL RESULT @frm: 89, string content:

SO I GOT NO RECOGNIZED STRING.

----- part of my program ------

static void utterance_loop()
{
FILE fp;
int32 fr;
char
hyp;
char line[1024];
int16 adbuf[4096];
int32 k;
int32 ns; / #Samples read from audio in this utterance /
int32 hwm; / High Water Mark: to know when to report partial result /
int32 recording;
string adFileName="law.raw";
int32 retVal;
int totalEles=0, index=1;

fp = fopen(adFileName.c_str(), "rb");    
if(fp==NULL)
  {
     printf("Error: Open audio file %s for recognition failed", adFileName.c_str());
     return;
  }

for (;;) {      /* Loop for each new utterance */
ui_ready ();

fgets (line, sizeof(line), stdin);
if ((line[0] == 'q') || (line[0] == 'Q'))
    return;

// ad_start_rec(ad); / Start A/D recording for this utterance /
recording = 1;

ns = 0;
hwm = 4000; /* Next partial result reported after 4000 samples */
last_fr = -1;   /* Frame count at last partial result reported */

/* Begin utterance */
if (uttproc_begin_utt (NULL) < 0)
    E_FATAL("uttproc_begin_utt() failed\n");

/* Send audio data to decoder until end of utterance */
for (;;) {
    /*
     * Read audio data (NON-BLOCKING).  Use your favourite substitute here.
     * NOTE: In our implementation, ad_read returns -1 upon end of utterance.
     */
//  if ((k = ad_read (ad, adbuf, 4096)) < 0)
//     break;

    printf("start to read audio file\n");
    if((k = fread(adbuf, sizeof(int16), 4096, fp)) <= 0){
       printf("read end of file\n");
       fclose(fp);
       break;
    }
    totalEles+=k;
    printf("%d, total read: %d elements\n", index, totalEles);
    index++;

    retVal = uttproc_rawdata (adbuf, k, 0);
    printf("uttproc_rawdata return: %d\n", retVal);
    ns += k;
    /* Time to report partial result? (every 4000 samples or 1/4 sec) */

    if (ns > hwm) {
        printf("update result \n");
    update_result ();
    hwm = ns+4000;
    }

    if (recording && (! speaking(ns))) {
//  ad_stop_rec(ad);
    E_INFO("A/D Stopped\n");
    recording = 0;
    break;
    }
}

retVal = uttproc_end_utt ();
printf("uttproc_end_utt return: %d\n", retVal);

printf ("PLEASE WAIT...\n");
fflush (stdout);

for (;;) {
    printf("begin uttproc_result\n");
    if ((k = uttproc_result (&fr, &hyp, 0)) == 0) {
    printf ("\n uttproc_result return 0, FINAL RESULT @frm: %d, string content: %s\n", fr, hyp);
    break;
    }
    printf("end of uttproc_result, k: %d, fr: %d  hyp: %s\n", k, fr, hyp);
    if (k < 0) {
    E_INFO("uttproc_result_noblock() failed\n");
    break;
    }
    if (! (k & 0x1f)){
        printf("begin update_result\n");
    update_result ();
    printf("end of update_result\n");
    }   
}
}

}

int
main (int32 argc, char *argv[])
{
fbs_init (argc, argv);

// if ((ad = ad_open_sps(SAMPLE_RATE)) == NULL)
// E_FATAL("ad_open_sps failed\n");

E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

utterance_loop ();

// ad_close (ad);
fbs_end ();
return 0;
}

---------- Arguments file for fbs_init --------------
-ctloffset 0
-ctlcount 100000000
-langwt 6.5
-fwdflatlw 8.5
-rescorelw 9.5
-ugwt 0.5
-fillpen le-10
-silpen 0.005
-inspen 0.65
-top 1
-topsenfrm 3
-topsenthresh -70000
-fwdflatbeam 1e-08
-fwdflatnwbeam 0.0003
-normmean TRUE
-compress TRUE
-agcmax TRUE
-matchscore TRUE
-fwdflat FALSE
-bestpath TRUE
-usecitrans TRUE
-top 4
-compallsen FALSE
-beam 2e-6
-npbeam 2e-6
-lpbeam 2e-5
-lponlybeam 5e-4
-nwbeam 5e-4
-dictfn ./law.dict
-phnfn /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/phone
-mapfn /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/map
-hmmdir /home/qhwang/mulworkspace/AudioAnalyzer/Oedipus/Adult/hmm/6k
-hmmdirlist /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/
-8bsen TRUE
-sendumpfn /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/sendump
-cbdir /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/
-ccbfn cep.256
-dcbfn d2cep.256
-pcbfn p3cep.256
-xcbfn xcep.256
-hmmext chmm
-code1ext ccode
-code2ext d2code
-code3ext p3code
-code4ext xcode
-hmmsm 0.0000001
-transsm 0.0001
-cepfloor 0.0001
-dcepfloor 0.0001
-xcepfloor 0.0001
-latsize 50
-backtrace FALSE
-taphone FALSE
-tastate FALSE

I apprecite if anyone can help!!!!