I altered an example program tty-ptt.c of sphinx2-0.6. It could not recognize
anything. My platform is Linux 2.6.9-42.0.8. I don't know why. I don't think
there is any problem of installing or configuration. Because in my project
I called the functions in sphinx2-0.6 libraries, they all worked very well
in Force Alignment Mode.
But tty-ptt.c works in normal mode. I created the language model and
dictionary for just one word "law" from http://www.speech.cs.cmu.edu/tools/lmtool-adv.html.
In tty-ptt.c I comment ad_open_sps
or ad_read or ad_start_record etc. I just let the program read a raw audio
file, which is the recording of speech of "law". All the function calls
returned normally.
But in the loop,
for (;;) {
if ((k = uttproc_result (&fr, &hyp, 0)) == 0)
printf("\n FINAL RESULT @frm %d: %s", fr, hyp);
break;
}
.....
}
uttproc_result was called many times, k value became less and less.
From the uttproc.h, I knew this was normal. But in the last call after
k was 1, a failed message printed out:
...........................
begin uttproc_result
end of uttproc_result, k: 3, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s
begin uttproc_result
end of uttproc_result, k: 2, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s
begin uttproc_result
end of uttproc_result, k: 1, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s
begin uttproc_result
WARNING: "search.c", line 2609: Failed to terminate in final state
INFO: search.c(2646): FWDTREE: (00000001 -4262821 (A=-4262821 L=0))
INFO: search.c(2558): 50 words recognized (1/fr)
INFO: search.c(2563): 5698 senones evaluated (63/fr)
INFO: search.c(2565): 2776 channels searched (30/fr), 172 1st, 2604 last
INFO: search.c(2569): 280 words for which last channels evaluated (3/fr)
INFO: search.c(2571): 168 candidate words for entering last phone (1/fr)
INFO: uttproc.c(510): 0.89 SoS, 0.03 sec elapsed, 0.03 xRT, 0.03 sec CPU, 0.03 xRT
uttproc_result return 0, FINAL RESULT @frm: 89, string content:
SO I GOT NO RECOGNIZED STRING.
----- part of my program ------
static void utterance_loop()
{
FILE fp;
int32 fr;
char hyp;
char line[1024];
int16 adbuf[4096];
int32 k;
int32 ns; / #Samples read from audio in this utterance /
int32 hwm; / High Water Mark: to know when to report partial result /
int32 recording;
string adFileName="law.raw";
int32 retVal;
int totalEles=0, index=1;
fp=fopen(adFileName.c_str(),"rb");if(fp==NULL){printf("Error:Openaudiofile%sforrecognitionfailed",adFileName.c_str());return;}for(;;){/* Loop for each new utterance */ui_ready();fgets(line,sizeof(line),stdin);if((line[0]=='q')||(line[0]=='Q'))return;
// ad_start_rec(ad); / Start A/D recording for this utterance /
recording = 1;
ns=0;hwm=4000;/* Next partial result reported after 4000 samples */last_fr=-1;/* Frame count at last partial result reported *//* Begin utterance */if(uttproc_begin_utt(NULL)<0)E_FATAL("uttproc_begin_utt()failed\n");/* Send audio data to decoder until end of utterance */for(;;){/* * Read audio data (NON-BLOCKING). Use your favourite substitute here. * NOTE: In our implementation, ad_read returns -1 upon end of utterance. *///if((k=ad_read(ad,adbuf,4096))<0)//break;printf("starttoreadaudiofile\n");if((k=fread(adbuf,sizeof(int16),4096,fp))<=0){printf("readendoffile\n");fclose(fp);break;}totalEles+=k;printf("%d,totalread:%delements\n",index,totalEles);index++;retVal=uttproc_rawdata(adbuf,k,0);printf("uttproc_rawdatareturn:%d\n",retVal);ns+=k;/* Time to report partial result? (every 4000 samples or 1/4 sec) */if(ns>hwm){printf("updateresult\n");update_result();hwm=ns+4000;}if(recording&&(!speaking(ns))){//ad_stop_rec(ad);E_INFO("A/DStopped\n");recording=0;break;}}retVal=uttproc_end_utt();printf("uttproc_end_uttreturn:%d\n",retVal);printf("PLEASEWAIT...\n");fflush(stdout);for(;;){printf("beginuttproc_result\n");if((k=uttproc_result(&fr,&hyp,0))==0){printf("\nuttproc_resultreturn0,FINALRESULT@frm:%d,stringcontent:%s\n",fr,hyp);break;}printf("endofuttproc_result,k:%d,fr:%dhyp:%s\n",k,fr,hyp);if(k<0){E_INFO("uttproc_result_noblock()failed\n");break;}if(!(k&0x1f)){printf("beginupdate_result\n");update_result();printf("endofupdate_result\n");}}}
}
int
main (int32 argc, char *argv[])
{
fbs_init (argc, argv);
// if ((ad = ad_open_sps(SAMPLE_RATE)) == NULL)
// E_FATAL("ad_open_sps failed\n");
Hi all,
I altered an example program tty-ptt.c of sphinx2-0.6. It could not recognize
anything. My platform is Linux 2.6.9-42.0.8. I don't know why. I don't think
there is any problem of installing or configuration. Because in my project
I called the functions in sphinx2-0.6 libraries, they all worked very well
in Force Alignment Mode.
But tty-ptt.c works in normal mode. I created the language model and
dictionary for just one word "law" from
http://www.speech.cs.cmu.edu/tools/lmtool-adv.html.
In tty-ptt.c I comment ad_open_sps
or ad_read or ad_start_record etc. I just let the program read a raw audio
file, which is the recording of speech of "law". All the function calls
returned normally.
But in the loop,
for (;;) {
if ((k = uttproc_result (&fr, &hyp, 0)) == 0)
printf("\n FINAL RESULT @frm %d: %s", fr, hyp);
break;
}
.....
}
uttproc_result was called many times, k value became less and less.
From the uttproc.h, I knew this was normal. But in the last call after
k was 1, a failed message printed out:
...........................
begin uttproc_result
end of uttproc_result, k: 3, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s
begin uttproc_result
end of uttproc_result, k: 2, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s
begin uttproc_result
end of uttproc_result, k: 1, fr: -10012 hyp: %s COMPILED ON: %s, AT: %s
begin uttproc_result
WARNING: "search.c", line 2609: Failed to terminate in final state
INFO: search.c(2646): FWDTREE: (00000001 -4262821 (A=-4262821 L=0))
INFO: search.c(2558): 50 words recognized (1/fr)
INFO: search.c(2563): 5698 senones evaluated (63/fr)
INFO: search.c(2565): 2776 channels searched (30/fr), 172 1st, 2604 last
INFO: search.c(2569): 280 words for which last channels evaluated (3/fr)
INFO: search.c(2571): 168 candidate words for entering last phone (1/fr)
INFO: uttproc.c(510): 0.89 SoS, 0.03 sec elapsed, 0.03 xRT, 0.03 sec CPU, 0.03 xRT
uttproc_result return 0, FINAL RESULT @frm: 89, string content:
SO I GOT NO RECOGNIZED STRING.
----- part of my program ------
static void utterance_loop()
{
FILE fp;
int32 fr;
char hyp;
char line[1024];
int16 adbuf[4096];
int32 k;
int32 ns; / #Samples read from audio in this utterance /
int32 hwm; / High Water Mark: to know when to report partial result /
int32 recording;
string adFileName="law.raw";
int32 retVal;
int totalEles=0, index=1;
// ad_start_rec(ad); / Start A/D recording for this utterance /
recording = 1;
}
int
main (int32 argc, char *argv[])
{
fbs_init (argc, argv);
// if ((ad = ad_open_sps(SAMPLE_RATE)) == NULL)
// E_FATAL("ad_open_sps failed\n");
// ad_close (ad);
fbs_end ();
return 0;
}
---------- Arguments file for fbs_init --------------
-ctloffset 0
-ctlcount 100000000
-langwt 6.5
-fwdflatlw 8.5
-rescorelw 9.5
-ugwt 0.5
-fillpen le-10
-silpen 0.005
-inspen 0.65
-top 1
-topsenfrm 3
-topsenthresh -70000
-fwdflatbeam 1e-08
-fwdflatnwbeam 0.0003
-normmean TRUE
-compress TRUE
-agcmax TRUE
-matchscore TRUE
-fwdflat FALSE
-bestpath TRUE
-usecitrans TRUE
-top 4
-compallsen FALSE
-beam 2e-6
-npbeam 2e-6
-lpbeam 2e-5
-lponlybeam 5e-4
-nwbeam 5e-4
-dictfn ./law.dict
-phnfn /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/phone
-mapfn /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/map
-hmmdir /home/qhwang/mulworkspace/AudioAnalyzer/Oedipus/Adult/hmm/6k
-hmmdirlist /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/
-8bsen TRUE
-sendumpfn /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/sendump
-cbdir /home/qhwang/mulworkspace/extra/AudioAnalyzer/Oedipus/Adult/hmm/6k/
-ccbfn cep.256
-dcbfn d2cep.256
-pcbfn p3cep.256
-xcbfn xcep.256
-hmmext chmm
-code1ext ccode
-code2ext d2code
-code3ext p3code
-code4ext xcode
-hmmsm 0.0000001
-transsm 0.0001
-cepfloor 0.0001
-dcepfloor 0.0001
-xcepfloor 0.0001
-latsize 50
-backtrace FALSE
-taphone FALSE
-tastate FALSE
I apprecite if anyone can help!!!!