Hi,
i am using cmusphinx for speech recognition,but facing a problem it recognizes words without saying any word
the complete code is
packageedu.cmu.pocketsphinx.demo;importandroid.app.Activity;importandroid.os.AsyncTask;importandroid.os.Bundle;importandroid.util.Log;importandroid.widget.TextView;importandroid.widget.Toast;importjava.io.File;importjava.io.IOException;importjava.util.HashMap;importedu.cmu.pocketsphinx.Assets;importedu.cmu.pocketsphinx.Hypothesis;importedu.cmu.pocketsphinx.RecognitionListener;importedu.cmu.pocketsphinx.SpeechRecognizer;importstaticandroid.widget.Toast.makeText;importstaticedu.cmu.pocketsphinx.SpeechRecognizerSetup.defaultSetup;publicclassPocketSphinxActivityextendsActivityimplementsRecognitionListener{/*Namedsearchesallowtoquicklyreconfigurethedecoder*///privatestaticfinalStringKWS_SEARCH="wakeup";//privatestaticfinalStringFORECAST_SEARCH="forecast";privatestaticfinalStringDIGITS_SEARCH="digits";//privatestaticfinalStringPHONE_SEARCH="phones";//privatestaticfinalStringMENU_SEARCH="menu";/*Keywordwearelookingfortoactivatemenu*///privatestaticfinalStringKEYPHRASE="hello";privateSpeechRecognizerrecognizer;privateHashMap<String,Integer>captions;@OverridepublicvoidonCreate(Bundlestate){super.onCreate(state);//PreparethedataforUI//captions=newHashMap<String,Integer>();//captions.put(KWS_SEARCH,R.string.kws_caption);//captions.put(MENU_SEARCH,R.string.menu_caption);//captions.put(DIGITS_SEARCH,R.string.digits_caption);//captions.put(PHONE_SEARCH,R.string.phone_caption);//captions.put(FORECAST_SEARCH,R.string.forecast_caption);setContentView(R.layout.main);((TextView)findViewById(R.id.caption_text)).setText("Preparing the recognizer");//Recognizerinitializationisatime-consuminganditinvolvesIO,//soweexecuteitinasynctasknewAsyncTask<Void,Void,Exception>(){@OverrideprotectedExceptiondoInBackground(Void...params){try{Assetsassets=newAssets(PocketSphinxActivity.this);FileassetDir=assets.syncAssets();setupRecognizer(assetDir);}catch(IOExceptione){returne;}returnnull;}@OverrideprotectedvoidonPostExecute(Exceptionresult){if(result!=null){((TextView)findViewById(R.id.caption_text)).setText("Failed to init recognizer "+result);}else{reset();((TextView)findViewById(R.id.caption_text)).setText("say something");//switchSearch(KWS_SEARCH);}}}.execute();}@OverridepublicvoidonDestroy(){super.onDestroy();recognizer.cancel();recognizer.shutdown();}/***Inpartialresultwegetquickupdatesaboutcurrenthypothesis.In*keywordspottingmodewecanreacthere,inothermodesweneedtowait*forfinalresultinonResult.*/@OverridepublicvoidonPartialResult(Hypothesishypothesis){//Log.e("inside","onPartialResult\t");/*if(hypothesis==null)return;Stringtext=hypothesis.getHypstr();Log.e("hbypothesis in","partial results"+text);if(text.equals(KEYPHRASE))switchSearch(MENU_SEARCH);elseif(text.equals(DIGITS_SEARCH))switchSearch(DIGITS_SEARCH);elseif(text.equals(PHONE_SEARCH))switchSearch(PHONE_SEARCH);elseif(text.equals(FORECAST_SEARCH))switchSearch(FORECAST_SEARCH);else((TextView)findViewById(R.id.result_text)).setText(text);*/}/***Thiscallbackiscalledwhenwestoptherecognizer.*/@OverridepublicvoidonResult(Hypothesishypothesis){Log.e("inside","onResult");((TextView)findViewById(R.id.result_text)).setText("");if(hypothesis!=null){Stringtext=hypothesis.getHypstr();Log.e("hypothesis ","text\t"+text);makeText(getApplicationContext(),text,Toast.LENGTH_SHORT).show();}}@OverridepublicvoidonBeginningOfSpeech(){Log.e("user to about to "," say something");}/***Westoprecognizerheretogetafinalresult*/@OverridepublicvoidonEndOfSpeech(){Log.e("user has stopped","speaking");reset();/*if(!recognizer.getSearchName().equals(KWS_SEARCH))switchSearch(KWS_SEARCH);*/}privatevoidswitchSearch(StringsearchName){recognizer.startListening("hello");/*recognizer.stop();//Ifwearenotspotting,startlisteningwithtimeout(10000msor10seconds).if(searchName.equals(KWS_SEARCH))recognizer.startListening(searchName);elserecognizer.startListening(searchName,10000);Stringcaption=getResources().getString(captions.get(searchName));((TextView)findViewById(R.id.caption_text)).setText(caption);*/}privatevoidsetupRecognizer(FileassetsDir)throwsIOException{//Therecognizercanbeconfiguredtoperformmultiplesearches//ofdifferentkindandswitchbetweenthemrecognizer=defaultSetup().setAcousticModel(newFile(assetsDir,"en-us-ptm")).setDictionary(newFile(assetsDir,"cmudict-en-us.dict"))//Todisableloggingofrawaudiocommentoutthiscall(takesalotofspaceonthedevice).setRawLogDir(assetsDir)//Thresholdtotuneforkeyphrasetobalancebetweenfalsealarmsandmisses.setKeywordThreshold(1e-45f)//Usecontext-independentphoneticsearch,context-dependentistooslowformobile.setBoolean("-allphone_ci",true).getRecognizer();recognizer.addListener(this);//recognizer.startListening("hello");/**Inyourapplicationyoumightnotneedtoaddallthosesearches.*Theyareaddedherefordemonstration.Youcanleavejustone.*///Createkeyword-activationsearch.//recognizer.addKeyphraseSearch(KWS_SEARCH,KEYPHRASE);//Creategrammar-basedsearchforselectionbetweendemos//FilemenuGrammar=newFile(assetsDir,"menu.gram");//recognizer.addGrammarSearch(MENU_SEARCH,menuGrammar);//Creategrammar-basedsearchfordigitrecognitionFiledigitsGrammar=newFile(assetsDir,"digits.gram");recognizer.addKeywordSearch(DIGITS_SEARCH,digitsGrammar);//recognizer.startListening("HELLO");//recognizer.startListening(DIGITS_SEARCH);//Createlanguagemodelsearch//FilelanguageModel=newFile(assetsDir,"weather.dmp");//recognizer.addNgramSearch(FORECAST_SEARCH,languageModel);//Phoneticsearch//FilephoneticModel=newFile(assetsDir,"en-phone.dmp");//recognizer.addAllphoneSearch(PHONE_SEARCH,phoneticModel);}@OverridepublicvoidonError(Exceptionerror){((TextView)findViewById(R.id.caption_text)).setText(error.getMessage());}@OverridepublicvoidonTimeout(){/*switchSearch(KWS_SEARCH);*/}privatevoidreset(){Log.e("inside","reset()");recognizer.stop();recognizer.startListening(DIGITS_SEARCH);}}
my digits.gram file is
HELLO /1e-1/
HIMANSHU /1e-1/
UP /1e-1/
DOWN /1e-1/
LEFT /1e-1/
RIGHT /1e-1/
and dictionary contains
HELLO HH AH L OW
HELLO(2) HH EH L OW
HELP HH EH L P
HIMANSHU HH AH M AE N SH UW
SHUT SH AH T
UP AH P
BACKWARDS B AE K W ER D Z
DOWN D AW N
FORWARDS F AO R W ER D Z
LEFT L EH F T
RIGHT R AY T
any help would be appreciated.
Last edit: Nickolay V. Shmyrev 2016-03-28
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Overall we recommend to use 3-4 syllables for keyword spotting phrases, you can use longer keyphrases or an activation keyphrase + grammar search like in demo for more accurate recognition.
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
i checked changing the threshold several times but it is unable to resolve my problem.one thing i am using addKeywordSearch() is that okey using this method???
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
i checked changing the threshold several times but it is unable to resolve my problem.one thing i am using addKeywordSearch() is that okey using this method???
keyword search is fine. The process of tuning the threshold is covered here:
nikolay i am not using any kind of .mp3 file for speech recognition.for speech recognition i speak a word to recognize,but on my device raw files are being created for every onResult() call whether it is false alarm or not
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Hi Nickolay,
does pocketphinx only recognizes US accent.I am sending you my recorded file with the occurences of two words "HELP" and "BACHAO" .tell me how to train my acoustic model to recognize indian accent.
ERROR: "continuous.c", line 136: Input audio file has sample rate [44100], but decoder expects [16000]
Pocketsphinx requires 16khz 16bit mono file as input, your file has wrong format. With proper format pocketsphinx should properly detect your words. Also, your file is too noisy, you need to find a way to reduce noise in recording.
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
and please tell me where to place this recorded file in my android project?
You do not need to place this file in android project. You need to use it on desktop with pocketsphinx_continuous to find proper threshold for the keywords. Once threshold is found you can use this threshold in android project. For more details on how to do this read the tutorial
Hi,
i am using cmusphinx for speech recognition,but facing a problem it recognizes words without saying any word
the complete code is
my digits.gram file is
and dictionary contains
any help would be appreciated.
Last edit: Nickolay V. Shmyrev 2016-03-28
You need to tune activation thresholds in keyword list to avoid false alarms as covered in tutorial
http://cmusphinx.sourceforge.net/wiki/tutoriaam
Overall we recommend to use 3-4 syllables for keyword spotting phrases, you can use longer keyphrases or an activation keyphrase + grammar search like in demo for more accurate recognition.
i checked changing the threshold several times but it is unable to resolve my problem.one thing i am using addKeywordSearch() is that okey using this method???
in demo can we edit en_phone.dmp?
keyword search is fine. The process of tuning the threshold is covered here:
http://cmusphinx.sourceforge.net/wiki/tutoriallm#keyword_lists
In order to get help on threshold tuning you need to provide the audio file you are trying with.
en-phone.dmp has no relation to keyword spotting. The process of creation of en-phone.dmp is covered here: http://cmusphinx.sourceforge.net/wiki/phonemerecognition#training_phonetic_language_model_for_decoding
actually ,my requirement is
when i say HELP particular event should occur.
can we use local language words for speech recognition?
Yes you can, you have to use existing model phoneset though in a dictionary.
thanx nikolay
tunning threshold reduced number of false alarms but it stopped listening
Last edit: Himanshu Srivastava 2016-03-30
In order to get help on threshold tuning you need to provide the audio file you are trying with.
nikolay i am not using any kind of .mp3 file for speech recognition.for speech recognition i speak a word to recognize,but on my device raw files are being created for every onResult() call whether it is false alarm or not
You need to record a file first and try with a file as described in tutorial.
Hi Nickolay,
does pocketphinx only recognizes US accent.I am sending you my recorded file with the occurences of two words "HELP" and "BACHAO" .tell me how to train my acoustic model to recognize indian accent.
This file can not be processed
ERROR: "continuous.c", line 136: Input audio file has sample rate [44100], but decoder expects [16000]
Pocketsphinx requires 16khz 16bit mono file as input, your file has wrong format. With proper format pocketsphinx should properly detect your words. Also, your file is too noisy, you need to find a way to reduce noise in recording.
please tell me how can i check it's sample rate with .c file what you have mentioned?
what should be the format .mp3 is ok?
Mp3 is not ok, format must be 16khz 16bit mono PCM WAV file.
and please tell me where to place this recorded file in my android project?
You can check sample rate with
file
command.You do not need to place this file in android project. You need to use it on desktop with pocketsphinx_continuous to find proper threshold for the keywords. Once threshold is found you can use this threshold in android project. For more details on how to do this read the tutorial
http://cmusphinx.sourceforge.net/wiki/tutoriallm#keyword_lists
but what i am saying is ,i think it does not recognize indian accent .It recognizes only american accent .so,is it due to threshold??
Last edit: Himanshu Srivastava 2016-04-01
I don't think it's the case, it is just a matter of proper threshold. You can try different values and you'll see it recognizes fine.
i am unable to install pocketsphinx in order to use pocketsphinx_continuous command from cmd on windows,could you please guide me.
Download prebuilt binary here and run, there is no need to install it:
https://sourceforge.net/projects/cmusphinx/files/pocketsphinx/5prealpha/pocketsphinx-5prealpha-win32.zip/download
i am reading doc README inside pocketsphinx,but can't get what to do after building sphinxbase.sln and pocketsphinx.sln