Menu

PocketSphinx recognizes words without saying anything

Help
2016-03-28
2016-04-02
1 2 > >> (Page 1 of 2)
  • Himanshu Srivastava

    Hi,
    i am using cmusphinx for speech recognition,but facing a problem it recognizes words without saying any word
    the complete code is

    package edu.cmu.pocketsphinx.demo;
    
    import android.app.Activity;
    import android.os.AsyncTask;
    import android.os.Bundle;
    import android.util.Log;
    import android.widget.TextView;
    import android.widget.Toast;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.HashMap;
    
    import edu.cmu.pocketsphinx.Assets;
    import edu.cmu.pocketsphinx.Hypothesis;
    import edu.cmu.pocketsphinx.RecognitionListener;
    import edu.cmu.pocketsphinx.SpeechRecognizer;
    
    import static android.widget.Toast.makeText;
    import static edu.cmu.pocketsphinx.SpeechRecognizerSetup.defaultSetup;
    
    public class PocketSphinxActivity extends Activity implements
            RecognitionListener {
    
        /* Named searches allow to quickly reconfigure the decoder */
       // private static final String KWS_SEARCH = "wakeup";
       // private static final String FORECAST_SEARCH = "forecast";
        private static final String DIGITS_SEARCH = "digits";
       // private static final String PHONE_SEARCH = "phones";
       // private static final String MENU_SEARCH = "menu";
        /* Keyword we are looking for to activate menu */
       // private static final String KEYPHRASE = "hello";
    
        private SpeechRecognizer recognizer;
        private HashMap<String, Integer> captions;
    
        @Override
        public void onCreate(Bundle state) {
            super.onCreate(state);
    
            // Prepare the data for UI
           // captions = new HashMap<String, Integer>();
           // captions.put(KWS_SEARCH, R.string.kws_caption);
           // captions.put(MENU_SEARCH, R.string.menu_caption);
           // captions.put(DIGITS_SEARCH, R.string.digits_caption);
           // captions.put(PHONE_SEARCH, R.string.phone_caption);
           // captions.put(FORECAST_SEARCH, R.string.forecast_caption);
            setContentView(R.layout.main);
            ((TextView) findViewById(R.id.caption_text))
                    .setText("Preparing the recognizer");
    
            // Recognizer initialization is a time-consuming and it involves IO,
            // so we execute it in async task
    
            new AsyncTask<Void, Void, Exception>() {
                @Override
                protected Exception doInBackground(Void... params) {
                    try {
                        Assets assets = new Assets(PocketSphinxActivity.this);
                        File assetDir = assets.syncAssets();
                        setupRecognizer(assetDir);
                    } catch (IOException e) {
                        return e;
                    }
                    return null;
                }
    
                @Override
                protected void onPostExecute(Exception result) {
                    if (result != null) {
    
                        ((TextView) findViewById(R.id.caption_text))
                                .setText("Failed to init recognizer " + result);
                    } else {
                        reset();
                        ((TextView) findViewById(R.id.caption_text))
                                .setText("say something");
                      //  switchSearch(KWS_SEARCH);
                    }
                }
            }.execute();
        }
    
        @Override
        public void onDestroy() {
            super.onDestroy();
            recognizer.cancel();
            recognizer.shutdown();
        }
    
        /**
         * In partial result we get quick updates about current hypothesis. In
         * keyword spotting mode we can react here, in other modes we need to wait
         * for final result in onResult.
         */
        @Override
        public void onPartialResult(Hypothesis hypothesis) {
            //Log.e("inside", "onPartialResult\t");
         /*   if (hypothesis == null)
                return;
    
            String text = hypothesis.getHypstr();
            Log.e("hbypothesis in","partial results"+text);
            if (text.equals(KEYPHRASE))
                switchSearch(MENU_SEARCH);
            else if (text.equals(DIGITS_SEARCH))
                switchSearch(DIGITS_SEARCH);
            else if (text.equals(PHONE_SEARCH))
                switchSearch(PHONE_SEARCH);
            else if (text.equals(FORECAST_SEARCH))
                switchSearch(FORECAST_SEARCH);
            else
                ((TextView) findViewById(R.id.result_text)).setText(text);*/
        }
    
        /**
         * This callback is called when we stop the recognizer.
         */
        @Override
        public void onResult(Hypothesis hypothesis) {
            Log.e("inside","onResult");
            ((TextView) findViewById(R.id.result_text)).setText("");
            if (hypothesis != null) {
                String text = hypothesis.getHypstr();
                Log.e("hypothesis ","text\t"+text);
                makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
            }
        }
    
        @Override
        public void onBeginningOfSpeech() {
            Log.e("user to about to "," say something");
        }
    
        /**
         * We stop recognizer here to get a final result
         */
        @Override
        public void onEndOfSpeech() {
            Log.e("user has stopped","speaking");
            reset();
           /* if (!recognizer.getSearchName().equals(KWS_SEARCH))
                switchSearch(KWS_SEARCH);*/
        }
    
        private void switchSearch(String searchName) {
            recognizer.startListening("hello");
         /*   recognizer.stop();
    
            // If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
            if (searchName.equals(KWS_SEARCH))
                recognizer.startListening(searchName);
            else
                recognizer.startListening(searchName, 10000);
    
            String caption = getResources().getString(captions.get(searchName));
            ((TextView) findViewById(R.id.caption_text)).setText(caption);*/
        }
    
        private void setupRecognizer(File assetsDir) throws IOException {
            // The recognizer can be configured to perform multiple searches
            // of different kind and switch between them
            recognizer = defaultSetup()
                    .setAcousticModel(new File(assetsDir, "en-us-ptm"))
                    .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
    
                    // To disable logging of raw audio comment out this call (takes a lot of space on the device)
                    .setRawLogDir(assetsDir)
    
                    // Threshold to tune for keyphrase to balance between false alarms and misses
                    .setKeywordThreshold(1e-45f)
    
                    // Use context-independent phonetic search, context-dependent is too slow for mobile
                    .setBoolean("-allphone_ci", true)
    
                    .getRecognizer();
            recognizer.addListener(this);
           // recognizer.startListening("hello");
    
            /** In your application you might not need to add all those searches.
             * They are added here for demonstration. You can leave just one.
             */
    
            // Create keyword-activation search.
          // recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);
    
            // Create grammar-based search for selection between demos
           // File menuGrammar = new File(assetsDir, "menu.gram");
            //recognizer.addGrammarSearch(MENU_SEARCH, menuGrammar);
    
            // Create grammar-based search for digit recognition
            File digitsGrammar = new File(assetsDir, "digits.gram");
            recognizer.addKeywordSearch(DIGITS_SEARCH, digitsGrammar);
            //recognizer.startListening("HELLO");
            //recognizer.startListening(DIGITS_SEARCH);
            // Create language model search
           // File languageModel = new File(assetsDir, "weather.dmp");
           // recognizer.addNgramSearch(FORECAST_SEARCH, languageModel);
    
            // Phonetic search
            //File phoneticModel = new File(assetsDir, "en-phone.dmp");
            //recognizer.addAllphoneSearch(PHONE_SEARCH, phoneticModel);
        }
    
        @Override
        public void onError(Exception error) {
            ((TextView)findViewById(R.id.caption_text)).setText(error.getMessage());
        }
    
        @Override
        public void onTimeout() {
            /*switchSearch(KWS_SEARCH);*/
        }
        private void reset(){
            Log.e("inside","reset()");
            recognizer.stop();
            recognizer.startListening(DIGITS_SEARCH);
        }
    }
    

    my digits.gram file is

    HELLO /1e-1/
    HIMANSHU /1e-1/
    UP /1e-1/
    DOWN /1e-1/
    LEFT /1e-1/
    RIGHT /1e-1/
    

    and dictionary contains

    HELLO   HH AH L OW
    HELLO(2)    HH EH L OW
    HELP    HH EH L P
    HIMANSHU    HH AH M AE N SH UW
    SHUT    SH AH T
    UP  AH P
    BACKWARDS   B AE K W ER D Z
    DOWN    D AW N
    FORWARDS    F AO R W ER D Z
    LEFT    L EH F T
    RIGHT   R AY T
    

    any help would be appreciated.

     

    Last edit: Nickolay V. Shmyrev 2016-03-28
    • Nickolay V. Shmyrev

      You need to tune activation thresholds in keyword list to avoid false alarms as covered in tutorial

      http://cmusphinx.sourceforge.net/wiki/tutoriaam

      Overall we recommend to use 3-4 syllables for keyword spotting phrases, you can use longer keyphrases or an activation keyphrase + grammar search like in demo for more accurate recognition.

       
  • Himanshu Srivastava

    i checked changing the threshold several times but it is unable to resolve my problem.one thing i am using addKeywordSearch() is that okey using this method???

     
  • Himanshu Srivastava

    in demo can we edit en_phone.dmp?

     
    • Nickolay V. Shmyrev

      i checked changing the threshold several times but it is unable to resolve my problem.one thing i am using addKeywordSearch() is that okey using this method???

      keyword search is fine. The process of tuning the threshold is covered here:

      http://cmusphinx.sourceforge.net/wiki/tutoriallm#keyword_lists

      In order to get help on threshold tuning you need to provide the audio file you are trying with.

      in demo can we edit en_phone.dmp?

      en-phone.dmp has no relation to keyword spotting. The process of creation of en-phone.dmp is covered here: http://cmusphinx.sourceforge.net/wiki/phonemerecognition#training_phonetic_language_model_for_decoding

       
      • Himanshu Srivastava

        actually ,my requirement is
        when i say HELP particular event should occur.

         
      • Himanshu Srivastava

        can we use local language words for speech recognition?

         
        • Nickolay V. Shmyrev

          Yes you can, you have to use existing model phoneset though in a dictionary.

           
          • Himanshu Srivastava

            thanx nikolay

             
      • Himanshu Srivastava

        tunning threshold reduced number of false alarms but it stopped listening

         

        Last edit: Himanshu Srivastava 2016-03-30
        • Nickolay V. Shmyrev

          In order to get help on threshold tuning you need to provide the audio file you are trying with.

           
          • Himanshu Srivastava

            nikolay i am not using any kind of .mp3 file for speech recognition.for speech recognition i speak a word to recognize,but on my device raw files are being created for every onResult() call whether it is false alarm or not

             
            • Nickolay V. Shmyrev

              You need to record a file first and try with a file as described in tutorial.

               
  • Himanshu Srivastava

    Hi Nickolay,
    does pocketphinx only recognizes US accent.I am sending you my recorded file with the occurences of two words "HELP" and "BACHAO" .tell me how to train my acoustic model to recognize indian accent.

     
    • Nickolay V. Shmyrev

      This file can not be processed

      ERROR: "continuous.c", line 136: Input audio file has sample rate [44100], but decoder expects [16000]

      Pocketsphinx requires 16khz 16bit mono file as input, your file has wrong format. With proper format pocketsphinx should properly detect your words. Also, your file is too noisy, you need to find a way to reduce noise in recording.

       
      • Himanshu Srivastava

        please tell me how can i check it's sample rate with .c file what you have mentioned?

         
  • Himanshu Srivastava

    what should be the format .mp3 is ok?

     
    • Nickolay V. Shmyrev

      Mp3 is not ok, format must be 16khz 16bit mono PCM WAV file.

       
  • Himanshu Srivastava

    and please tell me where to place this recorded file in my android project?

     
  • Nickolay V. Shmyrev

    please tell me how can i check it's sample rate with .c file what you have mentioned?

    You can check sample rate with file command.

    file acoustic.wav 
    acoustic.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 44100 Hz
    

    and please tell me where to place this recorded file in my android project?

    You do not need to place this file in android project. You need to use it on desktop with pocketsphinx_continuous to find proper threshold for the keywords. Once threshold is found you can use this threshold in android project. For more details on how to do this read the tutorial

    http://cmusphinx.sourceforge.net/wiki/tutoriallm#keyword_lists

     
    • Himanshu Srivastava

      but what i am saying is ,i think it does not recognize indian accent .It recognizes only american accent .so,is it due to threshold??

       

      Last edit: Himanshu Srivastava 2016-04-01
      • Nickolay V. Shmyrev

        I don't think it's the case, it is just a matter of proper threshold. You can try different values and you'll see it recognizes fine.

         
        • Himanshu Srivastava

          i am unable to install pocketsphinx in order to use pocketsphinx_continuous command from cmd on windows,could you please guide me.

           
          • Nickolay V. Shmyrev

             
            • Himanshu Srivastava

              i am reading doc README inside pocketsphinx,but can't get what to do after building sphinxbase.sln and pocketsphinx.sln

               
1 2 > >> (Page 1 of 2)

Log in to post a comment.

Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.