CMU Sphinx / Forums / Help: PocketSphinx recognizes words without saying anything

Hi,
i am using cmusphinx for speech recognition,but facing a problem it recognizes words without saying any word
the complete code is

package edu.cmu.pocketsphinx.demo;

import android.app.Activity;
import android.os.AsyncTask;
import android.os.Bundle;
import android.util.Log;
import android.widget.TextView;
import android.widget.Toast;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;

import edu.cmu.pocketsphinx.Assets;
import edu.cmu.pocketsphinx.Hypothesis;
import edu.cmu.pocketsphinx.RecognitionListener;
import edu.cmu.pocketsphinx.SpeechRecognizer;

import static android.widget.Toast.makeText;
import static edu.cmu.pocketsphinx.SpeechRecognizerSetup.defaultSetup;

public class PocketSphinxActivity extends Activity implements
        RecognitionListener {

    /* Named searches allow to quickly reconfigure the decoder */
   // private static final String KWS_SEARCH = "wakeup";
   // private static final String FORECAST_SEARCH = "forecast";
    private static final String DIGITS_SEARCH = "digits";
   // private static final String PHONE_SEARCH = "phones";
   // private static final String MENU_SEARCH = "menu";
    /* Keyword we are looking for to activate menu */
   // private static final String KEYPHRASE = "hello";

    private SpeechRecognizer recognizer;
    private HashMap<String, Integer> captions;

    @Override
    public void onCreate(Bundle state) {
        super.onCreate(state);

        // Prepare the data for UI
       // captions = new HashMap<String, Integer>();
       // captions.put(KWS_SEARCH, R.string.kws_caption);
       // captions.put(MENU_SEARCH, R.string.menu_caption);
       // captions.put(DIGITS_SEARCH, R.string.digits_caption);
       // captions.put(PHONE_SEARCH, R.string.phone_caption);
       // captions.put(FORECAST_SEARCH, R.string.forecast_caption);
        setContentView(R.layout.main);
        ((TextView) findViewById(R.id.caption_text))
                .setText("Preparing the recognizer");

        // Recognizer initialization is a time-consuming and it involves IO,
        // so we execute it in async task

        new AsyncTask<Void, Void, Exception>() {
            @Override
            protected Exception doInBackground(Void... params) {
                try {
                    Assets assets = new Assets(PocketSphinxActivity.this);
                    File assetDir = assets.syncAssets();
                    setupRecognizer(assetDir);
                } catch (IOException e) {
                    return e;
                }
                return null;
            }

            @Override
            protected void onPostExecute(Exception result) {
                if (result != null) {

                    ((TextView) findViewById(R.id.caption_text))
                            .setText("Failed to init recognizer " + result);
                } else {
                    reset();
                    ((TextView) findViewById(R.id.caption_text))
                            .setText("say something");
                  //  switchSearch(KWS_SEARCH);
                }
            }
        }.execute();
    }

    @Override
    public void onDestroy() {
        super.onDestroy();
        recognizer.cancel();
        recognizer.shutdown();
    }

    /**
     * In partial result we get quick updates about current hypothesis. In
     * keyword spotting mode we can react here, in other modes we need to wait
     * for final result in onResult.
     */
    @Override
    public void onPartialResult(Hypothesis hypothesis) {
        //Log.e("inside", "onPartialResult\t");
     /*   if (hypothesis == null)
            return;

        String text = hypothesis.getHypstr();
        Log.e("hbypothesis in","partial results"+text);
        if (text.equals(KEYPHRASE))
            switchSearch(MENU_SEARCH);
        else if (text.equals(DIGITS_SEARCH))
            switchSearch(DIGITS_SEARCH);
        else if (text.equals(PHONE_SEARCH))
            switchSearch(PHONE_SEARCH);
        else if (text.equals(FORECAST_SEARCH))
            switchSearch(FORECAST_SEARCH);
        else
            ((TextView) findViewById(R.id.result_text)).setText(text);*/
    }

    /**
     * This callback is called when we stop the recognizer.
     */
    @Override
    public void onResult(Hypothesis hypothesis) {
        Log.e("inside","onResult");
        ((TextView) findViewById(R.id.result_text)).setText("");
        if (hypothesis != null) {
            String text = hypothesis.getHypstr();
            Log.e("hypothesis ","text\t"+text);
            makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
        }
    }

    @Override
    public void onBeginningOfSpeech() {
        Log.e("user to about to "," say something");
    }

    /**
     * We stop recognizer here to get a final result
     */
    @Override
    public void onEndOfSpeech() {
        Log.e("user has stopped","speaking");
        reset();
       /* if (!recognizer.getSearchName().equals(KWS_SEARCH))
            switchSearch(KWS_SEARCH);*/
    }

    private void switchSearch(String searchName) {
        recognizer.startListening("hello");
     /*   recognizer.stop();

        // If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
        if (searchName.equals(KWS_SEARCH))
            recognizer.startListening(searchName);
        else
            recognizer.startListening(searchName, 10000);

        String caption = getResources().getString(captions.get(searchName));
        ((TextView) findViewById(R.id.caption_text)).setText(caption);*/
    }

    private void setupRecognizer(File assetsDir) throws IOException {
        // The recognizer can be configured to perform multiple searches
        // of different kind and switch between them
        recognizer = defaultSetup()
                .setAcousticModel(new File(assetsDir, "en-us-ptm"))
                .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))

                // To disable logging of raw audio comment out this call (takes a lot of space on the device)
                .setRawLogDir(assetsDir)

                // Threshold to tune for keyphrase to balance between false alarms and misses
                .setKeywordThreshold(1e-45f)

                // Use context-independent phonetic search, context-dependent is too slow for mobile
                .setBoolean("-allphone_ci", true)

                .getRecognizer();
        recognizer.addListener(this);
       // recognizer.startListening("hello");


        /** In your application you might not need to add all those searches.
         * They are added here for demonstration. You can leave just one.
         */

        // Create keyword-activation search.
      // recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);

        // Create grammar-based search for selection between demos
       // File menuGrammar = new File(assetsDir, "menu.gram");
        //recognizer.addGrammarSearch(MENU_SEARCH, menuGrammar);

        // Create grammar-based search for digit recognition
        File digitsGrammar = new File(assetsDir, "digits.gram");
        recognizer.addKeywordSearch(DIGITS_SEARCH, digitsGrammar);
        //recognizer.startListening("HELLO");
        //recognizer.startListening(DIGITS_SEARCH);
        // Create language model search
       // File languageModel = new File(assetsDir, "weather.dmp");
       // recognizer.addNgramSearch(FORECAST_SEARCH, languageModel);

        // Phonetic search
        //File phoneticModel = new File(assetsDir, "en-phone.dmp");
        //recognizer.addAllphoneSearch(PHONE_SEARCH, phoneticModel);
    }

    @Override
    public void onError(Exception error) {
        ((TextView)findViewById(R.id.caption_text)).setText(error.getMessage());
    }

    @Override
    public void onTimeout() {
        /*switchSearch(KWS_SEARCH);*/
    }
    private void reset(){
        Log.e("inside","reset()");
        recognizer.stop();
        recognizer.startListening(DIGITS_SEARCH);
    }
}

my digits.gram file is

HELLO /1e-1/
HIMANSHU /1e-1/
UP /1e-1/
DOWN /1e-1/
LEFT /1e-1/
RIGHT /1e-1/

and dictionary contains

HELLO   HH AH L OW
HELLO(2)    HH EH L OW
HELP    HH EH L P
HIMANSHU    HH AH M AE N SH UW
SHUT    SH AH T
UP  AH P
BACKWARDS   B AE K W ER D Z
DOWN    D AW N
FORWARDS    F AO R W ER D Z
LEFT    L EH F T
RIGHT   R AY T

any help would be appreciated.

Last edit: Nickolay V. Shmyrev 2016-03-28

Nickolay V. Shmyrev - 2016-03-28

You need to tune activation thresholds in keyword list to avoid false alarms as covered in tutorial

http://cmusphinx.sourceforge.net/wiki/tutoriaam

Overall we recommend to use 3-4 syllables for keyword spotting phrases, you can use longer keyphrases or an activation keyphrase + grammar search like in demo for more accurate recognition.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Himanshu Srivastava - 2016-03-29

i checked changing the threshold several times but it is unable to resolve my problem.one thing i am using addKeywordSearch() is that okey using this method???

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Himanshu Srivastava - 2016-03-29

in demo can we edit en_phone.dmp?

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
- Nickolay V. Shmyrev - 2016-03-29
  
  i checked changing the threshold several times but it is unable to resolve my problem.one thing i am using addKeywordSearch() is that okey using this method???
  
  keyword search is fine. The process of tuning the threshold is covered here:
  
  http://cmusphinx.sourceforge.net/wiki/tutoriallm#keyword_lists
  
  In order to get help on threshold tuning you need to provide the audio file you are trying with.
  
  in demo can we edit en_phone.dmp?
  
  en-phone.dmp has no relation to keyword spotting. The process of creation of en-phone.dmp is covered here: http://cmusphinx.sourceforge.net/wiki/phonemerecognition#training_phonetic_language_model_for_decoding
  
  If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
  - Himanshu Srivastava - 2016-03-29
    
    actually ,my requirement is
    when i say HELP particular event should occur.
    
    If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
  - Himanshu Srivastava - 2016-03-30
    
    can we use local language words for speech recognition?
    
    If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
    - Nickolay V. Shmyrev - 2016-03-30
      
      Yes you can, you have to use existing model phoneset though in a dictionary.
      
      If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
      - Himanshu Srivastava - 2016-03-30
        
        thanx nikolay
        
        If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
  - Himanshu Srivastava - 2016-03-30
    
    tunning threshold reduced number of false alarms but it stopped listening
    
    Last edit: Himanshu Srivastava 2016-03-30
    
    If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
    - Nickolay V. Shmyrev - 2016-03-30
      
      In order to get help on threshold tuning you need to provide the audio file you are trying with.
      
      If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
      - Himanshu Srivastava - 2016-03-31
        
        nikolay i am not using any kind of .mp3 file for speech recognition.for speech recognition i speak a word to recognize,but on my device raw files are being created for every onResult() call whether it is false alarm or not
        
        If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
        
        Nickolay V. Shmyrev - 2016-03-31
        
        You need to record a file first and try with a file as described in tutorial.
        
        If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Himanshu Srivastava - 2016-04-01

Hi Nickolay,
does pocketphinx only recognizes US accent.I am sending you my recorded file with the occurences of two words "HELP" and "BACHAO" .tell me how to train my acoustic model to recognize indian accent.

acoustic.wav

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
- Nickolay V. Shmyrev - 2016-04-01
  
  This file can not be processed
  
  ERROR: "continuous.c", line 136: Input audio file has sample rate [44100], but decoder expects [16000]
  
  Pocketsphinx requires 16khz 16bit mono file as input, your file has wrong format. With proper format pocketsphinx should properly detect your words. Also, your file is too noisy, you need to find a way to reduce noise in recording.
  
  If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
  - Himanshu Srivastava - 2016-04-01
    
    please tell me how can i check it's sample rate with .c file what you have mentioned?
    
    If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Himanshu Srivastava - 2016-04-01

what should be the format .mp3 is ok?

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
- Nickolay V. Shmyrev - 2016-04-01
  
  Mp3 is not ok, format must be 16khz 16bit mono PCM WAV file.
  
  If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Himanshu Srivastava - 2016-04-01

and please tell me where to place this recorded file in my android project?

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Nickolay V. Shmyrev - 2016-04-01

please tell me how can i check it's sample rate with .c file what you have mentioned?

You can check sample rate with file command.

file acoustic.wav acoustic.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 44100 Hz

and please tell me where to place this recorded file in my android project?

You do not need to place this file in android project. You need to use it on desktop with pocketsphinx_continuous to find proper threshold for the keywords. Once threshold is found you can use this threshold in android project. For more details on how to do this read the tutorial

http://cmusphinx.sourceforge.net/wiki/tutoriallm#keyword_lists
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
- Himanshu Srivastava - 2016-04-01
  
  but what i am saying is ,i think it does not recognize indian accent .It recognizes only american accent .so,is it due to threshold??
  
  Last edit: Himanshu Srivastava 2016-04-01
  
  If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
  - Nickolay V. Shmyrev - 2016-04-01
    
    I don't think it's the case, it is just a matter of proper threshold. You can try different values and you'll see it recognizes fine.
    
    If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
    - Himanshu Srivastava - 2016-04-02
      
      i am unable to install pocketsphinx in order to use pocketsphinx_continuous command from cmd on windows,could you please guide me.
      
      If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
      - Nickolay V. Shmyrev - 2016-04-02
        
        Download prebuilt binary here and run, there is no need to install it:
        
        https://sourceforge.net/projects/cmusphinx/files/pocketsphinx/5prealpha/pocketsphinx-5prealpha-win32.zip/download
        
        If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
        
        Himanshu Srivastava - 2016-04-02
        
        i am reading doc README inside pocketsphinx,but can't get what to do after building sphinxbase.sln and pocketsphinx.sln
        
        If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

PocketSphinx recognizes words without saying anything

Speech Recognition Toolkit

Forums

Help

PocketSphinx recognizes words without saying anything document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

PocketSphinx recognizes words without saying anything