CMU Sphinx / Forums / Help: How to improve WAV file recognition accuracy?

Hello all,
I need to transcribe an audio WAV file.I am using the wsj acoustic and language model instead of grammar file.I am not sure abt my config file.Can any1 please tell me what's worng with the config file below?Do i need to modify anythng inorder to improve the recognition since the recognition is batch and not live.

If i have my own language model for example "test.lm" instead of using the wsj5k.DMP will it works??what do i need to modify??

Thank you..

<?xml version="1.0" encoding="UTF-8"?>

&lt;!-- ******************************************************** --&gt;
&lt;!-- frequently tuned properties                              --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;property name=&quot;logLevel&quot; value=&quot;OFF&quot;/&gt;
&lt;property name=&quot;relativeBeamWidth&quot; value=&quot;1E-60&quot; /&gt; 
&lt;property name=&quot;absoluteWordBeamWidth&quot; value=&quot;18&quot; /&gt; 
&lt;property name=&quot;relativeWordBeamWidth&quot; value=&quot;1E-30&quot; /&gt; 
&lt;property name=&quot;wordInsertionProbability&quot; value=&quot;.2&quot; /&gt; 
&lt;property name=&quot;languageWeight&quot; value=&quot;10.5&quot; /&gt; 
&lt;property name=&quot;silenceInsertionProbability&quot; value=&quot;.1&quot; /&gt; 
&lt;property name=&quot;acousticLookahead&quot; value=&quot;1.7&quot; /&gt;

&lt;property name=&quot;absoluteBeamWidth&quot; value=&quot;-1&quot; /&gt; 
&lt;property name=&quot;relativeBeamWidth&quot; value=&quot;1E-90&quot; /&gt; 
&lt;property name=&quot;frontend&quot; value=&quot;epFrontEnd&quot;/&gt;
&lt;property name=&quot;recognizer&quot; value=&quot;recognizer&quot;/&gt;
&lt;property name=&quot;showCreations&quot; value=&quot;false&quot;/&gt;


&lt;!-- ******************************************************** --&gt;
&lt;!-- word recognizer configuration                            --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;recognizer&quot; type=&quot;edu.cmu.sphinx.recognizer.Recognizer&quot;&gt;
    &lt;property name=&quot;decoder&quot; value=&quot;decoder&quot;/&gt;
    &lt;propertylist name=&quot;monitors&quot;&gt;
        &lt;item&gt;accuracyTracker &lt;/item&gt;
        &lt;item&gt;speedTracker &lt;/item&gt;
        &lt;item&gt;memoryTracker &lt;/item&gt;
    &lt;/propertylist&gt;

</component>

&lt;component name=&quot;lexTreeLinguist&quot; type=&quot;edu.cmu.sphinx.linguist.lextree.LexTreeLinguist&quot;&gt;
      &lt;property name=&quot;silenceInsertionProbability&quot; value=&quot;${silenceInsertionProbability}&quot; /&gt; 
      &lt;property name=&quot;wantUnigramSmear&quot; value=&quot;true&quot; /&gt; 
      &lt;property name=&quot;fillerInsertionProbability&quot; value=&quot;.02&quot; /&gt; 
      &lt;property name=&quot;addFillerWords&quot; value=&quot;true&quot; /&gt; 
      &lt;property name=&quot;acousticModel&quot; value=&quot;wsj&quot; /&gt; 
      &lt;property name=&quot;languageModel&quot; value=&quot;trigramModel&quot; /&gt; 
      &lt;property name=&quot;wordInsertionProbability&quot; value=&quot;${wordInsertionProbability}&quot; /&gt; 
      &lt;property name=&quot;languageWeight&quot; value=&quot;14&quot; /&gt; 
      &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot; /&gt; 
      &lt;property name=&quot;dictionary&quot; value=&quot;dictionary&quot; /&gt; 
      &lt;property name=&quot;unigramSmearWeight&quot; value=&quot;1&quot; /&gt; 
      &lt;property name=&quot;cacheSize&quot; value=&quot;0&quot; /&gt; 
      &lt;property name=&quot;generateUnitStates&quot; value=&quot;false&quot; /&gt; 
      &lt;property name=&quot;unitManager&quot; value=&quot;unitManager&quot; /&gt; 
    &lt;/component&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- The Decoder   configuration                              --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;decoder&quot; type=&quot;edu.cmu.sphinx.decoder.Decoder&quot;&gt;
    &lt;property name=&quot;searchManager&quot; value=&quot;wordPruningSearchManager&quot;/&gt;
&lt;/component&gt;

- &lt;!--  ************************************************** --&gt; 
- &lt;!--  wordPruningSearchManager                           --&gt; 
- &lt;!--  ************************************************** --&gt;

- &lt;component name=&quot;wordPruningSearchManager&quot; type=&quot;edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager&quot;&gt;
  &lt;property name=&quot;scorer&quot; value=&quot;threadedScorer&quot; /&gt; 
  &lt;property name=&quot;pruner&quot; value=&quot;trivialPruner&quot; /&gt; 
  &lt;property name=&quot;acousticLookaheadFrames&quot; value=&quot;2.0&quot; /&gt; 
  &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot; /&gt; 
  &lt;property name=&quot;activeListManager&quot; value=&quot;activeListManager&quot; /&gt; 
  &lt;property name=&quot;buildWordLattice&quot; value=&quot;true&quot; /&gt; 
  &lt;property name=&quot;maxLatticeEdges&quot; value =&quot;50&quot; /&gt;
  &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;1E-60&quot; /&gt; 
  &lt;property name=&quot;growSkipInterval&quot; value=&quot;8&quot; /&gt; 
  &lt;property name=&quot;linguist&quot; value=&quot;lexTreeLinguist&quot; /&gt; 
  &lt;property name=&quot;checkStateOrder&quot; value=&quot;false&quot; /&gt; 
  &lt;property name=&quot;keepAllTokens&quot; value=&quot;true&quot; /&gt;

</component>

&lt;component name=&quot;activeList&quot; 
         type=&quot;edu.cmu.sphinx.decoder.search.PartitionActiveListFactory&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;absoluteBeamWidth&quot; value=&quot;${absoluteBeamWidth}&quot;/&gt;
    &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;${relativeBeamWidth}&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;trivialPruner&quot; 
            type=&quot;edu.cmu.sphinx.decoder.pruner.SimplePruner&quot;/&gt;

&lt;component name=&quot;threadedScorer&quot; 
            type=&quot;edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer&quot;&gt;
    &lt;property name=&quot;frontend&quot; value=&quot;${frontend}&quot;/&gt;
    &lt;property name=&quot;isCpuRelative&quot; value=&quot;true&quot;/&gt;
    &lt;property name=&quot;numThreads&quot; value=&quot;0&quot;/&gt;
    &lt;property name=&quot;minScoreablesPerThread&quot; value=&quot;10&quot;/&gt;
    &lt;property name=&quot;scoreablesKeepFeature&quot; value=&quot;true&quot;/&gt;
&lt;/component&gt;





&lt;!--  ************************************************** --&gt; 
&lt;!--  acoustic model                                     --&gt; 
&lt;!--  ************************************************** --&gt;

     &lt;component name=&quot;wsj&quot; type=&quot;edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model&quot;&gt;
      &lt;property name=&quot;loader&quot; value=&quot;wsjLoader&quot; /&gt; 
      &lt;property name=&quot;unitManager&quot; value=&quot;unitManager&quot; /&gt; 
      &lt;/component&gt;

<component name="wsjLoader" type="edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.ModelLoader">
<property name="logMath" value="logMath" />
<property name="unitManager" value="unitManager" />
</component>



<component name="trigramModel"

type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
<property name="unigramWeight" value=".7"/>
<property name="maxDepth" value="3"/>
<property name="logMath" value="logMath"/>
<property name="dictionary" value="dictionary"/>
<property name="location"
value="C:/Documents and Settings/Mohammad Reaaz/My Documents/Mobile_Translator/models/language/wsj5k.DMP" />
</component>

&lt;!-- ******************************************************** --&gt;
&lt;!-- The Dictionary configuration                            --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;dictionary&quot; 
    type=&quot;edu.cmu.sphinx.linguist.dictionary.FastDictionary&quot;&gt;
    &lt;property name=&quot;dictionaryPath&quot; 
 value=&quot;resource:/edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model!/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/cmudict.0.6d&quot;/&gt;
    &lt;property name=&quot;fillerPath&quot; 
 value=&quot;resource:/edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model!/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/fillerdict&quot;/&gt;
    &lt;property name=&quot;addSilEndingPronunciation&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;wordReplacement&quot; value=&quot;&amp;lt;sil&amp;gt;&quot;/&gt;
    &lt;property name=&quot;allowMissingWords&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;unitManager&quot; value=&quot;unitManager&quot;/&gt;
&lt;/component&gt;



&lt;!-- ******************************************************** --&gt;
&lt;!-- The unit manager configuration                           --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;unitManager&quot; 
    type=&quot;edu.cmu.sphinx.linguist.acoustic.UnitManager&quot;/&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- The live frontend configuration                          --&gt;
&lt;!-- ******************************************************** --&gt;
&lt;component name=&quot;epFrontEnd&quot; type=&quot;edu.cmu.sphinx.frontend.FrontEnd&quot;&gt;
    &lt;propertylist name=&quot;pipeline&quot;&gt;
        &lt;item&gt;streamDataSource &lt;/item&gt;
        &lt;item&gt;speechClassifier &lt;/item&gt;
        &lt;item&gt;speechMarker &lt;/item&gt;
        &lt;item&gt;nonSpeechDataFilter &lt;/item&gt;
        &lt;item&gt;premphasizer &lt;/item&gt;
        &lt;item&gt;windower &lt;/item&gt;
        &lt;item&gt;fft &lt;/item&gt;
        &lt;item&gt;melFilterBank &lt;/item&gt;
        &lt;item&gt;dct &lt;/item&gt;
        &lt;item&gt;BatchCMN &lt;/item&gt;
        &lt;item&gt;featureExtraction &lt;/item&gt;
    &lt;/propertylist&gt;
&lt;/component&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- The frontend pipelines                                   --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;streamDataSource&quot;
            type=&quot;edu.cmu.sphinx.frontend.util.StreamDataSource&quot;&gt;
    &lt;property name=&quot;sampleRate&quot; value=&quot;16000&quot;/&gt;
    &lt;property name=&quot;bitsPerSample&quot; value=&quot;16&quot;/&gt;
    &lt;property name=&quot;bigEndianData&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;signedData&quot; value=&quot;true&quot;/&gt;
    &lt;property name=&quot;bytesPerRead&quot; value=&quot;320&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;speechClassifier&quot; 
           type=&quot;edu.cmu.sphinx.frontend.endpoint.SpeechClassifier&quot;&gt;
    &lt;property name=&quot;threshold&quot; value=&quot;13&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;nonSpeechDataFilter&quot; 
           type=&quot;edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter&quot;/&gt;

&lt;component name=&quot;speechMarker&quot; 
           type=&quot;edu.cmu.sphinx.frontend.endpoint.SpeechMarker&quot; &gt;
    &lt;property name=&quot;speechTrailer&quot; value=&quot;50&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;premphasizer&quot; 
           type=&quot;edu.cmu.sphinx.frontend.filter.Preemphasizer&quot;/&gt;

&lt;component name=&quot;windower&quot; 
           type=&quot;edu.cmu.sphinx.frontend.window.RaisedCosineWindower&quot;&gt;
&lt;/component&gt;

&lt;component name=&quot;fft&quot; 
        type=&quot;edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform&quot;&gt;
&lt;/component&gt;

&lt;component name=&quot;melFilterBank&quot; 
    type=&quot;edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank&quot;&gt;
&lt;/component&gt;

&lt;component name=&quot;dct&quot; 
        type=&quot;edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform&quot;/&gt;

&lt;component name=&quot;BatchCMN&quot; 
           type=&quot;edu.cmu.sphinx.frontend.feature.BatchCMN&quot;/&gt;

&lt;component name=&quot;featureExtraction&quot; 
           type=&quot;edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor&quot;/&gt;


&lt;!-- ******************************************************* --&gt;
&lt;!--  monitors                                               --&gt;
&lt;!-- ******************************************************* --&gt;

&lt;component name=&quot;accuracyTracker&quot; 
            type=&quot;edu.cmu.sphinx.instrumentation.AccuracyTracker&quot;&gt;
    &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot;/&gt;
    &lt;property name=&quot;showAlignedResults&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;showRawResults&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;memoryTracker&quot; 
            type=&quot;edu.cmu.sphinx.instrumentation.MemoryTracker&quot;&gt;
    &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot;/&gt;
&lt;property name=&quot;showSummary&quot; value=&quot;false&quot;/&gt;
&lt;property name=&quot;showDetails&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;speedTracker&quot; 
            type=&quot;edu.cmu.sphinx.instrumentation.SpeedTracker&quot;&gt;
    &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot;/&gt;
    &lt;property name=&quot;frontend&quot; value=&quot;${frontend}&quot;/&gt;
&lt;property name=&quot;showSummary&quot; value=&quot;true&quot;/&gt;
&lt;property name=&quot;showDetails&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;

- &lt;!--  ************************************************** 
  --&gt; 
- &lt;!--  activeListManager                                  
  --&gt; 
- &lt;!--  ************************************************** 
  --&gt; 
- &lt;component name=&quot;activeListManager&quot; type=&quot;edu.cmu.sphinx.decoder.search.SimpleActiveListManager&quot;&gt;
- &lt;propertylist name=&quot;activeListFactories&quot;&gt;
  &lt;item&gt;unitExitActiveList&lt;/item&gt; 
  &lt;item&gt;wordActiveList&lt;/item&gt; 
  &lt;item&gt;wordActiveList&lt;/item&gt; 
  &lt;item&gt;activeList&lt;/item&gt; 
  &lt;item&gt;activeList&lt;/item&gt; 
  &lt;item&gt;activeList&lt;/item&gt; 
  &lt;/propertylist&gt;

</component>

&lt;!--  ************************************************** 
  --&gt; 
- &lt;!--  unitExitActiveList                                 
  --&gt; 
- &lt;!--  ************************************************** 
  --&gt; 
- &lt;component name=&quot;unitExitActiveList&quot; type=&quot;edu.cmu.sphinx.decoder.search.PartitionActiveListFactory&quot;&gt;
  &lt;property name=&quot;absoluteBeamWidth&quot; value=&quot;-1&quot; /&gt; 
  &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot; /&gt; 
  &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;${relativeBeamWidth}&quot; /&gt; 
  &lt;/component&gt;
- &lt;!--  ************************************************** 
  --&gt; 
- &lt;!--  wordActiveList                                     
  --&gt; 
- &lt;!--  ************************************************** 
  --&gt; 
- &lt;component name=&quot;wordActiveList&quot; type=&quot;edu.cmu.sphinx.decoder.search.WordActiveListFactory&quot;&gt;
  &lt;property name=&quot;absoluteBeamWidth&quot; value=&quot;21&quot; /&gt; 
  &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot; /&gt; 
  &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;1E-25&quot; /&gt; 
  &lt;/component&gt;

- &lt;!--  ************************************************** 
  --&gt; 
- &lt;!--  recognizerMonitor                                  
  --&gt; 
- &lt;!--  ************************************************** 
  --&gt; 
- &lt;component name=&quot;recognizerMonitor&quot; type=&quot;edu.cmu.sphinx.instrumentation.RecognizerMonitor&quot;&gt;
  &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot; /&gt; 
- &lt;propertylist name=&quot;allocatedMonitors&quot;&gt;
  &lt;item&gt;configMonitor&lt;/item&gt; 
  &lt;/propertylist&gt;
  &lt;/component&gt;


&lt;!-- ******************************************************* --&gt;
&lt;!--  Miscellaneous components                               --&gt;
&lt;!-- ******************************************************* --&gt;

&lt;component name=&quot;logMath&quot; type=&quot;edu.cmu.sphinx.util.LogMath&quot;&gt;
    &lt;property name=&quot;logBase&quot; value=&quot;1.0001&quot;/&gt;
    &lt;property name=&quot;useAddTable&quot; value=&quot;true&quot;/&gt;
&lt;/component&gt;

    &lt;component name=&quot;confidenceScorer&quot;
               type=&quot;edu.cmu.sphinx.result.MAPConfidenceScorer&quot;&gt;
        &lt;property name=&quot;languageWeight&quot; value=&quot;${languageWeight}&quot;/&gt;
    &lt;/component&gt;

</config>

How to improve WAV file recognition accuracy?

Speech Recognition Toolkit

Forums

Help

How to improve WAV file recognition accuracy? document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

How to improve WAV file recognition accuracy?