Menu

Sphinx4 Transcriber Config File

Help
Adiilah
2008-02-10
2012-09-22
  • Adiilah

    Adiilah - 2008-02-10

    Hi all,
    Well i have to build a sphinx4 application that needs to convert a WAV file to text..I have made use of the Transcriber demo itself...I have edited it to make it recognize words...I have used the language model wsj5k.DMP and not a grammar file.

    The problem am facing is that the it's taking lotzz of time to decode the WAV file and the recognition is really very poor.

    For example: I have tested the wav file with content "Hello Rita" ,the result that i obtained is:

    NonSpeechDataFilter: ALERT: getting a SpeechStartSignal while in speech, removing it.
    a in or the go

    Can any1 please tell me what's wrong with the following config file:
    It's really urgent.

    Thnk you all in advance...

    <?xml version="1.0" encoding="UTF-8"?>

    <!--
    Sphinx-4 Configuration file
    -->

    <!-- ******** -->
    <!-- an4 configuration file -->
    <!-- ******** -->

    <config>

    <!-- ******** -->
    <!-- frequently tuned properties -->
    <!-- ******** -->

    <property name="logLevel" value="OFF"/>
    <property name="relativeBeamWidth" value="1E-60" />
    <property name="absoluteWordBeamWidth" value="18" />
    <property name="relativeWordBeamWidth" value="1E-30" />
    <property name="wordInsertionProbability" value=".2" />
    <property name="languageWeight" value="10.5" />
    <property name="silenceInsertionProbability" value=".1" />
    <property name="acousticLookahead" value="1.7" />

    <property name="absoluteBeamWidth" value="-1" />
    <property name="relativeBeamWidth" value="1E-90" />
    <property name="frontend" value="epFrontEnd"/>
    <property name="recognizer" value="recognizer"/>
    <property name="showCreations" value="false"/>

    <!-- ******** -->
    <!-- word recognizer configuration -->
    <!-- ******** -->

    <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
    <property name="decoder" value="decoder"/>
    <propertylist name="monitors">
    <item>accuracyTracker </item>
    <item>speedTracker </item>
    <item>memoryTracker </item>
    </propertylist>
    </component>

    <component name="lexTreeLinguist" type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
    <property name="silenceInsertionProbability" value="${silenceInsertionProbability}" />
    <property name="wantUnigramSmear" value="true" />
    <property name="fillerInsertionProbability" value=".02" />
    <property name="addFillerWords" value="true" />
    <property name="acousticModel" value="wsj" />
    <property name="languageModel" value="trigramModel" />
    <property name="wordInsertionProbability" value="${wordInsertionProbability}" />
    <property name="languageWeight" value="14" />
    <property name="logMath" value="logMath" />
    <property name="dictionary" value="dictionary" />
    <property name="unigramSmearWeight" value="1" />
    <property name="cacheSize" value="0" />
    <property name="generateUnitStates" value="false" />
    <property name="unitManager" value="unitManager" />
    </component>

    <!-- ******** -->
    <!-- The Decoder configuration -->
    <!-- ******** -->

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
    <property name="searchManager" value="wordPruningSearchManager"/>
    </component>

    • <!-- ********
      -->
    • <!-- wordPruningSearchManager
      -->
    • <!-- ********
      -->
    • <component name="wordPruningSearchManager" type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
      <property name="scorer" value="threadedScorer" />
      <property name="pruner" value="trivialPruner" />
      <property name="acousticLookaheadFrames" value="2.0" />
      <property name="logMath" value="logMath" />
      <property name="activeListManager" value="activeListManager" />
      <property name="buildWordLattice" value="true" />
      <property name="maxLatticeEdges" value ="50" />
      <property name="relativeBeamWidth" value="1E-60" />
      <property name="growSkipInterval" value="8" />
      <property name="linguist" value="lexTreeLinguist" />
      <property name="checkStateOrder" value="false" />
      <property name="keepAllTokens" value="true" />
      </component>

    <component name="activeList"
    type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
    <property name="logMath" value="logMath"/>
    <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
    <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="trivialPruner"
    type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <component name="threadedScorer"
    type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
    <property name="frontend" value="${frontend}"/>
    <property name="isCpuRelative" value="true"/>
    <property name="numThreads" value="0"/>
    <property name="minScoreablesPerThread" value="10"/>
    <property name="scoreablesKeepFeature" value="true"/>
    </component>

    <!-- ******* -->
    <!-- acoustic model -->
    <!-- *******
    -->

    <component name="wsj" type="edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model">
    <property name="loader" value="wsjLoader" />
    <property name="unitManager" value="unitManager" />
    </component>
    <!-- ******* -->
    <!-- sphinx3Loader -->
    <!-- *******
    -->
    <component name="wsjLoader" type="edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.ModelLoader">
    <property name="logMath" value="logMath" />
    <property name="unitManager" value="unitManager" />
    </component>
    <!-- ******* -->
    <!-- trigramModel -->
    <!-- *******
    -->
    <component name="trigramModel" type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
    <property name="unigramWeight" value=".5" />
    <property name="maxDepth" value="3" />
    <property name="logMath" value="logMath" />
    <property name="dictionary" value="dictionary" />
    <property name="location" value="C:/Documents and Settings/Mohammad Reaaz/My Documents/Mobile_Translator/models/language/wsj5k.DMP" />
    </component>

    <!-- ******** -->
    <!-- The Dictionary configuration -->
    <!-- ******** -->

    <component name="dictionary"
    type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
    <property name="dictionaryPath"
    value="resource:/edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model!/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/cmudict.0.6d"/>
    <property name="fillerPath"
    value="resource:/edu.cmu.sphinx.model.acoustic.WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.Model!/edu/cmu/sphinx/model/acoustic/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/fillerdict"/>
    <property name="addSilEndingPronunciation" value="false"/>
    <property name="wordReplacement" value="&lt;sil&gt;"/>
    <property name="allowMissingWords" value="false"/>
    <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******** -->
    <!-- The unit manager configuration -->
    <!-- ******** -->

    <component name="unitManager"
    type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>

    <!-- ******** -->
    <!-- The live frontend configuration -->
    <!-- ******** -->
    <component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
    <propertylist name="pipeline">
    <item>streamDataSource </item>
    <item>speechClassifier </item>
    <item>speechMarker </item>
    <item>nonSpeechDataFilter </item>
    <item>premphasizer </item>
    <item>windower </item>
    <item>fft </item>
    <item>melFilterBank </item>
    <item>dct </item>
    <item>liveCMN </item>
    <item>featureExtraction </item>
    </propertylist>
    </component>

    <!-- ******** -->
    <!-- The frontend pipelines -->
    <!-- ******** -->

    <component name="streamDataSource"
    type="edu.cmu.sphinx.frontend.util.StreamDataSource">
    <property name="sampleRate" value="16000"/>
    <property name="bitsPerSample" value="16"/>
    <property name="bigEndianData" value="false"/>
    <property name="signedData" value="true"/>
    <property name="bytesPerRead" value="320"/>
    </component>

    <component name="speechClassifier"
    type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
    <property name="threshold" value="13"/>
    </component>

    <component name="nonSpeechDataFilter"
    type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>

    <component name="speechMarker"
    type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" >
    <property name="speechTrailer" value="50"/>
    </component>

    <component name="premphasizer"
    type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower"
    type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
    </component>

    <component name="fft"
    type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
    </component>

    <component name="melFilterBank"
    type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
    </component>

    <component name="dct"
    type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="liveCMN"
    type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>

    <component name="featureExtraction"
    type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

    <!-- ********** -->
    <!-- monitors -->
    <!--
    ********** -->

    <component name="accuracyTracker"
    type="edu.cmu.sphinx.instrumentation.AccuracyTracker">
    <property name="recognizer" value="${recognizer}"/>
    <property name="showAlignedResults" value="false"/>
    <property name="showRawResults" value="false"/>
    </component>

    <component name="memoryTracker"
    type="edu.cmu.sphinx.instrumentation.MemoryTracker">
    <property name="recognizer" value="${recognizer}"/>
    <property name="showSummary" value="false"/>
    <property name="showDetails" value="false"/>
    </component>

    <component name="speedTracker"
    type="edu.cmu.sphinx.instrumentation.SpeedTracker">
    <property name="recognizer" value="${recognizer}"/>
    <property name="frontend" value="${frontend}"/>
    <property name="showSummary" value="true"/>
    <property name="showDetails" value="false"/>
    </component>

    • <!-- ********
      -->
    • <!-- activeListManager
      -->
    • <!-- ********
      -->
    • <component name="activeListManager" type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
    • <propertylist name="activeListFactories">
      <item>unitExitActiveList</item>
      <item>wordActiveList</item>
      <item>wordActiveList</item>
      <item>activeList</item>
      <item>activeList</item>
      <item>activeList</item>
      </propertylist>
      </component>

    <!-- *******
    -->
    - <!-- unitExitActiveList
    -->
    - <!-- *******

    -->
    - <component name="unitExitActiveList" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
    <property name="absoluteBeamWidth" value="-1" />
    <property name="logMath" value="logMath" />
    <property name="relativeBeamWidth" value="${relativeBeamWidth}" />
    </component>
    - <!-- *******
    -->
    - <!-- wordActiveList
    -->
    - <!-- *******

    -->
    - <component name="wordActiveList" type="edu.cmu.sphinx.decoder.search.WordActiveListFactory">
    <property name="absoluteBeamWidth" value="21" />
    <property name="logMath" value="logMath" />
    <property name="relativeBeamWidth" value="1E-25" />
    </component>

    • <!-- ********
      -->
    • <!-- recognizerMonitor
      -->
    • <!-- ********
      -->
    • <component name="recognizerMonitor" type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
      <property name="recognizer" value="${recognizer}" />
    • <propertylist name="allocatedMonitors">
      <item>configMonitor</item>
      </propertylist>
      </component>

    <!-- ********** -->
    <!-- Miscellaneous components -->
    <!--
    ********** -->

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
    <property name="logBase" value="1.0001"/>
    <property name="useAddTable" value="true"/>
    </component>

    <component name="confidenceScorer"
    type="edu.cmu.sphinx.result.MAPConfidenceScorer">
    <property name="languageWeight" value="${languageWeight}"/>
    </component>

    </config>

     
    • Adiilah

      Adiilah - 2008-02-13

      Hi Nickolay V. Shmyrev,
      Thank you 4 the reply..I'll try it and if still it's not working i'll upload z audio files!!
      Thank you again..

       
    • Adiilah

      Adiilah - 2008-02-13

      Hi,
      Can you please explain me what's actually the beam size and what effect it has on the recognizer?
      How i can decrease the beams size i mean by how much?...I have modified the config file i have tried the BatchCMN instead of live...It's taking lot of time to produce an output.

      Thank you in advance...

       
      • Nickolay V. Shmyrev

        You can read about beams in Programmers Manual.

        http://cmusphinx.sourceforge.net/sphinx4/doc/ProgrammersGuide.html

        They are used for pruning - not every word is selected for search but only ones that sounds like a best word. By using wider beam you improve accuracy but speed is slow. Also you can use partition pruner and others, everything is described in Javadoc.

        Some book on speech recognition is a good answer for such kind of questions.

         
    • Nickolay V. Shmyrev

      Never ever start the same topic on three forums at once

       
    • Adiilah

      Adiilah - 2008-02-10

      Hi,
      Am SORRY i will not repeat that again.Can you please advice me on the config file?Is anything wrong?The result i obtained is really very bad!!
      It's really very urgent.

      Thank you in advance.

       
    • Nickolay V. Shmyrev

      Beams should be smaller and you must use BatchCMN instead of Live. Also you must be sure you have proper sample rate in you wav file. Otherwise it looks ok. If you have problems you have to upload your recording somewhere and give us a link.

      And be patient please.

       

Log in to post a comment.

Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.