Menu

Transcribing Words using WSJ

Jher
2010-09-27
2012-09-22
  • Jher

    Jher - 2010-09-27

    Can anyone look into my code because i'm having a hard time in doing my thesis
    because of this. I will greatly acknowledge anyone who will help me. Please
    help.

    I'm currently doing thesis about using sphinx 4 in transcribing words in .WAV
    files.

    -
    -
    -
    - <config>
    -
    -
    -
    <property name="absoluteBeamWidth" value="500">
    <property name="relativeBeamWidth" value="1E-80">
    <property name="absoluteWordBeamWidth" value="20">
    <property name="relativeWordBeamWidth" value="1E-60">
    <property name="wordInsertionProbability" value="1E-16">
    <property name="languageWeight" value="7.0">
    <property name="silenceInsertionProbability" value=".1">
    <property name="frontend" value="epFrontEnd">
    <property name="recognizer" value="recognizer">
    <property name="showCreations" value="false">
    -
    -
    -
    - <component name="batch" type="edu.cmu.sphinx.tools.batch.BatchModeRecognizer">
    - <propertylist name="inputDataProcessors">
    <item>streamDataSource</item>
    </propertylist>
    <property name="skip" value="0">
    <property name="recognizer" value="${recognizer}">
    </property></property></component>
    -
    -
    -
    - <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
    <property name="decoder" value="decoder">
    - <propertylist name="monitors">
    <item>accuracyTracker</item>
    <item>speedTracker</item>
    <item>memoryTracker</item>
    <item>recognizerMonitor</item>
    </propertylist>
    </property></component>
    -
    -
    -
    - <component name="wordPruningSearchManager" type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
    <property name="logMath" value="logMath">
    <property name="linguist" value="lexTreeLinguist">
    <property name="pruner" value="trivialPruner">
    <property name="scorer" value="threadedScorer">
    <property name="activeListManager" value="activeListManager">
    <property name="growSkipInterval" value="0">
    <property name="checkStateOrder" value="false">
    <property name="buildWordLattice" value="false">
    <property name="acousticLookaheadFrames" value="1.7">
    <property name="relativeBeamWidth" value="${relativeBeamWidth}">
    </property></property></property></property></property></property></property></property></property></property></component>
    -
    -
    -
    - <component name="searchManager" type="edu.cmu.sphinx.decoder.search.SearchManager">
    <property name="logMath" value="logMath">
    <property name="linguist" value="lexTreeLinguist">
    <property name="pruner" value="trivialPruner">
    <property name="scorer" value="threadedScorer">
    <property name="activeListManager" value="activeListManager">
    <property name="growSkipInterval" value="0">
    <property name="checkStateOrder" value="false">
    <property name="buildWordLattice" value="false">
    <property name="maxLatticeEdges" value="3">
    <property name="acousticLookaheadFrames" value="1.7">
    <property name="relativeBeamWidth" value="${relativeBeamWidth}">
    </property></property></property></property></property></property></property></property></property></property></property></component>
    -
    -
    -
    - <component name="activeListManager" type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
    - <propertylist name="activeListFactories">
    <item>standardActiveListFactory</item>
    <item>wordActiveListFactory</item>
    <item>wordActiveListFactory</item>
    <item>standardActiveListFactory</item>
    <item>standardActiveListFactory</item>
    <item>standardActiveListFactory</item>
    </propertylist>
    </component>
    - <component name="standardActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
    <property name="logMath" value="logMath">
    <property name="absoluteBeamWidth" value="${absoluteBeamWidth}">
    <property name="relativeBeamWidth" value="${relativeBeamWidth}">
    </property></property></property></component>
    - <component name="wordActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
    <property name="logMath" value="logMath">
    <property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}">
    <property name="relativeBeamWidth" value="${relativeWordBeamWidth}">
    </property></property></property></component>
    -
    -
    -
    <component name="trivialPruner" type="edu.cmu.sphinx.decoder.pruner.SimplePruner">
    -
    -
    -
    - <component name="threadedScorer" type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
    <property name="frontend" value="${frontend}">
    <property name="isCpuRelative" value="false">
    <property name="numThreads" value="0">
    <property name="minScoreablesPerThread" value="10">
    <property name="scoreablesKeepFeature" value="false">
    </property></property></property></property></property></component>
    -
    -
    -
    - <component name="lexTreeLinguist" type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
    <property name="logMath" value="logMath">
    <property name="acousticModel" value="wsj">
    <property name="languageModel" value="trigramModel">
    <property name="dictionary" value="dictionary">
    <property name="addFillerWords" value="false">
    <property name="fillerInsertionProbability" value="1E-10">
    <property name="generateUnitStates" value="false">
    <property name="wantUnigramSmear" value="true">
    <property name="unigramSmearWeight" value="1">
    <property name="wordInsertionProbability" value="${wordInsertionProbability}">
    <property name="silenceInsertionProbability" value="${silenceInsertionProbability}">
    <property name="languageWeight" value="${languageWeight}">
    </property></property></property></property></property></property></property></property></property></property></property></property></component>
    -
    -
    -
    - <component name="dictionary" type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
    <property name="dictionaryPath" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/cmudict.0.6d">
    <property name="fillerPath" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/fillerdict">
    <property name="addSilEndingPronunciation" value="false">
    <property name="wordReplacement" value="sil">
    </property></property></property></property></component>
    -
    -
    -
    - <component name="trigramModel" type="edu.cmu.sphinx.linguist.language.ngram.SimpleNGramModel">
    <property name="location" value="resource:/edu/cmu/sphinx/demo/hellongram/hellongram.trigram.lm">
    <property name="logMath" value="logMath">
    <property name="dictionary" value="dictionary">
    <property name="maxDepth" value="3">
    <property name="unigramWeight" value=".7">
    </property></property></property></property></property></component>
    -
    -
    -
    - <component name="wsj" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.trainer.TrainerAcousticModel">
    <property name="loader" value="wsjLoader">
    <property name="unitManager" value="unitManager">
    </property></property></component>
    - <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.trainer.Sphinx4Loader">
    <property name="logMath" value="logMath">
    <property name="unitManager" value="unitManager">
    <property name="location" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz">
    <property name="modelDefinition" value="etc/WSJ_clean_13dCep_16k_40mel_130Hz_6800Hz.4000.mdef">
    <property name="dataLocation" value="cd_continuous_8gau/">
    </property></property></property></property></property></component>
    -
    -
    -
    <component name="unitManager" type="edu.cmu.sphinx.linguist.acoustic.UnitManager">
    -
    -
    -
    - <component name="wavFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
    - <propertylist name="pipeline">
    <item>audioFileDataSource</item>
    <item>speechClassifier</item>
    <item>speechMarker</item>
    <item>nonSpeechDataFilter</item>
    <item>premphasizer</item>
    <item>windower</item>
    <item>fft</item>
    <item>melFilterBank</item>
    <item>dct</item>
    <item>liveCMN</item>
    -
    <item>featureExtraction</item>
    </propertylist>
    </component>
    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource">
    - <component name="speechClassifier" type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
    -
    </component>
    <component name="nonSpeechDataFilter" type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter">
    - <component name="speechMarker" type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker">
    -
    </component>
    - <component name="premphasizer" type="edu.cmu.sphinx.frontend.filter.Preemphasizer">
    -
    </component>
    - <component name="windower" type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
    -
    </component>
    <component name="fft" type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
    - <component name="melFilterBank" type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
    -
    </component>
    <component name="dct" type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform">
    <component name="liveCMN" type="edu.cmu.sphinx.frontend.feature.LiveCMN">
    <component name="batchCMN" type="edu.cmu.sphinx.frontend.feature.BatchCMN">
    <component name="featureExtraction" type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor">
    -
    -
    -
    - <component name="accuracyTracker" type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
    <property name="recognizer" value="${recognizer}">
    <property name="showAlignedResults" value="false">
    <property name="showRawResults" value="false">
    </property></property></property></component>
    - <component name="memoryTracker" type="edu.cmu.sphinx.instrumentation.MemoryTracker">
    <property name="recognizer" value="${recognizer}">
    <property name="showSummary" value="false">
    <property name="showDetails" value="false">
    </property></property></property></component>
    - <component name="speedTracker" type="edu.cmu.sphinx.instrumentation.SpeedTracker">
    <property name="recognizer" value="${recognizer}">
    <property name="frontend" value="${frontend}">
    <property name="showSummary" value="true">
    <property name="showDetails" value="false">
    </property></property></property></property></component>
    - <component name="recognizerMonitor" type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
    <property name="recognizer" value="${recognizer}">
    - <propertylist name="allocatedMonitors">
    <item>configMonitor</item>
    </propertylist>
    </property></component>
    - <component name="configMonitor" type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
    <property name="showConfig" value="true">
    </property></component>
    -
    -
    -
    - <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
    <property name="logBase" value="1.0001">
    <property name="useAddTable" value="true">
    </property></property></component>
    </component></component></component></component></component></component></component></component></component></property></property></property></property></property></property></property></property></property></property></config>

     
  • Nickolay V. Shmyrev

    Hello

    Beams in your configuration file as well as language model aren't optimal for
    maximum decoding accuracy. We have several carefully optimized configuraitons
    for standard decoding tasks. For example tests/performance/wsj20k is a
    configuration for the decoding task with vocabulary of 20000 words.

     
  • Jher

    Jher - 2010-09-28

    Can you send me a file or post it here how will I do it. I really really need
    it for my thesis.
    Thanks.

     

Log in to post a comment.