Can anyone look into my code because i'm having a hard time in doing my thesis
because of this. I will greatly acknowledge anyone who will help me. Please
help.
I'm currently doing thesis about using sphinx 4 in transcribing words in .WAV
files.
Beams in your configuration file as well as language model aren't optimal for
maximum decoding accuracy. We have several carefully optimized configuraitons
for standard decoding tasks. For example tests/performance/wsj20k is a
configuration for the decoding task with vocabulary of 20000 words.
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Can anyone look into my code because i'm having a hard time in doing my thesis
because of this. I will greatly acknowledge anyone who will help me. Please
help.
I'm currently doing thesis about using sphinx 4 in transcribing words in .WAV
files.
-
-
-
- <config>
-
-
-
<property name="absoluteBeamWidth" value="500">
<property name="relativeBeamWidth" value="1E-80">
<property name="absoluteWordBeamWidth" value="20">
<property name="relativeWordBeamWidth" value="1E-60">
<property name="wordInsertionProbability" value="1E-16">
<property name="languageWeight" value="7.0">
<property name="silenceInsertionProbability" value=".1">
<property name="frontend" value="epFrontEnd">
<property name="recognizer" value="recognizer">
<property name="showCreations" value="false">
-
-
-
- <component name="batch" type="edu.cmu.sphinx.tools.batch.BatchModeRecognizer">
- <propertylist name="inputDataProcessors">
<item>streamDataSource</item>
</propertylist>
<property name="skip" value="0">
<property name="recognizer" value="${recognizer}">
</property></property></component>
-
-
-
- <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
<property name="decoder" value="decoder">
- <propertylist name="monitors">
<item>accuracyTracker</item>
<item>speedTracker</item>
<item>memoryTracker</item>
<item>recognizerMonitor</item>
</propertylist>
</property></component>
-
-
-
- <component name="wordPruningSearchManager" type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
<property name="logMath" value="logMath">
<property name="linguist" value="lexTreeLinguist">
<property name="pruner" value="trivialPruner">
<property name="scorer" value="threadedScorer">
<property name="activeListManager" value="activeListManager">
<property name="growSkipInterval" value="0">
<property name="checkStateOrder" value="false">
<property name="buildWordLattice" value="false">
<property name="acousticLookaheadFrames" value="1.7">
<property name="relativeBeamWidth" value="${relativeBeamWidth}">
</property></property></property></property></property></property></property></property></property></property></component>
-
-
-
- <component name="searchManager" type="edu.cmu.sphinx.decoder.search.SearchManager">
<property name="logMath" value="logMath">
<property name="linguist" value="lexTreeLinguist">
<property name="pruner" value="trivialPruner">
<property name="scorer" value="threadedScorer">
<property name="activeListManager" value="activeListManager">
<property name="growSkipInterval" value="0">
<property name="checkStateOrder" value="false">
<property name="buildWordLattice" value="false">
<property name="maxLatticeEdges" value="3">
<property name="acousticLookaheadFrames" value="1.7">
<property name="relativeBeamWidth" value="${relativeBeamWidth}">
</property></property></property></property></property></property></property></property></property></property></property></component>
-
-
-
- <component name="activeListManager" type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
- <propertylist name="activeListFactories">
<item>standardActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
</propertylist>
</component>
- <component name="standardActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath">
<property name="absoluteBeamWidth" value="${absoluteBeamWidth}">
<property name="relativeBeamWidth" value="${relativeBeamWidth}">
</property></property></property></component>
- <component name="wordActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath">
<property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}">
<property name="relativeBeamWidth" value="${relativeWordBeamWidth}">
</property></property></property></component>
-
-
-
<component name="trivialPruner" type="edu.cmu.sphinx.decoder.pruner.SimplePruner">
-
-
-
- <component name="threadedScorer" type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
<property name="frontend" value="${frontend}">
<property name="isCpuRelative" value="false">
<property name="numThreads" value="0">
<property name="minScoreablesPerThread" value="10">
<property name="scoreablesKeepFeature" value="false">
</property></property></property></property></property></component>
-
-
-
- <component name="lexTreeLinguist" type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
<property name="logMath" value="logMath">
<property name="acousticModel" value="wsj">
<property name="languageModel" value="trigramModel">
<property name="dictionary" value="dictionary">
<property name="addFillerWords" value="false">
<property name="fillerInsertionProbability" value="1E-10">
<property name="generateUnitStates" value="false">
<property name="wantUnigramSmear" value="true">
<property name="unigramSmearWeight" value="1">
<property name="wordInsertionProbability" value="${wordInsertionProbability}">
<property name="silenceInsertionProbability" value="${silenceInsertionProbability}">
<property name="languageWeight" value="${languageWeight}">
</property></property></property></property></property></property></property></property></property></property></property></property></component>
-
-
-
- <component name="dictionary" type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
<property name="dictionaryPath" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/cmudict.0.6d">
<property name="fillerPath" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/fillerdict">
<property name="addSilEndingPronunciation" value="false">
<property name="wordReplacement" value="sil">
</property></property></property></property></component>
-
-
-
- <component name="trigramModel" type="edu.cmu.sphinx.linguist.language.ngram.SimpleNGramModel">
<property name="location" value="resource:/edu/cmu/sphinx/demo/hellongram/hellongram.trigram.lm">
<property name="logMath" value="logMath">
<property name="dictionary" value="dictionary">
<property name="maxDepth" value="3">
<property name="unigramWeight" value=".7">
</property></property></property></property></property></component>
-
-
-
- <component name="wsj" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.trainer.TrainerAcousticModel">
<property name="loader" value="wsjLoader">
<property name="unitManager" value="unitManager">
</property></property></component>
- <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.trainer.Sphinx4Loader">
<property name="logMath" value="logMath">
<property name="unitManager" value="unitManager">
<property name="location" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz">
<property name="modelDefinition" value="etc/WSJ_clean_13dCep_16k_40mel_130Hz_6800Hz.4000.mdef">
<property name="dataLocation" value="cd_continuous_8gau/">
</property></property></property></property></property></component>
-
-
-
<component name="unitManager" type="edu.cmu.sphinx.linguist.acoustic.UnitManager">
-
-
-
- <component name="wavFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
- <propertylist name="pipeline">
<item>audioFileDataSource</item>
<item>speechClassifier</item>
<item>speechMarker</item>
<item>nonSpeechDataFilter</item>
<item>premphasizer</item>
<item>windower</item>
<item>fft</item>
<item>melFilterBank</item>
<item>dct</item>
<item>liveCMN</item>
-
<item>featureExtraction</item>
</propertylist>
</component>
<component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource">
- <component name="speechClassifier" type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
-
</component>
<component name="nonSpeechDataFilter" type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter">
- <component name="speechMarker" type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker">
-
</component>
- <component name="premphasizer" type="edu.cmu.sphinx.frontend.filter.Preemphasizer">
-
</component>
- <component name="windower" type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
-
</component>
<component name="fft" type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
- <component name="melFilterBank" type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
-
</component>
<component name="dct" type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform">
<component name="liveCMN" type="edu.cmu.sphinx.frontend.feature.LiveCMN">
<component name="batchCMN" type="edu.cmu.sphinx.frontend.feature.BatchCMN">
<component name="featureExtraction" type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor">
-
-
-
- <component name="accuracyTracker" type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
<property name="recognizer" value="${recognizer}">
<property name="showAlignedResults" value="false">
<property name="showRawResults" value="false">
</property></property></property></component>
- <component name="memoryTracker" type="edu.cmu.sphinx.instrumentation.MemoryTracker">
<property name="recognizer" value="${recognizer}">
<property name="showSummary" value="false">
<property name="showDetails" value="false">
</property></property></property></component>
- <component name="speedTracker" type="edu.cmu.sphinx.instrumentation.SpeedTracker">
<property name="recognizer" value="${recognizer}">
<property name="frontend" value="${frontend}">
<property name="showSummary" value="true">
<property name="showDetails" value="false">
</property></property></property></property></component>
- <component name="recognizerMonitor" type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
<property name="recognizer" value="${recognizer}">
- <propertylist name="allocatedMonitors">
<item>configMonitor</item>
</propertylist>
</property></component>
- <component name="configMonitor" type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
<property name="showConfig" value="true">
</property></component>
-
-
-
- <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
<property name="logBase" value="1.0001">
<property name="useAddTable" value="true">
</property></property></component>
</component></component></component></component></component></component></component></component></component></property></property></property></property></property></property></property></property></property></property></config>
Hello
Beams in your configuration file as well as language model aren't optimal for
maximum decoding accuracy. We have several carefully optimized configuraitons
for standard decoding tasks. For example tests/performance/wsj20k is a
configuration for the decoding task with vocabulary of 20000 words.
Can you send me a file or post it here how will I do it. I really really need
it for my thesis.
Thanks.
The file is a part of sphinx4 sources:
http://cmusphinx.svn.sourceforge.net/viewvc/cmusphinx/trunk/sphinx4/tests/per
formance/wsj20k/wsj20k.config.xml?revision=9692