CMU Sphinx / Forums / Help: Audioformat issues

All-

As my previous post mentioned I have been having problems converting the transcribe config files to work with text data.

I decided to run some tests using the an4 dataset, specifically the:

an4_clstk/fbbh/cen8-fbbh-b.raw

audio file.

I ran this through the following congif.xml:

<?xml version="1.0" encoding="UTF-8"?>

&lt;!-- ******************************************************** --&gt;
&lt;!-- word recognizer configuration                            --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;recognizer&quot; type=&quot;edu.cmu.sphinx.recognizer.Recognizer&quot;&gt;
    &lt;property name=&quot;decoder&quot; value=&quot;decoder&quot;/&gt;
    &lt;propertylist name=&quot;monitors&quot;&gt;
        &lt;item&gt;accuracyTracker &lt;/item&gt;
        &lt;item&gt;speedTracker &lt;/item&gt;
        &lt;item&gt;memoryTracker &lt;/item&gt;
        &lt;item&gt;recognizerMonitor &lt;/item&gt;
    &lt;/propertylist&gt;
&lt;/component&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- The Decoder   configuration                              --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;decoder&quot; type=&quot;edu.cmu.sphinx.decoder.Decoder&quot;&gt;
    &lt;property name=&quot;searchManager&quot; value=&quot;wordPruningSearchManager&quot;/&gt;
   &lt;!-- &lt;property name=&quot;featureBlockSize&quot; value=&quot;50&quot;/&gt; --&gt;
&lt;/component&gt;


&lt;!-- ******************************************************** --&gt;
&lt;!-- The Search Manager                                       --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;searchManager&quot; 
    type=&quot;edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;linguist&quot; value=&quot;lexTreeLinguist&quot;/&gt;
    &lt;property name=&quot;pruner&quot; value=&quot;trivialPruner&quot;/&gt;
    &lt;property name=&quot;scorer&quot; value=&quot;threadedScorer&quot;/&gt;
    &lt;property name=&quot;activeListFactory&quot; value=&quot;activeList&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;wordPruningSearchManager&quot; 
type=&quot;edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;linguist&quot; value=&quot;lexTreeLinguist&quot;/&gt;
    &lt;property name=&quot;pruner&quot; value=&quot;trivialPruner&quot;/&gt;
    &lt;property name=&quot;scorer&quot; value=&quot;threadedScorer&quot;/&gt;
    &lt;property name=&quot;activeListManager&quot; value=&quot;activeListManager&quot;/&gt;
    &lt;property name=&quot;growSkipInterval&quot; value=&quot;0&quot;/&gt;
    &lt;property name=&quot;checkStateOrder&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;buildWordLattice&quot; value=&quot;true&quot;/&gt;
    &lt;property name=&quot;acousticLookaheadFrames&quot; value=&quot;1.7&quot;/&gt;
    &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;${relativeBeamWidth}&quot;/&gt;
&lt;property name=&quot;keepAllTokens&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;




&lt;!-- ******************************************************** --&gt;
&lt;!-- The Active Lists                                         --&gt;
&lt;!-- ******************************************************** --&gt;

 &lt;component name=&quot;activeList&quot; 
         type=&quot;edu.cmu.sphinx.decoder.search.PartitionActiveListFactory&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;absoluteBeamWidth&quot; value=&quot;${absoluteBeamWidth}&quot;/&gt;
    &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;${relativeBeamWidth}&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;activeListManager&quot; 
         type=&quot;edu.cmu.sphinx.decoder.search.SimpleActiveListManager&quot;&gt;
    &lt;propertylist name=&quot;activeListFactories&quot;&gt;
    &lt;item&gt;standardActiveListFactory&lt;/item&gt;
    &lt;item&gt;wordActiveListFactory&lt;/item&gt;
    &lt;item&gt;wordActiveListFactory&lt;/item&gt;
    &lt;item&gt;standardActiveListFactory&lt;/item&gt;
    &lt;item&gt;standardActiveListFactory&lt;/item&gt;
    &lt;item&gt;standardActiveListFactory&lt;/item&gt;
&lt;/propertylist&gt;
&lt;/component&gt;

&lt;component name=&quot;standardActiveListFactory&quot; 
           type=&quot;edu.cmu.sphinx.decoder.search.PartitionActiveListFactory&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;absoluteBeamWidth&quot; value=&quot;${absoluteBeamWidth}&quot;/&gt;
    &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;${relativeBeamWidth}&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;wordActiveListFactory&quot; 
           type=&quot;edu.cmu.sphinx.decoder.search.PartitionActiveListFactory&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;absoluteBeamWidth&quot; value=&quot;${absoluteWordBeamWidth}&quot;/&gt;
    &lt;property name=&quot;relativeBeamWidth&quot; value=&quot;${relativeWordBeamWidth}&quot;/&gt;
&lt;/component&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- The Pruner                                               --&gt;
&lt;!-- ******************************************************** --&gt; 
&lt;component name=&quot;trivialPruner&quot; 
           type=&quot;edu.cmu.sphinx.decoder.pruner.SimplePruner&quot;/&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- TheScorer                                                --&gt;
&lt;!-- ******************************************************** --&gt; 
&lt;component name=&quot;threadedScorer&quot; 
           type=&quot;edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer&quot;&gt;
    &lt;property name=&quot;frontend&quot; value=&quot;${frontend}&quot;/&gt;
    &lt;property name=&quot;isCpuRelative&quot; value=&quot;true&quot;/&gt;
    &lt;property name=&quot;numThreads&quot; value=&quot;0&quot;/&gt;
    &lt;property name=&quot;minScoreablesPerThread&quot; value=&quot;10&quot;/&gt;
    &lt;property name=&quot;scoreablesKeepFeature&quot; value=&quot;true&quot;/&gt;
&lt;/component&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- The linguist  configuration                              --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;lexTreeLinguist&quot; 
           type=&quot;edu.cmu.sphinx.linguist.lextree.LexTreeLinguist&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;acousticModel&quot; value=&quot;hub4&quot;/&gt;
    &lt;property name=&quot;languageModel&quot; value=&quot;trigramModel&quot;/&gt;
    &lt;property name=&quot;dictionary&quot; value=&quot;dictionary&quot;/&gt;
    &lt;property name=&quot;addFillerWords&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;fillerInsertionProbability&quot; value=&quot;1E-10&quot;/&gt;
    &lt;property name=&quot;generateUnitStates&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;wantUnigramSmear&quot; value=&quot;true&quot;/&gt;
    &lt;property name=&quot;unigramSmearWeight&quot; value=&quot;1&quot;/&gt;
    &lt;property name=&quot;wordInsertionProbability&quot; 
            value=&quot;${wordInsertionProbability}&quot;/&gt;
    &lt;property name=&quot;silenceInsertionProbability&quot; 
            value=&quot;${silenceInsertionProbability}&quot;/&gt;
    &lt;property name=&quot;languageWeight&quot; value=&quot;${languageWeight}&quot;/&gt;
    &lt;property name=&quot;unitManager&quot; value=&quot;unitManager&quot;/&gt;
&lt;/component&gt;

&lt;!-- ******************************************************** --&gt;
&lt;!-- The Dictionary configuration                            --&gt;
&lt;!-- ******************************************************** --&gt;
&lt;component name=&quot;dictionary&quot;
      type=&quot;edu.cmu.sphinx.linguist.dictionary.FastDictionary&quot;&gt;
    &lt;property name=&quot;dictionaryPath&quot; value=&quot;resource:/edu.cmu.sphinx.model.acoustic.HUB4_8gau_13dCep_16k_40mel_133Hz_6855Hz.Model!/edu/cmu/sphinx/model/acoustic/HUB4_8gau_13dCep_16k_40mel_133Hz_6855Hz/cmudict.06d&quot;/&gt;
    &lt;property name=&quot;fillerPath&quot; value=&quot;resource:/edu.cmu.sphinx.model.acoustic.HUB4_8gau_13dCep_16k_40mel_133Hz_6855Hz.Model!/edu/cmu/sphinx/model/acoustic/HUB4_8gau_13dCep_16k_40mel_133Hz_6855Hz/fillerdict&quot;/&gt;
    &lt;property name=&quot;addSilEndingPronunciation&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;allowMissingWords&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;unitManager&quot; value=&quot;unitManager&quot;/&gt;
&lt;/component&gt;

 &lt;!-- ************************************************** --&gt;
&lt;!-- trigramModel                                       --&gt;
&lt;!-- ************************************************** --&gt;

&lt;component name=&quot;trigramModel&quot;
      type=&quot;edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel&quot;&gt;
    &lt;property name=&quot;unigramWeight&quot; value=&quot;.7&quot;/&gt;
    &lt;property name=&quot;maxDepth&quot; value=&quot;3&quot;/&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;dictionary&quot; value=&quot;dictionary&quot;/&gt;
    &lt;property name=&quot;location&quot;
              value=&quot;C:/eclipse/workspace/PodCastSearch/lab/speech/sphinx4/data/hub4_model/language_model.arpaformat.DMP&quot;/&gt;
&lt;/component&gt;

&lt;!-- ************************************************** --&gt;
&lt;!-- flatUnigramModel                                   --&gt;
&lt;!-- ************************************************** --&gt;
&lt;component name=&quot;flatUnigramModel&quot; 
            type=&quot;edu.cmu.sphinx.linguist.language.ngram.SimpleNGramModel&quot;&gt;
    &lt;property name=&quot;location&quot; 
               value=&quot;hub4.flat_unigram.lm&quot;/&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;dictionary&quot; value=&quot;dictionary&quot;/&gt;
    &lt;property name=&quot;maxDepth&quot; value=&quot;1&quot;/&gt;
    &lt;property name=&quot;unigramWeight&quot; value=&quot;.7&quot;/&gt;
&lt;/component&gt;


&lt;!-- ******************************************************** --&gt;
&lt;!-- The acoustic model configuration                         --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;hub4&quot;
    type=&quot;edu.cmu.sphinx.model.acoustic.HUB4_8gau_13dCep_16k_40mel_133Hz_6855Hz.Model&quot;&gt;
    &lt;property name=&quot;loader&quot; value=&quot;sphinx3Loader&quot;/&gt;
    &lt;property name=&quot;unitManager&quot; value=&quot;unitManager&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;sphinx3Loader&quot;
           type=&quot;edu.cmu.sphinx.model.acoustic.HUB4_8gau_13dCep_16k_40mel_133Hz_6855Hz.ModelLoader&quot;&gt;
    &lt;property name=&quot;logMath&quot; value=&quot;logMath&quot;/&gt;
    &lt;property name=&quot;unitManager&quot; value=&quot;unitManager&quot;/&gt;
&lt;/component&gt;


&lt;!-- ******************************************************** --&gt;
&lt;!-- The unit manager configuration                           --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;unitManager&quot; 
           type=&quot;edu.cmu.sphinx.linguist.acoustic.UnitManager&quot;/&gt;


&lt;!-- ******************************************************** --&gt;
&lt;!-- The frontend configuration                               --&gt;
&lt;!-- ******************************************************** --&gt;

&lt;component name=&quot;mfcFrontEnd&quot; type=&quot;edu.cmu.sphinx.frontend.FrontEnd&quot;&gt;
    &lt;propertylist name=&quot;pipeline&quot;&gt;
     &lt;item&gt;streamDataSource &lt;/item&gt;
        &lt;item&gt;speechClassifier &lt;/item&gt;
        &lt;item&gt;speechMarker &lt;/item&gt;
        &lt;item&gt;nonSpeechDataFilter &lt;/item&gt;
        &lt;item&gt;premphasizer &lt;/item&gt;
        &lt;item&gt;windower &lt;/item&gt;
        &lt;item&gt;fft &lt;/item&gt;
        &lt;item&gt;melFilterBank &lt;/item&gt;
        &lt;item&gt;dct &lt;/item&gt;
        &lt;item&gt;liveCMN &lt;/item&gt;
        &lt;item&gt;featureExtraction &lt;/item&gt;

      &lt;!--  &lt;item&gt;streamDataSource &lt;/item&gt;
        &lt;item&gt;premphasizer &lt;/item&gt;
        &lt;item&gt;windower &lt;/item&gt;
        &lt;item&gt;fft &lt;/item&gt;
        &lt;item&gt;melFilterBank &lt;/item&gt;
        &lt;item&gt;dct &lt;/item&gt;
        &lt;item&gt;batchCMN &lt;/item&gt;
        &lt;item&gt;featureExtraction &lt;/item&gt; --&gt;
    &lt;/propertylist&gt;
&lt;/component&gt;



&lt;component name=&quot;streamDataSource&quot;
           type=&quot;edu.cmu.sphinx.frontend.util.StreamDataSource&quot;&gt;
&lt;property name=&quot;sampleRate&quot; value=&quot;16000&quot;/&gt;
    &lt;property name=&quot;bitsPerSample&quot; value=&quot;16&quot;/&gt;
    &lt;property name=&quot;bigEndianData&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;signedData&quot; value=&quot;true&quot;/&gt;
    &lt;property name=&quot;bytesPerRead&quot; value=&quot;320&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;microphone&quot; 
            type=&quot;edu.cmu.sphinx.frontend.util.Microphone&quot;&gt;
    &lt;property name=&quot;closeBetweenUtterances&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;speechClassifier&quot; 
            type=&quot;edu.cmu.sphinx.frontend.endpoint.SpeechClassifier&quot;&gt;
    &lt;property name=&quot;threshold&quot; value=&quot;13&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;nonSpeechDataFilter&quot; 
            type=&quot;edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter&quot;/&gt;

&lt;component name=&quot;speechMarker&quot; 
            type=&quot;edu.cmu.sphinx.frontend.endpoint.SpeechMarker&quot;&gt;
    &lt;property name=&quot;speechTrailer&quot; value=&quot;50&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;premphasizer&quot; 
    type=&quot;edu.cmu.sphinx.frontend.filter.Preemphasizer&quot;/&gt;

&lt;component name=&quot;windower&quot; 
type=&quot;edu.cmu.sphinx.frontend.window.RaisedCosineWindower&quot;/&gt;

&lt;component name=&quot;fft&quot; 
    type=&quot;edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform&quot;/&gt;

&lt;component name=&quot;melFilterBank&quot; 
    type=&quot;edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank&quot;/&gt;

&lt;component name=&quot;dct&quot; 
        type=&quot;edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform&quot;/&gt;

&lt;component name=&quot;liveCMN&quot; 
            type=&quot;edu.cmu.sphinx.frontend.feature.LiveCMN&quot;/&gt;

&lt;component name=&quot;batchCMN&quot; 
            type=&quot;edu.cmu.sphinx.frontend.feature.BatchCMN&quot;/&gt;

&lt;component name=&quot;featureExtraction&quot; 
    type=&quot;edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor&quot;/&gt;

&lt;!-- ******************************************************* --&gt;
&lt;!--  monitors                                               --&gt;
&lt;!-- ******************************************************* --&gt;

&lt;component name=&quot;accuracyTracker&quot; 
           type=&quot;edu.cmu.sphinx.instrumentation.AccuracyTracker&quot;&gt;
    &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot;/&gt;
    &lt;property name=&quot;showRawResults&quot; value=&quot;false&quot;/&gt;
    &lt;property name=&quot;showAlignedResults&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;memoryTracker&quot; 
           type=&quot;edu.cmu.sphinx.instrumentation.MemoryTracker&quot;&gt;
    &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot;/&gt;
&lt;property name=&quot;showDetails&quot; value=&quot;false&quot;/&gt;
&lt;property name=&quot;showSummary&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;speedTracker&quot; 
           type=&quot;edu.cmu.sphinx.instrumentation.SpeedTracker&quot;&gt;
    &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot;/&gt;
    &lt;property name=&quot;frontend&quot; value=&quot;${frontend}&quot;/&gt;
&lt;property name=&quot;showDetails&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;

&lt;component name=&quot;recognizerMonitor&quot; 
           type=&quot;edu.cmu.sphinx.instrumentation.RecognizerMonitor&quot;&gt;
    &lt;property name=&quot;recognizer&quot; value=&quot;${recognizer}&quot;/&gt;
    &lt;propertylist name=&quot;allocatedMonitors&quot;&gt;
        &lt;item&gt;configMonitor &lt;/item&gt;
    &lt;/propertylist&gt;
&lt;/component&gt;

&lt;component name=&quot;configMonitor&quot; 
           type=&quot;edu.cmu.sphinx.instrumentation.ConfigMonitor&quot;&gt;
    &lt;property name=&quot;showConfig&quot; value=&quot;false&quot;/&gt;
&lt;/component&gt;


&lt;!-- ******************************************************* --&gt;
&lt;!--  Miscellaneous components                               --&gt;
&lt;!-- ******************************************************* --&gt;

&lt;component name=&quot;logMath&quot; type=&quot;edu.cmu.sphinx.util.LogMath&quot;&gt;
    &lt;property name=&quot;logBase&quot; value=&quot;1.0001&quot;/&gt;
    &lt;property name=&quot;useAddTable&quot; value=&quot;true&quot;/&gt;
&lt;/component&gt;


&lt;component name=&quot;confidenceScorer&quot;
           type=&quot;edu.cmu.sphinx.result.MAPConfidenceScorer&quot;&gt;
    &lt;property name=&quot;languageWeight&quot; value=&quot;${languageWeight}&quot;/&gt;
&lt;property name=&quot;dumpLattice&quot; value=&quot;true&quot;/&gt;
&lt;property name=&quot;dumpSausage&quot; value=&quot;true&quot;/&gt;
&lt;/component&gt;

</config>

And I got a perfect result, however when I run the file included iwth the WavFile demo, 12345.wav through this config I get "what you eat or five", which is not so good. I am partially worried about the audio formats I am using, so I used some software to convert the an4 data to a wavfile and when I run that through my java code (see the routines below), I get the following:

File is in :PCM_SIGNED 44100.0 Hz, 16 bit, stereo, 4 bytes/frame, little-endian

for the audio format. This file STILL PRODUCES PERFECT RESULTS despite the fact that the format is all wrong. however when I attempt to convert it to the right format using (see java code below):

AudioFormat.Encoding.PCM_SIGNED,
16000,
16,
1,
2,
2,
16000,
false );

which should down convert it to match my config.xml file, I get no results whatsoever.

So any ideas as to what is going on here, why am I able to work with a 44.1 khz file and not with a 16khz file, when I specified a 16khz file in the config.xml file? Do all sphinx inputs need to be mono, and how do I set the bytesPerRead property (how is this calculated)?

Very confused,

James

Here is all of my java code:

private AudioInputStream convertFormat(AudioInputStream audioInputStream) {
AudioFormat audioFormat = audioInputStream.getFormat();
System.out.println( "Play input audio format=" + audioFormat );

      // Convert compressed audio data to uncompressed PCM format.
     // if ( audioFormat.getEncoding() != AudioFormat.Encoding.PCM_SIGNED ) {
      // if ((audioFormat.getEncoding() != AudioFormat.Encoding.PCM) ||
      //     (audioFormat.getEncoding() == AudioFormat.Encoding.ALAW) || 
      //     (audioFormat.getEncoding() == AudioFormat.Encoding.MP3)) {
         AudioFormat newFormat = new AudioFormat(
            AudioFormat.Encoding.PCM_SIGNED, 
            16000,
            16,
            1,
            //audioFormat.getChannels(),
            //audioFormat.getChannels() * 2,
            2,
            16000,
            false );
         System.out.println( &quot;Converting audio format to &quot; + newFormat );
         AudioInputStream newStream = AudioSystem.getAudioInputStream( newFormat, audioInputStream );
         audioFormat = newFormat;
         audioInputStream = newStream;
      //}
      return audioInputStream;
}

public void parse(URL audioFileURL) {
    try {

        //URL configURL = PodCastParser.class.getResource(&quot;hub4.config.xml&quot;);
        //URL configURL = PodCastParser.class.getResource(&quot;myWSJconfig.xml&quot;);
       //URL configURL = PodCastParser.class.getResource(&quot;myhub4config.xml&quot;);
       URL configURL = PodCastParser.class.getResource(&quot;transcriberconfig.xml&quot;);
       // URL configURL = PodCastParser.class.getResource(&quot;hellongram.config.xml&quot;);

        System.out.println(&quot;Loading Recognizer...\n&quot;);

        ConfigurationManager cm = new ConfigurationManager(configURL);

    //Recognizer recognizer = (Recognizer) cm.lookup(&quot;wordRecognizer&quot;);
    Recognizer recognizer = (Recognizer) cm.lookup(&quot;recognizer&quot;);

        /* allocate the resource necessary for the recognizer */
        recognizer.allocate();

        System.out.println(&quot;Decoding &quot; + audioFileURL.getFile());
        //System.out.println(AudioSystem.getAudioFileFormat(audioFileURL));

    StreamDataSource reader
            = (StreamDataSource) cm.lookup(&quot;streamDataSource&quot;);
        InputStream is;
        try {
        is 
            = AudioSystem.getAudioInputStream(audioFileURL);
        System.out.println(&quot;File is in :&quot;+((AudioInputStream) is).getFormat());
        is =  convertFormat((AudioInputStream)is);

        }
        catch (UnsupportedAudioFileException uafe) {
            System.out.println
                (&quot;Reading &quot; + audioFileURL.toExternalForm() + &quot; as raw audio file.&quot;);
            is = new FileInputStream(audioFileURL.getFile());
        }


        /* set the stream data source to read from the audio file */
        reader.setInputStream(is, audioFileURL.getFile());

        /* decode the audio file */

        boolean done = false;
        while (!done) {
        /*
         * This while loop will terminate after the last utterance
         * in the audio file has been decoded, in which case the
         * recognizer will return null.
         */ 
        Result result = recognizer.recognize();
        if (result != null) {
            String resultText = result.getBestResultNoFiller();
            System.out.println(result.getTimedBestResult(true, true));
            System.out.println(resultText);
        } else {
            done = true;
        }
        }

// Result result = recognizer.recognize();
// String resultText = result.getBestResultNoFiller();
// System.out.println("You said: " + resultText + "\n");
//

    } catch (IOException e) {
        System.err.println(&quot;Problem when loading WavFile: &quot; + e);
        e.printStackTrace();
    } catch (PropertyException e) {
        System.err.println(&quot;Problem configuring WavFile: &quot; + e);
        e.printStackTrace();
    } catch (InstantiationException e) {
        System.err.println(&quot;Problem creating WavFile: &quot; + e);
        e.printStackTrace();
    } 
}

Audioformat issues

Speech Recognition Toolkit

Forums

Help

Audioformat issues document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

Audioformat issues