Menu

small Ques. about .mfc

Help
Muneer
2012-03-13
2012-09-22
  • Muneer

    Muneer - 2012-03-13

    hi ,

    Is it permissible use Sphinx4 to produce mfc and use it in recognition using
    pocketsphinx ?

     
  • Nickolay V. Shmyrev

    Yes

     
  • Muneer

    Muneer - 2012-03-26

    thanks nshmyrev,

    but there is a problem when use mfc make by sphinx4 when use it in
    pocketsphinx

    configuration file for sphinx4 :

    <config> </config>

    <component name="cepstraFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
    <propertylist name="pipeline">
    <item>streamDataSource</item>
    <item>preemphasizer</item>
    <item>windower</item>
    <item>fft</item>
    <item>melFilterBank</item>
    <item>dct</item>
    <item>dataDumper</item>
    </propertylist>
    </component>

    <component name="spectraFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
    <propertylist name="pipeline">
    <item>streamDataSource</item>
    <item>preemphasizer</item>
    <item>windower</item>
    <item>fft</item>
    </propertylist>
    </component>

    <component name="plpFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
    <propertylist name="pipeline">
    <item>streamDataSource</item>
    <item>preemphasizer</item>
    <item>windower</item>
    <item>fft</item>
    <item>plpFrequencyFilterBank</item>
    <item>plpCepstrumProducer</item>
    </propertylist>
    </component>

    <component name="dataDumper" type="edu.cmu.sphinx.frontend.util.DataDumper"> </component>

    <component name="preemphasizer" <br="">type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/> </component>

    <component name="windower" <br="">type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
    </component>

    <component name="fft" <br="">type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/> </component>

    <component name="melFilterBank" <br="">type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
    </component>

    <component name="dct" <br="">type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/> </component>

    <component name="cmn" type="edu.cmu.sphinx.frontend.feature.BatchCMN"> </component>

    <component name="featureExtraction" <br="">type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/> </component>

    <component name="streamDataSource" <br="">type="edu.cmu.sphinx.frontend.util.StreamDataSource">
    <property name="sampleRate" value="16000">
    </property></component>

    <component name="plpFrequencyFilterBank" <br="">type="edu.cmu.sphinx.frontend.frequencywarp.PLPFrequencyFilterBank"/> </component>

    <component name="plpCepstrumProducer" <br="">type="edu.cmu.sphinx.frontend.frequencywarp.PLPCepstrumProducer"/>
    </component>

    and the class that we use to generate mfc is :
    /
    Copyright 1999-2002 Carnegie Mellon University.
    Portions Copyright 2002 Sun Microsystems, Inc.
    Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
    All Rights Reserved. Use is subject to license terms.

    See the file "license.terms" for information on usage and
    redistribution of this file, and for a DISCLAIMER OF ALL
    WARRANTIES.

    */

    package edu.cmu.sphinx.tools.feature;

    import java.io.DataOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.PrintStream;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.util.LinkedList;
    import java.util.List;
    import java.util.Scanner;
    import java.util.logging.Logger;

    import edu.cmu.sphinx.frontend.Data;
    import edu.cmu.sphinx.frontend.DataEndSignal;
    import edu.cmu.sphinx.frontend.DoubleData;
    import edu.cmu.sphinx.frontend.FloatData;
    import edu.cmu.sphinx.frontend.FrontEnd;
    import edu.cmu.sphinx.frontend.util.StreamDataSource;
    import edu.cmu.sphinx.util.props.ConfigurationManager;
    import edu.cmu.sphinx.util.props.PropertyException;

    /
    This program takes in an audio file, does frontend signal processing to it,
    and then dumps the resulting Feature into a separate file. Also it can
    process a list of files at once.


    Available options:


    • -config configFile - the XML configuration file

    • -name frontendName - the name of the feature extractor inside the
      configuration file

    • -i audioFile - the name of the audio file

    • -ctl controlFile - the name of the input file for batch processing

    • -o outputFile - the name of the output file or output folder

    • -format binary/ascii - output file format


    */
    public class FeatureFileDumper {

    private FrontEnd frontEnd;
    private StreamDataSource audioSource;
    private List<float> allFeatures;
    private int featureLength = -1; </float>

    /* The logger for this class /
    private static final Logger logger = Logger
    .getLogger("edu.cmu.sphinx.tools.feature.FeatureFileDumper");

    /
    Constructs a FeatureFileDumper.

    @param cm
    the configuration manager
    @param frontEndName
    the name for the frontend
    */
    public FeatureFileDumper(ConfigurationManager cm, String frontEndName)
    throws IOException {
    try {
    frontEnd = (FrontEnd) cm.lookup(frontEndName);
    audioSource = (StreamDataSource) cm.lookup("streamDataSource");
    } catch (Exception e) {
    e.printStackTrace();
    }
    }

    /
    Process the file and store the features

    @param inputAudioFile
    the input audio file
    @throws FileNotFoundException
    /
    public void processFile(String inputAudioFile) throws FileNotFoundException {
    audioSource
    .setInputStream(new FileInputStream(inputAudioFile), "audio");
    allFeatures = new LinkedList<float>();
    getAllFeatures();
    logger.info("Frames: " + allFeatures.size());
    } </float>

    /
    Retrieve all Features from the frontend, and cache all those with actual
    feature data.
    /
    private void getAllFeatures() {
    /

    Run through all the data and produce feature.
    /
    try {
    assert (allFeatures != null);
    Data feature = frontEnd.getData();
    while (!(feature instanceof DataEndSignal)) {
    if (feature instanceof DoubleData) {
    double featureData = ((DoubleData) feature).getValues();
    if (featureLength < 0) {
    featureLength = featureData.length;
    logger.info("Feature length: " + featureLength);
    }
    float convertedData = new float;
    for (int i = 0; i < featureData.length; i++) {
    convertedData_ = (float) featureData_;
    }
    allFeatures.add(convertedData);
    } else if (feature instanceof FloatData) {
    float featureData = ((FloatData) feature).getValues();
    if (featureLength < 0) {
    featureLength = featureData.length;
    logger.info("Feature length: " + featureLength);
    }
    allFeatures.add(featureData);
    }
    feature = frontEnd.getData();
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    }

    /
    Returns the total number of data points that should be written to the
    output file.

    @return the total number of data points that should be written
    */
    private int getNumberDataPoints() {
    return (allFeatures.size() * featureLength);
    }

    /
    Dumps the feature to the given binary output.

    @param outputFile
    the binary output file
    */
    public void dumpBinary(String outputFile) throws IOException {
    DataOutputStream outStream = new DataOutputStream(new FileOutputStream(
    outputFile));
    outStream.writeInt(getNumberDataPoints());

    for (float feature : allFeatures) {
    for (float val : feature) {
    outStream.writeFloat(val);
    }
    }

    outStream.close();
    }

    /
    Dumps the feature to the given ASCII output file.

    @param outputFile
    the ASCII output file
    */
    public void dumpAscii(String outputFile) throws IOException {
    PrintStream ps = new PrintStream(new FileOutputStream(outputFile), true);
    ps.print(getNumberDataPoints());
    ps.print(' ');

    for (float feature : allFeatures) {
    for (float val : feature) {
    ps.print(val);
    ps.print(' ');
    }
    }

    ps.close();
    }

    /
    Main program for this dumper.
    /
    public static void main(String argv) {

    String configFile = "frontend.config.xml";
    String frontEndName = "cepstraFrontEnd";
    String inputFile = "d://4.wav";
    String inputCtl = null;
    String outputFile = "d://4.mfc";
    String format = "binary";

    for (int i = 0; i < argv.length; i++) {
    if (argv_.equals("-c")) {
    configFile = argv;
    }
    if (argv_.equals("-name")) {
    frontEndName = argv;
    }
    if (argv_.equals("-i")) {
    inputFile = argv;
    }
    if (argv_.equals("-ctl")) {
    inputCtl = argv;
    }
    if (argv_.equals("-o")) {
    outputFile = argv;
    }
    if (argv_.equals("-format")) {
    format = argv;
    }
    }

    if (frontEndName == null || (inputFile == null && inputCtl == null)
    || outputFile == null || format == null) {
    System.out
    .println("Usage: FeatureFileDumper "
    + " -name frontendName "
    + "< -i input File -o outputFile | -ctl inputFile -i inputFolder -o outputFolder >");
    System.exit(1);
    }

    logger.info("Input file: " + inputFile);
    logger.info("Output file: " + outputFile);
    logger.info("Format: " + format);

    try {
    URL url;
    if (configFile != null) {
    url = new File(configFile).toURI().toURL();
    } else {
    url = FeatureFileDumper.class
    .getResource("frontend.config.xml");
    }
    ConfigurationManager cm = new ConfigurationManager(url);
    FeatureFileDumper dumper = new FeatureFileDumper(cm, frontEndName);

    if (inputCtl == null)
    dumper.processFile(inputFile, outputFile, format);
    else
    dumper.processCtl(inputCtl, inputFile, outputFile, format);
    } catch (IOException ioe) {
    System.err.println("I/O Error " + ioe);
    } catch (PropertyException p) {
    System.err.println("Bad configuration " + p);
    }
    }

    private void processFile(String inputFile, String outputFile, String format)
    throws MalformedURLException, IOException {
    processFile(inputFile);
    if (format.equals("binary")) {
    dumpBinary(outputFile);
    } else if (format.equals("ascii")) {
    dumpAscii(outputFile);
    } else {
    System.out.println("ERROR: unknown output format: " + format);
    }
    }

    private void processCtl(String inputCtl, String inputFolder,
    String outputFolder, String format) throws MalformedURLException,
    IOException {

    Scanner scanner = new Scanner(new File(inputCtl));
    while (scanner.hasNext()) {
    String fileName = scanner.next();
    String inputFile = inputFolder + "/" + fileName + ".wav";
    String outputFile = outputFolder + "/" + fileName + ".mfc";

    processFile(inputFile);
    if (format.equals("binary")) {
    dumpBinary(outputFile);
    } else if (format.equals("ascii")) {
    dumpAscii(outputFile);
    } else {
    System.out.println("ERROR: unknown output format: " + format);
    }
    }
    }
    }

    when we decode sphinx4 feat (mfc) on the pocketsphinx the result is (the full
    log is) :

    pocketsphinx_batch \
    -cepdir feat \
    -cepext .mfc \
    -samprate 16000 \
    -ctl an4_test.fileids \
    -dict an4.dic \
    -feat 1s_c_d_dd \
    -featparams feat.params \
    -hmm an4.cd_cont_1000/ \
    -lm an4.ug.lm \
    -mdef an4.cd_cont_1000/mdef \
    -mean an4.cd_cont_1000//means \
    -mixw an4.cd_cont_1000/mixture_weights \
    -var an4.cd_cont_1000/variances \
    -hyp result.txt.txt \
    -dither yes \
    -sendump an4.cd_cont_1000/sendump \
    -hypseg 7878.txt

    Current configuration:

    -adchdr 0 0
    -adcin no no
    -agc none none
    -agcthresh 2.0 2.000000e+00
    -alpha 0.97 9.700000e-01
    -argfile
    -ascale 20.0 2.000000e+01
    -aw 1 1
    -backtrace no no
    -beam 1e-48 1.000000e-48
    -bestpath yes yes
    -bestpathlw 9.5 9.500000e+00
    -bghist no no
    -build_outdirs yes yes
    -cepdir feat
    -cepext .mfc .mfc
    -ceplen 13 13
    -cmn current current
    -cmninit 8.0 8.0
    -compallsen no no
    -ctl an4_test.fileids
    -ctlcount -1 -1
    -ctlincr 1 1
    -ctloffset 0 0
    -ctm
    -debug 0
    -dict an4.dic
    -dictcase no no
    -dither no yes
    -doublebw no no
    -ds 1 1
    -fdict
    -feat 1s_c_d_dd 1s_c_d_dd
    -featparams feat.params
    -fillprob 1e-8 1.000000e-08
    -frate 100 100
    -fsg
    -fsgctl
    -fsgdir
    -fsgext
    -fsgusealtpron yes yes
    -fsgusefiller yes yes
    -fwdflat yes yes
    -fwdflatbeam 1e-64 1.000000e-64
    -fwdflatefwid 4 4
    -fwdflatlw 8.5 8.500000e+00
    -fwdflatsfwin 25 25
    -fwdflatwbeam 7e-29 7.000000e-29
    -fwdtree yes yes
    -hmm an4.cd_cont_1000/
    -hyp result.txt.txt
    -hypseg 7878.txt
    -input_endian little little
    -jsgf
    -kdmaxbbi -1 -1
    -kdmaxdepth 0 0
    -kdtree
    -latsize 5000 5000
    -lda
    -ldadim 0 0
    -lextreedump 0 0
    -lifter 0 0
    -lm an4.ug.lm
    -lmctl
    -lmname default default
    -lmnamectl
    -logbase 1.0001 1.000100e+00
    -logfn
    -logspec no no
    -lowerf 133.33334 1.333333e+02
    -lpbeam 1e-40 1.000000e-40
    -lponlybeam 7e-29 7.000000e-29
    -lw 6.5 6.500000e+00
    -maxhmmpf -1 -1
    -maxnewoov 20 20
    -maxwpf -1 -1
    -mdef an4.cd_cont_1000/mdef
    -mean an4.cd_cont_1000//means
    -mfclogdir
    -min_endfr 0 0
    -mixw an4.cd_cont_1000/mixture_weights
    -mixwfloor 0.0000001 1.000000e-07
    -mllr
    -mllrctl
    -mllrdir
    -mllrext
    -mmap yes yes
    -nbest 0 0
    -nbestdir
    -nbestext .hyp .hyp
    -ncep 13 13
    -nfft 512 512
    -nfilt 40 40
    -nwpen 1.0 1.000000e+00
    -outlatbeam 1e-5 1.000000e-05
    -outlatdir
    -outlatext .lat .lat
    -outlatfmt s3 s3
    -pbeam 1e-48 1.000000e-48
    -pip 1.0 1.000000e+00
    -pl_beam 1e-10 1.000000e-10
    -pl_pbeam 1e-5 1.000000e-05
    -pl_window 0 0
    -rawlogdir
    -remove_dc no no
    -round_filters yes yes
    -samprate 16000 1.600000e+04
    -seed -1 -1
    -sendump an4.cd_cont_1000/sendump
    -senin no no
    -senlogdir
    -senmgau
    -silprob 0.005 5.000000e-03
    -smoothspec no no
    -svspec
    -tmat
    -tmatfloor 0.0001 1.000000e-04
    -topn 4 4
    -topn_beam 0 0
    -toprule
    -transform legacy legacy
    -unit_area yes yes
    -upperf 6855.4976 6.855498e+03
    -usewdphones no no
    -uw 1.0 1.000000e+00
    -var an4.cd_cont_1000/variances
    -varfloor 0.0001 1.000000e-04
    -varnorm no no
    -verbose no no
    -warp_params
    -warp_type inverse_linear inverse_linear
    -wbeam 7e-29 7.000000e-29
    -wip 0.65 6.500000e-01
    -wlen 0.025625 2.562500e-02

    INFO: cmd_ln.c(691): Parsing command line:
    \
    -alpha 0.97 \
    -doublebw no \
    -nfilt 40 \
    -ncep 13 \
    -lowerf 133.33334 \
    -upperf 6855.4976

    Current configuration:

    -agc none none
    -agcthresh 2.0 2.000000e+00
    -alpha 0.97 9.700000e-01
    -ceplen 13 13
    -cmn current current
    -cmninit 8.0 8.0
    -dither no yes
    -doublebw no no
    -feat 1s_c_d_dd 1s_c_d_dd
    -frate 100 100
    -input_endian little little
    -lda
    -ldadim 0 0
    -lifter 0 0
    -logspec no no
    -lowerf 133.33334 1.333333e+02
    -ncep 13 13
    -nfft 512 512
    -nfilt 40 40
    -remove_dc no no
    -round_filters yes yes
    -samprate 16000 1.600000e+04
    -seed -1 -1
    -smoothspec no no
    -svspec
    -transform legacy legacy
    -unit_area yes yes
    -upperf 6855.4976 6.855498e+03
    -varnorm no no
    -verbose no no
    -warp_params
    -warp_type inverse_linear inverse_linear
    -wlen 0.025625 2.562500e-02

    INFO: acmod.c(246): Parsed model-specific feature parameters from feat.params
    INFO: fe_interface.c(289): You are using the internal mechanism to generate
    the seed.
    INFO: feat.c(713): Initializing feature stream to type: '1s_c_d_dd',
    ceplen=13, CMN='current', VARNORM='no', AGC='none'
    INFO: cmn.c(142): mean= 12.00, mean= 0.0
    INFO: mdef.c(517): Reading model definition: an4.cd_cont_1000/mdef
    INFO: bin_mdef.c(179): Allocating 17583 * 8 bytes (137 KiB) for CD tree
    INFO: tmat.c(205): Reading HMM transition probability matrices:
    an4.cd_cont_1000//transition_matrices
    INFO: acmod.c(121): Attempting to use SCHMM computation module
    INFO: ms_gauden.c(198): Reading mixture gaussian parameter:
    an4.cd_cont_1000//means
    INFO: ms_gauden.c(292): 1150 codebook, 1 feature, size:
    INFO: ms_gauden.c(294): 22x39
    INFO: ms_gauden.c(198): Reading mixture gaussian parameter:
    an4.cd_cont_1000/variances
    INFO: ms_gauden.c(292): 1150 codebook, 1 feature, size:
    INFO: ms_gauden.c(294): 22x39
    INFO: ms_gauden.c(354): 20885 variance values floored
    INFO: acmod.c(123): Attempting to use PTHMM computation module
    INFO: ms_gauden.c(198): Reading mixture gaussian parameter:
    an4.cd_cont_1000//means
    INFO: ms_gauden.c(292): 1150 codebook, 1 feature, size:
    INFO: ms_gauden.c(294): 22x39
    INFO: ms_gauden.c(198): Reading mixture gaussian parameter:
    an4.cd_cont_1000/variances
    INFO: ms_gauden.c(292): 1150 codebook, 1 feature, size:
    INFO: ms_gauden.c(294): 22x39
    INFO: ms_gauden.c(354): 20885 variance values floored
    INFO: ptm_mgau.c(800): Number of codebooks exceeds 256: 1150
    INFO: acmod.c(125): Falling back to general multi-stream GMM computation
    INFO: ms_gauden.c(198): Reading mixture gaussian parameter:
    an4.cd_cont_1000//means
    INFO: ms_gauden.c(292): 1150 codebook, 1 feature, size:
    INFO: ms_gauden.c(294): 22x39
    INFO: ms_gauden.c(198): Reading mixture gaussian parameter:
    an4.cd_cont_1000/variances
    INFO: ms_gauden.c(292): 1150 codebook, 1 feature, size:
    INFO: ms_gauden.c(294): 22x39
    INFO: ms_gauden.c(354): 20885 variance values floored
    INFO: ms_senone.c(160): Reading senone mixture weights:
    an4.cd_cont_1000/mixture_weights
    INFO: ms_senone.c(211): Truncating senone logs3(pdf) values by 10 bits
    INFO: ms_senone.c(218): Not transposing mixture weights in memory
    INFO: ms_senone.c(277): Read mixture weights for 1150 senones: 1 features x 22
    codewords
    INFO: ms_senone.c(331): Mapping senones to individual codebooks
    INFO: ms_mgau.c(141): The value of topn: 4
    INFO: dict.c(317): Allocating 6003 * 20 bytes (117 KiB) for word entries
    INFO: dict.c(332): Reading main dictionary: an4.dic
    INFO: dict.c(211): Allocated 34 KiB for strings, 29 KiB for phones
    INFO: dict.c(335): 1904 words read
    INFO: dict.c(341): Reading filler dictionary: an4.cd_cont_1000//noisedict
    INFO: dict.c(211): Allocated 0 KiB for strings, 0 KiB for phones
    INFO: dict.c(344): 3 words read
    INFO: dict2pid.c(396): Building PID tables for dictionary
    INFO: dict2pid.c(404): Allocating 50^3 * 2 bytes (244 KiB) for word-initial
    triphones
    INFO: dict2pid.c(131): Allocated 30200 bytes (29 KiB) for word-final triphones
    INFO: dict2pid.c(195): Allocated 30200 bytes (29 KiB) for single-phone word
    triphones
    INFO: ngram_model_arpa.c(477): ngrams 1=1430, 2=2411, 3=3011
    INFO: ngram_model_arpa.c(135): Reading unigrams
    INFO: ngram_model_arpa.c(516): 1430 = #unigrams created
    INFO: ngram_model_arpa.c(195): Reading bigrams
    INFO: ngram_model_arpa.c(533): 2411 = #bigrams created
    INFO: ngram_model_arpa.c(534): 80 = #prob2 entries
    INFO: ngram_model_arpa.c(542): 122 = #bo_wt2 entries
    INFO: ngram_model_arpa.c(292): Reading trigrams
    INFO: ngram_model_arpa.c(555): 3011 = #trigrams created
    INFO: ngram_model_arpa.c(556): 55 = #prob3 entries
    INFO: ngram_search_fwdtree.c(99): 150 unique initial diphones
    INFO: ngram_search_fwdtree.c(147): 0 root, 0 non-root channels, 5 single-phone
    words
    INFO: ngram_search_fwdtree.c(186): Creating search tree
    INFO: ngram_search_fwdtree.c(191): before: 0 root, 0 non-root channels, 5
    single-phone words
    INFO: ngram_search_fwdtree.c(326): after: max nonroot chan increased to 6986
    INFO: ngram_search_fwdtree.c(338): after: 147 root, 6858 non-root channels, 4
    single-phone words
    INFO: ngram_search_fwdflat.c(156): fwdflat: min_ef_width = 4, max_sf_win = 25
    INFO: cmn.c(175): CMN: -646.31 19.12 18.96 18.47 17.96 17.39 16.81 16.18 15.54
    14.87 14.18 13.46 12.72
    INFO: ngram_search.c(474): Resized backpointer table to 10000 entries
    INFO: ngram_search.c(482): Resized score stack to 200000 entries
    INFO: ngram_search.c(474): Resized backpointer table to 20000 entries
    INFO: ngram_search.c(482): Resized score stack to 400000 entries
    INFO: ngram_search.c(474): Resized backpointer table to 40000 entries
    INFO: ngram_search.c(482): Resized score stack to 800000 entries
    INFO: ngram_search.c(474): Resized backpointer table to 80000 entries
    INFO: ngram_search.c(482): Resized score stack to 1600000 entries
    INFO: ngram_search.c(474): Resized backpointer table to 160000 entries
    INFO: ngram_search.c(482): Resized score stack to 3200000 entries
    INFO: ngram_search.c(474): Resized backpointer table to 320000 entries
    INFO: ngram_search.c(482): Resized score stack to 6400000 entries
    INFO: ngram_search_fwdtree.c(1549): 240562 words recognized (147/fr)
    INFO: ngram_search_fwdtree.c(1551): 1577829 senones evaluated (964/fr)
    INFO: ngram_search_fwdtree.c(1553): 16447226 channels searched (10053/fr),
    239904 1st, 5167368 last
    INFO: ngram_search_fwdtree.c(1557): 302411 words for which last channels
    evaluated (184/fr)
    INFO: ngram_search_fwdtree.c(1560): 2225812 candidate words for entering last
    phone (1360/fr)
    INFO: ngram_search_fwdtree.c(1562): fwdtree 37.32 CPU 2.281 xRT
    INFO: ngram_search_fwdtree.c(1565): fwdtree 37.38 wall 2.285 xRT
    INFO: ngram_search_fwdflat.c(305): Utterance vocabulary contains 166 words
    INFO: ngram_search_fwdflat.c(940): 178116 words recognized (109/fr)
    INFO: ngram_search_fwdflat.c(942): 1264025 senones evaluated (773/fr)
    INFO: ngram_search_fwdflat.c(944): 5920015 channels searched (3618/fr)
    INFO: ngram_search_fwdflat.c(946): 269973 words searched (165/fr)
    INFO: ngram_search_fwdflat.c(948): 4811 word transitions (2/fr)
    INFO: ngram_search_fwdflat.c(951): fwdflat 3.64 CPU 0.222 xRT
    INFO: ngram_search_fwdflat.c(954): fwdflat 3.64 wall 0.223 xRT
    INFO: ngram_search.c(1214): not found in last frame, using مِن.1634
    instead
    INFO: ngram_search.c(1266): lattice start node .0 end node مِن(5).2
    INFO: ngram_search.c(1294): Eliminated 1115 nodes before end node
    INFO: ngram_search.c(1399): Lattice has 1134 nodes, 1 links
    INFO: ps_lattice.c(1365): Normalizer P(O) = alpha(مِن(5):2:1634) = -68814
    INFO: ps_lattice.c(1403): Joint P(O,S) = -68814 P(S|O) = 0
    INFO: ngram_search.c(888): bestpath 0.23 CPU 0.014 xRT
    INFO: ngram_search.c(891): bestpath 0.23 wall 0.014 xRT
    INFO: ngram_search.c(1043): bestpath -0.00 CPU -0.000 xRT
    INFO: ngram_search.c(1046): bestpath 0.00 wall 0.000 xRT
    INFO: ngram_search.c(1043): bestpath -0.00 CPU -0.000 xRT
    INFO: ngram_search.c(1046): bestpath 0.00 wall 0.000 xRT
    INFO: batch.c(792): 8: 16.35 seconds speech, 41.18 seconds CPU, 41.25 seconds
    wall
    INFO: batch.c(794): 8: 2.52 xRT (CPU), 2.52 xRT (elapsed)
    INFO: batch.c(806): TOTAL 16.35 seconds speech, 41.18 seconds CPU, 41.25
    seconds wall
    INFO: batch.c(808): AVERAGE 2.52 xRT (CPU), 2.52 xRT (elapsed)
    INFO: ngram_search_fwdtree.c(430): TOTAL fwdtree 37.32 CPU 2.282 xRT
    INFO: ngram_search_fwdtree.c(433): TOTAL fwdtree 37.38 wall 2.286 xRT
    INFO: ngram_search_fwdflat.c(174): TOTAL fwdflat 3.64 CPU 0.222 xRT
    INFO: ngram_search_fwdflat.c(177): TOTAL fwdflat 3.64 wall 0.223 xRT
    INFO: ngram_search.c(317): TOTAL bestpath 0.23 CPU 0.014 xRT
    INFO: ngram_search.c(320): TOTAL bestpath 0.23 wall 0.014 xRT

    why the result is not accurate such sphinxbase feat ?


     
  • Nickolay V. Shmyrev

    why the result is not accurate such sphinxbase feat ?[/quote

    Most likely you have zero energy regions in your audio. You need to apply
    dither both in sphinx4 and in pocketsphinx.
    CMN values must match. You need to provide more information (file, logs,
    results, versions) in order to get more definite answer.

     
  • Anonymous

    Anonymous - 2012-06-23

    Hello;
    I have this java class to produce MFCC /
    package transcriber;
    import edu.cmu.sphinx.frontend.*;
    import edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor;
    import edu.cmu.sphinx.frontend.util.StreamDataSource;
    import edu.cmu.sphinx.frontend.util.DataDumper;
    import edu.cmu.sphinx.tools.feature.FeatureFileDumper;
    import edu.cmu.sphinx.util.props.ConfigurationManager;
    import java.io.FileInputStream;
    //import src.sphinx.edu.cmu;

    public class feature {
    public static void main(String args) {

    try {
    String configFile = "C:/Users/azer/Desktop/Nouveau dossier
    (4)src/transcriber/co.xml";
    String audioFile = "C:/u/1.wav";
    String audioFile1 = "C:/Users/azer/Desktop/Nouveau dossier
    (4)/src/transcriber/2.wav";// Put your file name here

    ConfigurationManager cm = new ConfigurationManager(configFile);

    FrontEnd frontend = (FrontEnd) cm.lookup ("mfcFrontEnd");

    StreamDataSource source = (StreamDataSource) cm.lookup ("streamDataSource");
    source.setInputStream(new FileInputStream(audioFile), audioFile);
    ///FileInputStream dis=new FileInputStream(audioFile);
    ///source.setInputStream(new FileInputStream(audioFile1), audioFile1);
    FeatureFileDumper ffd1=new FeatureFileDumper(cm,"mfcFrontEnd");
    DeltasFeatureExtractor fff= (DeltasFeatureExtractor) cm.lookup("dfe");
    DataDumper dumper = (DataDumper)cm.lookup("dataDumper");
    Object mat;

    Data data = null;
    mat =new Object ;
    System.out.println("-----------------resultats
    obtenus----------------------");
    do {
    data = dumper.getData();

    } while (data != null);
    //double spectrumData = ((DoubleData) data).getValues();
    //int a=data.length;
    System.out.println(mat);
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
    }
    i want to transform data mfcc to matrix for use in DTW recognition, please if
    someone has an idea
    thank you VM

     
  • Nickolay V. Shmyrev

    Something like

    double[][] mat = new double[100][39]
    frameid = 0;
    
    do { 
        data = dumper.getData(); 
        double[] spectrumData = ((DoubleData) data).getValues();
        for (idx=0;idx<39;idx++)
          mat[frameid][idx] = spectrumData[idx];
        frameid++;
    } while (data != null);
    
     

Log in to post a comment.