From: <ku...@us...> - 2010-02-14 14:21:34
|
Revision: 2036 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2036&view=rev Author: kurzum Date: 2010-02-14 14:21:27 +0000 (Sun, 14 Feb 2010) Log Message: ----------- iterative learning script Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java trunk/src/dl-learner/org/dllearner/utilities/JamonMonitorLogger.java trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerRandomizer.java trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerCrossFolds.java trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerHelper.java trunk/src/dl-learner/org/dllearner/utilities/examples/ExperimentCollector.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java Property Changed: ---------------- trunk/ Property changes on: trunk ___________________________________________________________________ Modified: svn:ignore - .lastUsedExample .settings .project .classpath classes log cache cachePersistant reports results local rdbtoonto the_log.txt tmp fragmentOntology.owl output ling osmdata matching stanley dllearner.jar father.inp lgd.nt files + .lastUsedExample .settings .project .classpath classes log cache cachePersistant reports results local rdbtoonto the_log.txt tmp fragmentOntology.owl output ling osmdata matching stanley dllearner.jar father.inp lgd.nt files errorDescription Modified: trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -1,27 +1,154 @@ package org.dllearner.scripts.tiger; +import java.util.SortedSet; + +import org.apache.log4j.Logger; +import org.dllearner.utilities.JamonMonitorLogger; import org.dllearner.utilities.examples.Examples; +import com.jamonapi.Monitor; + public class ExperimentConfig { + private static final Logger logger = Logger.getLogger(ExperimentConfig.class); + public String label = "unset"; public int resultLimit = -1; public int splits = 5; - public int initialsplits = 30; - public int iteration = 1; + public int initialsplits = 5; + private int iteration; public boolean useStartClass = true; public boolean searchTree = false; - public int noise = 0; + public int noise = 10; //sets ValueFrequency treshold and maxExecution time - public boolean adaptive = true; - public int maxExecutionTime = 40; - public int valueFrequencyThreshold = 3; + public boolean adaptMaxRuntime = true; + public int maxExecutionTime = 20; + public int factor = 2; + public boolean useDataHasValue = true; +// public int valueFrequencyThreshold = 3; + public final Monitor[] iterationPrecision; + public final Monitor[] iterationRecall; + public final Monitor[] iterationFmeasure; + public final Monitor[] iterationLearningTime; + public final Monitor[] iterationTotalTime; + private String highestPrecision = ""; + private String highestRecall = ""; + private String highestFMeasure = ""; - public boolean stopCondition(int iteration, Examples learn){ - return (iteration<this.iteration); + public ExperimentConfig(int iteration, String label){ + this.iteration = iteration; + this.label = label; + + iterationPrecision = new Monitor[this.iteration]; + iterationRecall = new Monitor[this.iteration]; + iterationFmeasure = new Monitor[this.iteration]; + iterationLearningTime = new Monitor[this.iteration]; + iterationTotalTime = new Monitor[this.iteration]; + for (int i = 0; i < iterationPrecision.length; i++) { + iterationPrecision[i] = JamonMonitorLogger.getStatisticMonitor(this.getClass(), label+"_prec_i"+i); + iterationRecall[i] = JamonMonitorLogger.getStatisticMonitor(this.getClass(), label+"_rec_i"+i); + iterationFmeasure[i] = JamonMonitorLogger.getStatisticMonitor(this.getClass(), label+"_fme_i"+i); + iterationLearningTime[i] = JamonMonitorLogger.getStatisticMonitor(this.getClass(), label+"_learning_i"+i); + iterationTotalTime[i] = JamonMonitorLogger.getStatisticMonitor(this.getClass(), label+"_total_i"+i); + } } + //reached iterations + //reached 100% + + public boolean stopCondition(int iteration, Examples learn, SortedSet<String> posAsPos, SortedSet<String> retrieved, Examples allExamples, String concept){ + if(iteration == 0){ + //skip first; + return true; + } + Monitor iterationTime = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "iterationTime"); + iterationTotalTime[iteration-1].add(iterationTime.getLastValue()); + Monitor learningTime = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "learningTime"); + iterationLearningTime[iteration-1].add(learningTime.getLastValue()); + logger.info("Testing stop condition (iter: "+iteration+" ) " ); + + double precision = TestIterativeLearning.precision(posAsPos.size(), retrieved.size()); + double recall = TestIterativeLearning.recall(posAsPos.size(),allExamples.getPosTest().size()); + double fmeasure = fmeasure( precision, recall); + iterationPrecision[iteration-1].add(precision); + iterationRecall[iteration-1].add(recall); + iterationFmeasure[iteration-1].add(fmeasure); + + if(higher(iterationPrecision, precision)){highestPrecision=concept;} + if(higher(iterationRecall, recall)){highestRecall=concept;} + if(higher(iterationFmeasure, fmeasure)){highestFMeasure=concept;} + + logger.info("F-Measure: "+TestIterativeLearning.df.format( fmeasure )); + + boolean condIter = (iteration<this.iteration); + boolean condPrec = fmeasure <=1.0d; + if(!condIter){ + logger.info("iterations reached, stopping"); + return false; + }else if(!condPrec){ + logger.info("fmeasure reached, stopping"); + return false; + }else{ + return true; + } + } + + public static double fmeasure(double precision, double recall){ + return (precision+recall == 0)?0.0d: (2*precision*recall)/(precision+recall); + } + + public boolean higher(Monitor[] a, double current){ + for (int i = 0; i < a.length; i++) { + if(current>a[i].getMax()){ + return true; + } + } + return false; + } + + @Override + public String toString(){ + + String pre = "\n*********\n"+label+"\n"; + pre +="highestPrecision: "+highestPrecision+"\n"; + pre +="highestRecall: "+highestRecall+"\n"; + pre +="highestFMeasure: "+highestFMeasure+"\n"; + + String precision = "Precision:\n"; + String hits = "hits:\n"; + String recall = "Recall:\n"; + String fmeasure = "F-Measure:\n"; + String learningtime = "learningtime:\n"; + String totaltime = "Totaltime:\n"; + + for (int i = 0; i < iterationPrecision.length; i++) { + precision+=iterationPrecision[i].getAvg()+"\n"; + hits+=iterationPrecision[i].getHits()+" | "; + recall+=iterationRecall[i].getAvg()+"\n"; + fmeasure+=iterationFmeasure[i].getAvg()+"\n"; + learningtime+=iterationLearningTime[i].getAvg()+"\n"; + totaltime+=iterationTotalTime[i].getAvg()+"\n"; + } + + return pre+precision+recall+fmeasure+hits+"\n"+learningtime+totaltime; + } +// public static double precision( int posAsPos, int retrieved){ +// double precision = ((double)posAsPos)/((double)retrieved); +// logger.info("Precision: "+df.format(precision)); +// return precision; +// } +// public static double recall( int posAsPos, int allPositives){ +// double recall = ((double)posAsPos)/((double)allPositives); +// +// logger.info("Recall: "+df.format(recall)); +// return recall; +// +// } + + + + } Deleted: trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -1,407 +0,0 @@ -package org.dllearner.scripts.tiger; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URL; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; - -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.dllearner.algorithms.refinement2.ROLComponent2; -import org.dllearner.algorithms.refinement2.ROLearner2; -import org.dllearner.core.ComponentPool; -import org.dllearner.core.EvaluatedDescription; -import org.dllearner.core.KnowledgeSource; -import org.dllearner.core.LearningAlgorithm; -import org.dllearner.core.LearningProblem; -import org.dllearner.core.ReasonerComponent; -import org.dllearner.core.configurators.ComponentFactory; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Individual; -import org.dllearner.kb.OWLFile; -import org.dllearner.kb.sparql.Cache; -import org.dllearner.kb.sparql.SPARQLTasks; -import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; -import org.dllearner.learningproblems.PosNegLPStandard; -import org.dllearner.reasoning.FastInstanceChecker; -import org.dllearner.refinementoperators.RhoDRDown; -import org.dllearner.utilities.Helper; -import org.dllearner.utilities.JamonMonitorLogger; -import org.dllearner.utilities.examples.ExMakerFixedSize; -import org.dllearner.utilities.examples.ExampleDataCollector; -import org.dllearner.utilities.examples.Examples; - -import com.jamonapi.Monitor; - -public class GlobalTest { - private static final Logger logger = Logger.getLogger(GlobalTest.class); - - static DecimalFormat df = new DecimalFormat("00.###%"); - - static String backgroundXML = "files/tiger.noSchema.noImports.rdf"; - static String propertiesXML = "files/propertiesOnly.rdf"; - static String sentenceXMLFolder = "files/tiger/"; - static String sentenceprefix = "http://nlp2rdf.org/ontology/s"; - static String prefix = "http://nlp2rdf.org/ontology/"; - - static String active = "files/active_all_sentenceNumbers.txt"; - static String passiveNoZU = "files/passive_noZuInf_sentenceNumbers.txt"; - static String passiveWithZu = "files/passive_zuInf_sentenceNumbers.txt"; - static String test_has_pos = "files/test_has_pos.txt"; - static String test_has_neg = "files/test_has_neg.txt"; - - static SparqlEndpoint sparqlEndpoint; - static SPARQLTasks sparqlTasks; - - static String sparqlEndpointURL = "http://db0.aksw.org:8893/sparql"; - static String graph = "http://nlp2rdf.org/tiger"; - static String rulegraph = "http://nlp2rdf.org/schema/rules1"; - - - - - - final static boolean debug = false; - //no randomization in examples - final static boolean randomizedebug = !debug; - - public static void main(String[] args) { - LogHelper.initLoggers(); - Logger.getLogger(Cache.class).setLevel(Level.INFO); - Logger.getLogger(ComponentPool.class).setLevel(Level.INFO); - Logger.getLogger(ROLearner2.class).setLevel(Level.TRACE); - Logger.getLogger(RhoDRDown.class).setLevel(Level.TRACE); - Logger.getLogger(SparqlQuery.class).setLevel(Level.INFO); - - try { - sparqlEndpoint = new SparqlEndpoint(new URL(sparqlEndpointURL), new ArrayList<String>(Arrays - .asList(new String[] { graph })), new ArrayList<String>()); - sparqlTasks = new SPARQLTasks(Cache.getDefaultCache(), sparqlEndpoint); - } catch (Exception e) { - e.printStackTrace(); - } - - Examples allExamples = new Examples(); - SortedSet<String> positives; - SortedSet<String> negatives; - -// positives = read(passiveWithZu); - positives = read(passiveNoZU); - negatives = read(active); - - //removing overlap - positives.removeAll(negatives); - negatives.removeAll(positives); - -// System.out.println(Helper.intersection(passiveZuInfSentences, activeSentences)); -// System.out.println(Helper.intersection(passiveZuInfSentences, passiveNoZuSentences)); -// System.out.println(Helper.intersection(activeSentences, passiveNoZuSentences)); - allExamples.addPosTrain(positives); - allExamples.addNegTrain(negatives); - - logger.debug("All examples \n"+allExamples); - - ExperimentConfig config = new ExperimentConfig(); - firstContact( allExamples, config); - JamonMonitorLogger.writeHTMLReport("log/tiger.html"); - //retrieved wird neues Example, als schnittmenge mit all - //und den bisher gewaehlten - //dann splits auswählen und - //pos und neg wieder hinzufuegen - - } - - public static void firstContact(Examples allExamples, ExperimentConfig config){ - ExMakerFixedSize fs = new ExMakerFixedSize(allExamples, randomizedebug); - Examples learn = fs.select(config.initialsplits, config.initialsplits); - logger.debug("Intial training set \n"+learn); -// System.out.println(learn.getPosTrain()); -// System.out.println(learn.getNegTrain()); -// if (true) { -// System.exit(0); -// } -// int size = 0; - for(int i = 0 ; config.stopCondition(i, learn) ;i++ ) { - /*LEARNING*/ - EvaluatedDescription ed = learn(learn, config); - - /*RETRIEVING*/ - SortedSet<String> retrieved = getSentences(ed, config.resultLimit); - logger.debug("Retrieved "+retrieved.size()+" sentences"); - - - /*MASHING*/ - //Menge aller positiven geschn. mit den gefundenen - SortedSet<String> posAsPos = Helper.intersection(retrieved, allExamples.getPosTrain()); - logger.debug("Number of retrieved positives: "+posAsPos.size()); - logger.debug("Number of total positives: "+allExamples.getPosTrain().size()); - results(posAsPos, retrieved, allExamples); - - //Menge aller positiven geschn. mit den gefundenen - SortedSet<String> negAsPos = Helper.intersection(retrieved, allExamples.getNegTrain()); - logger.debug("Number of retrieved negatives: "+negAsPos.size()); - logger.debug("Total: "+posAsPos.size()+" + "+negAsPos.size() +" = "+retrieved.size()); - -// if(retrieved.size()!=(posAsPos.size()+negAsPos.size())){ -// logger.warn("sets are wrong"); -// System.exit(0); -// } - - Examples newlyFound = new Examples(); - newlyFound.addPosTrain(Helper.intersection(retrieved, learn.getPosTest())); - newlyFound.addNegTrain(Helper.intersection(retrieved, learn.getNegTest())); - //validate here - - fs = new ExMakerFixedSize(newlyFound, randomizedebug); - newlyFound = fs.select(config.splits, config.splits); - - learn.addPosTrain(newlyFound.getPosTrain()); - learn.addNegTrain(newlyFound.getNegTrain()); - logger.debug("Next training set \n"+learn); -// size = learn.getPosTrain().size() + learn.getNegTrain().size(); - - } - - - - - - } - - private static void results(SortedSet<String> posAsPos, SortedSet<String> retrieved, Examples allExamples) { - double precision = precision( posAsPos.size(), retrieved.size()); - double recall = recall( posAsPos.size(),allExamples.getPosTrain().size()); - logger.info("F-Measure: "+df.format( (2*precision*recall)/(precision+recall)) ); - - } - - public static double precision( int posAsPos, int retrieved){ - double precision = ((double)posAsPos)/((double)retrieved); - logger.info("Precision: "+df.format(precision)); - return precision; - } - public static double recall( int posAsPos, int allPositives){ - double recall = ((double)posAsPos)/((double)allPositives); - - logger.info("Recall: "+df.format(recall)); - return recall; - - } - - private static Set<KnowledgeSource> _getOWL(Examples ex) throws Exception{ - Set<KnowledgeSource> tmp = new HashSet<KnowledgeSource>(); - List<URL> urls = new ArrayList<URL>(); - urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getPosTrain())); - urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getNegTrain())); - urls.add(new File(backgroundXML).toURI().toURL()); - - for (URL u : urls) { - OWLFile ks = ComponentFactory.getOWLFile(u); - tmp.add(ks); - } - return tmp; - } - @SuppressWarnings("unused") - private static Set<KnowledgeSource> _getSPARQL(Examples ex) throws Exception{ - Set<KnowledgeSource> tmp = new HashSet<KnowledgeSource>(); - - Set<String> examples = new TreeSet<String>(); - examples.addAll(ex.getPosTrain()); - examples.addAll(ex.getNegTrain()); - SparqlKnowledgeSource ks = ComponentFactory.getSparqlKnowledgeSource(new URL(sparqlEndpointURL), examples); - ks.getConfigurator().setUrl(new URL(sparqlEndpointURL)); - ks.getConfigurator().setDefaultGraphURIs(new HashSet<String>(Arrays.asList(new String[]{graph}))); - ks.getConfigurator().setInstances(examples); - ks.getConfigurator().setDissolveBlankNodes(false); - ks.getConfigurator().setRecursionDepth(2); - ks.getConfigurator().setDissolveBlankNodes(false); - ks.getConfigurator().setCloseAfterRecursion(true); - ks.getConfigurator().setGetAllSuperClasses(true); - ks.getConfigurator().setGetPropertyInformation(false); - ks.getConfigurator().setUseLits(true); -// ks.getConfigurator(). - OWLFile ks2 = ComponentFactory.getOWLFile(new File(propertiesXML).toURI().toURL()); - tmp.add(ks); - tmp.add(ks2); - - return tmp; - } - - //test if virtuoso is correct - public static void validate(Description d, Examples newlyFound){ - try { - ExMakerFixedSize fs = new ExMakerFixedSize(newlyFound); - Examples tmp = fs.select(100, 100); - FastInstanceChecker fc = _getFastInstanceChecker(tmp); - SortedSet<Individual> inds = fc.getIndividuals(d); - } catch (Exception e) { - e.printStackTrace(); - } - } - - public static FastInstanceChecker _getFastInstanceChecker(Examples ex)throws Exception{ - Set<KnowledgeSource> tmp = _getOWL(ex); -// Set<KnowledgeSource> tmp = _getSPARQL(ex); - - - FastInstanceChecker rc = ComponentFactory.getFastInstanceChecker(tmp); - for (KnowledgeSource ks : tmp) { - ks.init(); - } - rc.init(); - return rc; - } - - public static EvaluatedDescription learn(Examples ex, ExperimentConfig config) { - Monitor init = JamonMonitorLogger.getTimeMonitor(GlobalTest.class, "init").start(); - - EvaluatedDescription result = null; - - try { - FastInstanceChecker rc = _getFastInstanceChecker(ex); - PosNegLPStandard lp = ComponentFactory - .getPosNegLPStandard(rc, ex.getPosTrain(), ex.getNegTrain()); - LearningAlgorithm la = _getROLLearner(lp, rc, config, ex); - lp.init(); - la.init(); - init.stop(); - Monitor learning = JamonMonitorLogger.getTimeMonitor(GlobalTest.class, "learning") - .start(); - la.start(); - learning.stop(); - - result = la.getCurrentlyBestEvaluatedDescription(); - logger.debug(PrefixMap.toKBSyntaxString(result.getDescription())); - logger.debug(PrefixMap.toManchesterSyntaxString(result.getDescription())); - - } catch (Exception e) { - e.printStackTrace(); - System.exit(0); - } - return result; - } - - public static SortedSet<String> getSentences(EvaluatedDescription ed, int resultLimit) { - SortedSet<String> result = new TreeSet<String>(); - SparqlQueryDescriptionConvertVisitor visit = new SparqlQueryDescriptionConvertVisitor(); - visit.setDistinct(true); - visit.setLabels(false); - visit.setLimit(resultLimit); - String sparqlQuery = ""; - try { - sparqlQuery = visit.getSparqlQuery(ed.getDescription()); - } catch (Exception e1) { - e1.printStackTrace(); - } - logger.debug(PrefixMap.toKBSyntaxString(ed.getDescription())); - sparqlQuery = " \n define input:inference \"" + rulegraph + "\" \n" + "" + sparqlQuery; - logger.debug(sparqlQuery); - - Monitor m = JamonMonitorLogger.getTimeMonitor(GlobalTest.class, "sparqlquery").start(); - result.addAll(sparqlTasks.queryAsSet(sparqlQuery, "subject")); - m.stop(); - logger.debug("query avg: " + ((double)m.getAvg() / (double)1000)+ " seconds (last: "+((double)m.getLastValue() / (double)1000)+")"); - if(result.isEmpty()){ - - logger.error("sparql query returned no results "); - logger.error(sparqlQuery); - System.exit(0); - } - return result; - } - - private static LearningAlgorithm _getROLLearner(LearningProblem lp, ReasonerComponent rc, ExperimentConfig config, Examples ex) - throws Exception { - - int maxExecutionTime = config.maxExecutionTime; - int valueFrequencyThreshold = config.valueFrequencyThreshold; - if(config.adaptive){ - maxExecutionTime = 2 * ex.sizeOfTrainingSets(); - valueFrequencyThreshold = ex.getPosTrain().size(); -// valueFrequencyThreshold = (int) Math.floor(0.8d*((double)ex.getPosTrain().size())); - - } - - ROLComponent2 la = ComponentFactory.getROLComponent2(lp, rc); - la.getConfigurator().setUseExistsConstructor(true); - - la.getConfigurator().setUseAllConstructor(false); - la.getConfigurator().setUseCardinalityRestrictions(false); - la.getConfigurator().setUseNegation(false); - la.getConfigurator().setUseHasValueConstructor(false); - la.getConfigurator().setUseDataHasValueConstructor(true); - la.getConfigurator().setValueFrequencyThreshold(valueFrequencyThreshold); - - la.getConfigurator().setIgnoredConcepts(new HashSet<String>(Arrays.asList(new String[]{ - "http://nlp2rdf.org/ontology/sentencefinalpunctuation_tag", - "http://nlp2rdf.org/ontology/comma_tag", - "http://nachhalt.sfb632.uni-potsdam.de/owl/stts.owl#SentenceFinalPunctuation" - }))); - - - la.getConfigurator().setNoisePercentage(config.noise); - la.getConfigurator().setTerminateOnNoiseReached(true); - la.getConfigurator().setMaxExecutionTimeInSeconds(maxExecutionTime); - - if(config.useStartClass){ - la.getConfigurator().setStartClass(prefix + "Sentence"); - } - - la.getConfigurator().setWriteSearchTree(config.searchTree); - la.getConfigurator().setSearchTreeFile("log/searchTreeTiger.txt"); - la.getConfigurator().setReplaceSearchTree(true); - return la; - } - - public static SortedSet<String> read(String f) { - SortedSet<String> result = new TreeSet<String>(); - BufferedReader in = null; - try { - in = new BufferedReader(new InputStreamReader(new FileInputStream(f))); - - String line; - while ((line = in.readLine()) != null) { - try { - line = line.trim(); - Integer.parseInt(line); - if (!result.add(sentenceprefix + line)) { - logger.error("reading failed"); - System.exit(0); - } - } catch (Exception e) { - e.printStackTrace(); - System.exit(0); - } - } - - } catch (Exception e) { - e.printStackTrace(); - logger.error("Could not read examples from: " + f); - System.exit(0); - - } finally { - try { - in.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - logger.info("read " + result.size() + " lines from " + f); - - return result; - } - -} Modified: trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/LogHelper.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -26,7 +26,7 @@ Layout layout = new PatternLayout(); layout = new PatternLayout("%-5p [%C{1}]: %m%n"); ConsoleAppender consoleAppender = new ConsoleAppender(layout); -// consoleAppender.setThreshold(Level.DEBUG); + consoleAppender.setThreshold(Level.WARN); Layout layout2 = null; FileAppender fileAppenderNormal = null; Modified: trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -26,8 +26,6 @@ import org.dllearner.core.LearningProblem; import org.dllearner.core.ReasonerComponent; import org.dllearner.core.configurators.ComponentFactory; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Individual; import org.dllearner.kb.OWLFile; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SPARQLTasks; @@ -41,9 +39,12 @@ import org.dllearner.utilities.Files; import org.dllearner.utilities.Helper; import org.dllearner.utilities.JamonMonitorLogger; +import org.dllearner.utilities.examples.ExMakerCrossFolds; import org.dllearner.utilities.examples.ExMakerFixedSize; +import org.dllearner.utilities.examples.ExMakerRandomizer; import org.dllearner.utilities.examples.ExampleDataCollector; import org.dllearner.utilities.examples.Examples; +import org.dllearner.utilities.examples.ExperimentCollector; import com.jamonapi.Monitor; @@ -72,20 +73,22 @@ static String graph = "http://nlp2rdf.org/tiger"; static String rulegraph = "http://nlp2rdf.org/schema/rules1"; + public static DecimalFormat dftime = new DecimalFormat("#####.#"); - final static boolean debug = false; + static int iterations = 5; + static int folds = 6; + static int printSentences = 3; //no randomization in examples - final static boolean randomizedebug = !debug; public static void main(String[] args) { LogHelper.initLoggers(); Logger.getLogger(Cache.class).setLevel(Level.INFO); Logger.getLogger(ComponentPool.class).setLevel(Level.INFO); - Logger.getLogger(ROLearner2.class).setLevel(Level.TRACE); - Logger.getLogger(RhoDRDown.class).setLevel(Level.TRACE); + Logger.getLogger(ROLearner2.class).setLevel(Level.INFO); + Logger.getLogger(RhoDRDown.class).setLevel(Level.INFO); Logger.getLogger(SparqlQuery.class).setLevel(Level.INFO); try { @@ -96,96 +99,193 @@ e.printStackTrace(); } - Examples allExamples = new Examples(); - SortedSet<String> positives; - SortedSet<String> negatives; - -// positives = read(passiveWithZu); - positives = read(passiveNoZU); - negatives = read(active); +// boolean debug = true; +// if(debug){ +// folds = 1; +// iterations = 1; +// } - //removing overlap - positives.removeAll(negatives); - negatives.removeAll(positives); + passiveNoZU(); +// passiveWithZu(); -// System.out.println(Helper.intersection(passiveZuInfSentences, activeSentences)); -// System.out.println(Helper.intersection(passiveZuInfSentences, passiveNoZuSentences)); -// System.out.println(Helper.intersection(activeSentences, passiveNoZuSentences)); - allExamples.addPosTrain(positives); - allExamples.addNegTrain(negatives); + logger.warn("finished"); + JamonMonitorLogger.writeHTMLReport("log/tiger.html"); - logger.debug("All examples \n"+allExamples); + } + + public static void passiveNoZU(){ + ExperimentCollector eColl_passiveNoZU = new ExperimentCollector("passiveNoZU"); - ExperimentConfig config = new ExperimentConfig(); - firstContact( allExamples, config); - JamonMonitorLogger.writeHTMLReport("log/tiger.html"); - //retrieved wird neues Example, als schnittmenge mit all - //und den bisher gewaehlten - //dann splits auswählen und - //pos und neg wieder hinzufuegen + SortedSet<String> positives = read(passiveNoZU); + SortedSet<String> negatives = read(active); + + //removing overlap + positives.removeAll(negatives); + negatives.removeAll(positives); + + Examples allExamples = new Examples(); + allExamples.addPosTrain(positives); + allExamples.addNegTrain(negatives); + + logger.debug("All examples \n"+allExamples); + + List<Examples> folds = new ExMakerCrossFolds(allExamples).split(TestIterativeLearning.folds, 0.1d); +// ExMakerCrossFolds.printFolds(folds); + List<ExperimentConfig> configs = getConfigs(); + for (ExperimentConfig experimentConfig : configs) { + logger.warn("next: passiveNoZU."+experimentConfig.label); + int i = 1; + for (Examples examples : folds) { + + logger.warn("beginning fold: "+(i++)); + conductExperiment( examples, experimentConfig); + + } + eColl_passiveNoZU.addExperimentConfig(experimentConfig); + logger.info(experimentConfig); + } + eColl_passiveNoZU.write(iterations); + + } + + public static void passiveWithZu(){ + ExperimentCollector eColl_passiveWithZu = new ExperimentCollector("passiveWithZu"); + SortedSet<String> positives = read(passiveWithZu); + SortedSet<String> negatives = read(active); + + //removing overlap + positives.removeAll(negatives); + negatives.removeAll(positives); + + Examples allExamples = new Examples(); + allExamples.addPosTrain(positives); + allExamples.addNegTrain(negatives); + + logger.debug("All examples \n"+allExamples); + + List<Examples> runs = new ArrayList<Examples>(); + runs.add(new ExMakerRandomizer(allExamples).split(0.7d)); + runs.add(new ExMakerRandomizer(allExamples).split(0.7d)); + runs.add(new ExMakerRandomizer(allExamples).split(0.7d)); + runs.add(new ExMakerRandomizer(allExamples).split(0.7d)); + runs.add(new ExMakerRandomizer(allExamples).split(0.7d)); + List<ExperimentConfig> configs = getConfigs(); + for (ExperimentConfig experimentConfig : configs) { + logger.warn("next: passiveWithZu."+experimentConfig.label); + int i=1; + for (Examples examples : runs) { + logger.warn("beginning run: "+(i++)); + conductExperiment( examples, experimentConfig); + + } + eColl_passiveWithZu.addExperimentConfig(experimentConfig); + + logger.info(experimentConfig); + } + eColl_passiveWithZu.write(iterations); + } - public static void firstContact(Examples allExamples, ExperimentConfig config){ - ExMakerFixedSize fs = new ExMakerFixedSize(allExamples, randomizedebug); + public static List<ExperimentConfig> getConfigs(){ + + List<ExperimentConfig> l = new ArrayList<ExperimentConfig>(); + ExperimentConfig baseline = new ExperimentConfig(iterations, "baseline_5_5"); + + + ExperimentConfig reducedExamples = new ExperimentConfig(iterations, "reducedExamples_2_2"); + reducedExamples.initialsplits = 2; + reducedExamples.splits = 2; + + + ExperimentConfig fixRuntime = new ExperimentConfig(iterations, "fixRuntime_20s"); + fixRuntime.adaptMaxRuntime=false; + fixRuntime.maxExecutionTime = 20; + + + ExperimentConfig useLemma = new ExperimentConfig(iterations, "useLemma_false"); + useLemma.useDataHasValue=false; + + + l.add(baseline); +// l.add(reducedExamples); +// l.add(fixRuntime); +// l.add(useLemma); + + + return l; + } + + public static void conductExperiment(Examples allExamples, ExperimentConfig config){ + Examples tmp = new Examples(); + tmp.addPosTrain(allExamples.getPosTrain()); + tmp.addNegTrain(allExamples.getNegTrain()); + + ExMakerFixedSize fs = new ExMakerFixedSize(tmp); Examples learn = fs.select(config.initialsplits, config.initialsplits); - logger.debug("Intial training set \n"+learn); -// System.out.println(learn.getPosTrain()); -// System.out.println(learn.getNegTrain()); -// if (true) { -// System.exit(0); -// } -// int size = 0; - for(int i = 0 ; config.stopCondition(i, learn) ;i++ ) { + logger.debug("Total set \n"+allExamples); + logger.debug("Initial training set \n"+learn); + + SortedSet<String> posAsPos = new TreeSet<String>(); + SortedSet<String> retrieved = new TreeSet<String>(); + + String lastConcept=""; + + for(int i = 0 ; config.stopCondition(i, learn, posAsPos, retrieved, allExamples, lastConcept) ;i++ ) { + Monitor iterationTime = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "iterationTime").start(); /*LEARNING*/ EvaluatedDescription ed = learn(learn, config); + lastConcept = PrefixMap.toKBSyntaxString(ed.getDescription()); + logger.debug("USING CONCEPT: "+lastConcept); /*RETRIEVING*/ - SortedSet<String> retrieved = getSentences(ed, config.resultLimit, learn); + retrieved = getSentences(ed, config.resultLimit, learn); + //remove all that are not to be tested + retrieved = Helper.intersection(allExamples.getTestExamples(), retrieved ); logger.debug("Retrieved "+retrieved.size()+" sentences"); /*MASHING*/ //Menge aller positiven geschn. mit den gefundenen - SortedSet<String> posAsPos = Helper.intersection(retrieved, allExamples.getPosTrain()); + posAsPos = Helper.intersection(retrieved, allExamples.getPosTest()); logger.debug("Number of retrieved positives: "+posAsPos.size()); - logger.debug("Number of total positives: "+allExamples.getPosTrain().size()); + logger.debug("Number of total positives: "+allExamples.getPosTest().size()); results(posAsPos, retrieved, allExamples); //Menge aller positiven geschn. mit den gefundenen - SortedSet<String> negAsPos = Helper.intersection(retrieved, allExamples.getNegTrain()); + SortedSet<String> negAsPos = Helper.intersection(retrieved, allExamples.getNegTest()); logger.debug("Number of retrieved negatives: "+negAsPos.size()); + logger.debug("Number of total negatives: "+allExamples.getNegTest().size()); logger.debug("Total: "+posAsPos.size()+" + "+negAsPos.size() +" = "+retrieved.size()); - //not covered - -// if(retrieved.size()!=(posAsPos.size()+negAsPos.size())){ -// logger.warn("sets are wrong"); -// System.exit(0); -// } - Examples newlyFound = new Examples(); - SortedSet<String> discoveredPosInStore = Helper.intersection(retrieved, learn.getPosTest()); - SortedSet<String> misclassifiedNegInStore = Helper.intersection(retrieved, learn.getNegTest()); + SortedSet<String> discoveredPosInStore = Helper.intersection(retrieved, allExamples.getPosTest()); + SortedSet<String> misclassifiedNegInStore = Helper.intersection(retrieved, allExamples.getNegTest()); newlyFound.addPosTrain(discoveredPosInStore); newlyFound.addNegTrain(misclassifiedNegInStore); - int print = 5; - logger.info("Discovered "+discoveredPosInStore.size()+" positive sentences in store (printing "+print+"):"); - _getLabels(discoveredPosInStore, print); - logger.info("Misclassified "+misclassifiedNegInStore.size()+" negative sentences in store (printing "+print+"):"); - _getLabels(misclassifiedNegInStore, print); + + SortedSet<String> posAsNeg = Helper.difference(allExamples.getPositiveExamples(), retrieved); + logger.info("Discovered: "+discoveredPosInStore.size()+" positive sentences in store (printing "+printSentences+"):"); + _getLabels(discoveredPosInStore, printSentences); + logger.info("Misclassified: "+misclassifiedNegInStore.size()+" negative sentences in store (printing "+printSentences+"):"); + _getLabels(misclassifiedNegInStore, printSentences); + logger.info("Not found positives: "+posAsNeg.size()+" positive sentences in store (printing "+printSentences+"):"); + _getLabels(posAsNeg, printSentences); - fs = new ExMakerFixedSize(newlyFound, randomizedebug); + + fs = new ExMakerFixedSize(newlyFound); newlyFound = fs.select(config.splits, config.splits); learn.addPosTrain(newlyFound.getPosTrain()); learn.addNegTrain(newlyFound.getNegTrain()); logger.debug("Next training set \n"+learn); -// size = learn.getPosTrain().size() + learn.getNegTrain().size(); - + iterationTime.stop(); + Monitor learningTime = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "learningTime"); + logger.warn("finished iteration "+(i+1)+" needed on avg: "+dftime.format(iterationTime.getAvg())); + logger.warn("for learning: "+dftime.format(learningTime.getLastValue())+" avg: "+dftime.format(learningTime.getAvg())); } @@ -196,19 +296,19 @@ private static void results(SortedSet<String> posAsPos, SortedSet<String> retrieved, Examples allExamples) { double precision = precision( posAsPos.size(), retrieved.size()); - double recall = recall( posAsPos.size(),allExamples.getPosTrain().size()); - logger.info("F-Measure: "+df.format( (2*precision*recall)/(precision+recall)) ); + double recall = recall( posAsPos.size(),allExamples.getPosTest().size()); + double fmeasure = (2*precision*recall)/(precision+recall); + logger.info("F-Measure: "+df.format( fmeasure )); } public static double precision( int posAsPos, int retrieved){ - double precision = ((double)posAsPos)/((double)retrieved); + double precision = (retrieved==0)?0.0d:((double)posAsPos)/((double)retrieved); logger.info("Precision: "+df.format(precision)); return precision; } public static double recall( int posAsPos, int allPositives){ double recall = ((double)posAsPos)/((double)allPositives); - logger.info("Recall: "+df.format(recall)); return recall; @@ -221,7 +321,6 @@ urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getPosTrain())); urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getNegTrain())); - for (URL u : urls) { OWLFile ks = ComponentFactory.getOWLFile(u); tmp.add(ks); @@ -255,17 +354,17 @@ } //test if virtuoso is correct - public static void validate(Description d, Examples newlyFound){ - try { - ExMakerFixedSize fs = new ExMakerFixedSize(newlyFound); - Examples tmp = fs.select(100, 100); - FastInstanceChecker fc = _getFastInstanceChecker(tmp); - @SuppressWarnings("unused") - SortedSet<Individual> inds = fc.getIndividuals(d); - } catch (Exception e) { - e.printStackTrace(); - } - } +// public static void validate(Description d, Examples newlyFound){ +// try { +// ExMakerFixedSize fs = new ExMakerFixedSize(newlyFound); +// Examples tmp = fs.select(100, 100); +// FastInstanceChecker fc = _getFastInstanceChecker(tmp); +// @SuppressWarnings("unused") +// SortedSet<Individual> inds = fc.getIndividuals(d); +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } public static FastInstanceChecker _getFastInstanceChecker(Examples ex)throws Exception{ Set<KnowledgeSource> tmp = _getOWL(ex); @@ -281,7 +380,8 @@ } public static EvaluatedDescription learn(Examples ex, ExperimentConfig config) { - Monitor init = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "init").start(); + Monitor initTimeKBandReasoner = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "initTimeKBandReasoner").start(); + EvaluatedDescription result = null; @@ -292,15 +392,14 @@ LearningAlgorithm la = _getROLLearner(lp, rc, config, ex); lp.init(); la.init(); - init.stop(); - Monitor learning = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "learning") - .start(); + initTimeKBandReasoner.stop(); + Monitor learningTime = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "learningTime").start(); la.start(); - learning.stop(); - + learningTime.stop(); + result = la.getCurrentlyBestEvaluatedDescription(); - logger.debug(PrefixMap.toKBSyntaxString(result.getDescription())); - logger.debug(PrefixMap.toManchesterSyntaxString(result.getDescription())); + logger.trace(PrefixMap.toKBSyntaxString(result.getDescription())); + logger.trace(PrefixMap.toManchesterSyntaxString(result.getDescription())); } catch (Exception e) { e.printStackTrace(); @@ -310,6 +409,7 @@ } public static SortedSet<String> getSentences(EvaluatedDescription ed, int resultLimit, Examples justforFindingTheBug) { + Monitor m = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "getSentences").start(); SortedSet<String> result = new TreeSet<String>(); SparqlQueryDescriptionConvertVisitor visit = new SparqlQueryDescriptionConvertVisitor(); visit.setDistinct(true); @@ -329,11 +429,11 @@ } catch (Exception e1) { e1.printStackTrace(); } - logger.debug("USING CONCEPT: "+PrefixMap.toKBSyntaxString(ed.getDescription())); + sparqlQueryGood = " \n define input:inference \"" + rulegraph + "\" \n" + "" + sparqlQueryGood; - logger.debug(sparqlQueryGood); + logger.trace(sparqlQueryGood); - Monitor m = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "sparqlquery").start(); + result.addAll(sparqlTasks.queryAsSet(sparqlQueryGood, "subject")); m.stop(); logger.debug("query avg: " + ((double)m.getAvg() / (double)1000)+ " seconds (last: "+((double)m.getLastValue() / (double)1000)+")"); @@ -347,6 +447,7 @@ } private static void _getLabels(SortedSet<String> sentenceURIs, int limit){ + Monitor m = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "_getLabels").start(); int i = 0; for (String sentenceURI : sentenceURIs) { if(i>=limit){ @@ -355,6 +456,7 @@ i++; _getLabel(sentenceURI); } + m.stop(); } private static void _getLabel(String sentenceURI){ @@ -372,12 +474,10 @@ throws Exception { int maxExecutionTime = config.maxExecutionTime; - int valueFrequencyThreshold = config.valueFrequencyThreshold; - if(config.adaptive){ - maxExecutionTime = 2 * ex.sizeOfTrainingSets(); - valueFrequencyThreshold = ex.getPosTrain().size(); + int valueFrequencyThreshold = ex.getPosTrain().size(); + if(config.adaptMaxRuntime){ + maxExecutionTime = config.factor * ex.sizeOfTrainingSets(); // valueFrequencyThreshold = (int) Math.floor(0.8d*((double)ex.getPosTrain().size())); - } ROLComponent2 la = ComponentFactory.getROLComponent2(lp, rc); @@ -387,13 +487,14 @@ la.getConfigurator().setUseCardinalityRestrictions(false); la.getConfigurator().setUseNegation(false); la.getConfigurator().setUseHasValueConstructor(false); - la.getConfigurator().setUseDataHasValueConstructor(true); + la.getConfigurator().setUseDataHasValueConstructor(config.useDataHasValue); la.getConfigurator().setValueFrequencyThreshold(valueFrequencyThreshold); la.getConfigurator().setIgnoredConcepts(new HashSet<String>(Arrays.asList(new String[]{ "http://nlp2rdf.org/ontology/sentencefinalpunctuation_tag", "http://nlp2rdf.org/ontology/comma_tag", - "http://nachhalt.sfb632.uni-potsdam.de/owl/stts.owl#SentenceFinalPunctuation" + "http://nachhalt.sfb632.uni-potsdam.de/owl/stts.owl#SentenceFinalPunctuation", + "http://nlp2rdf.org/ontology/generalsentenceinternalpunctuation_tag" }))); Modified: trunk/src/dl-learner/org/dllearner/utilities/JamonMonitorLogger.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/JamonMonitorLogger.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/utilities/JamonMonitorLogger.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -57,12 +57,6 @@ l.add(monitor); } - - - /*for (String label : retMon) { - l.add(MonitorFactory.getTimeMonitor(label)); - }*/ - return l; } @@ -141,13 +135,19 @@ @SuppressWarnings("all") public static Monitor getTimeMonitor(Class clazz, String label) { - String labeltmp = getMonitorPrefix(clazz)+label; return MonitorFactory.getTimeMonitor(labeltmp); } @SuppressWarnings("all") + public static Monitor getStatisticMonitor(Class clazz, String label) { + String labeltmp = getMonitorPrefix(clazz)+label; + return MonitorFactory.getMonitor(label, "double"); + + } + + @SuppressWarnings("all") public static void increaseCount (Class clazz, String label) { // MonitorFactory.getMonitor(getMonitorPrefix(clazz)+label, "#").add(1.0); Monitor m = MonitorFactory.getMonitor(getMonitorPrefix(clazz)+label, "count"); Added: trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerCrossFolds.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerCrossFolds.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerCrossFolds.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -0,0 +1,106 @@ +package org.dllearner.utilities.examples; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; + +public class ExMakerCrossFolds { + @SuppressWarnings("unused") + private static Logger logger = Logger.getLogger(ExMakerCrossFolds.class); + + private final Examples examples; + + public static int minElementsPerFold = 6; + + public ExMakerCrossFolds(Examples examples){ + this.examples = examples; + } + + public static void main(String[] args) { + Examples ex = new Examples(); + + for (int i = 0; i < 30000; i++) { + ex.addPosTrain("p"+i); + ex.addNegTrain("n"+i); + } + long n = System.currentTimeMillis(); + System.out.println("initial size: "+ex.size()); + ExMakerCrossFolds r = new ExMakerCrossFolds(ex); + List<Examples> l = r.split(10, 0.9d); + printFolds(l ); + System.out.println(System.currentTimeMillis()-n); + + + } + public static void printFolds(List<Examples> l ){ + int i = 1; + int totalsize = 0; + StringBuffer b = new StringBuffer(); + b.append("Number of folds "+l.size()+"\n"); + for (Examples examples : l) { + b.append("Fold: "+(i++)+"\n"); + b.append(examples.toString()); + b.append("\n"); + + totalsize+=examples.size(); + } + b.append("total size: "+totalsize); + logger.info(b.toString()); + } + + + public List<Examples> split(int folds, double percentageOfTrainingSet){ + if( folds*minElementsPerFold > examples.sizeTotalOfPositives() + || folds*minElementsPerFold > examples.sizeTotalOfNegatives() + ){ + logger.error("Too many folds for, too few data. cant spread: "); + logger.error(examples.sizeTotalOfPositives()+" examples over "+folds+" folds OR"); + logger.error(examples.sizeTotalOfNegatives()+" examples over "+folds+" folds"); + logger.error("each fold must have more than "+minElementsPerFold+" elements"); + return null; + } + + List<Examples> ret = new ArrayList<Examples>(); + double foldPercentage = 1.0d/((double)folds); + int tenPercentPos = (int)Math.floor(((double)examples.sizeTotalOfPositives())*foldPercentage); + int tenPercentNeg = (int)Math.floor(((double)examples.sizeTotalOfNegatives())*foldPercentage); + + List<String> posRemaining = new ArrayList<String>(examples.getPositiveExamples()); + List<String> negRemaining = new ArrayList<String>(examples.getNegativeExamples()); + Collections.shuffle(posRemaining); + Collections.shuffle(negRemaining); + + + Examples tmp; + Examples oneFold; + for(int i = 0; i<folds;i++){ +// logger.trace("Foldprogess: "+i+" of "+folds); + SortedSet<String> newPos = new TreeSet<String>(); + SortedSet<String> newNeg = new TreeSet<String>(); + String one = ""; + + for(int a =0; a<tenPercentPos&& !posRemaining.isEmpty();a++){ + one = posRemaining.remove(posRemaining.size()-1); + newPos.add(one); + } + for(int a =0; a <tenPercentNeg&& !negRemaining.isEmpty() ; a++){ + one = negRemaining.remove(negRemaining.size()-1); + newNeg.add(one); + } + + tmp = new Examples(); + tmp.addPosTrain(newPos); + tmp.addNegTrain(newNeg); + + oneFold = new ExMakerRandomizer(tmp).split(percentageOfTrainingSet); + ret.add(oneFold); + + } + return ret; + } + +} Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerFixedSize.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -27,6 +27,7 @@ /** * used to randomize examples and split them into training and test sets + * gets a fixed number of examples * @author Sebastian Hellmann <hel...@in...> * */ @@ -73,7 +74,8 @@ /** * returns a new example object based on all Examples in the old set * picks a fixed number of examples, puts them into - * training sets rest to test set + * training sets, rest to test set + * based on all examples found in examples object * @param nrOfPos * @param nrOfNeg * @return Added: trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerHelper.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerHelper.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerHelper.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -0,0 +1,32 @@ +package org.dllearner.utilities.examples; + +import java.util.Collection; +import java.util.Random; + +public class ExMakerHelper { + + + /** + * bad performance don't use for large sets + * use collections.shuffle and remove last + * @param from + * @return + */ + public static String pickOneRandomly(Collection<String> from){ +// Monitor m = JamonMonitorLogger.getTimeMonitor(ExMakerHelper.class, "bad_performance").start(); + + if(from.isEmpty()){ + return null; + } + Random r = new Random(); + String[] array = from.toArray(new String[] {}); + + int index = Math.round((float)(array.length*r.nextFloat())); +// m.stop(); + try{ + return array[index]; + }catch (Exception e) { + return pickOneRandomly(from); + } + } +} Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerRandomizer.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerRandomizer.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/ExMakerRandomizer.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -19,14 +19,15 @@ */ package org.dllearner.utilities.examples; -import java.util.Random; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import org.apache.log4j.Logger; /** * used to randomize examples and split them into training and test sets + * gets a percentage of the examples * @author Sebastian Hellmann <hel...@in...> * */ @@ -42,7 +43,7 @@ public static void main(String[] args) { Examples ex = new Examples(); - for (int i = 0; i < 20; i++) { + for (int i = 0; i < 1000; i++) { ex.addPosTrain("p"+i); ex.addNegTrain("n"+i); } @@ -53,56 +54,49 @@ } + public Examples split(double percentageOfTrainingSet){ -// System.out.println(GlobalConfig.trainingDataPercentage+""); - SortedSet<String> posTrain = new TreeSet<String>(); - SortedSet<String> negTrain = new TreeSet<String>(); + int sizeOfPosTrainingSet = (int)Math.floor(((double)examples.sizeTotalOfPositives())*percentageOfTrainingSet); + int sizeOfNegTrainingSet = (int)Math.floor(((double)examples.sizeTotalOfNegatives())*percentageOfTrainingSet); - SortedSet<String> posTest = new TreeSet<String>(); - SortedSet<String> negTest = new TreeSet<String>(); + List<String> posRemaining = new ArrayList<String>(examples.getPositiveExamples()); + List<String> negRemaining = new ArrayList<String>(examples.getNegativeExamples()); + Collections.shuffle(posRemaining); + Collections.shuffle(negRemaining); - SortedSet<String> posOld = new TreeSet<String>(); - SortedSet<String> negOld = new TreeSet<String>(); - posOld.addAll(examples.getPositiveExamples()); - negOld.addAll(examples.getNegativeExamples()); + List<String> newPos = new ArrayList<String>(); + List<String> newNeg = new ArrayList<String>(); - int posOldSize = posOld.size(); - int negOldSize = negOld.size(); - - while (!posOld.isEmpty() && (((double)posOld.size()/(double)posOldSize)) > percentageOfTrainingSet) { - String one = pickOneRandomly(posOld.toArray(new String[] {})); - posOld.remove(one); - posTest.add(one); + Examples ret = new Examples(); + String one; + while (posRemaining.size()>sizeOfPosTrainingSet){ + one = posRemaining.remove(posRemaining.size()-1); + newPos.add(one); + } - posTrain.addAll(posOld); - while (!negOld.isEmpty() && (((double)negOld.size()/(double)negOldSize)) > percentageOfTrainingSet) { - String one = pickOneRandomly(negOld.toArray(new String[] {})); - negOld.remove(one); - negTest.add(one); + ret.addPosTest(newPos); + ret.addPosTrain(posRemaining); + + while (negRemaining.size()>sizeOfNegTrainingSet){ + one = negRemaining.remove(negRemaining.size()-1); + newNeg.add(one); } - negTrain.addAll(negOld); + ret.addNegTest(newNeg); + ret.addNegTrain(negRemaining); - double posPercent = posTrain.size()/(double)posOldSize; - double negPercent = negTrain.size()/(double)negOldSize; + double posPercent = ret.getPosTrain().size()/(double)examples.getPositiveExamples().size(); + double negPercent = ret.getNegTrain().size()/(double)examples.getNegativeExamples().size(); // if there is more than a 10% error if(Math.abs(posPercent - percentageOfTrainingSet)>0.1d || Math.abs(negPercent - percentageOfTrainingSet)>0.1d ){ logger.info("repeating, unevenly matched"); return split(percentageOfTrainingSet); } - return new Examples(posTrain, negTrain, posTest, negTest); + return ret; } - public static String pickOneRandomly(String[] from){ - Random r = new Random(); - int index = Math.round((float)(from.length*r.nextFloat())); - try{ - return from[index]; - }catch (Exception e) { - return pickOneRandomly(from); - } - } + } Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java 2010-02-13 19:55:34 UTC (rev 2035) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -36,6 +36,10 @@ */ public class Examples { private static final Logger logger = Logger.getLogger(Examples.class); + public static DecimalFormat df1 = new DecimalFormat("00.#%"); + public static DecimalFormat df2 = new DecimalFormat("00.##%"); + public static DecimalFormat df3 = new DecimalFormat("00.###%"); + private DecimalFormat myDf = df2; // private final SortedSet<String> positiveExamples = new TreeSet<String>(); // private final SortedSet<String> negativeExamples = new TreeSet<String>(); @@ -124,9 +128,9 @@ double posPercent = posTrain.size() / (double) sizeTotalOfPositives(); double negPercent = negTrain.size() / (double) sizeTotalOfNegatives(); ret += "\nPositive: " + posTrain.size() + " | " + posTest.size() + " (" - + DecimalFormat.getPercentInstance().format(posPercent) + ")"; + + myDf.format(posPercent) + ")"; ret += "\nNegative: " + negTrain.size() + " | " + negTest.size() + " (" - + DecimalFormat.getPercentInstance().format(negPercent) + ")"; + + myDf.format(negPercent) + ")"; return ret; } @@ -189,7 +193,11 @@ return posTrain.size()+negTrain.size(); } + public int sizeOfTestSets(){ + return posTest.size()+negTest.size(); + } + public SortedSet<String> getPositiveExamples() { SortedSet<String> total = new TreeSet<String>(); total.addAll(posTrain); @@ -203,6 +211,13 @@ total.addAll(negTest); return total; } + + public SortedSet<String> getTestExamples() { + SortedSet<String> total = new TreeSet<String>(); + total.addAll(posTest); + total.addAll(negTest); + return total; + } public SortedSet<String> getPosTrain() { return posTrain; Added: trunk/src/dl-learner/org/dllearner/utilities/examples/ExperimentCollector.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/ExperimentCollector.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/ExperimentCollector.java 2010-02-14 14:21:27 UTC (rev 2036) @@ -0,0 +1,103 @@ +package org.dllearner.utilities.examples; + +import java.io.File; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.List; + +import org.dllearner.scripts.tiger.ExperimentConfig; +import org.dllearner.utilities.Files; + +public class ExperimentCollector { + + public static String dir = "results/"; + public String details ; + public String totalGNU ; + public String totalLatex ; + public String timeGNU ; + public String timeLatex ; + + public static DecimalFormat df = new DecimalFormat(".####"); + public static DecimalFormat dfhuman = new DecimalFormat("##.##%"); + public static DecimalFormat dfRuntime = new DecimalFormat("####."); + List<ExperimentConfig> experimentConfigs = new ArrayList<ExperimentConfig>(); + + public ExperimentCollector(String filePrefix) { + details = dir + filePrefix + "_" + "details"; + totalGNU = dir + filePrefix + "_" + "totalGNU"; + totalLatex = dir + filePrefix + "_" + "totalLatex"; + timeGNU = dir + filePrefix + "_" + "timeGNU"; + timeLatex = dir + filePrefix + "_" + "timeLatex"; + } + + public void addExperimentConfig(ExperimentConfig experimentConfig) { + experimentConfigs.add(experimentConfig); + } + + public void write(int iterations) { + Files.appendFile(new File(details), ""); + Files.appendFile(new File(timeGNU), "\n***********\n\n"); + Files.appendFile(new File(timeLatex), "\n***********\n\n"); + Files.appendFile(new File(totalLatex), "\n***********\n\n"); + Files.appendFile(new File(totalGNU), "\n***********\n\n"); + String headerGNU = "\t"; + String headerLatex = "\t&\t"; + for (ExperimentConfig ec : experimentConfigs) { + headerGNU += ec.label + "\t"; + Files.appendFile(new File(details), ec.toString()); + } + for (int i = 0; i < iterations; i++) { + headerLatex += (i+1) + "\t&\t"; + } + + Files.appendFile(new File(totalGNU), headerGNU + "\n"); + Files.appendFile(new File(totalLatex), headerLatex + "\n"); + + for (int i = 0; i < iterations; i++) { + String fmeasureGNU = i + "\t"; + String learningTimeGNU = i + "\t"; + String totalTimeGNU = i + "\t"; + for (ExperimentConfig ec : experimentConfigs) { + fmeasureGNU += df.format(ec.iterationFmeasure[i].getAvg()) + "\t"; + learningTimeGNU+= df.format(ec.iterationLearningTime[i].getAvg())+"\t"; + totalTimeGNU+= df.format(ec.iterationTotalTime[i].getAvg())+"\t"; + } + Files.appendFile(new File(totalGNU), fmeasureGNU + "\n"); + Files.appendFile(new File(timeGNU), learningTimeGNU + "\n"); + Files.appendFile(new File(timeGNU), totalTimeGNU + "\n"); + } + + for (ExperimentConfig ec : experimentConfigs) { + String label = ec.label + "\t&\t"; + String learningTimeLatex = label+"learn"; + String totalTimeLatex = label+"total"; + String fmeasureLatex = label; + for (int i = 0; i < iterations; i++) { + learningTimeLatex += dfRuntime.format(ec.iterationLearningTime[i].getAvg()) + "\t&\t"; + totalTimeLatex += dfRuntime.format(ec.iterationTotalTime[i].getAvg()) + "\t&\t"; + fmeasureLatex += dfhuman.format(ec.iterationFmeasure[i].getAvg()) + "\t&\t"; + } + Files.appendFile(new File(timeLatex), learningTimeLatex + "\n"); + Files.appendFile(new File(timeLatex), totalTimeLatex + "\n"); + Files.appendFile(new File(timeLatex), "\n\n\n"); + Files.appendFile(new File(totalLatex), fmeasureLatex + "\n"); + + } + for (ExperimentConfig ec : experimentConfigs) { + String label = ec.label + "\t&\t"; + String learningTimeHuman = label+"learn"; + String totalTimeHuman = label+"total"; + String fmeasureHuman = label; + for (int i = 0; i < iterations; i++) { + learningTimeHuman += dfRuntime.format(ec.iterationLearningTime[i].getAvg()) +" ("+dfRuntime.format(ec.iterationLearningTime[i].getStdDev()) + ")\t&\t"; + totalTimeHuman += dfRuntime.format(ec.iterationTotalTime[i].getAvg()) +" ("+ dfRuntime.format(ec.iterationTotalTime[i].getStdDev())+ ")\t&\t"; + fmeasureHuman += dfhuman.format(ec.iterationFmeasure[i].getAvg()) +" ("+ dfhuman.format(ec.iterationFmeasure[i].getStdDev())+ ")\t&\t"; + } + Files.appendFile(new File(timeLatex), learningTimeHuman + "\n"); + Files.appendFile(new File(timeLatex), totalTimeHuman + "\n"); + Files.appendFile(new File(totalLatex), fmeasureHuman + "\n"); + } + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |