From: <lor...@us...> - 2014-05-09 18:13:07
|
Revision: 4267 http://sourceforge.net/p/dl-learner/code/4267 Author: lorenz_b Date: 2014-05-09 18:13:04 +0000 (Fri, 09 May 2014) Log Message: ----------- Modified CV script. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QueryTreeHeuristic.java trunk/components-core/src/main/java/org/dllearner/core/owl/DoubleMinMaxRange.java trunk/scripts/src/main/java/org/dllearner/scripts/NestedCrossValidation.java trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java trunk/test/qtl/breasttissue/train1.conf trunk/test/qtl/carcinogenesis/train.conf trunk/test/qtl/mutagenesis/train1.conf trunk/test/qtl/parkinsons/train.conf trunk/test/qtl/suramin/train.conf Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java 2014-05-09 18:13:04 UTC (rev 4267) @@ -102,19 +102,18 @@ @ConfigOption(name = "noisePercentage", defaultValue="0.0", description="the (approximated) percentage of noise within the examples") private double noisePercentage = 0.0; @ConfigOption(defaultValue = "10", name = "maxExecutionTimeInSeconds", description = "maximum execution of the algorithm in seconds") - private int maxExecutionTimeInSeconds = -1; + private int maxExecutionTimeInSeconds = 60; private double coverageWeight = 0.8; private double specifityWeight = 0.1; - private double coverageBeta = 0.5; private double minCoveredPosExamplesFraction = 0.2; // maximum execution time to compute a part of the solution private double maxTreeComputationTimeInSeconds = 10; // how important not to cover negatives - private double posWeight = 2; + private double beta = 1; // minimum score a query tree must have to be part of the solution - private double minimumTreeScore = 0.2; + private double minimumTreeScore = 0.3; //If yes, then the algorithm tries to cover all positive examples. Note that while this improves accuracy on the testing set, //it may lead to overfitting private boolean tryFullCoverage; @@ -161,10 +160,10 @@ lggGenerator = new LGGGeneratorImpl<String>(); - if(heuristic == null){ +// if(heuristic == null){ heuristic = new QueryTreeHeuristic(); - heuristic.setPosExamplesWeight(posWeight); - } + heuristic.setPosExamplesWeight(beta); +// } logger.info("Initializing..."); treeCache = new QueryTreeCache(model); @@ -222,7 +221,8 @@ String setup = "Setup:"; setup += "\n#Pos. examples:" + currentPosExamples.size(); setup += "\n#Neg. examples:" + currentNegExamples.size(); - setup += "\nPos. weight(beta):" + posWeight; + setup += "Heuristic:" + heuristic.getHeuristicType().name(); + setup += "\nbeta=" + beta; logger.info(setup); logger.info("Running..."); startTime = System.currentTimeMillis(); @@ -236,7 +236,7 @@ logger.info("#Remaining neg. examples:" + currentNegExampleTrees.size()); //compute a (partial) solution - computeNextPartialSolution(); + computeBestPartialSolution(); //pick best (partial) solution computed so far EvaluatedQueryTree<String> bestPartialSolution = currentPartialSolutions.first(); @@ -279,29 +279,31 @@ long endTime = System.currentTimeMillis(); logger.info("Finished in " + (endTime-startTime) + "ms."); - logger.info("Combined solution:\n" + OWLAPIConverter.getOWLAPIDescription(currentBestSolution.getDescription())); + logger.info("Combined solution:" + OWLAPIConverter.getOWLAPIDescription(currentBestSolution.getDescription()).toString().replace("\n", "")); logger.info(currentBestSolution.getScore()); } - private void computeNextPartialSolution(){ + private void computeBestPartialSolution(){ logger.info("Computing best partial solution..."); bestCurrentScore = Double.NEGATIVE_INFINITY; partialSolutionStartTime = System.currentTimeMillis(); initTodoList(currentPosExampleTrees, currentNegExampleTrees); EvaluatedQueryTree<String> currentElement; + QueryTree<String> currentTree; while(!partialSolutionTerminationCriteriaSatisfied()){ logger.trace("TODO list size: " + todoList.size()); //pick best element from todo list currentElement = todoList.poll(); + currentTree = currentElement.getTree(); //generate the LGG between the chosen tree and each uncovered positive example - for (QueryTree<String> example : currentElement.getFalseNegatives()) { - QueryTree<String> tree = currentElement.getTree(); - + Iterator<QueryTree<String>> it = currentElement.getFalseNegatives().iterator(); + while (it.hasNext() && !isPartialSolutionTimeExpired() && !isTimeExpired()) { + QueryTree<String> uncoveredTree = it.next(); //compute the LGG lggMon.start(); - QueryTree<String> lgg = lggGenerator.getLGG(tree, example); + QueryTree<String> lgg = lggGenerator.getLGG(currentTree, uncoveredTree); lggMon.stop(); //evaluate the LGG @@ -312,8 +314,8 @@ if(score >= bestCurrentScore){ //add to todo list, if not already contained in todo list or solution list todo(solution); - if(solution.getScore() > bestCurrentScore){ - logger.info("Got better solution:" + solution.getTreeScore()); + if(score > bestCurrentScore){ + logger.info("\tGot better solution:" + solution.getTreeScore()); } bestCurrentScore = solution.getScore(); } else if(mas < bestCurrentScore){ @@ -330,7 +332,7 @@ logger.info("...finished in " + (endTime-partialSolutionStartTime) + "ms."); EvaluatedDescription bestPartialSolution = currentPartialSolutions.first().asEvaluatedDescription(); - logger.info("Best partial solution:\n" + OWLAPIConverter.getOWLAPIDescription(bestPartialSolution.getDescription()) + "\n(" + bestPartialSolution.getScore() + ")"); + logger.info("Best partial solution: " + OWLAPIConverter.getOWLAPIDescription(bestPartialSolution.getDescription()).toString().replace("\n", "") + "\n(" + bestPartialSolution.getScore() + ")"); logger.trace("LGG time: " + lggMon.getTotal() + "ms"); logger.trace("Avg. LGG time: " + lggMon.getAvg() + "ms"); @@ -361,7 +363,7 @@ ? 0 : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExampleTrees.size()); - double coverageScore = Heuristics.getFScore(recall, precision, coverageBeta); + double coverageScore = Heuristics.getFScore(recall, precision, beta); //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented int nrOfSpecificNodes = 0; @@ -400,8 +402,6 @@ private EvaluatedDescription buildCombinedSolution(){ if(partialSolutions.size() == 1){ EvaluatedDescription combinedSolution = partialSolutions.get(0).asEvaluatedDescription(); - double accuracy = lp.getAccuracy(combinedSolution.getDescription()); - System.out.println(accuracy); return combinedSolution; } List<Description> disjuncts = new ArrayList<Description>(); @@ -423,16 +423,13 @@ Set<Individual> posNotCovered = Sets.difference(lp.getPositiveExamples(), posCovered); Set<Individual> negNotCovered = Sets.difference(lp.getNegativeExamples(), negCovered); - double accuracy = lp.getAccuracy(unionDescription); - System.out.println(accuracy); - //compute the coverage double recall = posCovered.size() / (double)lp.getPositiveExamples().size(); double precision = (posCovered.size() + negCovered.size() == 0) ? 0 : posCovered.size() / (double)(posCovered.size() + negCovered.size()); - double coverageScore = Heuristics.getFScore(recall, precision, coverageBeta); + double coverageScore = Heuristics.getFScore(recall, precision, beta); // ScoreTwoValued score = new ScoreTwoValued(posCovered, posNotCovered, negCovered, negNotCovered); // score.setAccuracy(coverageScore); @@ -442,6 +439,7 @@ } private void reset(){ + currentBestSolution = null; partialSolutions = new ArrayList<EvaluatedQueryTree<String>>(); stop = false; @@ -672,17 +670,18 @@ } /** - * @param coverageBeta the coverageBeta to set + * Default value is 1. Lower values force importance of covering positive examples. + * @param beta the beta to set */ - public void setCoverageBeta(double coverageBeta) { - this.coverageBeta = coverageBeta; + public void setBeta(double beta) { + this.beta = beta; } /** - * @param posWeight the posWeight to set + * @param maxTreeComputationTimeInSeconds the maxTreeComputationTimeInSeconds to set */ - public void setPosWeight(double posWeight) { - this.posWeight = posWeight; + public void setMaxTreeComputationTimeInSeconds(double maxTreeComputationTimeInSeconds) { + this.maxTreeComputationTimeInSeconds = maxTreeComputationTimeInSeconds; } /* (non-Javadoc) Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QueryTreeHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QueryTreeHeuristic.java 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QueryTreeHeuristic.java 2014-05-09 18:13:04 UTC (rev 4267) @@ -26,13 +26,6 @@ private HeuristicType heuristicType = HeuristicType.PRED_ACC; - // F score beta value - private double coverageBeta = 1; - - private double coverageWeight = 0.8; - - private double specifityWeight = 0.1; - private double posExamplesWeight = 1; // syntactic comparison as final comparison criterion @@ -63,7 +56,7 @@ case FMEASURE : score = Heuristics.getFScore(tp/(tp+fn), tp/(tp+fp), posExamplesWeight);break; case PRED_ACC : - score = (tp + posExamplesWeight * tn) / ((tp + fn) + posExamplesWeight * (tn + fp));break; + score = (1/posExamplesWeight * tp + tn) / (1/posExamplesWeight * (tp + fn) + (tn + fp));break; case ENTROPY :{ double total = tp + fn; double pp = tp / total; @@ -154,6 +147,13 @@ } /** + * @return the heuristicType + */ + public HeuristicType getHeuristicType() { + return heuristicType; + } + + /** * @param posExamplesWeight the posExamplesWeight to set */ public void setPosExamplesWeight(double posExamplesWeight) { Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/DoubleMinMaxRange.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/owl/DoubleMinMaxRange.java 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/components-core/src/main/java/org/dllearner/core/owl/DoubleMinMaxRange.java 2014-05-09 18:13:04 UTC (rev 4267) @@ -65,11 +65,11 @@ * @see org.dllearner.core.owl.KBElement#toString(java.lang.String, java.util.Map) */ public String toString(String baseURI, Map<String, String> prefixes) { - return " [>= " + minValue + " <= " + maxValue + "]"; + return " double[>= " + minValue + " <= " + maxValue + "]"; } public String toKBSyntaxString(String baseURI, Map<String, String> prefixes) { - return " [>= " + minValue + " <= " + maxValue + "]"; + return " double[>= " + minValue + " <= " + maxValue + "]"; } public void accept(KBElementVisitor visitor) { @@ -81,7 +81,7 @@ */ @Override public String toManchesterSyntaxString(String baseURI, Map<String, String> prefixes) { - return " [>= " + minValue + " <= " + maxValue + "]"; + return " double[>= " + minValue + " <= " + maxValue + "]"; } /* (non-Javadoc) Modified: trunk/scripts/src/main/java/org/dllearner/scripts/NestedCrossValidation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/NestedCrossValidation.java 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/scripts/src/main/java/org/dllearner/scripts/NestedCrossValidation.java 2014-05-09 18:13:04 UTC (rev 4267) @@ -22,7 +22,6 @@ import static java.util.Arrays.asList; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.text.DecimalFormat; @@ -43,15 +42,17 @@ import org.apache.commons.beanutils.PropertyUtils; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.FileAppender; +import org.apache.log4j.Layout; import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import org.apache.log4j.Priority; import org.apache.log4j.SimpleLayout; import org.dllearner.cli.CLI; import org.dllearner.core.AbstractCELA; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; import org.dllearner.core.ComponentInitException; -import org.dllearner.core.ComponentManager; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; import org.dllearner.learningproblems.PosNegLP; @@ -101,7 +102,10 @@ */ public class NestedCrossValidation { - private File outputFile = new File("log/nested-cv.log"); + + private static final Logger logger = Logger.getLogger(NestedCrossValidation.class.getName()); + + private static File logFile = new File("log/nested-cv.log"); DecimalFormat df = new DecimalFormat(); // overall statistics @@ -126,7 +130,7 @@ OptionParser parser = new OptionParser(); parser.acceptsAll(asList("h", "?", "help"), "Show help."); - parser.acceptsAll(asList("c", "conf"), "The comma separated list of conffiles to be used.").withRequiredArg().describedAs("file1, file2, ..."); + parser.acceptsAll(asList("c", "conf"), "The comma separated list of config files to be used.").withRequiredArg().describedAs("file1, file2, ..."); parser.acceptsAll(asList( "v", "verbose"), "Be more verbose."); parser.acceptsAll(asList( "o", "outerfolds"), "Number of outer folds.").withRequiredArg().ofType(Integer.class).describedAs("#folds"); parser.acceptsAll(asList( "i", "innerfolds"), "Number of inner folds.").withRequiredArg().ofType(Integer.class).describedAs("#folds"); @@ -167,13 +171,18 @@ // create logger (a simple logger which outputs // its messages to the console) - SimpleLayout layout = new SimpleLayout(); + Layout layout = new PatternLayout("%m%n"); ConsoleAppender consoleAppender = new ConsoleAppender(layout); Logger logger = Logger.getRootLogger(); logger.removeAllAppenders(); logger.addAppender(consoleAppender); - logger.setLevel(Level.WARN); + logger.setLevel(Level.ERROR); Logger.getLogger("org.dllearner.algorithms").setLevel(Level.INFO); + Logger.getLogger("org.dllearner.scripts").setLevel(Level.INFO); + + FileAppender fileAppender = new FileAppender(layout, logFile.getPath(), false); + logger.addAppender(fileAppender); + fileAppender.setThreshold(Level.INFO); // logger.addAppender(new FileAppender(layout, "nested-cv.log", false)); // disable OWL API info output java.util.logging.Logger.getLogger("").setLevel(java.util.logging.Level.WARNING); @@ -188,15 +197,6 @@ } } - - private void print(String s){ - try { - Files.append(s + "\n", outputFile , Charsets.UTF_8); - } catch (IOException e) { - e.printStackTrace(); - } - System.out.println(s); - } public NestedCrossValidation(File confFile, int outerFolds, int innerFolds, String parameter, double startValue, double endValue, double stepsize, boolean verbose) throws ComponentInitException, ParseException, org.dllearner.confparser.ParseException, IOException { this(Lists.newArrayList(confFile), outerFolds, innerFolds, parameter, startValue, endValue, stepsize, verbose); @@ -205,37 +205,38 @@ public NestedCrossValidation(List<File> confFiles, int outerFolds, int innerFolds, String parameter, double startValue, double endValue, double stepsize, boolean verbose) throws ComponentInitException, ParseException, org.dllearner.confparser.ParseException, IOException { for (File confFile : confFiles) { - print(confFile.getPath()); + logger.info("++++++++++++++++++++++++++++++++++++++++++++++"); + logger.info(confFile.getPath()); + logger.info("++++++++++++++++++++++++++++++++++++++++++++++"); validate(confFile, outerFolds, innerFolds, parameter, startValue, endValue, stepsize, verbose); } - print("********************************************"); - print("********************************************"); - print("********************************************"); + logger.info("############################################"); + logger.info("############################################"); // decide for the best parameter - print(" Summary over parameter values:"); + logger.info(" Overall summary over parameter values:"); double bestPara = startValue; double bestValue = Double.NEGATIVE_INFINITY; for (Entry<Double, Stat> entry : globalParaStats.entrySet()) { double para = entry.getKey(); Stat stat = entry.getValue(); - print(" value " + para + ": " + stat.prettyPrint("%")); + logger.info(" value " + para + ": " + stat.prettyPrint("%")); if (stat.getMean() > bestValue) { bestPara = para; bestValue = stat.getMean(); } } - print(" selected " + bestPara + " as best parameter value (criterion value " + df.format(bestValue) + "%)"); + logger.info(" selected " + bestPara + " as best parameter value (criterion value " + df.format(bestValue) + "%)"); // overall statistics - print("*******************"); - print("* Overall Results *"); - print("*******************"); - print("accuracy: " + globalAcc.prettyPrint("%")); - print("F measure: " + globalF.prettyPrint("%")); - print("precision: " + globalPrecision.prettyPrint("%")); - print("recall: " + globalRecall.prettyPrint("%")); + logger.info("*******************"); + logger.info("* Overall Results *"); + logger.info("*******************"); + logger.info("accuracy: " + globalAcc.prettyPrint("%")); + logger.info("F measure: " + globalF.prettyPrint("%")); + logger.info("precision: " + globalPrecision.prettyPrint("%")); + logger.info("recall: " + globalRecall.prettyPrint("%")); } @@ -255,6 +256,7 @@ Collections.shuffle(negExamples, new Random(2)); AbstractReasonerComponent rc = start.getReasonerComponent(); + rc.init(); String baseURI = rc.getBaseURI(); List<TrainTestList> posLists = getFolds(posExamples, outerFolds); @@ -268,7 +270,7 @@ for(int currOuterFold=0; currOuterFold<outerFolds; currOuterFold++) { - print("Outer fold " + currOuterFold); + logger.info("Outer fold " + currOuterFold); TrainTestList posList = posLists.get(currOuterFold); TrainTestList negList = negLists.get(currOuterFold); @@ -277,7 +279,7 @@ for(double currParaValue=startValue; currParaValue<=endValue; currParaValue+=stepsize) { - print(" Parameter value " + currParaValue + ":"); + logger.info(" Parameter value " + currParaValue + ":"); // split train folds again (computation of inner folds for each parameter // value is redundant, but not a big problem) List<Individual> trainPosList = posList.getTrainList(); @@ -291,7 +293,7 @@ for(int currInnerFold=0; currInnerFold<innerFolds; currInnerFold++) { - print(" Inner fold " + currInnerFold + ":"); + logger.info(" Inner fold " + currInnerFold + ":"); // get positive & negative examples for training run Set<Individual> posEx = new TreeSet<Individual>(innerPosLists.get(currInnerFold).getTrainList()); Set<Individual> negEx = new TreeSet<Individual>(innerNegLists.get(currInnerFold).getTrainList()); @@ -320,13 +322,12 @@ TreeSet<Individual> posTest = new TreeSet<Individual>(innerPosLists.get(currInnerFold).getTestList()); TreeSet<Individual> negTest = new TreeSet<Individual>(innerNegLists.get(currInnerFold).getTestList()); - AbstractReasonerComponent rs = start.getReasonerComponent(); // true positive - Set<Individual> posCorrect = rs.hasType(concept, posTest); + Set<Individual> posCorrect = rc.hasType(concept, posTest); // false negative Set<Individual> posError = Helper.difference(posTest, posCorrect); // false positive - Set<Individual> negError = rs.hasType(concept, negTest); + Set<Individual> negError = rc.hasType(concept, negTest); // true negative Set<Individual> negCorrect = Helper.difference(negTest, negError); @@ -340,19 +341,16 @@ paraCriterionStat.addNumber(accuracy); - print(" hypothesis: " + concept.toManchesterSyntaxString(baseURI, null)); - print(" accuracy: " + df.format(accuracy) + "%"); - print(" precision: " + df.format(precision) + "%"); - print(" recall: " + df.format(recall) + "%"); - print(" F measure: " + df.format(fmeasure) + "%"); + logger.info(" hypothesis: " + concept.toManchesterSyntaxString(baseURI, null)); + logger.info(" accuracy: " + df.format(accuracy) + "%"); + logger.info(" precision: " + df.format(precision) + "%"); + logger.info(" recall: " + df.format(recall) + "%"); + logger.info(" F measure: " + df.format(fmeasure) + "%"); if(verbose) { - print(" false positives (neg. examples classified as pos.): " + formatIndividualSet(posError, baseURI)); - print(" false negatives (pos. examples classified as neg.): " + formatIndividualSet(negError, baseURI)); + logger.info(" false positives (neg. examples classified as pos.): " + formatIndividualSet(posError, baseURI)); + logger.info(" false negatives (pos. examples classified as neg.): " + formatIndividualSet(negError, baseURI)); } - - // free memory - rs.releaseKB(); } paraStats.put(currParaValue, paraCriterionStat); @@ -365,20 +363,20 @@ } // decide for the best parameter - print(" Summary over parameter values:"); + logger.info(" Summary over parameter values:"); double bestPara = startValue; double bestValue = Double.NEGATIVE_INFINITY; for(Entry<Double,Stat> entry : paraStats.entrySet()) { double para = entry.getKey(); Stat stat = entry.getValue(); - print(" value " + para + ": " + stat.prettyPrint("%")); + logger.info(" value " + para + ": " + stat.prettyPrint("%")); if(stat.getMean() > bestValue) { bestPara = para; bestValue = stat.getMean(); } } - print(" selected " + bestPara + " as best parameter value (criterion value " + df.format(bestValue) + "%)"); - print(" Learn on Outer fold:"); + logger.info(" selected " + bestPara + " as best parameter value (criterion value " + df.format(bestValue) + "%)"); + logger.info(" Learn on Outer fold:"); // start a learning process with this parameter and evaluate it on the outer fold start = new CLI(confFile); @@ -418,15 +416,15 @@ double recall = 100 * (double) posCorrect.size() / (posCorrect.size() + posError.size()); double fmeasure = 2 * (precision * recall) / (precision + recall); - print(" hypothesis: " + concept.toManchesterSyntaxString(baseURI, null)); - print(" accuracy: " + df.format(accuracy) + "%"); - print(" precision: " + df.format(precision) + "%"); - print(" recall: " + df.format(recall) + "%"); - print(" F measure: " + df.format(fmeasure) + "%"); + logger.info(" hypothesis: " + concept.toManchesterSyntaxString(baseURI, null)); + logger.info(" accuracy: " + df.format(accuracy) + "%"); + logger.info(" precision: " + df.format(precision) + "%"); + logger.info(" recall: " + df.format(recall) + "%"); + logger.info(" F measure: " + df.format(fmeasure) + "%"); if(verbose) { - print(" false positives (neg. examples classified as pos.): " + formatIndividualSet(posError, baseURI)); - print(" false negatives (pos. examples classified as neg.): " + formatIndividualSet(negError, baseURI)); + logger.info(" false positives (neg. examples classified as pos.): " + formatIndividualSet(posError, baseURI)); + logger.info(" false negatives (pos. examples classified as neg.): " + formatIndividualSet(negError, baseURI)); } // update overall statistics @@ -445,13 +443,13 @@ globalRecall.add(recallOverall); // overall statistics - print("*******************"); - print("* Overall Results *"); - print("*******************"); - print("accuracy: " + accOverall.prettyPrint("%")); - print("F measure: " + fOverall.prettyPrint("%")); - print("precision: " + precisionOverall.prettyPrint("%")); - print("recall: " + recallOverall.prettyPrint("%")); + logger.info("*******************"); + logger.info("* Overall Results *"); + logger.info("*******************"); + logger.info("accuracy: " + accOverall.prettyPrint("%")); + logger.info("F measure: " + fOverall.prettyPrint("%")); + logger.info("precision: " + precisionOverall.prettyPrint("%")); + logger.info("recall: " + recallOverall.prettyPrint("%")); } // convenience methods, which takes a list of examples and divides them in Modified: trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/QTLEvaluation.java 2014-05-09 18:13:04 UTC (rev 4267) @@ -6,43 +6,39 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Random; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; -import org.dllearner.algorithms.qtl.QTL2; import org.dllearner.algorithms.qtl.QTL2Disjunctive; import org.dllearner.algorithms.qtl.QueryTreeFactory; import org.dllearner.algorithms.qtl.datastructures.QueryTree; import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl; import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.cli.CLI; import org.dllearner.cli.CrossValidation; -import org.dllearner.cli.SPARQLCrossValidation; +import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblemUnsupportedException; import org.dllearner.core.owl.Individual; -import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.OWLAPIOntology; import org.dllearner.learningproblems.PosNegLP; import org.dllearner.learningproblems.PosNegLPStandard; import org.dllearner.reasoning.FastInstanceChecker; -import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.scripts.NestedCrossValidation; import org.semanticweb.owlapi.apibinding.OWLManager; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyChange; import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.reasoner.OWLReasoner; -import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; -import com.clarkparsia.pellet.owlapiv3.PelletReasonerFactory; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; @@ -52,363 +48,20 @@ */ public class QTLEvaluation { - int nrOfFolds = 10; + int nrOfFolds = 3; private int nrOfPosExamples = 300; private int nrOfNegExamples = 300; - List<String> posExamples = Lists.newArrayList( - "http://dl-learner.org/carcinogenesis#d1", - "http://dl-learner.org/carcinogenesis#d10", - "http://dl-learner.org/carcinogenesis#d101", - "http://dl-learner.org/carcinogenesis#d102", - "http://dl-learner.org/carcinogenesis#d103", - "http://dl-learner.org/carcinogenesis#d106", - "http://dl-learner.org/carcinogenesis#d107", - "http://dl-learner.org/carcinogenesis#d108", - "http://dl-learner.org/carcinogenesis#d11", - "http://dl-learner.org/carcinogenesis#d12", - "http://dl-learner.org/carcinogenesis#d13", - "http://dl-learner.org/carcinogenesis#d134", - "http://dl-learner.org/carcinogenesis#d135", - "http://dl-learner.org/carcinogenesis#d136", - "http://dl-learner.org/carcinogenesis#d138", - "http://dl-learner.org/carcinogenesis#d140", - "http://dl-learner.org/carcinogenesis#d141", - "http://dl-learner.org/carcinogenesis#d144", - "http://dl-learner.org/carcinogenesis#d145", - "http://dl-learner.org/carcinogenesis#d146", - "http://dl-learner.org/carcinogenesis#d147", - "http://dl-learner.org/carcinogenesis#d15", - "http://dl-learner.org/carcinogenesis#d17", - "http://dl-learner.org/carcinogenesis#d19", - "http://dl-learner.org/carcinogenesis#d192", - "http://dl-learner.org/carcinogenesis#d193", - "http://dl-learner.org/carcinogenesis#d195", - "http://dl-learner.org/carcinogenesis#d196", - "http://dl-learner.org/carcinogenesis#d197", - "http://dl-learner.org/carcinogenesis#d198", - "http://dl-learner.org/carcinogenesis#d199", - "http://dl-learner.org/carcinogenesis#d2", - "http://dl-learner.org/carcinogenesis#d20", - "http://dl-learner.org/carcinogenesis#d200", - "http://dl-learner.org/carcinogenesis#d201", - "http://dl-learner.org/carcinogenesis#d202", - "http://dl-learner.org/carcinogenesis#d203", - "http://dl-learner.org/carcinogenesis#d204", - "http://dl-learner.org/carcinogenesis#d205", - "http://dl-learner.org/carcinogenesis#d21", - "http://dl-learner.org/carcinogenesis#d22", - "http://dl-learner.org/carcinogenesis#d226", - "http://dl-learner.org/carcinogenesis#d227", - "http://dl-learner.org/carcinogenesis#d228", - "http://dl-learner.org/carcinogenesis#d229", - "http://dl-learner.org/carcinogenesis#d231", - "http://dl-learner.org/carcinogenesis#d232", - "http://dl-learner.org/carcinogenesis#d234", - "http://dl-learner.org/carcinogenesis#d236", - "http://dl-learner.org/carcinogenesis#d239", - "http://dl-learner.org/carcinogenesis#d23_2", - "http://dl-learner.org/carcinogenesis#d242", - "http://dl-learner.org/carcinogenesis#d245", - "http://dl-learner.org/carcinogenesis#d247", - "http://dl-learner.org/carcinogenesis#d249", - "http://dl-learner.org/carcinogenesis#d25", - "http://dl-learner.org/carcinogenesis#d252", - "http://dl-learner.org/carcinogenesis#d253", - "http://dl-learner.org/carcinogenesis#d254", - "http://dl-learner.org/carcinogenesis#d255", - "http://dl-learner.org/carcinogenesis#d26", - "http://dl-learner.org/carcinogenesis#d272", - "http://dl-learner.org/carcinogenesis#d275", - "http://dl-learner.org/carcinogenesis#d277", - "http://dl-learner.org/carcinogenesis#d279", - "http://dl-learner.org/carcinogenesis#d28", - "http://dl-learner.org/carcinogenesis#d281", - "http://dl-learner.org/carcinogenesis#d283", - "http://dl-learner.org/carcinogenesis#d284", - "http://dl-learner.org/carcinogenesis#d288", - "http://dl-learner.org/carcinogenesis#d29", - "http://dl-learner.org/carcinogenesis#d290", - "http://dl-learner.org/carcinogenesis#d291", - "http://dl-learner.org/carcinogenesis#d292", - "http://dl-learner.org/carcinogenesis#d30", - "http://dl-learner.org/carcinogenesis#d31", - "http://dl-learner.org/carcinogenesis#d32", - "http://dl-learner.org/carcinogenesis#d33", - "http://dl-learner.org/carcinogenesis#d34", - "http://dl-learner.org/carcinogenesis#d35", - "http://dl-learner.org/carcinogenesis#d36", - "http://dl-learner.org/carcinogenesis#d37", - "http://dl-learner.org/carcinogenesis#d38", - "http://dl-learner.org/carcinogenesis#d42", - "http://dl-learner.org/carcinogenesis#d43", - "http://dl-learner.org/carcinogenesis#d44", - "http://dl-learner.org/carcinogenesis#d45", - "http://dl-learner.org/carcinogenesis#d46", - "http://dl-learner.org/carcinogenesis#d47", - "http://dl-learner.org/carcinogenesis#d48", - "http://dl-learner.org/carcinogenesis#d49", - "http://dl-learner.org/carcinogenesis#d5", - "http://dl-learner.org/carcinogenesis#d51", - "http://dl-learner.org/carcinogenesis#d52", - "http://dl-learner.org/carcinogenesis#d53", - "http://dl-learner.org/carcinogenesis#d55", - "http://dl-learner.org/carcinogenesis#d58", - "http://dl-learner.org/carcinogenesis#d6", - "http://dl-learner.org/carcinogenesis#d7", - "http://dl-learner.org/carcinogenesis#d84", - "http://dl-learner.org/carcinogenesis#d85_2", - "http://dl-learner.org/carcinogenesis#d86", - "http://dl-learner.org/carcinogenesis#d87", - "http://dl-learner.org/carcinogenesis#d88", - "http://dl-learner.org/carcinogenesis#d89", - "http://dl-learner.org/carcinogenesis#d9", - "http://dl-learner.org/carcinogenesis#d91", - "http://dl-learner.org/carcinogenesis#d92", - "http://dl-learner.org/carcinogenesis#d93", - "http://dl-learner.org/carcinogenesis#d95", - "http://dl-learner.org/carcinogenesis#d96", - "http://dl-learner.org/carcinogenesis#d98", - "http://dl-learner.org/carcinogenesis#d99", - "http://dl-learner.org/carcinogenesis#d100", - "http://dl-learner.org/carcinogenesis#d104", - "http://dl-learner.org/carcinogenesis#d105", - "http://dl-learner.org/carcinogenesis#d109", - "http://dl-learner.org/carcinogenesis#d137", - "http://dl-learner.org/carcinogenesis#d139", - "http://dl-learner.org/carcinogenesis#d14", - "http://dl-learner.org/carcinogenesis#d142", - "http://dl-learner.org/carcinogenesis#d143", - "http://dl-learner.org/carcinogenesis#d148", - "http://dl-learner.org/carcinogenesis#d16", - "http://dl-learner.org/carcinogenesis#d18", - "http://dl-learner.org/carcinogenesis#d191", - "http://dl-learner.org/carcinogenesis#d206", - "http://dl-learner.org/carcinogenesis#d230", - "http://dl-learner.org/carcinogenesis#d233", - "http://dl-learner.org/carcinogenesis#d235", - "http://dl-learner.org/carcinogenesis#d237", - "http://dl-learner.org/carcinogenesis#d238", - "http://dl-learner.org/carcinogenesis#d23_1", - "http://dl-learner.org/carcinogenesis#d24", - "http://dl-learner.org/carcinogenesis#d240", - "http://dl-learner.org/carcinogenesis#d241", - "http://dl-learner.org/carcinogenesis#d243", - "http://dl-learner.org/carcinogenesis#d244", - "http://dl-learner.org/carcinogenesis#d246", - "http://dl-learner.org/carcinogenesis#d248", - "http://dl-learner.org/carcinogenesis#d250", - "http://dl-learner.org/carcinogenesis#d251", - "http://dl-learner.org/carcinogenesis#d27", - "http://dl-learner.org/carcinogenesis#d273", - "http://dl-learner.org/carcinogenesis#d274", - "http://dl-learner.org/carcinogenesis#d278", - "http://dl-learner.org/carcinogenesis#d286", - "http://dl-learner.org/carcinogenesis#d289", - "http://dl-learner.org/carcinogenesis#d3", - "http://dl-learner.org/carcinogenesis#d39", - "http://dl-learner.org/carcinogenesis#d4", - "http://dl-learner.org/carcinogenesis#d40", - "http://dl-learner.org/carcinogenesis#d41", - "http://dl-learner.org/carcinogenesis#d50", - "http://dl-learner.org/carcinogenesis#d54", - "http://dl-learner.org/carcinogenesis#d56", - "http://dl-learner.org/carcinogenesis#d57", - "http://dl-learner.org/carcinogenesis#d8", - "http://dl-learner.org/carcinogenesis#d85_1", - "http://dl-learner.org/carcinogenesis#d90", - "http://dl-learner.org/carcinogenesis#d94", - "http://dl-learner.org/carcinogenesis#d97", - "http://dl-learner.org/carcinogenesis#d296", - "http://dl-learner.org/carcinogenesis#d305", - "http://dl-learner.org/carcinogenesis#d306", - "http://dl-learner.org/carcinogenesis#d307", - "http://dl-learner.org/carcinogenesis#d308", - "http://dl-learner.org/carcinogenesis#d311", - "http://dl-learner.org/carcinogenesis#d314", - "http://dl-learner.org/carcinogenesis#d315", - "http://dl-learner.org/carcinogenesis#d316", - "http://dl-learner.org/carcinogenesis#d320", - "http://dl-learner.org/carcinogenesis#d322", - "http://dl-learner.org/carcinogenesis#d323", - "http://dl-learner.org/carcinogenesis#d325", - "http://dl-learner.org/carcinogenesis#d329", - "http://dl-learner.org/carcinogenesis#d330", - "http://dl-learner.org/carcinogenesis#d331", - "http://dl-learner.org/carcinogenesis#d332", - "http://dl-learner.org/carcinogenesis#d333", - "http://dl-learner.org/carcinogenesis#d336", - "http://dl-learner.org/carcinogenesis#d337" - ); + CLI cli = new CLI(new File("../test/qtl/carcinogenesis/train.conf")); - List<String> negExamples = Lists.newArrayList( - "http://dl-learner.org/carcinogenesis#d110", - "http://dl-learner.org/carcinogenesis#d111", - "http://dl-learner.org/carcinogenesis#d114", - "http://dl-learner.org/carcinogenesis#d116", - "http://dl-learner.org/carcinogenesis#d117", - "http://dl-learner.org/carcinogenesis#d119", - "http://dl-learner.org/carcinogenesis#d121", - "http://dl-learner.org/carcinogenesis#d123", - "http://dl-learner.org/carcinogenesis#d124", - "http://dl-learner.org/carcinogenesis#d125", - "http://dl-learner.org/carcinogenesis#d127", - "http://dl-learner.org/carcinogenesis#d128", - "http://dl-learner.org/carcinogenesis#d130", - "http://dl-learner.org/carcinogenesis#d133", - "http://dl-learner.org/carcinogenesis#d150", - "http://dl-learner.org/carcinogenesis#d151", - "http://dl-learner.org/carcinogenesis#d154", - "http://dl-learner.org/carcinogenesis#d155", - "http://dl-learner.org/carcinogenesis#d156", - "http://dl-learner.org/carcinogenesis#d159", - "http://dl-learner.org/carcinogenesis#d160", - "http://dl-learner.org/carcinogenesis#d161", - "http://dl-learner.org/carcinogenesis#d162", - "http://dl-learner.org/carcinogenesis#d163", - "http://dl-learner.org/carcinogenesis#d164", - "http://dl-learner.org/carcinogenesis#d165", - "http://dl-learner.org/carcinogenesis#d166", - "http://dl-learner.org/carcinogenesis#d169", - "http://dl-learner.org/carcinogenesis#d170", - "http://dl-learner.org/carcinogenesis#d171", - "http://dl-learner.org/carcinogenesis#d172", - "http://dl-learner.org/carcinogenesis#d173", - "http://dl-learner.org/carcinogenesis#d174", - "http://dl-learner.org/carcinogenesis#d178", - "http://dl-learner.org/carcinogenesis#d179", - "http://dl-learner.org/carcinogenesis#d180", - "http://dl-learner.org/carcinogenesis#d181", - "http://dl-learner.org/carcinogenesis#d183", - "http://dl-learner.org/carcinogenesis#d184", - "http://dl-learner.org/carcinogenesis#d185", - "http://dl-learner.org/carcinogenesis#d186", - "http://dl-learner.org/carcinogenesis#d188", - "http://dl-learner.org/carcinogenesis#d190", - "http://dl-learner.org/carcinogenesis#d194", - "http://dl-learner.org/carcinogenesis#d207", - "http://dl-learner.org/carcinogenesis#d208_1", - "http://dl-learner.org/carcinogenesis#d209", - "http://dl-learner.org/carcinogenesis#d210", - "http://dl-learner.org/carcinogenesis#d211", - "http://dl-learner.org/carcinogenesis#d212", - "http://dl-learner.org/carcinogenesis#d213", - "http://dl-learner.org/carcinogenesis#d214", - "http://dl-learner.org/carcinogenesis#d215", - "http://dl-learner.org/carcinogenesis#d217", - "http://dl-learner.org/carcinogenesis#d218", - "http://dl-learner.org/carcinogenesis#d219", - "http://dl-learner.org/carcinogenesis#d220", - "http://dl-learner.org/carcinogenesis#d224", - "http://dl-learner.org/carcinogenesis#d256", - "http://dl-learner.org/carcinogenesis#d257", - "http://dl-learner.org/carcinogenesis#d258", - "http://dl-learner.org/carcinogenesis#d261", - "http://dl-learner.org/carcinogenesis#d262", - "http://dl-learner.org/carcinogenesis#d263", - "http://dl-learner.org/carcinogenesis#d264", - "http://dl-learner.org/carcinogenesis#d265", - "http://dl-learner.org/carcinogenesis#d266", - "http://dl-learner.org/carcinogenesis#d267", - "http://dl-learner.org/carcinogenesis#d269", - "http://dl-learner.org/carcinogenesis#d271", - "http://dl-learner.org/carcinogenesis#d276", - "http://dl-learner.org/carcinogenesis#d280", - "http://dl-learner.org/carcinogenesis#d285", - "http://dl-learner.org/carcinogenesis#d287", - "http://dl-learner.org/carcinogenesis#d293", - "http://dl-learner.org/carcinogenesis#d294", - "http://dl-learner.org/carcinogenesis#d59", - "http://dl-learner.org/carcinogenesis#d60", - "http://dl-learner.org/carcinogenesis#d61", - "http://dl-learner.org/carcinogenesis#d63", - "http://dl-learner.org/carcinogenesis#d64", - "http://dl-learner.org/carcinogenesis#d65", - "http://dl-learner.org/carcinogenesis#d69", - "http://dl-learner.org/carcinogenesis#d70", - "http://dl-learner.org/carcinogenesis#d71", - "http://dl-learner.org/carcinogenesis#d72", - "http://dl-learner.org/carcinogenesis#d73", - "http://dl-learner.org/carcinogenesis#d74", - "http://dl-learner.org/carcinogenesis#d75", - "http://dl-learner.org/carcinogenesis#d76", - "http://dl-learner.org/carcinogenesis#d77", - "http://dl-learner.org/carcinogenesis#d78", - "http://dl-learner.org/carcinogenesis#d79", - "http://dl-learner.org/carcinogenesis#d80", - "http://dl-learner.org/carcinogenesis#d81", - "http://dl-learner.org/carcinogenesis#d82", - "http://dl-learner.org/carcinogenesis#d112", - "http://dl-learner.org/carcinogenesis#d113", - "http://dl-learner.org/carcinogenesis#d115", - "http://dl-learner.org/carcinogenesis#d118", - "http://dl-learner.org/carcinogenesis#d120", - "http://dl-learner.org/carcinogenesis#d122", - "http://dl-learner.org/carcinogenesis#d126", - "http://dl-learner.org/carcinogenesis#d129", - "http://dl-learner.org/carcinogenesis#d131", - "http://dl-learner.org/carcinogenesis#d132", - "http://dl-learner.org/carcinogenesis#d149", - "http://dl-learner.org/carcinogenesis#d152", - "http://dl-learner.org/carcinogenesis#d153", - "http://dl-learner.org/carcinogenesis#d157", - "http://dl-learner.org/carcinogenesis#d158", - "http://dl-learner.org/carcinogenesis#d167", - "http://dl-learner.org/carcinogenesis#d168", - "http://dl-learner.org/carcinogenesis#d175", - "http://dl-learner.org/carcinogenesis#d176", - "http://dl-learner.org/carcinogenesis#d177", - "http://dl-learner.org/carcinogenesis#d182", - "http://dl-learner.org/carcinogenesis#d187", - "http://dl-learner.org/carcinogenesis#d189", - "http://dl-learner.org/carcinogenesis#d208_2", - "http://dl-learner.org/carcinogenesis#d216", - "http://dl-learner.org/carcinogenesis#d221", - "http://dl-learner.org/carcinogenesis#d222", - "http://dl-learner.org/carcinogenesis#d223", - "http://dl-learner.org/carcinogenesis#d225", - "http://dl-learner.org/carcinogenesis#d259", - "http://dl-learner.org/carcinogenesis#d260", - "http://dl-learner.org/carcinogenesis#d268", - "http://dl-learner.org/carcinogenesis#d270", - "http://dl-learner.org/carcinogenesis#d282", - "http://dl-learner.org/carcinogenesis#d295", - "http://dl-learner.org/carcinogenesis#d62", - "http://dl-learner.org/carcinogenesis#d66", - "http://dl-learner.org/carcinogenesis#d67", - "http://dl-learner.org/carcinogenesis#d68", - "http://dl-learner.org/carcinogenesis#d83", - "http://dl-learner.org/carcinogenesis#d297", - "http://dl-learner.org/carcinogenesis#d298", - "http://dl-learner.org/carcinogenesis#d299", - "http://dl-learner.org/carcinogenesis#d300", - "http://dl-learner.org/carcinogenesis#d302", - "http://dl-learner.org/carcinogenesis#d303", - "http://dl-learner.org/carcinogenesis#d304", - "http://dl-learner.org/carcinogenesis#d309", - "http://dl-learner.org/carcinogenesis#d312", - "http://dl-learner.org/carcinogenesis#d313", - "http://dl-learner.org/carcinogenesis#d317", - "http://dl-learner.org/carcinogenesis#d318", - "http://dl-learner.org/carcinogenesis#d319", - "http://dl-learner.org/carcinogenesis#d324", - "http://dl-learner.org/carcinogenesis#d326", - "http://dl-learner.org/carcinogenesis#d327", - "http://dl-learner.org/carcinogenesis#d328", - "http://dl-learner.org/carcinogenesis#d334", - "http://dl-learner.org/carcinogenesis#d335" - ); - private Model model; private OWLOntology ontology; private QueryTreeFactory<String> queryTreeFactory; - private List<QueryTree<String>> posExampleTrees; - private List<QueryTree<String>> negExampleTrees; private PosNegLP lp; - public QTLEvaluation() throws ComponentInitException { + public QTLEvaluation() throws ComponentInitException, IOException { queryTreeFactory = new QueryTreeFactoryImpl(); queryTreeFactory.setMaxDepth(3); @@ -434,59 +87,29 @@ } } - private void loadExamples() throws ComponentInitException{ + private void loadExamples() throws ComponentInitException, IOException{ - Collections.shuffle(posExamples, new Random(1)); - Collections.shuffle(negExamples, new Random(2)); + cli.init(); + lp = (PosNegLP) cli.getLearningProblem(); + + // get examples and shuffle them + List<Individual> posExamples = new LinkedList<Individual>(((PosNegLP)lp).getPositiveExamples()); + Collections.shuffle(posExamples, new Random(1)); + List<Individual> negExamples = new LinkedList<Individual>(((PosNegLP)lp).getNegativeExamples()); + Collections.shuffle(negExamples, new Random(2)); posExamples = posExamples.subList(0, Math.min(posExamples.size(), nrOfPosExamples)); negExamples = negExamples.subList(0, Math.min(negExamples.size(), nrOfNegExamples)); -// posExamples.clear(); -// String string = "http://dl-learner.org/carcinogenesis#d101, http://dl-learner.org/carcinogenesis#d103, http://dl-learner.org/carcinogenesis#d107, http://dl-learner.org/carcinogenesis#d108, http://dl-learner.org/carcinogenesis#d135, http://dl-learner.org/carcinogenesis#d139, http://dl-learner.org/carcinogenesis#d14, http://dl-learner.org/carcinogenesis#d141, http://dl-learner.org/carcinogenesis#d143, http://dl-learner.org/carcinogenesis#d147, http://dl-learner.org/carcinogenesis#d17, http://dl-learner.org/carcinogenesis#d19, http://dl-learner.org/carcinogenesis#d193, http://dl-learner.org/carcinogenesis#d198, http://dl-learner.org/carcinogenesis#d228, http://dl-learner.org/carcinogenesis#d236, http://dl-learner.org/carcinogenesis#d242, http://dl-learner.org/carcinogenesis#d244, http://dl-learner.org/carcinogenesis#d273, http://dl-learner.org/carcinogenesis#d275, http://dl-learner.org/carcinogenesis#d28, http://dl-learner.org/carcinogenesis#d283, http://dl-learner.org/carcinogenesis#d286, http://dl-learner.org/carcinogenesis#d291, http://dl-learner.org/carcinogenesis#d292, http://dl-learner.org/carcinogenesis#d307, http://dl-learner.org/carcinogenesis#d31, http://dl-learner.org/carcinogenesis#d325, http://dl-learner.org/carcinogenesis#d33, http://dl-learner.org/carcinogenesis#d333, http://dl-learner.org/carcinogenesis#d34, http://dl-learner.org/carcinogenesis#d36, http://dl-learner.org/carcinogenesis#d38, http://dl-learner.org/carcinogenesis#d4, http://dl-learner.org/carcinogenesis#d40, http://dl-learner.org/carcinogenesis#d44, http://dl-learner.org/carcinogenesis#d51, http://dl-learner.org/carcinogenesis#d85_2, http://dl-learner.org/carcinogenesis#d98, http://dl-learner.org/carcinogenesis#d99"; -// String[] split = string.split(","); -// for (String s : split) { -// posExamples.add(s.trim()); -// } -// negExamples.clear(); -// string = "http://dl-learner.org/carcinogenesis#d112, http://dl-learner.org/carcinogenesis#d116, http://dl-learner.org/carcinogenesis#d117, http://dl-learner.org/carcinogenesis#d119, http://dl-learner.org/carcinogenesis#d157, http://dl-learner.org/carcinogenesis#d160, http://dl-learner.org/carcinogenesis#d161, http://dl-learner.org/carcinogenesis#d162, http://dl-learner.org/carcinogenesis#d163, http://dl-learner.org/carcinogenesis#d167, http://dl-learner.org/carcinogenesis#d169, http://dl-learner.org/carcinogenesis#d175, http://dl-learner.org/carcinogenesis#d177, http://dl-learner.org/carcinogenesis#d184, http://dl-learner.org/carcinogenesis#d194, http://dl-learner.org/carcinogenesis#d208_2, http://dl-learner.org/carcinogenesis#d209, http://dl-learner.org/carcinogenesis#d217, http://dl-learner.org/carcinogenesis#d256, http://dl-learner.org/carcinogenesis#d257, http://dl-learner.org/carcinogenesis#d260, http://dl-learner.org/carcinogenesis#d271, http://dl-learner.org/carcinogenesis#d276, http://dl-learner.org/carcinogenesis#d282, http://dl-learner.org/carcinogenesis#d287, http://dl-learner.org/carcinogenesis#d294, http://dl-learner.org/carcinogenesis#d298, http://dl-learner.org/carcinogenesis#d300, http://dl-learner.org/carcinogenesis#d309, http://dl-learner.org/carcinogenesis#d319, http://dl-learner.org/carcinogenesis#d326, http://dl-learner.org/carcinogenesis#d328, http://dl-learner.org/carcinogenesis#d334, http://dl-learner.org/carcinogenesis#d60, http://dl-learner.org/carcinogenesis#d61, http://dl-learner.org/carcinogenesis#d66, http://dl-learner.org/carcinogenesis#d75, http://dl-learner.org/carcinogenesis#d79, http://dl-learner.org/carcinogenesis#d80, http://dl-learner.org/carcinogenesis#d83"; -// split = string.split(","); -// for (String s : split) { -// negExamples.add(s.trim()); -// } + Set<Individual> posSet = new TreeSet<Individual>( + NestedCrossValidation.getFolds(NestedCrossValidation.getFolds(posExamples, 3).get(0).getTrainList(), 3).get(0).getTrainList()); + Set<Individual> negSet = new TreeSet<Individual>( + NestedCrossValidation.getFolds(NestedCrossValidation.getFolds(negExamples, 3).get(0).getTrainList(), 3).get(0).getTrainList()); - posExampleTrees = new ArrayList<QueryTree<String>>(); - for (String ex : posExamples) { - QueryTreeImpl<String> tree = queryTreeFactory.getQueryTree(ex, model); - posExampleTrees.add(tree); - } - negExampleTrees = new ArrayList<QueryTree<String>>(); - for (String ex : negExamples) { - QueryTreeImpl<String> tree = queryTreeFactory.getQueryTree(ex, model); - negExampleTrees.add(tree); - } - int cnt = 1; - for(QueryTree<String> tree : posExampleTrees){ -// System.out.println("TREE " + cnt); -// tree.dump(); -// -// System.out.println("-----------------------------"); - cnt++; -// System.out.println(((QueryTreeImpl<String>)tree).toQuery()); - } - - SortedSet<Individual> pos = new TreeSet<Individual>(); - for (String ex : posExamples) { - pos.add(new Individual(ex)); - } - SortedSet<Individual> neg = new TreeSet<Individual>(); - for (String ex : negExamples) { - neg.add(new Individual(ex)); - } - lp = new PosNegLPStandard(); - lp.setPositiveExamples(pos); - lp.setNegativeExamples(neg); + this.lp = new PosNegLPStandard(); + this.lp.setPositiveExamples(posSet); + this.lp.setNegativeExamples(negSet); } public void run(boolean multiThreaded) throws ComponentInitException, LearningProblemUnsupportedException{ @@ -496,13 +119,14 @@ lp.setReasoner(reasoner); lp.init(); QTL2Disjunctive la = new QTL2Disjunctive(lp, reasoner); -// la.init(); -// la.start(); + la.setBeta(0.5); + la.init(); + la.start(); CrossValidation.outputFile = new File("log/qtl-cv.log"); CrossValidation.writeToFile = true; CrossValidation.multiThreaded = multiThreaded; - CrossValidation cv = new CrossValidation(la, lp, reasoner, nrOfFolds, false); +// CrossValidation cv = new CrossValidation(la, lp, reasoner, nrOfFolds, false); long endTime = System.currentTimeMillis(); System.err.println((endTime - startTime) + "ms"); } Modified: trunk/test/qtl/breasttissue/train1.conf =================================================================== --- trunk/test/qtl/breasttissue/train1.conf 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/test/qtl/breasttissue/train1.conf 2014-05-09 18:13:04 UTC (rev 4267) @@ -8,6 +8,8 @@ // QTL configuration alg.type = "qtl2dis" +alg.maxExecutionTimeInSeconds = 60 +alg.maxTreeComputationTimeInSeconds = 10 // learning problem lp.type = "posNegStandard" Modified: trunk/test/qtl/carcinogenesis/train.conf =================================================================== --- trunk/test/qtl/carcinogenesis/train.conf 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/test/qtl/carcinogenesis/train.conf 2014-05-09 18:13:04 UTC (rev 4267) @@ -8,6 +8,11 @@ reasoner.type = "fast instance checker" reasoner.sources = { ks } +// QTL configuration +alg.type = "qtl2dis" +alg.maxExecutionTimeInSeconds = 60 +alg.maxTreeComputationTimeInSeconds = 10 + // learning problem lp.type = "posNegStandard" lp.positiveExamples = { @@ -352,5 +357,3 @@ "kb:d335" } -// QTL configuration -alg.type = "qtl2dis" Modified: trunk/test/qtl/mutagenesis/train1.conf =================================================================== --- trunk/test/qtl/mutagenesis/train1.conf 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/test/qtl/mutagenesis/train1.conf 2014-05-09 18:13:04 UTC (rev 4267) @@ -11,6 +11,8 @@ // QTL configuration alg.type = "qtl2dis" +alg.maxExecutionTimeInSeconds = 60 +alg.maxTreeComputationTimeInSeconds = 10 // learning problem lp.type = "posNegStandard" Modified: trunk/test/qtl/parkinsons/train.conf =================================================================== --- trunk/test/qtl/parkinsons/train.conf 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/test/qtl/parkinsons/train.conf 2014-05-09 18:13:04 UTC (rev 4267) @@ -7,6 +7,8 @@ // QTL configuration alg.type = "qtl2dis" +alg.maxExecutionTimeInSeconds = 60 +alg.maxTreeComputationTimeInSeconds = 10 // learning problem lp.type = "posNegStandard" Modified: trunk/test/qtl/suramin/train.conf =================================================================== --- trunk/test/qtl/suramin/train.conf 2014-05-08 18:19:32 UTC (rev 4266) +++ trunk/test/qtl/suramin/train.conf 2014-05-09 18:13:04 UTC (rev 4267) @@ -30,4 +30,6 @@ } // QTL configuration -alg.type = "qtl2dis" \ No newline at end of file +alg.type = "qtl2dis" +alg.maxExecutionTimeInSeconds = 60 +alg.maxTreeComputationTimeInSeconds = 10 \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |