From: <lor...@us...> - 2013-11-08 09:29:43
|
Revision: 4141 http://sourceforge.net/p/dl-learner/code/4141 Author: lorenz_b Date: 2013-11-08 09:29:40 +0000 (Fri, 08 Nov 2013) Log Message: ----------- Added experiment class. Modified Paths: -------------- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java Added Paths: ----------- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/CrossValidation.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/CrossValidation.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/CrossValidation.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/CrossValidation.java 2013-11-08 09:29:40 UTC (rev 4141) @@ -0,0 +1,312 @@ +/** + * Copyright (C) 2007-2008, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.algorithms.isle; + +import java.io.File; +import java.text.DecimalFormat; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.TreeSet; + +import org.dllearner.core.AbstractLearningProblem; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.learningproblems.Heuristics; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.datastructures.Datastructures; +import org.dllearner.utilities.statistics.Stat; +import org.dllearner.utilities.Files; + +/** + * Performs cross validation for the given problem. Supports + * k-fold cross-validation and leave-one-out cross-validation. + * + * @author Jens Lehmann + * + */ +public class CrossValidation { + + // statistical values + protected Stat runtime = new Stat(); + protected Stat accuracy = new Stat(); + protected Stat length = new Stat(); + protected Stat accuracyTraining = new Stat(); + protected Stat fMeasure = new Stat(); + protected Stat fMeasureTraining = new Stat(); + protected static boolean writeToFile = false; + protected static File outputFile; + + + protected Stat trainingCompletenessStat = new Stat(); + protected Stat trainingCorrectnessStat = new Stat(); + + protected Stat testingCompletenessStat = new Stat(); + protected Stat testingCorrectnessStat = new Stat(); + + public CrossValidation() { + + } + + public CrossValidation(AbstractCELA la, AbstractLearningProblem lp, AbstractReasonerComponent rs, int folds, boolean leaveOneOut) { + + DecimalFormat df = new DecimalFormat(); + + // the training and test sets used later on + List<Set<Individual>> trainingSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> trainingSetsNeg = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsNeg = new LinkedList<Set<Individual>>(); + + // get examples and shuffle them too + Set<Individual> posExamples; + Set<Individual> negExamples; + if(lp instanceof PosNegLP){ + posExamples = ((PosNegLP)lp).getPositiveExamples(); + negExamples = ((PosNegLP)lp).getNegativeExamples(); + } else if(lp instanceof PosOnlyLP){ + posExamples = ((PosNegLP)lp).getPositiveExamples(); + negExamples = new HashSet<Individual>(); + } else { + throw new IllegalArgumentException("Only PosNeg and PosOnly learning problems are supported"); + } + List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); + List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); + Collections.shuffle(posExamplesList, new Random(1)); + Collections.shuffle(negExamplesList, new Random(2)); + + // sanity check whether nr. of folds makes sense for this benchmark + if(!leaveOneOut && (posExamples.size()<folds && negExamples.size()<folds)) { + System.out.println("The number of folds is higher than the number of " + + "positive/negative examples. This can result in empty test sets. Exiting."); + System.exit(0); + } + + if(leaveOneOut) { + // note that leave-one-out is not identical to k-fold with + // k = nr. of examples in the current implementation, because + // with n folds and n examples there is no guarantee that a fold + // is never empty (this is an implementation issue) + int nrOfExamples = posExamples.size() + negExamples.size(); + for(int i = 0; i < nrOfExamples; i++) { + // ... + } + System.out.println("Leave-one-out not supported yet."); + System.exit(1); + } else { + // calculating where to split the sets, ; note that we split + // positive and negative examples separately such that the + // distribution of positive and negative examples remains similar + // (note that there are better but more complex ways to implement this, + // which guarantee that the sum of the elements of a fold for pos + // and neg differs by at most 1 - it can differ by 2 in our implementation, + // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) + int[] splitsPos = calculateSplits(posExamples.size(),folds); + int[] splitsNeg = calculateSplits(negExamples.size(),folds); + +// System.out.println(splitsPos[0]); +// System.out.println(splitsNeg[0]); + + // calculating training and test sets + for(int i=0; i<folds; i++) { + Set<Individual> testPos = getTestingSet(posExamplesList, splitsPos, i); + Set<Individual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); + testSetsPos.add(i, testPos); + testSetsNeg.add(i, testNeg); + trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); + trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); + } + + } + + // run the algorithm + for(int currFold=0; currFold<folds; currFold++) { + + Set<String> pos = Datastructures.individualSetToStringSet(trainingSetsPos.get(currFold)); + Set<String> neg = Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); + if(lp instanceof PosNegLP){ + ((PosNegLP)lp).setPositiveExamples(trainingSetsPos.get(currFold)); + ((PosNegLP)lp).setNegativeExamples(trainingSetsNeg.get(currFold)); + } else if(lp instanceof PosOnlyLP){ + ((PosOnlyLP)lp).setPositiveExamples(new TreeSet<Individual>(trainingSetsPos.get(currFold))); + } + + + try { + lp.init(); + la.init(); + } catch (ComponentInitException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + long algorithmStartTime = System.nanoTime(); + la.start(); + long algorithmDuration = System.nanoTime() - algorithmStartTime; + runtime.addNumber(algorithmDuration/(double)1000000000); + + Description concept = la.getCurrentlyBestDescription(); + + Set<Individual> tmp = rs.hasType(concept, testSetsPos.get(currFold)); + Set<Individual> tmp2 = Helper.difference(testSetsPos.get(currFold), tmp); + Set<Individual> tmp3 = rs.hasType(concept, testSetsNeg.get(currFold)); + + outputWriter("test set errors pos: " + tmp2); + outputWriter("test set errors neg: " + tmp3); + + // calculate training accuracies + int trainingCorrectPosClassified = getCorrectPosClassified(rs, concept, trainingSetsPos.get(currFold)); + int trainingCorrectNegClassified = getCorrectNegClassified(rs, concept, trainingSetsNeg.get(currFold)); + int trainingCorrectExamples = trainingCorrectPosClassified + trainingCorrectNegClassified; + double trainingAccuracy = 100*((double)trainingCorrectExamples/(trainingSetsPos.get(currFold).size()+ + trainingSetsNeg.get(currFold).size())); + accuracyTraining.addNumber(trainingAccuracy); + // calculate test accuracies + int correctPosClassified = getCorrectPosClassified(rs, concept, testSetsPos.get(currFold)); + int correctNegClassified = getCorrectNegClassified(rs, concept, testSetsNeg.get(currFold)); + int correctExamples = correctPosClassified + correctNegClassified; + double currAccuracy = 100*((double)correctExamples/(testSetsPos.get(currFold).size()+ + testSetsNeg.get(currFold).size())); + accuracy.addNumber(currAccuracy); + // calculate training F-Score + int negAsPosTraining = rs.hasType(concept, trainingSetsNeg.get(currFold)).size(); + double precisionTraining = trainingCorrectPosClassified + negAsPosTraining == 0 ? 0 : trainingCorrectPosClassified / (double) (trainingCorrectPosClassified + negAsPosTraining); + double recallTraining = trainingCorrectPosClassified / (double) trainingSetsPos.get(currFold).size(); + fMeasureTraining.addNumber(100*Heuristics.getFScore(recallTraining, precisionTraining)); + // calculate test F-Score + int negAsPos = rs.hasType(concept, testSetsNeg.get(currFold)).size(); + double precision = correctPosClassified + negAsPos == 0 ? 0 : correctPosClassified / (double) (correctPosClassified + negAsPos); + double recall = correctPosClassified / (double) testSetsPos.get(currFold).size(); +// System.out.println(precision);System.out.println(recall); + fMeasure.addNumber(100*Heuristics.getFScore(recall, precision)); + + length.addNumber(concept.getLength()); + + outputWriter("fold " + currFold + ":"); + outputWriter(" training: " + pos.size() + " positive and " + neg.size() + " negative examples"); + outputWriter(" testing: " + correctPosClassified + "/" + testSetsPos.get(currFold).size() + " correct positives, " + + correctNegClassified + "/" + testSetsNeg.get(currFold).size() + " correct negatives"); + outputWriter(" concept: " + concept); + outputWriter(" accuracy: " + df.format(currAccuracy) + "% (" + df.format(trainingAccuracy) + "% on training set)"); + outputWriter(" length: " + df.format(concept.getLength())); + outputWriter(" runtime: " + df.format(algorithmDuration/(double)1000000000) + "s"); + + } + + outputWriter(""); + outputWriter("Finished " + folds + "-folds cross-validation."); + outputWriter("runtime: " + statOutput(df, runtime, "s")); + outputWriter("length: " + statOutput(df, length, "")); + outputWriter("F-Measure on training set: " + statOutput(df, fMeasureTraining, "%")); + outputWriter("F-Measure: " + statOutput(df, fMeasure, "%")); + outputWriter("predictive accuracy on training set: " + statOutput(df, accuracyTraining, "%")); + outputWriter("predictive accuracy: " + statOutput(df, accuracy, "%")); + + } + + protected int getCorrectPosClassified(AbstractReasonerComponent rs, Description concept, Set<Individual> testSetPos) { + return rs.hasType(concept, testSetPos).size(); + } + + protected int getCorrectNegClassified(AbstractReasonerComponent rs, Description concept, Set<Individual> testSetNeg) { + return testSetNeg.size() - rs.hasType(concept, testSetNeg).size(); + } + + public static Set<Individual> getTestingSet(List<Individual> examples, int[] splits, int fold) { + int fromIndex; + // we either start from 0 or after the last fold ended + if(fold == 0) + fromIndex = 0; + else + fromIndex = splits[fold-1]; + // the split corresponds to the ends of the folds + int toIndex = splits[fold]; + +// System.out.println("from " + fromIndex + " to " + toIndex); + + Set<Individual> testingSet = new HashSet<Individual>(); + // +1 because 2nd element is exclusive in subList method + testingSet.addAll(examples.subList(fromIndex, toIndex)); + return testingSet; + } + + public static Set<Individual> getTrainingSet(Set<Individual> examples, Set<Individual> testingSet) { + return Helper.difference(examples, testingSet); + } + + // takes nr. of examples and the nr. of folds for this examples; + // returns an array which says where each fold ends, i.e. + // splits[i] is the index of the last element of fold i in the examples + public static int[] calculateSplits(int nrOfExamples, int folds) { + int[] splits = new int[folds]; + for(int i=1; i<=folds; i++) { + // we always round up to the next integer + splits[i-1] = (int)Math.ceil(i*nrOfExamples/(double)folds); + } + return splits; + } + + public static String statOutput(DecimalFormat df, Stat stat, String unit) { + String str = "av. " + df.format(stat.getMean()) + unit; + str += " (deviation " + df.format(stat.getStandardDeviation()) + unit + "; "; + str += "min " + df.format(stat.getMin()) + unit + "; "; + str += "max " + df.format(stat.getMax()) + unit + ")"; + return str; + } + + public Stat getAccuracy() { + return accuracy; + } + + public Stat getLength() { + return length; + } + + public Stat getRuntime() { + return runtime; + } + + protected void outputWriter(String output) { + if(writeToFile) { + Files.appendToFile(outputFile, output +"\n"); + System.out.println(output); + } else { + System.out.println(output); + } + + } + + public Stat getfMeasure() { + return fMeasure; + } + + public Stat getfMeasureTraining() { + return fMeasureTraining; + } + +} Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java 2013-11-08 09:29:40 UTC (rev 4141) @@ -0,0 +1,171 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.text.DecimalFormat; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.algorithms.isle.index.RemoteDataProvider; +import org.dllearner.algorithms.isle.index.TextDocument; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex; +import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceUtils; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.learningproblems.ClassLearningProblem; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.utilities.Helper; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import com.google.common.base.Charsets; +import com.google.common.collect.Sets; +import com.google.common.io.Files; + +/** + * Experimental setup: + * + * @author Lorenz Buehmann + * + */ +public abstract class Experiment { + + /** + * The number of folds for the cross-validation + */ + private final int FOLDS = 10; + /** + * Whether to perform k-fold cross-validation or leave-one-out cross-validation + */ + private final boolean LEAVE_ONE_OUT = false; + + private ClassLearningProblem lp; + private RelevanceMetric relevance; + private AbstractReasonerComponent reasoner; + + private String testFolder = "experiments/logs/"; + + private OWLOntology ontology; + private Set<TextDocument> documents; + + private boolean initialized = false; + + + protected abstract OWLOntology getOntology(); + protected abstract Set<TextDocument> getDocuments(); + + private void initIfNecessary() { + if(!initialized){ + ontology = getOntology(); + documents = getDocuments(); + + // build semantic index + SemanticIndex semanticIndex = new SimpleSemanticIndex(ontology, null, false); + semanticIndex.buildIndex(documents); + // set the relevance metric + relevance = new PMIRelevanceMetric(semanticIndex); + try { + // set KB + KnowledgeSource ks = new OWLAPIOntology(ontology); + // set reasoner + reasoner = new FastInstanceChecker(ks); + reasoner.init(); + // set learning problem + lp = new ClassLearningProblem(reasoner); + } catch (ComponentInitException e) { + e.printStackTrace(); + } + initialized = true; + } + } + + /** + * Get the classes on which the experiment is applied. + * @return + */ + private Set<NamedClass> getClasses(){ + Set<NamedClass> classes = new HashSet<NamedClass>(); + + for(OWLClass cls : ontology.getClassesInSignature()){ + classes.add(new NamedClass(cls.toStringID())); + } + + return classes; + } + + /** + * Run the experiment on all classes. + */ + public void run(){ + Set<NamedClass> classes = getClasses(); + + for (NamedClass cls : classes) { + try { + run(cls); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + /** + * Run the experiment on the given class. + * @param cls + * @throws ComponentInitException + */ + public void run(NamedClass cls) throws ComponentInitException { + initIfNecessary(); + + lp.setClassToDescribe(cls); + lp.init(); + + Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); + NLPHeuristic heuristic = new NLPHeuristic(entityRelevance); + + // perform cross validation with ISLE + ISLE isle = new ISLE(lp, reasoner); + isle.setHeuristic(heuristic); +// isle.setSearchTreeFile(testFolder + "searchTreeISLE.txt"); +// isle.setWriteSearchTree(true); +// isle.setReplaceSearchTree(true); + isle.setTerminateOnNoiseReached(true); + isle.init(); + CrossValidation crossValidationISLE = new CrossValidation(isle, lp, reasoner, FOLDS, LEAVE_ONE_OUT); + + // perform cross validation with CELOE + CELOE celoe = new CELOE(lp, reasoner); +// celoe.setSearchTreeFile(testFolder + "searchTreeCELOE.txt"); +// celoe.setWriteSearchTree(true); +// celoe.setReplaceSearchTree(true); + celoe.setTerminateOnNoiseReached(true); + celoe.init(); + CrossValidation crossValidationCELOE = new CrossValidation(isle, lp, reasoner, FOLDS, LEAVE_ONE_OUT); + + System.out.println(crossValidationISLE.getfMeasure()); + +// DecimalFormat df = new DecimalFormat("#00.00"); +// System.out.println("Summary ISLE vs. CELOE"); +// System.out.println("======================"); +// System.out.println("accuracy: " + df.format(100*isle.getCurrentlyBestAccuracy())+"% vs. " + df.format(100*celoe.getCurrentlyBestAccuracy())+"%"); +// System.out.println("expressions tested: " + isle.getClassExpressionTests() + " vs. " + celoe.getClassExpressionTests()); +// System.out.println("search tree nodes: " + isle.getNodes().size() + " vs. " + celoe.getNodes().size()); +// System.out.println("runtime: " + Helper.prettyPrintNanoSeconds(isle.getTotalRuntimeNs()) + " vs. " + Helper.prettyPrintNanoSeconds(celoe.getTotalRuntimeNs())); + } + +} Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java 2013-11-08 09:28:17 UTC (rev 4140) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java 2013-11-08 09:29:40 UTC (rev 4141) @@ -150,6 +150,10 @@ semanticIndex.buildIndex(createDocuments()); Set<AnnotatedDocument> documents = semanticIndex.getDocuments(cls); System.out.println(documents); + relevance = new PMIRelevanceMetric(semanticIndex); + + Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); + System.out.println(entityRelevance); } @Test Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java 2013-11-08 09:28:17 UTC (rev 4140) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestNoCorpus.java 2013-11-08 09:29:40 UTC (rev 4141) @@ -47,7 +47,7 @@ public ISLETestNoCorpus() throws Exception{ manager = OWLManager.createOWLOntologyManager(); - ontology = manager.loadOntologyFromOntologyDocument(new File(testFolder + "ontology.owl")); + ontology = manager.loadOntologyFromOntologyDocument(new File(testFolder + "ontology_with_comments.owl")); textRetriever = new RDFSLabelEntityTextRetriever(ontology); syntacticIndex = new OWLOntologyLuceneSyntacticIndexCreator(ontology, df.getRDFSLabel(), searchField).buildIndex(); @@ -66,7 +66,7 @@ lp.init(); semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex); - semanticIndex.buildIndex(df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI()), "en"); + semanticIndex.buildIndex(df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI()), null); // semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex); // semanticIndex.buildIndex(createDocuments()); Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/SemanticBibleExperiment.java 2013-11-08 09:29:40 UTC (rev 4141) @@ -0,0 +1,85 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.HashSet; +import java.util.Set; + +import org.dllearner.algorithms.isle.index.RemoteDataProvider; +import org.dllearner.algorithms.isle.index.TextDocument; +import org.dllearner.core.owl.NamedClass; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import com.google.common.base.Charsets; +import com.google.common.collect.Sets; +import com.google.common.io.Files; + +/** + * @author Lorenz Buehmann + * + */ +public class SemanticBibleExperiment extends Experiment{ + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getOntology() + */ + @Override + protected OWLOntology getOntology() { + try { + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + OWLOntology schema = man.loadOntology(IRI.create("http://www.semanticbible.com/2006/11/NTNames.owl")); + OWLOntology instances = OWLManager.createOWLOntologyManager().loadOntology(IRI.create("http://www.semanticbible.com/2006/11/NTN-individuals.owl")); + OWLOntology mergedOntology = man.createOntology(IRI.create("http://semanticbible.com/merged.owl")); + man.addAxioms(mergedOntology, schema.getAxioms()); + man.addAxioms(mergedOntology, instances.getAxioms()); + return mergedOntology; + } catch (OWLOntologyCreationException e) { + e.printStackTrace(); + } + return null; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getDocuments() + */ + @Override + protected Set<TextDocument> getDocuments() { + Set<TextDocument> documents = new HashSet<TextDocument>(); + try { + RemoteDataProvider bibleByChapter = new RemoteDataProvider( + new URL("http://gold.linkeddata.org/data/bible/split_by_chapter.zip")); + File folder = bibleByChapter.getLocalDirectory(); + for (File file : folder.listFiles()) { + if(!file.isDirectory() && !file.isHidden()){ + try { + String text = Files.toString(file, Charsets.UTF_8); + documents.add(new TextDocument(text)); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + documents = Sets.newHashSet(new TextDocument("and in that day seven women shall take hold of one man saying we will eat our own bread and wear our own apparel only let us be called by thy name to take away our reproach in that day shall the branch of the lord be beautiful and glorious and the fruit of the earth excellent and comely for them that are escaped of israel and it shall come to pass left in zion and remaineth in jerusalem shall be called holy every one that is written among the living in jerusalem when the lord shall have washed away the filth of the daughters of zion and shall have purged the blood of jerusalem from the midst thereof by the spirit of judgment and by the spirit of burning and the lord will create upon every dwelling place of mount zion and upon her assemblies a cloud and smoke by day and the shining of a flaming fire by night for upon all the glory a defence and there shall be a tabernacle for a shadow in the daytime from the heat and for a place of refuge and for a covert from storm and from rain")); + + return documents; + } + + public static void main(String[] args) throws Exception { + new SemanticBibleExperiment().run(new NamedClass("http://semanticbible.org/ns/2006/NTNames#Woman")); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |