From: <lor...@us...> - 2013-02-27 13:49:04
|
Revision: 3906 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3906&view=rev Author: lorenz_b Date: 2013-02-27 13:48:57 +0000 (Wed, 27 Feb 2013) Log Message: ----------- Added cross validation to matching tests. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java trunk/interfaces/src/main/java/org/dllearner/cli/CrossValidation.java trunk/scripts/pom.xml trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java trunk/scripts/src/test/java/org/dllearner/junit/OntologyMatchingTest.java Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java 2013-02-25 12:24:29 UTC (rev 3905) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java 2013-02-27 13:48:57 UTC (rev 3906) @@ -210,6 +210,18 @@ return new SparqlEndpoint(u, defaultGraphURIs, new LinkedList<String>()); } + public static SparqlEndpoint getEndpointLOD2Cloud() { + URL u = null; + try { + u = new URL("http://lod.openlinksw.com/sparql/"); + } catch (Exception e) { + e.printStackTrace(); + } + LinkedList<String> defaultGraphURIs=new LinkedList<String>(); +// defaultGraphURIs.add("http://dbpedia.org"); + return new SparqlEndpoint(u, defaultGraphURIs, new LinkedList<String>()); + } + public static SparqlEndpoint getEndpointLinkedGeoData() { URL u = null; try { Modified: trunk/interfaces/src/main/java/org/dllearner/cli/CrossValidation.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/CrossValidation.java 2013-02-25 12:24:29 UTC (rev 3905) +++ trunk/interfaces/src/main/java/org/dllearner/cli/CrossValidation.java 2013-02-27 13:48:57 UTC (rev 3906) @@ -27,7 +27,9 @@ import java.util.List; import java.util.Random; import java.util.Set; +import java.util.TreeSet; +import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.ComponentInitException; import org.dllearner.core.AbstractCELA; import org.dllearner.core.AbstractReasonerComponent; @@ -35,6 +37,7 @@ import org.dllearner.core.owl.Individual; import org.dllearner.learningproblems.Heuristics; import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.learningproblems.PosOnlyLP; import org.dllearner.utilities.Helper; import org.dllearner.utilities.datastructures.Datastructures; import org.dllearner.utilities.statistics.Stat; @@ -70,7 +73,7 @@ } - public CrossValidation(AbstractCELA la, PosNegLP lp, AbstractReasonerComponent rs, int folds, boolean leaveOneOut) { + public CrossValidation(AbstractCELA la, AbstractLearningProblem lp, AbstractReasonerComponent rs, int folds, boolean leaveOneOut) { DecimalFormat df = new DecimalFormat(); @@ -81,11 +84,20 @@ List<Set<Individual>> testSetsNeg = new LinkedList<Set<Individual>>(); // get examples and shuffle them too - Set<Individual> posExamples = ((PosNegLP)lp).getPositiveExamples(); + Set<Individual> posExamples; + Set<Individual> negExamples; + if(lp instanceof PosNegLP){ + posExamples = ((PosNegLP)lp).getPositiveExamples(); + negExamples = ((PosNegLP)lp).getNegativeExamples(); + } else if(lp instanceof PosOnlyLP){ + posExamples = ((PosNegLP)lp).getPositiveExamples(); + negExamples = new HashSet<Individual>(); + } else { + throw new IllegalArgumentException("Only PosNeg and PosOnly learning problems are supported"); + } List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); + List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); Collections.shuffle(posExamplesList, new Random(1)); - Set<Individual> negExamples = ((PosNegLP)lp).getNegativeExamples(); - List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); Collections.shuffle(negExamplesList, new Random(2)); // sanity check whether nr. of folds makes sense for this benchmark @@ -137,8 +149,13 @@ Set<String> pos = Datastructures.individualSetToStringSet(trainingSetsPos.get(currFold)); Set<String> neg = Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); - lp.setPositiveExamples(trainingSetsPos.get(currFold)); - lp.setNegativeExamples(trainingSetsNeg.get(currFold)); + if(lp instanceof PosNegLP){ + ((PosNegLP)lp).setPositiveExamples(trainingSetsPos.get(currFold)); + ((PosNegLP)lp).setNegativeExamples(trainingSetsNeg.get(currFold)); + } else if(lp instanceof PosOnlyLP){ + ((PosOnlyLP)lp).setPositiveExamples(new TreeSet<Individual>(trainingSetsPos.get(currFold))); + } + try { lp.init(); Modified: trunk/scripts/pom.xml =================================================================== --- trunk/scripts/pom.xml 2013-02-25 12:24:29 UTC (rev 3905) +++ trunk/scripts/pom.xml 2013-02-27 13:48:57 UTC (rev 3906) @@ -116,7 +116,7 @@ <dependency> <groupId>net.sourceforge.owlapi</groupId> <artifactId>owlapi-distribution</artifactId> - <version>3.4</version> + <version>3.4.4-SNAPSHOT</version> </dependency> <dependency> <groupId>net.sourceforge.owlapi</groupId> Modified: trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java 2013-02-25 12:24:29 UTC (rev 3905) +++ trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java 2013-02-27 13:48:57 UTC (rev 3906) @@ -44,6 +44,7 @@ import org.dllearner.learningproblems.PosOnlyLP; import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.utilities.CrossValidation; import org.dllearner.utilities.LabelShortFormProvider; import org.dllearner.utilities.datastructures.Datastructures; import org.dllearner.utilities.datastructures.SetManipulation; @@ -75,6 +76,8 @@ import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -95,6 +98,8 @@ private Map<Description, List<? extends EvaluatedDescription>> mappingKB2KB1; private boolean posNegLearning = true; + private final boolean performCrossValidation = true; + private int fragmentDepth = 2; /** * The maximum number of positive examples, used for the SPARQL extraction and learning algorithm @@ -118,6 +123,10 @@ this(new KnowledgeBase(endpoint1), new KnowledgeBase(endpoint2)); } + public void setFragmentDepth(int fragmentDepth) { + this.fragmentDepth = fragmentDepth; + } + public void start(){ mappingKB1KB2 = computeAlignment(kb1, kb2); printMappingPretty(mappingKB1KB2); @@ -283,14 +292,16 @@ fullFragment.add(positiveFragment); fullFragment.add(negativeFragment); - //here is the most difficult task, i.e. find a 'good' fragment of the KB on which we can learn KnowledgeSource ks = convert(fullFragment); //initialize the reasoner + logger.info("Initializing reasoner..."); AbstractReasonerComponent rc = new FastInstanceChecker(ks); rc.init(); + logger.info("Done."); //initialize the learning problem + logger.info("Initializing learning problem..."); AbstractLearningProblem lp; if(posNeg){ lp = new PosNegLPStandard(rc, positiveExamplesSample, negativeExamplesSample); @@ -298,26 +309,34 @@ lp = new PosOnlyLP(rc, positiveExamplesSample); } lp.init(); + logger.info("Done."); - //apply the learning algorithm - logger.info("Running learning algorithm..."); + //initialize the learning algorithm + logger.info("Initializing learning algorithm..."); CELOE la = new CELOE(lp, rc); la.setMaxExecutionTimeInSeconds(10); la.setNoisePercentage(25); la.init(); - la.start(); + logger.info("Done."); - try { - QTL qtl = new QTL(lp, new LocalModelBasedSparqlEndpointKS(fullFragment)); - qtl.init(); - qtl.start(); - System.out.println(qtl.getSPARQLQuery()); - } catch (LearningProblemUnsupportedException e) { - e.printStackTrace(); + if(performCrossValidation){ + CrossValidation cv = new CrossValidation(la, lp, rc, 5, false); + } else { + //apply the learning algorithm + logger.info("Running learning algorithm..."); + la.start(); + logger.info(la.getCurrentlyBestEvaluatedDescription()); } - - logger.info(la.getCurrentlyBestEvaluatedDescription()); +// try { +// QTL qtl = new QTL(lp, new LocalModelBasedSparqlEndpointKS(fullFragment)); +// qtl.init(); +// qtl.start(); +// System.out.println(qtl.getSPARQLQuery()); +// } catch (LearningProblemUnsupportedException e) { +// e.printStackTrace(); +// } + return la.getCurrentlyBestEvaluatedDescriptions(10); } catch (ComponentInitException e) { e.printStackTrace(); @@ -382,21 +401,32 @@ logger.info(i++ + "/" + size); fullFragment.add(getFragment(ind, kb)); } - //filter out triples with String literals, as there often occur are some syntax errors and they are not relevant for learning + filter(fullFragment); + return fullFragment; + } + + private void filter(Model model) { + // filter out triples with String literals, as there often occur are + // some syntax errors and they are not relevant for learning List<Statement> statementsToRemove = new ArrayList<Statement>(); - for(Iterator<Statement> iter = fullFragment.listStatements().toList().iterator(); iter.hasNext();){ + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { Statement st = iter.next(); RDFNode object = st.getObject(); - if(object.isLiteral()){ -// statementsToRemove.add(st); + if (object.isLiteral()) { + // statementsToRemove.add(st); Literal lit = object.asLiteral(); - if(lit.getDatatype() == null || lit.getDatatype().equals(XSD.STRING)){ + if (lit.getDatatype() == null || lit.getDatatype().equals(XSD.STRING)) { st.changeObject("shortened", "en"); - } + } } + //remove statements like <x a owl:Class> + if(st.getPredicate().equals(RDF.type)){ + if(object.equals(RDFS.Class.asNode()) || object.equals(OWL.Class.asNode()) || object.equals(RDFS.Literal.asNode())){ + statementsToRemove.add(st); + } + } } - fullFragment.remove(statementsToRemove); - return fullFragment; + model.remove(statementsToRemove); } /** @@ -414,7 +444,7 @@ private Model getFragment(Individual ind, KnowledgeBase kb){ logger.debug("Loading fragment for " + ind.getName()); ConciseBoundedDescriptionGenerator cbdGen = new ConciseBoundedDescriptionGeneratorImpl(kb.getEndpoint(), kb.getCache()); - Model cbd = cbdGen.getConciseBoundedDescription(ind.getName(), 2); + Model cbd = cbdGen.getConciseBoundedDescription(ind.getName(), fragmentDepth); logger.debug("Got " + cbd.size() + " triples."); return cbd; } Modified: trunk/scripts/src/test/java/org/dllearner/junit/OntologyMatchingTest.java =================================================================== --- trunk/scripts/src/test/java/org/dllearner/junit/OntologyMatchingTest.java 2013-02-25 12:24:29 UTC (rev 3905) +++ trunk/scripts/src/test/java/org/dllearner/junit/OntologyMatchingTest.java 2013-02-27 13:48:57 UTC (rev 3906) @@ -33,6 +33,8 @@ private KnowledgeBase worldFactBook; private KnowledgeBase openCyc; private KnowledgeBase linkedGeoData; + + private final int fragmentDepth = 3; @Before public void setUp() throws Exception { @@ -112,6 +114,7 @@ @Test public void testSingleClassLinkedGeoDataToDBpedia() { OntologyMatching matcher = new OntologyMatching(linkedGeoData, dbpedia); + matcher.setFragmentDepth(fragmentDepth); NamedClass nc = new NamedClass("http://linkedgeodata.org/ontology/Aerodrome"); List<? extends EvaluatedDescription> mapping = matcher.computeMapping(nc, linkedGeoData, dbpedia); Map<Description, List<? extends EvaluatedDescription>> alignment = new HashMap<Description, List<? extends EvaluatedDescription>>(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |