From: <lor...@us...> - 2013-12-10 13:47:57
|
Revision: 4205 http://sourceforge.net/p/dl-learner/code/4205 Author: lorenz_b Date: 2013-12-10 13:47:53 +0000 (Tue, 10 Dec 2013) Log Message: ----------- Added PMI test. Modified Paths: -------------- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java Added Paths: ----------- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetricTest.java Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2013-12-10 13:25:25 UTC (rev 4204) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2013-12-10 13:47:53 UTC (rev 4205) @@ -3,23 +3,42 @@ */ package org.dllearner.algorithms.isle; -import com.google.common.collect.Sets; -import com.hp.hpl.jena.rdf.model.Model; +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.dllearner.core.owl.NamedClass; import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.owl.OWLEntityTypeAdder; import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.AxiomType; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.OWLOntologyManager; -import java.io.*; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.HashSet; -import java.util.Set; +import com.google.common.collect.Sets; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; +import com.hp.hpl.jena.vocabulary.XSD; /** * @author Lorenz Buehmann @@ -28,7 +47,7 @@ public class DBpediaExperiment extends Experiment{ final SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - final int maxNrOfInstancesPerClass = 100; + final int maxNrOfInstancesPerClass = 10; @@ -38,12 +57,12 @@ @Override protected OWLOntology getOntology() { //load the DBpedia schema + OWLOntology schema = null; try { URL url = new URL("http://downloads.dbpedia.org/3.9/dbpedia_3.9.owl.bz2"); InputStream is = new BufferedInputStream(url.openStream()); CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); - OWLOntology schema = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); - return schema; + schema = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { @@ -59,11 +78,23 @@ "http://dbpedia.org/ontology/", Sets.newHashSet(new NamedClass("http://dbpedia.org/ontology/Person")), maxNrOfInstancesPerClass); + cleanUpModel(sample); + filter(sample, "http://dbpedia.org/ontology/"); + OWLEntityTypeAdder.addEntityTypes(sample); + StmtIterator iterator = sample.listStatements(); + while(iterator.hasNext()){ + System.out.println(iterator.next()); + } + try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); sample.write(baos, "TURTLE", null); OWLOntologyManager man = OWLManager.createOWLOntologyManager(); OWLOntology ontology = man.loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); + man.addAxioms(ontology, schema.getAxioms()); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_DATA_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_OBJECT_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.DATA_PROPERTY_RANGE)); return ontology; } catch (Exception e) { e.printStackTrace(); @@ -72,8 +103,78 @@ return null; } + /** + * Filter triples which are not relevant based on the given knowledge base + * namespace. + * + * @param model + * @param namespace + */ + private void filter(Model model, String namespace) { + List<Statement> statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + Property predicate = st.getPredicate(); + if (predicate.equals(RDF.type)) { + if (!st.getObject().asResource().getURI().startsWith(namespace)) { + statementsToRemove.add(st); + } else if (st.getObject().equals(OWL.FunctionalProperty.asNode())) { + statementsToRemove.add(st); + } else if (st.getObject().isLiteral() && st.getObject().asLiteral().getDatatypeURI().equals(XSD.gYear.getURI())) { + statementsToRemove.add(st); + } + } else if (!predicate.equals(RDFS.subClassOf) && !predicate.equals(OWL.sameAs) && !predicate.asResource().getURI().startsWith(namespace)) { + statementsToRemove.add(st); + } + } + model.remove(statementsToRemove); + } + private static void cleanUpModel(Model model) { + // filter out triples with String literals, as therein often occur + // some syntax errors and they are not relevant for learning + List<Statement> statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + RDFNode object = st.getObject(); + if (object.isLiteral()) { + // statementsToRemove.add(st); + Literal lit = object.asLiteral(); + if (lit.getDatatype() == null || lit.getDatatype().equals(XSD.xstring)) { + st.changeObject("shortened", "en"); + } else if (lit.getDatatype().getURI().equals(XSD.gYear.getURI())) { + statementsToRemove.add(st); + // System.err.println("REMOVE " + st); + } else if (lit.getDatatype().getURI().equals(XSD.gYearMonth.getURI())) { + statementsToRemove.add(st); +// System.err.println("REMOVE " + st); + } + } + //remove statements like <x a owl:Class> + if (st.getPredicate().equals(RDF.type)) { + if (object.equals(RDFS.Class.asNode()) || object.equals(OWL.Class.asNode()) || object.equals(RDFS.Literal.asNode()) + || object.equals(RDFS.Resource)) { + statementsToRemove.add(st); + } + } + //remove unwanted properties + String dbo = "http://dbpedia.org/ontology/"; + Set<String> blackList = Sets.newHashSet(dbo + "wikiPageDisambiguates",dbo + "wikiPageExternalLink", + dbo + "wikiPageID", dbo + "wikiPageInterLanguageLink", dbo + "wikiPageRedirects", dbo + "wikiPageRevisionID", + dbo + "wikiPageWikiLink"); + for(String bl: blackList){ + if (st.getPredicate().getURI().equals(bl)) { + statementsToRemove.add(st); + } + } + } + + model.remove(statementsToRemove); + } + + + /* (non-Javadoc) * @see org.dllearner.algorithms.isle.Experiment#getDocuments() */ Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java 2013-12-10 13:25:25 UTC (rev 4204) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java 2013-12-10 13:47:53 UTC (rev 4205) @@ -93,10 +93,10 @@ documents = getDocuments(); // build semantic index - SemanticIndex semanticIndex = SemanticIndexGenerator.generateIndex(documents, ontology, false); - - // set the relevance metric - relevance = new PMIRelevanceMetric(semanticIndex); +// SemanticIndex semanticIndex = SemanticIndexGenerator.generateIndex(documents, ontology, false); +// +// // set the relevance metric +// relevance = new PMIRelevanceMetric(semanticIndex); try { // set KB KnowledgeSource ks = new OWLAPIOntology(ontology); @@ -233,8 +233,8 @@ //get the start class for the learning algorithms Description startClass = getStartClass(cls, equivalence, true); - Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); - NLPHeuristic heuristic = new NLPHeuristic(entityRelevance); +// Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); +// NLPHeuristic heuristic = new NLPHeuristic(entityRelevance); ClassLearningProblem clp = new ClassLearningProblem(reasoner); clp.setClassToDescribe(cls); @@ -247,9 +247,9 @@ rop.init(); // perform cross validation with ISLE - ISLE isle = new ISLE(lp, reasoner); - isle.setHeuristic(heuristic); - isle.setMaxNrOfResults(3); + ISLE isle = new ISLE(clp, reasoner); +// isle.setHeuristic(heuristic); + isle.setMaxNrOfResults(20); isle.setOperator(rop); isle.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); isle.setStartClass(startClass); @@ -260,9 +260,10 @@ // isle.setTerminateOnNoiseReached(true); isle.setIgnoredConcepts(Collections.singleton(cls)); isle.setReplaceSearchTree(true); - isle.setMaxExecutionTimeInSeconds(10); + isle.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); isle.init(); - isle.start();System.exit(1); + isle.start(); + System.exit(1); List<? extends EvaluatedDescription> currentlyBestDescriptions = isle.getCurrentlyBestEvaluatedDescriptions(20); for (EvaluatedDescription description : currentlyBestDescriptions) { System.out.println(description); Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetricTest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetricTest.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetricTest.java 2013-12-10 13:47:53 UTC (rev 4205) @@ -0,0 +1,83 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.InputStream; +import java.net.URL; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; +import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.junit.Test; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * @author Lorenz Buehmann + * + */ +public class PMIRelevanceMetricTest { + + AbstractRelevanceMetric metric; + static final String solrServerURL = "http://[2001:638:902:2010:0:168:35:138]:8080/solr/en_dbpedia_resources/"; + static final String searchField = "comment"; + static final String DBPEDIA_NS = "http://dbpedia.org/ontology/"; + + /** + * + */ + public PMIRelevanceMetricTest() { + OWLOntology ontology = null; + try { + URL url = new URL("http://downloads.dbpedia.org/3.9/dbpedia_3.9.owl.bz2"); + InputStream is = new BufferedInputStream(url.openStream()); + CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); + ontology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); + } catch (Exception e){ + e.printStackTrace(); + } + Index index = new SolrSyntacticIndex(ontology, solrServerURL, searchField); + metric = new PMIRelevanceMetric(index); + } + + /** + * Test method for {@link org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric#getRelevance(org.dllearner.core.owl.Entity, org.dllearner.core.owl.Entity)}. + */ + @Test + public void testGetRelevance() { + //dbo:Person and dbo:Film + Entity entity1 = new NamedClass(DBPEDIA_NS + "Person"); + Entity entity2 = new NamedClass(DBPEDIA_NS + "Film"); + double relevance = metric.getRelevance(entity1, entity2); + System.out.println(relevance); + + //dbo:Person and dbo:Animal + entity1 = new NamedClass(DBPEDIA_NS + "Person"); + entity2 = new NamedClass(DBPEDIA_NS + "Animal"); + relevance = metric.getRelevance(entity1, entity2); + System.out.println(relevance); + + // dbo:Person and dbo:Animal + entity1 = new NamedClass(DBPEDIA_NS + "Person"); + entity2 = new ObjectProperty(DBPEDIA_NS + "birthPlace"); + relevance = metric.getRelevance(entity1, entity2); + System.out.println(relevance); + } + + /** + * Test method for {@link org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric#getNormalizedRelevance(org.dllearner.core.owl.Entity, org.dllearner.core.owl.Entity)}. + */ + @Test + public void testGetNormalizedRelevance() { + fail("Not yet implemented"); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |