From: <ki...@us...> - 2012-09-27 16:33:19
|
Revision: 3853 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3853&view=rev Author: kirdie Date: 2012-09-27 16:33:12 +0000 (Thu, 27 Sep 2012) Log Message: ----------- reintegrated the old approach to the learner2. Modified Paths: -------------- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java branches/hmm/components-ext/pom.xml branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -31,9 +31,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; - import org.dllearner.utilities.Helper; - import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.query.ResultSetRewindable; import com.hp.hpl.jena.rdf.model.Model; Modified: branches/hmm/components-ext/pom.xml =================================================================== --- branches/hmm/components-ext/pom.xml 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/pom.xml 2012-09-27 16:33:12 UTC (rev 3853) @@ -34,10 +34,10 @@ <groupId>com.jamonapi</groupId> <artifactId>jamon</artifactId> </dependency> - <dependency> + <!-- <dependency> <groupId>org.aksw.commons</groupId> <artifactId>sparql</artifactId> - </dependency> + </dependency> --> <dependency> <groupId>org.apache.solr</groupId> Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -9,15 +9,20 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import org.apache.commons.collections15.MultiMap; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; @@ -49,7 +54,6 @@ import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -70,7 +74,6 @@ import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; @@ -83,10 +86,6 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.expr.ExprAggregator; -import com.hp.hpl.jena.sparql.expr.ExprVar; -import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; -import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -95,19 +94,18 @@ * */ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm { - public static boolean useHMM = true; - + private static final boolean USE_HMM = false; + /** synonyms are great but are not used yet by the HMM algorithm. **/ + private static final boolean HMM_USE_SYNONYMS = false; + /** The minimum score of items that are accepted from the Sindice search BOA index. **/ + private static final Double BOA_THRESHOLD = 0.9; enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} private Mode mode = Mode.BEST_QUERY; /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - /** synonyms are great but are not used yet by the HMM algorithm. **/ - private static final boolean CREATE_SYNONYMS = false; - /** The minimum score of items that are accepted from the Sindice search BOA index. **/ - private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -403,7 +401,7 @@ logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); + templates = templateGenerator.buildTemplatesMultiThreaded(question,!USE_HMM||HMM_USE_SYNONYMS); } else { templates = templateGenerator.buildTemplates(question); } @@ -420,7 +418,7 @@ } //get the weighted query candidates - generatedQueries = getWeightedSPARQLQueries(templates); + generatedQueries = getWeightedSPARQLQueries(templates,USE_HMM); sparqlQueryCandidates = new ArrayList<WeightedQuery>(); int i = 0; for(WeightedQuery wQ : generatedQueries){ @@ -519,13 +517,15 @@ } - public Set<String> getRelevantKeywords(){ - return relevantKeywords; - } + public Set<String> getRelevantKeywords(){return relevantKeywords;} - // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one - private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates, boolean hmm) { + return hmm?getWeightedSPARQLQueriesWithHMM(templates):getWeightedSPARQLQueriesWithoutHMM(templates); + } + + private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithHMM(Set<Template> templates) + { // for testing for(Template template: templates) { @@ -621,7 +621,7 @@ return null; } - private SortedSet<WeightedQuery> getWeightedSPARQLQueriesOld(Set<Template> templates){ + private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){ logger.debug("Generating SPARQL query candidates..."); Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @@ -1295,6 +1295,7 @@ } return indexResultItems; } + class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ private Slot slot; Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -11,7 +11,6 @@ import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.Serializable; -import java.io.StringWriter; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -37,11 +36,11 @@ public class QueryTestData implements Serializable { + private static final long serialVersionUID = 1L; public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); - private static final int MAXIMUM_QUESTIONS = Integer.MAX_VALUE; private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); @@ -85,8 +84,9 @@ /** reads test data from a QALD2 benchmark XML file, including questions, queries and answers. * each question needs to have a query but not necessarily an answer. * @param file a QALD benchmark XML file + * @param MAX_NUMBER_OF_QUESTIONS the maximum number of questions read from the file. * @return the test data read from the XML file */ - public static QueryTestData readQaldXml(final File file) + public static QueryTestData readQaldXml(final File file, int MAX_NUMBER_OF_QUESTIONS) { QueryTestData testData = new QueryTestData(); try { @@ -99,7 +99,7 @@ for(int i = 0; i < questionNodes.getLength(); i++) { - if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove later? + if(i>MAX_NUMBER_OF_QUESTIONS) break; String question; String query; Set<String> answers = new HashSet<String>(); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -55,7 +55,6 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; -import org.dllearner.common.index.HierarchicalIndex; import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; @@ -104,6 +103,7 @@ private static final File evaluationFolder = new File("cache/evaluation"); private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; + private static final int MAX_NUMBER_OF_QUESTIONS = 10; @Test public void testDBpedia() throws Exception { @@ -120,28 +120,28 @@ test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); } -// /*@Test*/ public void testOxford() throws Exception -// { -// Model model = loadOxfordModel(); -// QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); -// // answers are not included at least in the first query TODO: check, why -// testData.generateAnswers(null, null, model); -// QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); -// newTestData.generateAnswers(null, null, model); -// for(int i : testData.id2Question.keySet()) -// { -// logger.info("Comparing answers for question "+testData.id2Question.get(i)); -// String referenceQuery = testData.id2Query.get(i); -// String newQuery = newTestData.id2Query.get(i); -// if(!referenceQuery.equals(newQuery)) -// { -// logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); -// Collection<String> referenceAnswers = testData.id2Answers.get(i); -// Collection<String> newAnswers = newTestData.id2Answers.get(i); -// if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); -// } -// } -// } + // /*@Test*/ public void testOxford() throws Exception + // { + // Model model = loadOxfordModel(); + // QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); + // // answers are not included at least in the first query TODO: check, why + // testData.generateAnswers(null, null, model); + // QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); + // newTestData.generateAnswers(null, null, model); + // for(int i : testData.id2Question.keySet()) + // { + // logger.info("Comparing answers for question "+testData.id2Question.get(i)); + // String referenceQuery = testData.id2Query.get(i); + // String newQuery = newTestData.id2Query.get(i); + // if(!referenceQuery.equals(newQuery)) + // { + // logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); + // Collection<String> referenceAnswers = testData.id2Answers.get(i); + // Collection<String> newAnswers = newTestData.id2Answers.get(i); + // if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); + // } + // } + // } /** For debugging one question in particular. */ @@ -164,23 +164,23 @@ */ /*@Test*/ public void testSingleQueryDBpedia() { -// Logger.getLogger(Templator.class).setLevel(Level.DEBUG); -// Logger.getLogger(Parser.class).setLevel(Level.DEBUG); -// Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // Logger.getLogger(Templator.class).setLevel(Level.DEBUG); + // Logger.getLogger(Parser.class).setLevel(Level.DEBUG); + // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); // String question = "houses for less than 900000 pounds"; String question = "Give/VB me/PRP all/DT video/JJ games/NNS published/VBN by/IN Mean/NNP Hamster/NNP Software/NNP"; -// String question = "give me all video games published by mean hamster software"; -// String question = "Give me all video games published by Mean Hamster Software"; -// question = new StanfordPartOfSpeechTagger().tag(question); -// System.out.println(question); + // String question = "give me all video games published by mean hamster software"; + // String question = "Give me all video games published by Mean Hamster Software"; + // question = new StanfordPartOfSpeechTagger().tag(question); + // System.out.println(question); -// Model model = loadOxfordModel(); + // Model model = loadOxfordModel(); QueryTestData testData = new QueryTestData(); new LearnQueryCallable(question, 0, testData, dbpediaLiveKnowledgebase, true).call(); logger.info("learned query: "+testData.id2Query.get(0)); } - - /*@Test*/ public void generateXMLOxford() throws IOException + + /*@Test*/ public void generateXMLOxford() throws IOException { boolean ADD_POS_TAGS = true; PartOfSpeechTagger posTagger = null; @@ -192,7 +192,7 @@ for(String line;(line=in.readLine())!=null;) { j++; - // if(j>5) break; // TODO: remove later + if(j>5) break; // TODO: remove later String question = line.replace("question: ", "").trim(); if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);} if(!line.trim().isEmpty()) {questions.add(question);} @@ -291,7 +291,7 @@ public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException - { + { evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index); generateHTML(title); @@ -318,7 +318,7 @@ logger.info("Old test data not loadable, creating it and exiting."); } learnedTestData.write();*/ - } + } private File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { @@ -335,9 +335,10 @@ } private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, - Knowledgebase kb, Model model, MappingBasedIndex index) + Knowledgebase kb, Model model, MappingBasedIndex index) { - QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); + + QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML,MAX_NUMBER_OF_QUESTIONS); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); @@ -352,11 +353,6 @@ evaluation.write(); } - private void evaluateAndWrite() - { - - } - /** evaluates a data set against a reference. * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. @@ -673,8 +669,8 @@ // try {testData.id2Answers.put(i,getUris(endpoint, learnedQuery));} // catch(Exception e) {logger.warn("Error with learned query "+learnedQuery+" for question "+question+" at endpoint "+endpoint+": "+e.getLocalizedMessage());} - long end = System.currentTimeMillis(); - // logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + // long end = System.currentTimeMillis(); + // logger.trace(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); // logger.info(String.format("Learned queries for %d of %d questions.",successes,id2Question.size())); @@ -779,7 +775,7 @@ // int successfullTestThreadRuns = 0; /** */ - private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; + // private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); @@ -806,7 +802,7 @@ Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); boa_propertiesIndex.setSortField("boa-score"); -// propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); + // propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-11-02 14:53:03
|
Revision: 3866 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3866&view=rev Author: kirdie Date: 2012-11-02 14:52:52 +0000 (Fri, 02 Nov 2012) Log Message: ----------- merged with trunk Modified Paths: -------------- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalDataPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/InverseFunctionalObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/InverseObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/IrreflexiveObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyRangeAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/ReflexiveObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/SubDataPropertyOfAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/SubObjectPropertyOfAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/SymmetricObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/TransitiveObjectPropertyAxiomLearner.java branches/hmm/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java branches/hmm/components-core/src/main/java/org/dllearner/core/ComponentManager.java branches/hmm/components-core/src/main/java/org/dllearner/core/owl/DisjointClassesAxiom.java branches/hmm/components-core/src/main/java/org/dllearner/core/owl/EquivalentObjectPropertiesAxiom.java branches/hmm/components-core/src/main/java/org/dllearner/core/owl/Thing.java branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/Cache.java branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/simple/ABoxQueryGenerator.java branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java branches/hmm/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java branches/hmm/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java branches/hmm/scripts/pom.xml branches/hmm/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluation.java branches/hmm/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java branches/hmm/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java branches/hmm/scripts/src/main/resources/log4j.properties branches/hmm/test/newcomponent/AristotlePosNeg.conf branches/hmm/test/phaenotype/mp-equivalence-axioms-subq.owl Added Paths: ----------- branches/hmm/components-core/src/main/java/org/dllearner/core/owl/GenericDatatypePropertyAssertion.java branches/hmm/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluationMultithreaded.java Removed Paths: ------------- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/SparqlSimpleExtractor.java Property Changed: ---------------- branches/hmm/ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/ocel/ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java branches/hmm/components-core/src/main/java/org/dllearner/algorithms/ocel/ROLearner2.java branches/hmm/components-core/src/main/java/org/dllearner/kb/aquisitors/LinkedDataTupleAquisitor.java branches/hmm/components-core/src/main/java/org/dllearner/kb/aquisitors/SparqlTupleAquisitor.java branches/hmm/components-core/src/main/java/org/dllearner/kb/aquisitors/SparqlTupleAquisitorImproved.java branches/hmm/components-core/src/main/java/org/dllearner/kb/aquisitors/TupleAquisitor.java branches/hmm/components-core/src/main/java/org/dllearner/kb/extraction/ObjectPropertyNode.java branches/hmm/components-core/src/main/java/org/dllearner/kb/manipulator/Manipulator.java branches/hmm/components-core/src/main/java/org/dllearner/kb/manipulator/Rule.java branches/hmm/components-core/src/main/java/org/dllearner/kb/manipulator/SimpleObjectFilterRule.java branches/hmm/components-core/src/main/java/org/dllearner/kb/manipulator/SimplePredicateFilterRule.java branches/hmm/components-core/src/main/java/org/dllearner/learningproblems/EvaluatedDescriptionClass.java branches/hmm/components-core/src/main/java/org/dllearner/learningproblems/EvaluatedDescriptionPosNeg.java branches/hmm/components-core/src/main/java/org/dllearner/learningproblems/PosNegLPStandard.java branches/hmm/components-core/src/main/java/org/dllearner/learningproblems/PosNegLPStrict.java branches/hmm/components-core/src/main/java/org/dllearner/learningproblems/ScorePosNeg.java branches/hmm/components-core/src/main/java/org/dllearner/utilities/owl/EvaluatedDescriptionPosNegComparator.java branches/hmm/scripts/src/main/java/org/dllearner/scripts/SemanticBibleComparison.java branches/hmm/scripts/src/main/java/org/dllearner/scripts/matching/LGDPoint.java branches/hmm/scripts/src/main/java/org/dllearner/scripts/package-info.java Property changes on: branches/hmm ___________________________________________________________________ Added: svn:mergeinfo + /trunk:3846-3863 Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -47,7 +47,6 @@ import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.learningproblems.AxiomScore; -import org.dllearner.learningproblems.Heuristics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -157,7 +156,8 @@ } if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); +// runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } @@ -165,6 +165,38 @@ logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + //compute the overlap if exist + Map<NamedClass, Integer> class2Overlap = new HashMap<NamedClass, Integer>(); + String query = String.format("SELECT ?type (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s a ?type.} GROUP BY ?type", classToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + NamedClass cls = new NamedClass(qs.getResource("type").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + class2Overlap.put(cls, cnt); + } + //for each property in knowledge base + for(NamedClass cls : allClasses){ + //get the popularity + int otherPopularity = reasoner.getPopularity(cls); + if(otherPopularity == 0){//skip empty properties + continue; + } + //get the overlap + int overlap = class2Overlap.containsKey(cls) ? class2Overlap.get(cls) : 0; + //compute the estimated precision + double precision = accuracy(otherPopularity, overlap); + //compute the estimated recall + double recall = accuracy(popularity, overlap); + //compute the final score + double score = 1 - fMEasure(precision, recall); + + currentlyBestEvaluatedDescriptions.add(new EvaluatedDescription(cls, new AxiomScore(score))); + } + } + private void runSPARQL1_0_Mode(){ Model model = ModelFactory.createDefaultModel(); int limit = 1000; Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -19,9 +19,7 @@ package org.dllearner.algorithms; -import java.net.URL; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -109,11 +107,11 @@ @Override public List<EvaluatedAxiom> getCurrentlyBestEvaluatedAxioms(int nrOfAxioms) { - List<EvaluatedAxiom> axioms = new ArrayList<EvaluatedAxiom>(); + currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); for(EvaluatedDescription ed : getCurrentlyBestEvaluatedDescriptions(nrOfAxioms)){ - axioms.add(new EvaluatedAxiom(new SubClassAxiom(classToDescribe, ed.getDescription()), new AxiomScore(ed.getAccuracy()))); + currentlyBestAxioms.add(new EvaluatedAxiom(new SubClassAxiom(classToDescribe, ed.getDescription()), new AxiomScore(ed.getAccuracy()))); } - return axioms; + return currentlyBestAxioms; } @Override @@ -145,6 +143,16 @@ } } + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ + runSingleQueryMode(); + } else { + runSPARQL1_0_Mode(); + } + + logger.info("...finished in {}ms. (Got {} rows)", (System.currentTimeMillis()-startTime), fetchedRows); + } + + private void runSPARQL1_0_Mode(){ Map<Individual, SortedSet<Description>> ind2Types = new HashMap<Individual, SortedSet<Description>>(); int limit = 1000; boolean repeat = true; @@ -153,9 +161,26 @@ createEvaluatedDescriptions(ind2Types); fetchedRows += 1000; } - + } + + private void runSingleQueryMode(){ + int total = reasoner.getPopularity(classToDescribe); - logger.info("...finished in {}ms. (Got {} rows)", (System.currentTimeMillis()-startTime), fetchedRows); + if(total > 0){ + String query = String.format("SELECT ?type (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a <%s>. ?s a ?type} GROUP BY ?type ORDER BY DESC(?cnt)", classToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + if(!qs.get("type").isAnon()){ + NamedClass sup = new NamedClass(qs.getResource("type").getURI()); + int overlap = qs.get("cnt").asLiteral().getInt(); + if(!sup.getURI().equals(Thing.uri) && ! classToDescribe.equals(sup)){//omit owl:Thing and the class to describe itself + currentlyBestEvaluatedDescriptions.add(new EvaluatedDescription(sup, computeScore(total, overlap))); + } + } + } + } } public NamedClass getClassToDescribe() { @@ -234,8 +259,7 @@ } public static void main(String[] args) throws Exception{ - SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), - Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); + SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW()); SPARQLReasoner reasoner = new SPARQLReasoner(ks); reasoner.prepareSubsumptionHierarchy(); @@ -244,7 +268,7 @@ l.setReasoner(reasoner); l.setReturnOnlyNewAxioms(true); - ConfigHelper.configure(l, "maxExecutionTimeInSeconds", 10); + ConfigHelper.configure(l, "maxExecutionTimeInSeconds", 50); l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/SoccerClub")); l.init(); l.start(); Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -21,6 +21,7 @@ import java.io.File; import java.text.DecimalFormat; +import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -218,11 +219,11 @@ // configurator = new CELOEConfigurator(this); } -// public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() { -// Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>(); -// problems.add(AbstractLearningProblem.class); -// return problems; -// } + public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() { + Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>(); + problems.add(AbstractLearningProblem.class); + return problems; + } public static String getName() { return "CELOE"; Property changes on: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/ocel ___________________________________________________________________ Deleted: svn:mergeinfo - Property changes on: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java ___________________________________________________________________ Deleted: svn:mergeinfo - Property changes on: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/ocel/ROLearner2.java ___________________________________________________________________ Deleted: svn:mergeinfo - Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -22,6 +22,8 @@ import java.net.URL; import java.util.ArrayList; import java.util.Collections; +import java.util.SortedSet; +import java.util.TreeSet; import org.dllearner.core.AbstractAxiomLearningAlgorithm; import org.dllearner.core.ComponentAnn; @@ -29,12 +31,15 @@ import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.ObjectPropertyEditor; import org.dllearner.core.owl.AsymmetricObjectPropertyAxiom; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.core.owl.ObjectProperty; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; @@ -53,6 +58,9 @@ public AsymmetricObjectPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o. FILTER NOT EXISTS{?o ?p ?s}}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o. ?o ?p ?s}"); + } public ObjectProperty getPropertyToDescribe() { @@ -88,18 +96,18 @@ } private void runSPARQL1_0_Mode(){ - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s <%s> ?o.} WHERE {?s <%s> ?o} LIMIT %d OFFSET %d"; String query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); + workingModel.add(newModel); // get number of instances of s with <s p o> query = "SELECT (COUNT(*) AS ?total) WHERE {?s <%s> ?o.}"; query = query.replace("%s", propertyToDescribe.getURI().toString()); - ResultSet rs = executeSelectQuery(query, model); + ResultSet rs = executeSelectQuery(query, workingModel); QuerySolution qs; int total = 0; while(rs.hasNext()){ @@ -108,7 +116,7 @@ } query = "SELECT (COUNT(*) AS ?symmetric) WHERE {?s <%s> ?o. ?o <%s> ?s.}"; query = query.replace("%s", propertyToDescribe.getURI().toString()); - rs = executeSelectQuery(query, model); + rs = executeSelectQuery(query, workingModel); int symmetric = 0; while(rs.hasNext()){ qs = rs.next(); @@ -127,26 +135,57 @@ } } - private void runSPARQL1_1_Mode(){ - String query = "SELECT (COUNT(*) AS ?total) WHERE {?s <%s> ?o.}"; - query = query.replace("%s", propertyToDescribe.getURI().toString()); - ResultSet rs = executeSelectQuery(query); - QuerySolution qs; - int total = 0; - while(rs.hasNext()){ - qs = rs.next(); - total = qs.getLiteral("total").getInt(); + @Override + public SortedSet<KBElement> getPositiveExamples(EvaluatedAxiom axiom) { + if(workingModel != null){ + SortedSet<KBElement> allExamples = new TreeSet<KBElement>(); + ParameterizedSparqlString query = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o.}"); + query.setIri("p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query.toString(), workingModel); + while(rs.hasNext()){ + allExamples.add(new Individual(rs.next().get("s").asResource().getURI())); + } + SortedSet<KBElement> negExamples = getNegativeExamples(axiom); + + SortedSet<KBElement> posExamples = new TreeSet<KBElement>(allExamples); + posExamples.removeAll(negExamples); + + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); } - query = "SELECT (COUNT(*) AS ?symmetric) WHERE {?s <%s> ?o. ?o <%s> ?s.}"; - query = query.replace("%s", propertyToDescribe.getURI().toString()); - rs = executeSelectQuery(query); - int symmetric = 0; - while(rs.hasNext()){ - qs = rs.next(); - symmetric = qs.getLiteral("symmetric").getInt(); + } + + @Override + public SortedSet<KBElement> getNegativeExamples(EvaluatedAxiom axiom) { + if(workingModel != null){ + SortedSet<KBElement> negExamples = new TreeSet<KBElement>(); + ParameterizedSparqlString query = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o.?o ?p ?s}"); + query.setIri("p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query.toString(), workingModel); + while(rs.hasNext()){ + negExamples.add(new Individual(rs.next().get("s").asResource().getURI())); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); } - int asymmetric = total - symmetric; + } + + private void runSPARQL1_1_Mode(){ + int total = reasoner.getPopularity(propertyToDescribe); + if(total > 0){ + int asymmetric = 0; + String query = "SELECT (COUNT(*) AS ?asymmetric) WHERE {?s <%s> ?o. FILTER NOT EXISTS{?o <%s> ?s.}}"; + query = query.replace("%s", propertyToDescribe.getURI().toString()); + ResultSet rs = executeSelectQuery(query); + if(rs.hasNext()){ + asymmetric = rs.next().getLiteral("asymmetric").getInt(); + } + currentlyBestAxioms.add(new EvaluatedAxiom(new AsymmetricObjectPropertyAxiom(propertyToDescribe), computeScore(total, asymmetric), declaredAsymmetric)); } Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -20,14 +20,7 @@ package org.dllearner.algorithms.properties; import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Level; @@ -40,19 +33,26 @@ import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.DatatypePropertyDomainAxiom; import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Thing; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.reasoning.SPARQLReasoner; -import org.semanticweb.owlapi.model.IRI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; @ComponentAnn(name="dataproperty domain axiom learner", shortName="dpldomain", version=0.1) public class DataPropertyDomainAxiomLearner extends AbstractAxiomLearningAlgorithm { @@ -62,14 +62,10 @@ @ConfigOption(name="propertyToDescribe", description="", propertyEditorClass=DataPropertyEditor.class) private DatatypeProperty propertyToDescribe; - private static final ParameterizedSparqlString singleQueryTemplate = new ParameterizedSparqlString("SELECT ?type (COUNT(DISTINCT ?ind) AS ?cnt) WHERE {?ind <%s> ?o. ?ind a ?type.}"); - - private Map<Individual, SortedSet<Description>> individual2Types; - public DataPropertyDomainAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; - super.iterativeQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?ind ?type WHERE {?ind ?p ?o. ?ind a ?type.}"); - + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s a ?type}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o. FILTER NOT EXISTS{?s a ?type}}"); } public DatatypeProperty getPropertyToDescribe() { @@ -82,7 +78,6 @@ @Override public void start() { - iterativeQueryTemplate.setIri("p", propertyToDescribe.getName()); logger.info("Start learning..."); startTime = System.currentTimeMillis(); fetchedRows = 0; @@ -104,81 +99,108 @@ } } } - - runIterativeQueryMode(); + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ + runSingleQueryMode(); + } else { + runSPARQL1_0_Mode(); + } logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } private void runSingleQueryMode(){ - } - - private void runIterativeQueryMode(){ - individual2Types = new HashMap<Individual, SortedSet<Description>>(); - while(!terminationCriteriaSatisfied() && !fullDataLoaded){ - ResultSet rs = fetchData(); - processData(rs); - buildEvaluatedAxioms(); + String query = String.format("SELECT (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s <%s> ?o.}", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + int nrOfSubjects = rs.next().getLiteral("cnt").getInt(); + + query = String.format("SELECT ?type (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s <%s> ?o. ?s a ?type.} GROUP BY ?type", propertyToDescribe.getName()); + rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + NamedClass domain = new NamedClass(qs.getResource("type").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + if(!domain.getURI().equals(Thing.uri)){ + currentlyBestAxioms.add(new EvaluatedAxiom(new DatatypePropertyDomainAxiom(propertyToDescribe, domain), computeScore(nrOfSubjects, cnt))); + } } } - private void processData(ResultSet rs){ - QuerySolution qs; - Individual ind; - Description type; - SortedSet<Description> types; - int cnt = 0; - while(rs.hasNext()){ - cnt++; - qs = rs.next(); - if(qs.get("type").isURIResource()){ - types = new TreeSet<Description>(); - ind = new Individual(qs.getResource("ind").getURI()); - type = new NamedClass(qs.getResource("type").getURI()); - types.add(type); - if(reasoner.isPrepared()){ - if(reasoner.getClassHierarchy().contains(type)){ - types.addAll(reasoner.getClassHierarchy().getSuperClasses(type)); + private void runSPARQL1_0_Mode() { + workingModel = ModelFactory.createDefaultModel(); + int limit = 1000; + int offset = 0; + String baseQuery = "CONSTRUCT {?s a ?type.} WHERE {?s <%s> ?o. ?s a ?type.} LIMIT %d OFFSET %d"; + String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); + Model newModel = executeConstructQuery(query); + while(!terminationCriteriaSatisfied() && newModel.size() != 0){ + workingModel.add(newModel); + // get number of distinct subjects + query = "SELECT (COUNT(DISTINCT ?s) AS ?all) WHERE {?s a ?type.}"; + ResultSet rs = executeSelectQuery(query, workingModel); + QuerySolution qs; + int all = 1; + while (rs.hasNext()) { + qs = rs.next(); + all = qs.getLiteral("all").getInt(); + } + + // get class and number of instances + query = "SELECT ?type (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a ?type.} GROUP BY ?type ORDER BY DESC(?cnt)"; + rs = executeSelectQuery(query, workingModel); + + if (all > 0) { + currentlyBestAxioms.clear(); + while(rs.hasNext()){ + qs = rs.next(); + Resource type = qs.get("type").asResource(); + //omit owl:Thing as trivial domain + if(type.equals(OWL.Thing)){ + continue; } + currentlyBestAxioms.add(new EvaluatedAxiom( + new DatatypePropertyDomainAxiom(propertyToDescribe, new NamedClass(type.getURI())), + computeScore(all, qs.get("cnt").asLiteral().getInt()))); } - addToMap(individual2Types, ind, types); + } + offset += limit; + query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); + newModel = executeConstructQuery(query); + fillWithInference(newModel); } - lastRowCount = cnt; } - - private void buildEvaluatedAxioms(){ - List<EvaluatedAxiom> axioms = new ArrayList<EvaluatedAxiom>(); - Map<Description, Integer> result = new HashMap<Description, Integer>(); - for(Entry<Individual, SortedSet<Description>> entry : individual2Types.entrySet()){ - for(Description nc : entry.getValue()){ - Integer cnt = result.get(nc); - if(cnt == null){ - cnt = Integer.valueOf(1); - } else { - cnt = Integer.valueOf(cnt + 1); + + private void fillWithInference(Model model){ + Model additionalModel = ModelFactory.createDefaultModel(); + if(reasoner.isPrepared()){ + for(StmtIterator iter = model.listStatements(null, RDF.type, (RDFNode)null); iter.hasNext();){ + Statement st = iter.next(); + Description cls = new NamedClass(st.getObject().asResource().getURI()); + if(reasoner.getClassHierarchy().contains(cls)){ + for(Description sup : reasoner.getClassHierarchy().getSuperClasses(cls)){ + additionalModel.add(st.getSubject(), st.getPredicate(), model.createResource(sup.toString())); + } } - result.put(nc, cnt); } } - - //omit owl:Thing - result.remove(new NamedClass(Thing.instance.getURI())); - - EvaluatedAxiom evalAxiom; - int total = individual2Types.keySet().size(); - for(Entry<Description, Integer> entry : sortByValues(result)){ - evalAxiom = new EvaluatedAxiom(new DatatypePropertyDomainAxiom(propertyToDescribe, entry.getKey()), - computeScore(total, entry.getValue())); - if(existingAxioms.contains(evalAxiom.getAxiom())){ - evalAxiom.setAsserted(true); - } - axioms.add(evalAxiom); - } - - currentlyBestAxioms = axioms; + model.add(additionalModel); } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyDomainAxiom axiom = (DatatypePropertyDomainAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("type", axiom.getDomain().toString()); + return super.getPositiveExamples(evAxiom); + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyDomainAxiom axiom = (DatatypePropertyDomainAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("type", axiom.getDomain().toString()); + return super.getNegativeExamples(evAxiom); + } + public static void main(String[] args) throws Exception{ org.apache.log4j.Logger.getRootLogger().addAppender(new ConsoleAppender(new SimpleLayout())); org.apache.log4j.Logger.getRootLogger().setLevel(Level.INFO); Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -20,12 +20,7 @@ package org.dllearner.algorithms.properties; import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.Set; import org.dllearner.core.AbstractAxiomLearningAlgorithm; import org.dllearner.core.ComponentAnn; @@ -36,15 +31,19 @@ import org.dllearner.core.owl.Datatype; import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.DatatypePropertyRangeAxiom; -import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.reasoning.SPARQLReasoner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Resource; @ComponentAnn(name="dataproperty range learner", shortName="dblrange", version=0.1) public class DataPropertyRangeAxiomLearner extends AbstractAxiomLearningAlgorithm { @@ -56,6 +55,9 @@ public DataPropertyRangeAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?o ?p ?s. FILTER (DATATYPE(?s) = ?dt)}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?o ?p ?s. FILTER (DATATYPE(?s) != ?dt)}"); + } public DatatypeProperty getPropertyToDescribe() { @@ -72,84 +74,103 @@ startTime = System.currentTimeMillis(); fetchedRows = 0; currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); - //get existing range - DataRange existingRange = reasoner.getRange(propertyToDescribe); - if(existingRange != null){ - existingAxioms.add(new DatatypePropertyRangeAxiom(propertyToDescribe, existingRange)); - logger.debug("Existing range: " + existingRange); - } - //get objects with datatypes - Map<Individual, SortedSet<Datatype>> individual2Datatypes = new HashMap<Individual, SortedSet<Datatype>>(); - boolean repeat = true; - int limit = 1000; - while(!terminationCriteriaSatisfied() && repeat){ - int ret = addIndividualsWithTypes(individual2Datatypes, limit, fetchedRows); - currentlyBestAxioms = buildEvaluatedAxioms(individual2Datatypes); - fetchedRows += 1000; - repeat = (ret == limit); - } - logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); - } - - private List<EvaluatedAxiom> buildEvaluatedAxioms(Map<Individual, SortedSet<Datatype>> individual2Types){ - List<EvaluatedAxiom> axioms = new ArrayList<EvaluatedAxiom>(); - Map<Datatype, Integer> result = new HashMap<Datatype, Integer>(); - for(Entry<Individual, SortedSet<Datatype>> entry : individual2Types.entrySet()){ - for(Datatype nc : entry.getValue()){ - Integer cnt = result.get(nc); - if(cnt == null){ - cnt = Integer.valueOf(1); - } else { - cnt = Integer.valueOf(cnt + 1); - } - result.put(nc, cnt); + if(returnOnlyNewAxioms){ + //get existing ranges + DataRange existingRange = reasoner.getRange(propertyToDescribe); + if(existingRange != null){ + existingAxioms.add(new DatatypePropertyRangeAxiom(propertyToDescribe, existingRange)); } } - EvaluatedAxiom evalAxiom; - int total = individual2Types.keySet().size(); - for(Entry<Datatype, Integer> entry : sortByValues(result)){ - evalAxiom = new EvaluatedAxiom(new DatatypePropertyRangeAxiom(propertyToDescribe, entry.getKey()), - computeScore(total, entry.getValue())); - if(existingAxioms.contains(evalAxiom.getAxiom())){ - evalAxiom.setAsserted(true); - } - axioms.add(evalAxiom); + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ + runSingleQueryMode(); + } else { + runSPARQL1_0_Mode(); } - - return axioms; + logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } - - private int addIndividualsWithTypes(Map<Individual, SortedSet<Datatype>> ind2Datatypes, int limit, int offset){ - String query = String.format("SELECT ?ind (DATATYPE(?val) AS ?datatype) WHERE {?ind <%s> ?val.} LIMIT %d OFFSET %d", propertyToDescribe.getName(), limit, offset); + private void runSingleQueryMode(){ + String query = String.format("SELECT (COUNT(DISTINCT ?o) AS ?cnt) WHERE {?s <%s> ?o.}", propertyToDescribe.getName()); ResultSet rs = executeSelectQuery(query); - Individual ind; - Datatype newType; + int nrOfSubjects = rs.next().getLiteral("cnt").getInt(); + + query = String.format("SELECT (DATATYPE(?o) AS ?type) (COUNT(DISTINCT ?o) AS ?cnt) WHERE {?s <%s> ?o.} GROUP BY DATATYPE(?o)", propertyToDescribe.getName()); + rs = executeSelectQuery(query); QuerySolution qs; - SortedSet<Datatype> types; - int cnt = 0; while(rs.hasNext()){ - cnt++; - newType = null; qs = rs.next(); - ind = new Individual(qs.getResource("ind").getURI()); - if(qs.getResource("datatype") != null){ - newType = new Datatype(qs.getResource("datatype").getURI()); - types = ind2Datatypes.get(ind); - if(types == null){ - types = new TreeSet<Datatype>(); - ind2Datatypes.put(ind, types); + if(qs.get("type") != null){ + DataRange range = new Datatype(qs.get("type").asLiteral().getLexicalForm()); + int cnt = qs.getLiteral("cnt").getInt(); + currentlyBestAxioms.add(new EvaluatedAxiom(new DatatypePropertyRangeAxiom(propertyToDescribe, range), computeScore(nrOfSubjects, cnt))); + } + } + } + + private void runSPARQL1_0_Mode() { + workingModel = ModelFactory.createDefaultModel(); + int limit = 1000; + int offset = 0; + String baseQuery = "CONSTRUCT {?s <%s> ?o} WHERE {?s <%s> ?o.} LIMIT %d OFFSET %d"; + String query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); + Model newModel = executeConstructQuery(query); + while(!terminationCriteriaSatisfied() && newModel.size() != 0){ + workingModel.add(newModel); + // get number of distinct subjects + query = "SELECT (COUNT(?o) AS ?all) WHERE {?s ?p ?o.}"; + ResultSet rs = executeSelectQuery(query, workingModel); + QuerySolution qs; + int all = 1; + while (rs.hasNext()) { + qs = rs.next(); + all = qs.getLiteral("all").getInt(); + } + + // get class and number of instances +// query = "SELECT (DATATYPE(?o) AS ?dt) (COUNT(?o) AS ?cnt) WHERE{?s ?p ?o} GROUP BY DATATYPE(?o) ORDER BY DESC(?cnt)"; + query = "SELECT ?dt (COUNT(?o) AS ?cnt) " + + "WHERE {" + + "{" + + "SELECT (DATATYPE(?o) AS ?dt) ?o WHERE{?s ?p ?o}" + + "}" + + "}" + + "GROUP BY ?dt"; + rs = executeSelectQuery(query, workingModel); + + if (all > 0) { + currentlyBestAxioms.clear(); + while(rs.hasNext()){ + qs = rs.next(); + Resource type = qs.get("dt").asResource(); + currentlyBestAxioms.add(new EvaluatedAxiom( + new DatatypePropertyRangeAxiom(propertyToDescribe, new Datatype(type.getURI())), + computeScore(all, qs.get("cnt").asLiteral().getInt()))); } - types.add(newType); + } - + offset += limit; + query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); + newModel = executeConstructQuery(query); } - return cnt; } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyRangeAxiom axiom = (DatatypePropertyRangeAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("dt", axiom.getRange().toString()); + return super.getPositiveExamples(evAxiom); + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyRangeAxiom axiom = (DatatypePropertyRangeAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("dt", axiom.getRange().toString()); + return super.getNegativeExamples(evAxiom); + } + public static void main(String[] args) throws Exception{ SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW()); Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -32,8 +33,13 @@ import org.dllearner.core.EvaluatedAxiom; import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.DataPropertyEditor; +import org.dllearner.core.owl.Datatype; import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.DisjointDatatypePropertyAxiom; +import org.dllearner.core.owl.GenericDatatypePropertyAssertion; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; +import org.dllearner.core.owl.ObjectProperty; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SPARQLTasks; @@ -42,8 +48,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -64,6 +72,9 @@ public DisjointDataPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p1 ?o. FILTER NOT EXISTS{?s ?p ?o}}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p ?o. }"); } public DatatypeProperty getPropertyToDescribe() { @@ -94,7 +105,8 @@ allDataProperties.remove(propertyToDescribe); if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); +// runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } @@ -102,21 +114,52 @@ logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + //compute the overlap if exist + Map<ObjectProperty, Integer> property2Overlap = new HashMap<ObjectProperty, Integer>(); + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + ObjectProperty prop = new ObjectProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + property2Overlap.put(prop, cnt); + } + //for each property in knowledge base + for(DatatypeProperty p : allDataProperties){ + //get the popularity + int otherPopularity = reasoner.getPopularity(p); + if(otherPopularity == 0){//skip empty properties + continue; + } + //get the overlap + int overlap = property2Overlap.containsKey(p) ? property2Overlap.get(p) : 0; + //compute the estimated precision + double precision = accuracy(otherPopularity, overlap); + //compute the estimated recall + double recall = accuracy(popularity, overlap); + //compute the final score + double score = 1 - fMEasure(precision, recall); + + currentlyBestAxioms.add(new EvaluatedAxiom(new DisjointDatatypePropertyAxiom(propertyToDescribe, p), new AxiomScore(score))); + } + } + private void runSPARQL1_0_Mode() { - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s ?p ?o.} WHERE {?s <%s> ?o. ?s ?p ?o.} LIMIT %d OFFSET %d"; + String countQuery = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); Map<DatatypeProperty, Integer> result = new HashMap<DatatypeProperty, Integer>(); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); - query = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; - + workingModel.add(newModel); DatatypeProperty prop; Integer oldCnt; - ResultSet rs = executeSelectQuery(query, model); + ResultSet rs = executeSelectQuery(countQuery, workingModel); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); @@ -135,7 +178,7 @@ offset += limit; - query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); + query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); newModel = executeConstructQuery(query); } @@ -253,6 +296,56 @@ return axioms; } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DisjointDatatypePropertyAxiom axiom = (DisjointDatatypePropertyAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> posExamples = new HashSet<KBElement>(); + + ResultSet rs = executeSelectQuery(posExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Literal object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = qs.getLiteral("o"); + posExamples.add(new GenericDatatypePropertyAssertion( + propertyToDescribe, subject, object.getLexicalForm(), new Datatype(object.getDatatypeURI()))); + } + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DisjointDatatypePropertyAxiom axiom = (DisjointDatatypePropertyAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> negExamples = new TreeSet<KBElement>(); + + ResultSet rs = executeSelectQuery(negExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Literal object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = qs.getLiteral("o"); + negExamples.add(new GenericDatatypePropertyAssertion( + propertyToDescribe, subject, object.getLexicalForm(), new Datatype(object.getDatatypeURI()))); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + public static void main(String[] args) throws Exception{ DisjointDataPropertyAxiomLearner l = new DisjointDataPropertyAxiomLearner(new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW())); l.setPropertyToDescribe(new DatatypeProperty("http://dbpedia.org/ontology/accessDate")); Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -33,7 +34,10 @@ import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.ObjectPropertyEditor; import org.dllearner.core.owl.DisjointObjectPropertyAxiom; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.ObjectPropertyAssertion; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SPARQLTasks; @@ -42,6 +46,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; @@ -64,6 +69,9 @@ public DisjointObjectPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p1 ?o. FILTER NOT EXISTS{?s ?p ?o}}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p ?o. }"); } public ObjectProperty getPropertyToDescribe() { @@ -95,7 +103,8 @@ allObjectProperties.remove(propertyToDescribe); if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); +// runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } @@ -103,21 +112,53 @@ logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + //compute the overlap if exist + Map<ObjectProperty, Integer> property2Overlap = new HashMap<ObjectProperty, Integer>(); + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + ObjectProperty prop = new ObjectProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + property2Overlap.put(prop, cnt); + } + //for each property in knowledge base + for(ObjectProperty p : allObjectProperties){ + //get the popularity + int otherPopularity = reasoner.getPopularity(p); + if(otherPopularity == 0){//skip empty properties + continue; + } + //get the overlap + int overlap = property2Overlap.containsKey(p) ? property2Overlap.get(p) : 0; + //compute the estimated precision + double precision = accuracy(otherPopularity, overlap); + //compute the estimated recall + double recall = accuracy(popularity, overlap); + //compute the final score + double score = 1 - fMEasure(precision, recall); + + currentlyBestAxioms.add(new EvaluatedAxiom(new DisjointObjectPropertyAxiom(propertyToDescribe, p), new AxiomScore(score))); + } + } + private void runSPARQL1_0_Mode() { - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s ?p ?o.} WHERE {?s <%s> ?o. ?s ?p ?o.} LIMIT %d OFFSET %d"; + String countQuery = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); Map<ObjectProperty, Integer> result = new HashMap<ObjectProperty, Integer>(); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); - query = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; + workingModel.add(newModel); ObjectProperty prop; Integer oldCnt; - ResultSet rs = executeSelectQuery(query, model); + ResultSet rs = executeSelectQuery(countQuery, workingModel); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); @@ -134,7 +175,6 @@ currentlyBestAxioms = buildAxioms(result, allObjectProperties); } - offset += limit; query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); newModel = executeConstructQuery(query); @@ -247,6 +287,54 @@ return axioms; } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DisjointObjectPropertyAxiom axiom = (DisjointObjectPropertyAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> posExamples = new HashSet<KBElement>(); + + ResultSet rs = executeSelectQuery(posExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Individual object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = new Individual(qs.getResource("o").getURI()); + posExamples.add(new ObjectPropertyAssertion(propertyToDescribe, subject, object)); + } + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DisjointObjectPropertyAxiom axiom = (DisjointObjectPropertyAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> negExamples = new TreeSet<KBElement>(); + + ResultSet rs = executeSelectQuery(negExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Individual object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = new Individual(qs.getResource("o").getURI()); + negExamples.add(new ObjectPropertyAssertion(propertyToDescribe, subject, object)); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + public static void main(String[] args) throws Exception{ SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); // endpoint = new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); Modified: branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java 2012-11-02 14:25:26 UTC (rev 3865) +++ branches/hmm/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java 2012-11-02 14:52:52 UTC (rev 3866) @@ -20,10 +20,8 @@ package org.dllearner.algorithms.properties; import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; +import java.util.HashSet; +import java.util.Set; import java.util.SortedSet; import org.dllearner.core.AbstractAxiomLearningAlgorithm; @@ -31,16 +29,21 @@ import org.dllearner.core.EvaluatedAxiom; import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.DataPropertyEditor; +import org.dllearner.core.owl.Datatype; import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.EquivalentDatatypePropertiesAxiom; +import org.dllearner.core.owl.GenericDatatypePropertyAssertion; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.learningproblems.AxiomScore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; @@ -54,6 +57,9 @@ public EquivalentDataPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p ?o}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p1 ?o. FILTER NOT EXISTS{?s ?p ?o}}"); + } public DatatypeProperty getPropertyToDescribe() { @@ -70,115 +76,131 @@ startTime = System.currentTimeMillis(); fetchedRows = 0; currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); - //get existing super properties - SortedSet<DatatypeProperty> existingSuperProperties = reasoner.getSuperProperties(propertyToDescribe); - logger.debug("Existing super properties: " + existingSuperProperties); + if(returnOnlyNewAxioms){ + //get existing domains + SortedSet<DatatypeProperty> existingSuperProperties = reasoner.getEquivalentProperties(propertyToDescribe); + if(existingSuperProperties != null && !existingSuperProperties.isEmpty()){ + for(DatatypeProperty supProp : existingSuperProperties){ + existingAxioms.add(new EquivalentDatatypePropertiesAxiom(propertyToDescribe, supProp)); + } + } + } + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } - logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + int total = reasoner.getPopularity(propertyToDescribe); + + if(total > 0){ + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + DatatypeProperty prop = new DatatypeProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + if(!prop.equals(propertyToDescribe)){ + currentlyBestAxioms.add(new EvaluatedAxiom(new EquivalentDatatypePropertiesAxiom(propertyToDescribe, prop), computeScore(total, cnt))); + + } + } + } + } + private void runSPARQL1_0_Mode() { - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s ?p ?o.} WHERE {?s <%s> ?o. ?s ?p ?o.} LIMIT %d OFFSET %d"; String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); - Map<DatatypeProperty, Integer> result = new HashMap<DatatypeProperty, Integer>(); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); - query = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; + workingModel.add(newModel); + // get number of triples + int all = (int)workingModel.size(); - DatatypeProperty prop; - Integer oldCnt; - ResultSet rs = executeSelectQuery(query, model); - QuerySolution qs; - while(rs.hasNext()){ - qs = rs.next(); - prop = new DatatypeProperty(qs.getResource("p").getURI()); - int newCnt = qs.getLiteral("count").getInt(); - oldCnt = result.get(prop); - if(oldCnt == null){ - oldCnt = Integer.valueOf(newCnt); + if (all > 0) { + // get class and number of instances + query = "SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s ?p ?o.} GROUP BY ?p ORDER BY DESC(?cnt)"; + ResultSet rs = executeSelectQuery(query, workingModel); + + currentlyBestAxioms.clear(); + QuerySolution qs; + DatatypeProperty prop; + while(rs.hasNext()){ + qs = rs.next(); + prop = new DatatypeProperty(qs.get("p").asResource().getURI()); + //omit property to describe as it is trivial + if(prop.equals(propertyToDescribe)){ + continue; + } + currentlyBestAxioms.add(new EvaluatedAxiom( + new EquivalentDatatypePropertiesAxiom(propertyToDescribe, prop), + computeScore(all, qs.get("cnt").asLiteral().getInt()))); } - result.put(prop, oldCnt); - qs.getLiteral("count").getInt(); + } - if(!result.isEmpty()){ - currentlyBestAxioms = buildAxioms(result); - } - - offset += limit; query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); newModel = executeConstructQuery(query); } - } - private void runSPARQL1_1_Mode() { - // get subjects with types - int limit = 1000; - int offset = 0; - String queryTemplate = "PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT ?p COUNT(?s) AS ?count WHERE {?s ?p ?o.?p a owl:DatatypeProperty." - + "{SELECT ?s ?o WHERE {?s <%s> ?o.} LIMIT %d OFFSET %d}" + "}"; - String query; - Map<DatatypeProperty, Integer> result = new HashMap<DatatypeProperty, Integer>(); - DatatypeProperty prop; - Integer oldCnt; - boolean repeat = true; - - while (!terminationCriteriaSatisfied() && repeat) { - query = String.format(queryTemplate, propertyToDescribe, limit, - offset); - ResultSet rs = executeSelectQuery(query); + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom e... [truncated message content] |
From: <ki...@us...> - 2012-11-09 17:15:13
|
Revision: 3872 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3872&view=rev Author: kirdie Date: 2012-11-09 17:15:06 +0000 (Fri, 09 Nov 2012) Log Message: ----------- new test data is now in and benchmark bug fixed but there is still a timeout issue when creating the benchmark xml file out of the oxford evaluation queries. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Property Changed: ---------------- branches/hmm/ Property changes on: branches/hmm ___________________________________________________________________ Modified: svn:mergeinfo - /trunk:3846-3863 + /trunk:3845-3871 Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-09 15:06:31 UTC (rev 3871) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-09 17:15:06 UTC (rev 3872) @@ -1174,7 +1174,7 @@ learnedPos++; List<String> results; try { - logger.debug("Testing query:\n" + query); + logger.trace("Testing query:\n" + query); com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); q.setLimit(1); ResultSet rs = executeSelect(q.toString()); @@ -1217,7 +1217,7 @@ } else if(queryType == SPARQL_QueryType.ASK){ for(WeightedQuery query : queries){ learnedPos++; - logger.debug("Testing query:\n" + query); + logger.trace("Testing query:\n" + query); boolean result = executeAskQuery(query.getQuery().toString()); learnedSPARQLQueries.add(query); // if(stopIfQueryResultNotEmpty && result){ Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-09 15:06:31 UTC (rev 3871) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-09 17:15:06 UTC (rev 3872) @@ -115,15 +115,15 @@ // problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test { - protected static final boolean USE_HMM = true; + protected static final boolean USE_HMM = false; protected static final File evaluationFolder = new File("cache/evaluation"); protected static final boolean DBPEDIA_PRETAGGED = true; protected static final boolean OXFORD_PRETAGGED = false; - protected static final int MAX_NUMBER_OF_QUESTIONS = 100; + protected static final int MAX_NUMBER_OF_QUESTIONS = Integer.MAX_VALUE; protected static final boolean WHITELIST_ONLY = false; protected static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); protected static final boolean GENERATE_HTML_ONLY = false; - protected static final int MAX_THREADS = 10; + protected static final int MAX_THREADS = 1; /*@Test*/ public void testDBpedia() throws Exception { @@ -252,7 +252,7 @@ out.close(); } - @Test public void testOxford() throws Exception + /*@Test*/ public void testOxford() throws Exception { File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex(),OXFORD_PRETAGGED); @@ -318,19 +318,16 @@ logger.info("learned query: "+testData.id2Query.get(0)); } - /*@Test*/ @SuppressWarnings("null") public void generateXMLOxford() throws IOException - { + @Test @SuppressWarnings("null") public void generateXMLOxford() throws IOException + { boolean ADD_POS_TAGS = true; PartOfSpeechTagger posTagger = null; if(ADD_POS_TAGS) {posTagger=new StanfordPartOfSpeechTagger();} Model model = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); - int j=0; for(String line;(line=in.readLine())!=null;) { - j++; - if(j>5) break; // TODO: remove later String question = line.replace("question: ", "").trim(); if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);} if(!line.trim().isEmpty()) {questions.add(question);} @@ -340,9 +337,13 @@ Iterator<String> it = questions.iterator(); for(int i=0;i<questions.size();i++) {id2Question.put(i, it.next());} MappingBasedIndex mappingIndex= getOxfordMappingIndex(); + logger.info("generating sparql queries for questions"); QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); + logger.info("generating answers for sparql queries"); testData.generateAnswers(null, null, model); + logger.info("writing test data to benchmark file"); testData.writeQaldXml(new File("log/test.xml")); + logger.info("finished generating oxford benchmark file, "+testData.id2Question.values().size()+"questions, "+testData.id2Query.size()+" SPARQL queries, "+testData.id2Answers.size()+" answers."); } public static MappingBasedIndex getOxfordMappingIndex() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |