From: <ki...@us...> - 2012-09-27 16:33:19
|
Revision: 3853 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3853&view=rev Author: kirdie Date: 2012-09-27 16:33:12 +0000 (Thu, 27 Sep 2012) Log Message: ----------- reintegrated the old approach to the learner2. Modified Paths: -------------- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java branches/hmm/components-ext/pom.xml branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -31,9 +31,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; - import org.dllearner.utilities.Helper; - import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.query.ResultSetRewindable; import com.hp.hpl.jena.rdf.model.Model; Modified: branches/hmm/components-ext/pom.xml =================================================================== --- branches/hmm/components-ext/pom.xml 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/pom.xml 2012-09-27 16:33:12 UTC (rev 3853) @@ -34,10 +34,10 @@ <groupId>com.jamonapi</groupId> <artifactId>jamon</artifactId> </dependency> - <dependency> + <!-- <dependency> <groupId>org.aksw.commons</groupId> <artifactId>sparql</artifactId> - </dependency> + </dependency> --> <dependency> <groupId>org.apache.solr</groupId> Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -9,15 +9,20 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import org.apache.commons.collections15.MultiMap; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; @@ -49,7 +54,6 @@ import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -70,7 +74,6 @@ import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; @@ -83,10 +86,6 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.expr.ExprAggregator; -import com.hp.hpl.jena.sparql.expr.ExprVar; -import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; -import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -95,19 +94,18 @@ * */ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm { - public static boolean useHMM = true; - + private static final boolean USE_HMM = false; + /** synonyms are great but are not used yet by the HMM algorithm. **/ + private static final boolean HMM_USE_SYNONYMS = false; + /** The minimum score of items that are accepted from the Sindice search BOA index. **/ + private static final Double BOA_THRESHOLD = 0.9; enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} private Mode mode = Mode.BEST_QUERY; /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - /** synonyms are great but are not used yet by the HMM algorithm. **/ - private static final boolean CREATE_SYNONYMS = false; - /** The minimum score of items that are accepted from the Sindice search BOA index. **/ - private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -403,7 +401,7 @@ logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); + templates = templateGenerator.buildTemplatesMultiThreaded(question,!USE_HMM||HMM_USE_SYNONYMS); } else { templates = templateGenerator.buildTemplates(question); } @@ -420,7 +418,7 @@ } //get the weighted query candidates - generatedQueries = getWeightedSPARQLQueries(templates); + generatedQueries = getWeightedSPARQLQueries(templates,USE_HMM); sparqlQueryCandidates = new ArrayList<WeightedQuery>(); int i = 0; for(WeightedQuery wQ : generatedQueries){ @@ -519,13 +517,15 @@ } - public Set<String> getRelevantKeywords(){ - return relevantKeywords; - } + public Set<String> getRelevantKeywords(){return relevantKeywords;} - // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one - private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates, boolean hmm) { + return hmm?getWeightedSPARQLQueriesWithHMM(templates):getWeightedSPARQLQueriesWithoutHMM(templates); + } + + private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithHMM(Set<Template> templates) + { // for testing for(Template template: templates) { @@ -621,7 +621,7 @@ return null; } - private SortedSet<WeightedQuery> getWeightedSPARQLQueriesOld(Set<Template> templates){ + private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){ logger.debug("Generating SPARQL query candidates..."); Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @@ -1295,6 +1295,7 @@ } return indexResultItems; } + class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ private Slot slot; Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -11,7 +11,6 @@ import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.Serializable; -import java.io.StringWriter; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -37,11 +36,11 @@ public class QueryTestData implements Serializable { + private static final long serialVersionUID = 1L; public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); - private static final int MAXIMUM_QUESTIONS = Integer.MAX_VALUE; private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); @@ -85,8 +84,9 @@ /** reads test data from a QALD2 benchmark XML file, including questions, queries and answers. * each question needs to have a query but not necessarily an answer. * @param file a QALD benchmark XML file + * @param MAX_NUMBER_OF_QUESTIONS the maximum number of questions read from the file. * @return the test data read from the XML file */ - public static QueryTestData readQaldXml(final File file) + public static QueryTestData readQaldXml(final File file, int MAX_NUMBER_OF_QUESTIONS) { QueryTestData testData = new QueryTestData(); try { @@ -99,7 +99,7 @@ for(int i = 0; i < questionNodes.getLength(); i++) { - if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove later? + if(i>MAX_NUMBER_OF_QUESTIONS) break; String question; String query; Set<String> answers = new HashSet<String>(); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -55,7 +55,6 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; -import org.dllearner.common.index.HierarchicalIndex; import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; @@ -104,6 +103,7 @@ private static final File evaluationFolder = new File("cache/evaluation"); private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; + private static final int MAX_NUMBER_OF_QUESTIONS = 10; @Test public void testDBpedia() throws Exception { @@ -120,28 +120,28 @@ test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); } -// /*@Test*/ public void testOxford() throws Exception -// { -// Model model = loadOxfordModel(); -// QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); -// // answers are not included at least in the first query TODO: check, why -// testData.generateAnswers(null, null, model); -// QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); -// newTestData.generateAnswers(null, null, model); -// for(int i : testData.id2Question.keySet()) -// { -// logger.info("Comparing answers for question "+testData.id2Question.get(i)); -// String referenceQuery = testData.id2Query.get(i); -// String newQuery = newTestData.id2Query.get(i); -// if(!referenceQuery.equals(newQuery)) -// { -// logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); -// Collection<String> referenceAnswers = testData.id2Answers.get(i); -// Collection<String> newAnswers = newTestData.id2Answers.get(i); -// if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); -// } -// } -// } + // /*@Test*/ public void testOxford() throws Exception + // { + // Model model = loadOxfordModel(); + // QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); + // // answers are not included at least in the first query TODO: check, why + // testData.generateAnswers(null, null, model); + // QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); + // newTestData.generateAnswers(null, null, model); + // for(int i : testData.id2Question.keySet()) + // { + // logger.info("Comparing answers for question "+testData.id2Question.get(i)); + // String referenceQuery = testData.id2Query.get(i); + // String newQuery = newTestData.id2Query.get(i); + // if(!referenceQuery.equals(newQuery)) + // { + // logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); + // Collection<String> referenceAnswers = testData.id2Answers.get(i); + // Collection<String> newAnswers = newTestData.id2Answers.get(i); + // if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); + // } + // } + // } /** For debugging one question in particular. */ @@ -164,23 +164,23 @@ */ /*@Test*/ public void testSingleQueryDBpedia() { -// Logger.getLogger(Templator.class).setLevel(Level.DEBUG); -// Logger.getLogger(Parser.class).setLevel(Level.DEBUG); -// Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // Logger.getLogger(Templator.class).setLevel(Level.DEBUG); + // Logger.getLogger(Parser.class).setLevel(Level.DEBUG); + // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); // String question = "houses for less than 900000 pounds"; String question = "Give/VB me/PRP all/DT video/JJ games/NNS published/VBN by/IN Mean/NNP Hamster/NNP Software/NNP"; -// String question = "give me all video games published by mean hamster software"; -// String question = "Give me all video games published by Mean Hamster Software"; -// question = new StanfordPartOfSpeechTagger().tag(question); -// System.out.println(question); + // String question = "give me all video games published by mean hamster software"; + // String question = "Give me all video games published by Mean Hamster Software"; + // question = new StanfordPartOfSpeechTagger().tag(question); + // System.out.println(question); -// Model model = loadOxfordModel(); + // Model model = loadOxfordModel(); QueryTestData testData = new QueryTestData(); new LearnQueryCallable(question, 0, testData, dbpediaLiveKnowledgebase, true).call(); logger.info("learned query: "+testData.id2Query.get(0)); } - - /*@Test*/ public void generateXMLOxford() throws IOException + + /*@Test*/ public void generateXMLOxford() throws IOException { boolean ADD_POS_TAGS = true; PartOfSpeechTagger posTagger = null; @@ -192,7 +192,7 @@ for(String line;(line=in.readLine())!=null;) { j++; - // if(j>5) break; // TODO: remove later + if(j>5) break; // TODO: remove later String question = line.replace("question: ", "").trim(); if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);} if(!line.trim().isEmpty()) {questions.add(question);} @@ -291,7 +291,7 @@ public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException - { + { evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index); generateHTML(title); @@ -318,7 +318,7 @@ logger.info("Old test data not loadable, creating it and exiting."); } learnedTestData.write();*/ - } + } private File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { @@ -335,9 +335,10 @@ } private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, - Knowledgebase kb, Model model, MappingBasedIndex index) + Knowledgebase kb, Model model, MappingBasedIndex index) { - QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); + + QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML,MAX_NUMBER_OF_QUESTIONS); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); @@ -352,11 +353,6 @@ evaluation.write(); } - private void evaluateAndWrite() - { - - } - /** evaluates a data set against a reference. * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. @@ -673,8 +669,8 @@ // try {testData.id2Answers.put(i,getUris(endpoint, learnedQuery));} // catch(Exception e) {logger.warn("Error with learned query "+learnedQuery+" for question "+question+" at endpoint "+endpoint+": "+e.getLocalizedMessage());} - long end = System.currentTimeMillis(); - // logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + // long end = System.currentTimeMillis(); + // logger.trace(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); // logger.info(String.format("Learned queries for %d of %d questions.",successes,id2Question.size())); @@ -779,7 +775,7 @@ // int successfullTestThreadRuns = 0; /** */ - private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; + // private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); @@ -806,7 +802,7 @@ Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); boa_propertiesIndex.setSortField("boa-score"); -// propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); + // propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |