[DL-Learner SVN] SF.net SVN: dl-learner:[3853] branches/hmm

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3853
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3853&view=rev
Author:   kirdie
Date:     2012-09-27 16:33:12 +0000 (Thu, 27 Sep 2012)
Log Message:
-----------
reintegrated the old approach to the learner2.

Modified Paths:
--------------
    branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java
    branches/hmm/components-ext/pom.xml
    branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
    branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java
    branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java

Modified: branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java
===================================================================

--- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java	2012-09-27 13:18:05 UTC (rev 3852)
+++ branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java	2012-09-27 16:33:12 UTC (rev 3853)
@@ -31,9 +31,7 @@
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
-
 import org.dllearner.utilities.Helper;
-
 import com.hp.hpl.jena.query.ResultSetFactory;
 import com.hp.hpl.jena.query.ResultSetRewindable;
 import com.hp.hpl.jena.rdf.model.Model;

Modified: branches/hmm/components-ext/pom.xml
===================================================================
--- branches/hmm/components-ext/pom.xml	2012-09-27 13:18:05 UTC (rev 3852)
+++ branches/hmm/components-ext/pom.xml	2012-09-27 16:33:12 UTC (rev 3853)
@@ -34,10 +34,10 @@
             <groupId>com.jamonapi</groupId>
             <artifactId>jamon</artifactId>
         </dependency>
-        <dependency>
+        <!-- <dependency>
             <groupId>org.aksw.commons</groupId>
             <artifactId>sparql</artifactId>
-        </dependency>
+        </dependency> -->
 
         <dependency>
             <groupId>org.apache.solr</groupId>

Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-09-27 13:18:05 UTC (rev 3852)
+++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-09-27 16:33:12 UTC (rev 3853)
@@ -9,15 +9,20 @@
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
-import java.util.SortedMap;
 import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import org.apache.commons.collections15.MultiMap;
 import org.apache.log4j.Logger;
 import org.dllearner.algorithm.tbsl.nlp.Lemmatizer;
@@ -49,7 +54,6 @@
 import org.dllearner.common.index.IndexResultItem;
 import org.dllearner.common.index.IndexResultSet;
 import org.dllearner.common.index.MappingBasedIndex;
-import org.dllearner.common.index.SOLRIndex;
 import org.dllearner.common.index.SPARQLDatatypePropertiesIndex;
 import org.dllearner.common.index.SPARQLIndex;
 import org.dllearner.common.index.SPARQLObjectPropertiesIndex;
@@ -70,7 +74,6 @@
 import org.dllearner.kb.sparql.SparqlEndpoint;
 import org.dllearner.kb.sparql.SparqlQuery;
 import org.dllearner.reasoning.SPARQLReasoner;
-import org.ini4j.InvalidFileFormatException;
 import org.ini4j.Options;
 import org.semanticweb.owlapi.model.IRI;
 import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
@@ -83,10 +86,6 @@
 import com.hp.hpl.jena.rdf.model.Model;
 import com.hp.hpl.jena.rdf.model.ModelFactory;
 import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
-import com.hp.hpl.jena.sparql.expr.ExprAggregator;
-import com.hp.hpl.jena.sparql.expr.ExprVar;
-import com.hp.hpl.jena.sparql.expr.aggregate.AggCount;
-import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator;
 import com.jamonapi.Monitor;
 import com.jamonapi.MonitorFactory;
 
@@ -95,19 +94,18 @@
  * */
 public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm
 {
-	public static boolean useHMM = true;
-	
+	private static final boolean USE_HMM = false;
+	/** synonyms are great but are not used yet by the HMM algorithm. **/
+	private static final boolean	HMM_USE_SYNONYMS	= false;
+		/** The minimum score of items that are accepted from the Sindice search BOA index. **/
+	private static final Double	BOA_THRESHOLD	=  0.9;
 	enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY}
 	private Mode mode = Mode.BEST_QUERY;
 	
 	/** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/
 	private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
+	private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class);
 
-	private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class);
-	/** synonyms are great but are not used yet by the HMM algorithm. **/
-	private static final boolean	CREATE_SYNONYMS	= false;
-	/** The minimum score of items that are accepted from the Sindice search BOA index. **/
-	private static final Double	BOA_THRESHOLD	=  0.9;
 	private Monitor templateMon = MonitorFactory.getTimeMonitor("template");
 	private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql");
 
@@ -403,7 +401,7 @@
 		logger.debug("Generating SPARQL query templates...");
 		templateMon.start();
 		if(multiThreaded){
-			templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS);
+			templates = templateGenerator.buildTemplatesMultiThreaded(question,!USE_HMM||HMM_USE_SYNONYMS);
 		} else {
 			templates = templateGenerator.buildTemplates(question);
 		}
@@ -420,7 +418,7 @@
 		}
 
 		//get the weighted query candidates
-		generatedQueries = getWeightedSPARQLQueries(templates);
+		generatedQueries = getWeightedSPARQLQueries(templates,USE_HMM);
 		sparqlQueryCandidates = new ArrayList<WeightedQuery>();
 		int i = 0;
 		for(WeightedQuery wQ : generatedQueries){
@@ -519,13 +517,15 @@
 
 	}
 
-	public Set<String> getRelevantKeywords(){
-		return relevantKeywords;
-	}
+	public Set<String> getRelevantKeywords(){return relevantKeywords;}
 
-	// just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one
-	private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates)
+	private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates, boolean hmm)
 	{
+		return hmm?getWeightedSPARQLQueriesWithHMM(templates):getWeightedSPARQLQueriesWithoutHMM(templates);
+	}
+	
+	private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithHMM(Set<Template> templates)
+	{
 		// for testing 
 		for(Template template: templates)
 		{
@@ -621,7 +621,7 @@
 		return null;
 	}
 
-	private SortedSet<WeightedQuery> getWeightedSPARQLQueriesOld(Set<Template> templates){
+	private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){
 		logger.debug("Generating SPARQL query candidates...");
 
 		Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() {
@@ -1295,6 +1295,7 @@
 		}
 		return indexResultItems;
 	}
+	
 	class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{
 
 		private Slot slot;

Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java
===================================================================
--- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java	2012-09-27 13:18:05 UTC (rev 3852)
+++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java	2012-09-27 16:33:12 UTC (rev 3853)
@@ -11,7 +11,6 @@
 import java.io.ObjectOutputStream;
 import java.io.PrintWriter;
 import java.io.Serializable;
-import java.io.StringWriter;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Set;
@@ -37,11 +36,11 @@
 
 public class QueryTestData implements Serializable
 {
+	private static final long	serialVersionUID	= 1L;
 	public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>();
 	public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>();
 	public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>();
 	public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>();
-	private static final int	MAXIMUM_QUESTIONS	= Integer.MAX_VALUE;
 
 	private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName();
 
@@ -85,8 +84,9 @@
 	/** reads test data from a QALD2 benchmark XML file, including questions, queries and answers.
 	 * each question needs to have a query but not necessarily an answer.
 	 * @param file a QALD benchmark XML file 
+	 * @param MAX_NUMBER_OF_QUESTIONS the maximum number of questions read from the file. 
 	 * @return the test data read from the XML file */
-	public static QueryTestData readQaldXml(final File file)
+	public static QueryTestData readQaldXml(final File file, int MAX_NUMBER_OF_QUESTIONS)
 	{
 		QueryTestData testData = new QueryTestData();
 		try {
@@ -99,7 +99,7 @@
 
 			for(int i = 0; i < questionNodes.getLength(); i++)
 			{
-				if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove later?
+				if(i>MAX_NUMBER_OF_QUESTIONS) break;
 				String question;
 				String query;
 				Set<String> answers = new HashSet<String>();

Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java
===================================================================
--- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java	2012-09-27 13:18:05 UTC (rev 3852)
+++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java	2012-09-27 16:33:12 UTC (rev 3853)
@@ -55,7 +55,6 @@
 import org.dllearner.algorithm.tbsl.nlp.WordNet;
 import org.dllearner.algorithm.tbsl.templator.Templator;
 import org.dllearner.algorithm.tbsl.util.Knowledgebase;
-import org.dllearner.common.index.HierarchicalIndex;
 import org.dllearner.common.index.Index;
 import org.dllearner.common.index.MappingBasedIndex;
 import org.dllearner.common.index.SOLRIndex;
@@ -104,6 +103,7 @@
 	private static final File evaluationFolder = new File("cache/evaluation");
 	private static final boolean	DBPEDIA_PRETAGGED	= true;
 	private static final boolean	OXFORD_PRETAGGED	= false;
+	private static final int MAX_NUMBER_OF_QUESTIONS = 10;
 
 	@Test public void testDBpedia() throws Exception
 	{
@@ -120,28 +120,28 @@
 		test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex());
 	}
 
-//	/*@Test*/ public void testOxford() throws Exception
-//	{
-//		Model model = loadOxfordModel();
-//		QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml"));
-//		// answers are not included at least in the first query TODO: check, why
-//		testData.generateAnswers(null, null, model);
-//		QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED);
-//		newTestData.generateAnswers(null, null, model);
-//		for(int i : testData.id2Question.keySet())
-//		{
-//			logger.info("Comparing answers for question "+testData.id2Question.get(i));
-//			String referenceQuery = testData.id2Query.get(i);
-//			String newQuery = newTestData.id2Query.get(i);			
-//			if(!referenceQuery.equals(newQuery))
-//			{
-//				logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery);
-//				Collection<String> referenceAnswers = testData.id2Answers.get(i);
-//				Collection<String> newAnswers = newTestData.id2Answers.get(i);			
-//				if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers);
-//			}
-//		}
-//	}
+	//	/*@Test*/ public void testOxford() throws Exception
+	//	{
+	//		Model model = loadOxfordModel();
+	//		QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml"));
+	//		// answers are not included at least in the first query TODO: check, why
+	//		testData.generateAnswers(null, null, model);
+	//		QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED);
+	//		newTestData.generateAnswers(null, null, model);
+	//		for(int i : testData.id2Question.keySet())
+	//		{
+	//			logger.info("Comparing answers for question "+testData.id2Question.get(i));
+	//			String referenceQuery = testData.id2Query.get(i);
+	//			String newQuery = newTestData.id2Query.get(i);			
+	//			if(!referenceQuery.equals(newQuery))
+	//			{
+	//				logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery);
+	//				Collection<String> referenceAnswers = testData.id2Answers.get(i);
+	//				Collection<String> newAnswers = newTestData.id2Answers.get(i);			
+	//				if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers);
+	//			}
+	//		}
+	//	}
 
 	/** For debugging one question in particular.
 	 */
@@ -164,23 +164,23 @@
 	 */
 	/*@Test*/ public void testSingleQueryDBpedia()
 	{
-//		Logger.getLogger(Templator.class).setLevel(Level.DEBUG);
-//		Logger.getLogger(Parser.class).setLevel(Level.DEBUG);
-//		Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG);
+		//		Logger.getLogger(Templator.class).setLevel(Level.DEBUG);
+		//		Logger.getLogger(Parser.class).setLevel(Level.DEBUG);
+		//		Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG);
 		//		String question = "houses for less than 900000 pounds";
 		String question = "Give/VB me/PRP all/DT video/JJ games/NNS published/VBN by/IN Mean/NNP Hamster/NNP Software/NNP";
-//		String question = "give me all video games published by mean hamster software";
-//		String question = "Give me all video games published by Mean Hamster Software";		
-//		question = new StanfordPartOfSpeechTagger().tag(question);
-//		System.out.println(question);
+		//		String question = "give me all video games published by mean hamster software";
+		//		String question = "Give me all video games published by Mean Hamster Software";		
+		//		question = new StanfordPartOfSpeechTagger().tag(question);
+		//		System.out.println(question);
 
-//		Model model = loadOxfordModel();
+		//		Model model = loadOxfordModel();
 		QueryTestData testData = new QueryTestData();
 		new LearnQueryCallable(question, 0, testData, dbpediaLiveKnowledgebase, true).call();
 		logger.info("learned query: "+testData.id2Query.get(0));
 	}
-	
-	/*@Test*/ public void generateXMLOxford() throws IOException
+
+	/*@Test*/  public void generateXMLOxford() throws IOException
 	{
 		boolean ADD_POS_TAGS = true;
 		PartOfSpeechTagger posTagger = null;
@@ -192,7 +192,7 @@
 		for(String line;(line=in.readLine())!=null;)
 		{
 			j++;
-			//			if(j>5) break; // TODO: remove later
+			if(j>5) break; // TODO: remove later
 			String question = line.replace("question: ", "").trim();
 			if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);}
 			if(!line.trim().isEmpty()) {questions.add(question);}
@@ -291,7 +291,7 @@
 
 	public void test(String title, final File referenceXML,final  SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index)
 			throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException
-	{		
+			{		
 		evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index);
 		generateHTML(title); 
 
@@ -318,7 +318,7 @@
 					logger.info("Old test data not loadable, creating it and exiting.");					
 				}
 				learnedTestData.write();*/
-	}
+			}
 
 	private File generateTestDataIfNecessary(final File referenceXML,final  SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException
 	{
@@ -335,9 +335,10 @@
 	}
 
 	private void evaluateAndWrite(String title,final File updatedReferenceXML, final  SparqlEndpoint endpoint,ExtractionDBCache cache,
-		Knowledgebase kb, Model model, MappingBasedIndex index)
+			Knowledgebase kb, Model model, MappingBasedIndex index)
 	{
-		QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML);
+
+		QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML,MAX_NUMBER_OF_QUESTIONS);
 		logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions.");
 
 		long startLearning = System.currentTimeMillis();
@@ -352,11 +353,6 @@
 		evaluation.write();
 	}
 
-	private void evaluateAndWrite()
-	{
-
-	}
-
 	/** evaluates a data set against a reference.
 	 * @param reference the test data assumed to be correct. needs to contain the answers for all queries.
 	 * @param suspect the test data to compare with the reference.
@@ -673,8 +669,8 @@
 		//			try {testData.id2Answers.put(i,getUris(endpoint, learnedQuery));}
 		//			catch(Exception e) {logger.warn("Error with learned query "+learnedQuery+" for question "+question+" at endpoint "+endpoint+": "+e.getLocalizedMessage());}
 
-		long end = System.currentTimeMillis();
-		//			logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start));
+		//		long end = System.currentTimeMillis();
+		//		logger.trace(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start));
 
 
 		//		logger.info(String.format("Learned queries for %d of %d questions.",successes,id2Question.size()));
@@ -779,7 +775,7 @@
 	//	int successfullTestThreadRuns = 0;
 
 	/** */
-	private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING	= "http://live.dbpedia.org/sparql";
+	//	private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING	= "http://live.dbpedia.org/sparql";
 
 	private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class);
 
@@ -806,7 +802,7 @@
 		Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties");
 		SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail");
 		boa_propertiesIndex.setSortField("boa-score");
-//		propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex);
+		//		propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex);
 		MappingBasedIndex mappingIndex= new MappingBasedIndex(
 				SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), 
 				SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(),

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.