[DL-Learner SVN] SF.net SVN: dl-learner:[3852] branches/hmm/components-ext/src/main/java/org /dlle

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3852
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3852&view=rev
Author:   kirdie
Date:     2012-09-27 13:18:05 +0000 (Thu, 27 Sep 2012)
Log Message:
-----------
last commit merged the two sparqltemplatedbased2learner's into one file, this one renamed the file to the correct name.

Added Paths:
-----------
    branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java

Removed Paths:
-------------
    branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java

Copied: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java (from rev 3851, branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java)
===================================================================

--- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	                        (rev 0)
+++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-09-27 13:18:05 UTC (rev 3852)
@@ -0,0 +1,1440 @@
+package org.dllearner.algorithm.tbsl.learning;
+
+import hmm.HiddenMarkovModel;
+import hmm.ResourceInfo;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import org.apache.commons.collections15.MultiMap;
+import org.apache.log4j.Logger;
+import org.dllearner.algorithm.tbsl.nlp.Lemmatizer;
+import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer;
+import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger;
+import org.dllearner.algorithm.tbsl.nlp.PlingStemmer;
+import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger;
+import org.dllearner.algorithm.tbsl.nlp.WordNet;
+import org.dllearner.algorithm.tbsl.sparql.Allocation;
+import org.dllearner.algorithm.tbsl.sparql.Query;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value;
+import org.dllearner.algorithm.tbsl.sparql.Slot;
+import org.dllearner.algorithm.tbsl.sparql.SlotType;
+import org.dllearner.algorithm.tbsl.sparql.Template;
+import org.dllearner.algorithm.tbsl.sparql.WeightedQuery;
+import org.dllearner.algorithm.tbsl.templator.Templator;
+import org.dllearner.algorithm.tbsl.util.Knowledgebase;
+import org.dllearner.algorithm.tbsl.util.PopularityMap;
+import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType;
+import org.dllearner.algorithm.tbsl.util.Similarity;
+import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection;
+import org.dllearner.common.index.Index;
+import org.dllearner.common.index.IndexResultItem;
+import org.dllearner.common.index.IndexResultSet;
+import org.dllearner.common.index.MappingBasedIndex;
+import org.dllearner.common.index.SOLRIndex;
+import org.dllearner.common.index.SPARQLDatatypePropertiesIndex;
+import org.dllearner.common.index.SPARQLIndex;
+import org.dllearner.common.index.SPARQLObjectPropertiesIndex;
+import org.dllearner.common.index.SPARQLPropertiesIndex;
+import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex;
+import org.dllearner.common.index.VirtuosoObjectPropertiesIndex;
+import org.dllearner.common.index.VirtuosoPropertiesIndex;
+import org.dllearner.core.ComponentInitException;
+import org.dllearner.core.LearningProblem;
+import org.dllearner.core.SparqlQueryLearningAlgorithm;
+import org.dllearner.core.owl.Description;
+import org.dllearner.core.owl.NamedClass;
+import org.dllearner.core.owl.ObjectProperty;
+import org.dllearner.core.owl.Thing;
+import org.dllearner.kb.LocalModelBasedSparqlEndpointKS;
+import org.dllearner.kb.SparqlEndpointKS;
+import org.dllearner.kb.sparql.ExtractionDBCache;
+import org.dllearner.kb.sparql.SparqlEndpoint;
+import org.dllearner.kb.sparql.SparqlQuery;
+import org.dllearner.reasoning.SPARQLReasoner;
+import org.ini4j.InvalidFileFormatException;
+import org.ini4j.Options;
+import org.semanticweb.owlapi.model.IRI;
+import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
+import com.hp.hpl.jena.ontology.OntModelSpec;
+import com.hp.hpl.jena.query.QueryExecutionFactory;
+import com.hp.hpl.jena.query.QueryFactory;
+import com.hp.hpl.jena.query.QuerySolution;
+import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.query.Syntax;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
+import com.hp.hpl.jena.sparql.expr.ExprAggregator;
+import com.hp.hpl.jena.sparql.expr.ExprVar;
+import com.hp.hpl.jena.sparql.expr.aggregate.AggCount;
+import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator;
+import com.jamonapi.Monitor;
+import com.jamonapi.MonitorFactory;
+
+/** The old learner taken over by Konrad Höffner for experiments with the Hidden Markov Algorithm by Saedeeh Shekarpur.
+ * 
+ * */
+public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm
+{
+	public static boolean useHMM = true;
+	
+	enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY}
+	private Mode mode = Mode.BEST_QUERY;
+	
+	/** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/
+	private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
+
+	private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class);
+	/** synonyms are great but are not used yet by the HMM algorithm. **/
+	private static final boolean	CREATE_SYNONYMS	= false;
+	/** The minimum score of items that are accepted from the Sindice search BOA index. **/
+	private static final Double	BOA_THRESHOLD	=  0.9;
+	private Monitor templateMon = MonitorFactory.getTimeMonitor("template");
+	private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql");
+
+	private boolean useRemoteEndpointValidation;
+	private boolean stopIfQueryResultNotEmpty;
+	private int maxTestedQueriesPerTemplate = 50;
+	private int maxQueryExecutionTimeInSeconds;
+	private int maxTestedQueries = 200;
+	private int maxIndexResults;
+
+	private SparqlEndpoint endpoint = null;
+	private Model model = null;
+
+	private ExtractionDBCache cache = new ExtractionDBCache("cache");
+
+	private Index resourcesIndex;
+	private Index classesIndex;
+	private Index propertiesIndex;
+
+	private Index datatypePropertiesIndex;
+	private Index objectPropertiesIndex;
+
+	private MappingBasedIndex mappingIndex;
+
+	private Templator templateGenerator = null;
+	private Lemmatizer lemmatizer;
+	private PartOfSpeechTagger posTagger;
+	private WordNet wordNet;
+
+	private String question;
+	private int learnedPos = -1;
+
+	private Set<Template> templates;
+	private Map<Template, Collection<? extends Query>> template2Queries;
+	private Map<Slot, List<String>> slot2URI;
+
+	private Collection<WeightedQuery> sparqlQueryCandidates;
+	private SortedSet<WeightedQuery> learnedSPARQLQueries;
+	private SortedSet<WeightedQuery> generatedQueries;
+
+	private SPARQLReasoner reasoner;
+
+	private String currentlyExecutedQuery;
+
+	private boolean dropZeroScoredQueries = true;
+	private boolean useManualMappingsIfExistOnly = true;
+
+	private boolean multiThreaded = true;
+
+	private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"};
+
+	private PopularityMap popularityMap;
+
+	private Set<String> relevantKeywords;
+
+	private boolean useDomainRangeRestriction = true;
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
+	}
+
+	public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options);
+	}
+
+	public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase){
+		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options());
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){
+		this(endpoint, index, new StanfordPartOfSpeechTagger());
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options());
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){
+		this(endpoint, index, posTagger, new WordNet(), new Options());
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options());
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){
+		this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options());
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache"));
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){
+		this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache"));
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache"));
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache"));
+	}
+
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){
+		this.endpoint = endpoint;
+		this.resourcesIndex = resourcesIndex;
+		this.classesIndex = classesIndex;
+		this.propertiesIndex = propertiesIndex;
+		this.posTagger = posTagger;
+		this.wordNet = wordNet;
+		this.cache = cache;
+
+		setOptions(options);
+
+		if(propertiesIndex instanceof SPARQLPropertiesIndex){
+			if(propertiesIndex instanceof VirtuosoPropertiesIndex){
+				datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			} else {
+				datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			}
+		} else {
+			datatypePropertiesIndex = propertiesIndex;
+			objectPropertiesIndex = propertiesIndex;
+		}
+		reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache);
+	}
+
+	public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
+	}
+
+	public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options());
+	}
+
+	public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options());
+	}
+
+	public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache"));
+	}
+
+	public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger)
+	{
+		this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger);
+		setMappingIndex(mappingBasedIndex);
+	}
+
+	public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){
+		this.model = model;
+		this.resourcesIndex = resourcesIndex;
+		this.classesIndex = classesIndex;
+		this.propertiesIndex = propertiesIndex;
+		this.posTagger = posTagger;
+		this.wordNet = wordNet;
+		this.cache = cache;
+
+		setOptions(options);
+
+		if(propertiesIndex instanceof SPARQLPropertiesIndex){
+			if(propertiesIndex instanceof VirtuosoPropertiesIndex){
+				datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			} else {
+				datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			}
+		} else {
+			datatypePropertiesIndex = propertiesIndex;
+			objectPropertiesIndex = propertiesIndex;
+		}
+		reasoner = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM, model)), cache);
+	}
+
+	public void setGrammarFiles(String[] grammarFiles)
+	{
+		if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");}
+		templateGenerator.setGrammarFiles(grammarFiles);
+	}
+
+	@Override
+	public void init() throws ComponentInitException {
+		templateGenerator = new Templator(posTagger, wordNet, grammarFiles);
+		lemmatizer = new LingPipeLemmatizer();
+	}
+
+	public void setMappingIndex(MappingBasedIndex mappingIndex) {
+		this.mappingIndex = mappingIndex;
+	}
+
+	public void setCache(ExtractionDBCache cache) {
+		this.cache = cache;
+	}
+
+	public void setKnowledgebase(Knowledgebase knowledgebase){
+		this.endpoint = knowledgebase.getEndpoint();
+		this.resourcesIndex = knowledgebase.getResourceIndex();
+		this.classesIndex = knowledgebase.getClassIndex();
+		this.propertiesIndex = knowledgebase.getPropertyIndex();
+		this.mappingIndex = knowledgebase.getMappingIndex();
+		if(propertiesIndex instanceof SPARQLPropertiesIndex){
+			if(propertiesIndex instanceof VirtuosoPropertiesIndex){
+				datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			} else {
+				datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			}
+		} else {
+			datatypePropertiesIndex = propertiesIndex;
+			objectPropertiesIndex = propertiesIndex;
+		}
+		reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint));
+	}
+
+	public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) {
+		this.useDomainRangeRestriction = useDomainRangeRestriction;
+	}
+
+	/*
+	 * Only for Evaluation useful.
+	 */
+	public void setUseIdealTagger(boolean value){
+		templateGenerator.setUNTAGGED_INPUT(!value);
+	}
+
+	private void setOptions(Options options){
+		maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10"));
+
+		maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20"));
+		cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds);
+
+		useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false;
+		stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true"));
+		maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20"));
+
+		String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict");
+		wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath();
+		System.setProperty("wordnet.database.dir", wordnetPath);
+	}
+
+	public void setEndpoint(SparqlEndpoint endpoint){
+		this.endpoint = endpoint;
+
+		reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint));
+		reasoner.setCache(cache);
+		reasoner.prepareSubsumptionHierarchy();
+	}
+
+	public void setQuestion(String question){
+		this.question = question;
+	}
+
+	public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){
+		this.useRemoteEndpointValidation = useRemoteEndpointValidation;
+	}
+
+	public int getMaxQueryExecutionTimeInSeconds() {
+		return maxQueryExecutionTimeInSeconds;
+	}
+
+	public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) {
+		this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds;
+	}
+
+	public int getMaxTestedQueriesPerTemplate() {
+		return maxTestedQueriesPerTemplate;
+	}
+
+	public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) {
+		this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate;
+	}
+
+	private void reset(){
+		learnedSPARQLQueries = new TreeSet<WeightedQuery>();
+		template2Queries = new HashMap<Template, Collection<? extends Query>>();
+		slot2URI = new HashMap<Slot, List<String>>();
+		relevantKeywords = new HashSet<String>();
+		currentlyExecutedQuery = null;
+
+		//		templateMon.reset();
+		//		sparqlMon.reset();
+	}
+
+	public void learnSPARQLQueries() throws NoTemplateFoundException{
+		reset();
+		//generate SPARQL query templates
+		logger.debug("Generating SPARQL query templates...");
+		templateMon.start();
+		if(multiThreaded){
+			templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS);
+		} else {
+			templates = templateGenerator.buildTemplates(question);
+		}
+		templateMon.stop();
+		logger.debug("Done in " + templateMon.getLastValue() + "ms.");
+		relevantKeywords.addAll(templateGenerator.getUnknownWords());
+		if(templates.isEmpty()){
+			throw new NoTemplateFoundException();
+
+		}
+		logger.debug("Templates:");
+		for(Template t : templates){
+			logger.debug(t);
+		}
+
+		//get the weighted query candidates
+		generatedQueries = getWeightedSPARQLQueries(templates);
+		sparqlQueryCandidates = new ArrayList<WeightedQuery>();
+		int i = 0;
+		for(WeightedQuery wQ : generatedQueries){
+			logger.debug(wQ.explain());
+			sparqlQueryCandidates.add(wQ);
+			if(i == maxTestedQueries){
+				break;
+			}
+			i++;
+		}
+
+		if(mode == Mode.BEST_QUERY){
+			double bestScore = -1;
+			for(WeightedQuery candidate : generatedQueries){
+				double score = candidate.getScore();
+				if(score >= bestScore){
+					bestScore = score;
+					learnedSPARQLQueries.add(candidate);
+				} else {
+					break;
+				}
+			}
+		} else if(mode == Mode.BEST_NON_EMPTY_QUERY){
+			//test candidates
+			if(useRemoteEndpointValidation){ //on remote endpoint
+				validateAgainstRemoteEndpoint(sparqlQueryCandidates);
+			} else {//on local model
+
+			}
+		}
+	}
+
+	public SortedSet<WeightedQuery> getGeneratedQueries() {
+		return generatedQueries;
+	}
+
+	public SortedSet<WeightedQuery> getGeneratedQueries(int topN) {
+		SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>();
+		int max = Math.min(topN, generatedQueries.size());
+		for(WeightedQuery wQ : generatedQueries){
+			topNQueries.add(wQ);
+			if(topNQueries.size() == max){
+				break;
+			}
+		}
+		return topNQueries;
+	}
+
+	public Set<Template> getTemplates(){
+		return templates;
+	}
+
+	public List<String> getGeneratedSPARQLQueries(){
+		List<String> queries = new ArrayList<String>();
+		for(WeightedQuery wQ : sparqlQueryCandidates){
+			queries.add(wQ.getQuery().toString());
+		}
+
+		return queries;
+	}
+
+	public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){
+		return template2Queries;
+	}
+
+	public Map<Slot, List<String>> getSlot2URIs(){
+		return slot2URI;
+	}
+
+	private void normProminenceValues(Set<Allocation> allocations){
+		double min = 0;
+		double max = 0;
+		for(Allocation a : allocations){
+			if(a.getProminence() < min){
+				min = a.getProminence();
+			}
+			if(a.getProminence() > max){
+				max = a.getProminence();
+			}
+		}
+		if(min==max) {return;}
+		for(Allocation a : allocations){
+			double prominence = a.getProminence()/(max-min);
+			a.setProminence(prominence);
+		}
+	}
+
+	private void computeScore(Set<Allocation> allocations){
+		double alpha = 0.8;
+		double beta = 1 - alpha;
+
+		for(Allocation a : allocations){
+			double score = alpha * a.getSimilarity() + beta * a.getProminence();
+			a.setScore(score);
+		}
+
+	}
+
+	public Set<String> getRelevantKeywords(){
+		return relevantKeywords;
+	}
+
+	// just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one
+	private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates)
+	{
+		// for testing 
+		for(Template template: templates)
+		{
+			{
+				ArrayList<String> keywords = new ArrayList<String>();
+				for(Slot slot: template.getSlots())
+				{
+					keywords.add(slot.getWords().get(0));
+				}
+				if(template.getSlots().size()!=3) {continue;}
+//				if(!keywords.contains("Mean Hamster Software")) {continue;}
+//				if(!keywords.contains("published")) {continue;}
+				System.out.println("\"keywords\": "+keywords);
+			}
+			System.out.println(template);
+			SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>();
+			Query query = template.getQuery();
+			double score = 0;
+
+			Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>();
+			Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); 
+			for(Slot slot: template.getSlots())
+			{
+				List<String> segment = new LinkedList<String>();
+				segment.addAll(Arrays.asList(slot.getWords().get(0).split("\\s")));			
+				List<ResourceInfo> resourceInfos = new LinkedList<ResourceInfo>();
+
+				for(IndexResultItem item : getIndexResultItems(slot))
+				{
+					// if this gets used at another place, create a function IndexResultItemToResourceInfo()
+					ResourceInfo info = new ResourceInfo();
+					info.setUri(item.getUri());
+					String label = item.getLabel();					
+					// in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources)
+					info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri())));
+					// in saedeehs algorithm, the emission probabilty is formed by the string similarity
+					// but we use the lucene index score
+					double max = 0;
+					for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));}					
+					if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max);
+					info.setStringSimilarityScore(max);
+					if(!info.setTypeFromDBpediaURI()) throw new AssertionError("could not set type for info "+info);
+					System.err.println("info with type: "+info);
+					resourceInfos.add(info);
+				}
+				segmentToURIs.put(segment,resourceInfos);
+			}
+			HiddenMarkovModel hmm = new HiddenMarkovModel();
+			hmm.initialization();
+			hmm.startMarkovModel(segmentToURIs,true);
+			MultiMap<Double,List<String>> paths = hmm.getPaths();
+
+			//			System.out.println(hmm.getPaths());
+			// die keywords jetzt in sadeehs algorithmus reinwerfen 
+			// da kommen jetzt pfade raus mit unterschiedlichen wahrscheinlichkeiten	
+			//			HiddenMarkovModel HMM = new HiddenMarkovModel();
+			//			HMM.StartMarkovModel();
+			// jetzt die variablen aus der query ersetzen mit den kandidaten
+			// ranked list der pfade, die die observation sequence generieren
+
+			for(Double d : paths.keySet())
+			{
+				for(List<String> path : paths.get(d))
+				{
+					Query q = new Query(query);
+					// TODO: which variable stands for which resource? do it randomly now to check if the replacement works and then correct the order later 
+					System.out.println(q.getVariablesAsStringList());
+					System.out.println();
+					int i = 0;
+					for(String var : q.getVariablesAsStringList())
+					{						
+						q.replaceVarWithURI(var, path.get(i));
+						i++;
+					}
+					System.out.println(q);
+
+
+					WeightedQuery wQuery = new WeightedQuery(q, score);
+					queries.add(wQuery);
+				}
+			}
+			//System.exit(0);
+			return queries;
+			//			>> SLOTS:
+			//				y0: RESOURCE {Mean Hamster Software}
+			//				p0: OBJECTPROPERTY {published,print}
+			//				p1: CLASS {video games}
+
+
+			//			System.out.println(template);			
+		}
+		// 		
+		return null;
+	}
+
+	private SortedSet<WeightedQuery> getWeightedSPARQLQueriesOld(Set<Template> templates){
+		logger.debug("Generating SPARQL query candidates...");
+
+		Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() {
+
+			@Override
+			public int compare(Slot o1, Slot o2) {
+				if(o1.getSlotType() == o2.getSlotType()){
+					return o1.getToken().compareTo(o2.getToken());
+				} else {
+					return -1;
+				}
+			}
+		});
+		slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>());
+
+
+		SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>();
+
+		Set<Allocation> allocations;
+
+		for(Template t : templates){
+			logger.info("Processing template:\n" + t.toString());			
+			allocations = new TreeSet<Allocation>();
+			boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex(");
+
+			ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size());
+			List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>();
+
+			long startTime = System.currentTimeMillis();
+
+			for (Slot slot : t.getSlots()) {
+				if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations);
+					Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot);
+					Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker);
+					list.add(submit);
+				} 
+			}
+
+			for (Future<Map<Slot, SortedSet<Allocation>>> future : list) {
+				try {
+					Map<Slot, SortedSet<Allocation>> result = future.get();
+					Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next();
+					slot2Allocations.put(item.getKey(), item.getValue());
+				} catch (InterruptedException e) {
+					e.printStackTrace();
+				} catch (ExecutionException e) {
+					e.printStackTrace();
+				}
+			}
+
+			executor.shutdown();
+
+
+			/*for(Slot slot : t.getSlots()){
+				allocations = slot2Allocations2.get(slot);
+				if(allocations == null){
+					allocations = computeAllocations(slot, 10);
+					slot2Allocations2.put(slot, allocations);
+				}
+				slot2Allocations.put(slot, allocations);
+
+				//for tests add the property URI with http://dbpedia.org/property/ namespace
+				//TODO should be replaced by usage of a separate SOLR index
+				Set<Allocation> tmp = new HashSet<Allocation>();
+				if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){
+					for(Allocation a : allocations){
+						String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1);
+						Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence());
+						newA.setScore(a.getScore()-0.000001);
+						tmp.add(newA);
+					}
+				}
+				allocations.addAll(tmp);
+			}*/
+			logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms");
+
+			Set<WeightedQuery> queries = new HashSet<WeightedQuery>();
+			Query cleanQuery = t.getQuery();
+			queries.add(new WeightedQuery(cleanQuery));
+
+			Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>();
+			List<Slot> sortedSlots = new ArrayList<Slot>();
+			Set<Slot> classSlots = new HashSet<Slot>();
+			for(Slot slot : t.getSlots()){
+				if(slot.getSlotType() == SlotType.CLASS){
+					sortedSlots.add(slot);
+					classSlots.add(slot);
+				}
+			}
+			for(Slot slot : t.getSlots()){
+				if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){
+					sortedSlots.add(slot);
+				}
+			}
+			for(Slot slot : t.getSlots()){
+				if(!sortedSlots.contains(slot)){
+					sortedSlots.add(slot);
+				}
+			}
+			//add for each SYMPROPERTY Slot the reversed query
+			for(Slot slot : sortedSlots){
+				for(WeightedQuery wQ : queries){
+					if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){
+						Query reversedQuery = new Query(wQ.getQuery());
+						reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse();
+						tmp.add(new WeightedQuery(reversedQuery));
+					}
+					tmp.add(wQ);
+				}
+				queries.clear();
+				queries.addAll(tmp);
+				tmp.clear();
+			}
+
+			for(Slot slot : sortedSlots){
+				if(!slot2Allocations.get(slot).isEmpty()){
+					for(Allocation a : slot2Allocations.get(slot)){
+						for(WeightedQuery query : queries){
+							Query q = new Query(query.getQuery());
+
+							boolean drop = false;
+							if(useDomainRangeRestriction){
+								if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){
+									for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){
+										String objectVar = triple.getValue().getName();
+										String subjectVar = triple.getVariable().getName();
+										//											System.out.println(triple);
+										for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){
+											//												System.out.println(typeTriple);
+											if(true){//reasoner.isObjectProperty(a.getUri())){
+												Description range = reasoner.getRange(new ObjectProperty(a.getUri()));
+												//													System.out.println(a);
+												if(range != null){
+													Set<Description> allRanges = new HashSet<Description>();
+													SortedSet<Description> superClasses;
+													if(range instanceof NamedClass){
+														superClasses = reasoner.getSuperClasses(range);
+														allRanges.addAll(superClasses);
+													} else {
+														for(Description nc : range.getChildren()){
+															superClasses = reasoner.getSuperClasses(nc);
+															allRanges.addAll(superClasses);
+														}
+													}
+													allRanges.add(range);
+													allRanges.remove(new NamedClass(Thing.instance.getURI()));
+
+													Set<Description> allTypes = new HashSet<Description>();
+													String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1);
+													Description type = new NamedClass(typeURI);
+													superClasses = reasoner.getSuperClasses(type);
+													allTypes.addAll(superClasses);
+													allTypes.add(type);
+
+													if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){
+														drop = true;
+													} 
+												}
+											} else {
+												drop = true;
+											}
+
+										}
+										for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){
+											Description domain = reasoner.getDomain(new ObjectProperty(a.getUri()));
+											//												System.out.println(a);
+											if(domain != null){
+												Set<Description> allDomains = new HashSet<Description>();
+												SortedSet<Description> superClasses;
+												if(domain instanceof NamedClass){
+													superClasses = reasoner.getSuperClasses(domain);
+													allDomains.addAll(superClasses);
+												} else {
+													for(Description nc : domain.getChildren()){
+														superClasses = reasoner.getSuperClasses(nc);
+														allDomains.addAll(superClasses);
+													}
+												}
+												allDomains.add(domain);
+												allDomains.remove(new NamedClass(Thing.instance.getURI()));
+
+												Set<Description> allTypes = new HashSet<Description>();
+												String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1);
+												Description type = new NamedClass(typeURI);
+												superClasses = reasoner.getSuperClasses(type);
+												allTypes.addAll(superClasses);
+												allTypes.add(type);
+
+												if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){
+													drop = true;												
+												} else {
+
+												}
+											}
+										}
+									}
+								}
+							}
+
+							if(!drop){
+								if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case
+									for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){
+										SPARQL_Value object = triple.getValue();
+										if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position
+											SPARQL_Property predicate = triple.getProperty();
+											if(!predicate.isVariable()){//only consider triple where predicate is URI
+												String predicateURI = predicate.getName().replace("<", "").replace(">", "");
+												if(isDatatypeProperty(predicateURI)){//if data property
+													q.addFilter(new SPARQL_Filter(new SPARQL_Pair(
+															object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX)));
+												} else {
+													q.replaceVarWithURI(slot.getAnchor(), a.getUri());
+												}
+											} else {
+												q.replaceVarWithURI(slot.getAnchor(), a.getUri());
+											}
+										} else {
+											q.replaceVarWithURI(slot.getAnchor(), a.getUri());
+										}
+									}
+								} else {
+									q.replaceVarWithURI(slot.getAnchor(), a.getUri());
+								}
+								WeightedQuery w = new WeightedQuery(q);
+								double newScore = query.getScore() + a.getScore();
+								w.setScore(newScore);
+								w.addAllocations(query.getAllocations());
+								w.addAllocation(a);
+								tmp.add(w);
+							}
+
+
+						}
+					}
+					//lower queries with FILTER-REGEX
+					if(containsRegex){
+						for(WeightedQuery wQ : tmp){
+							wQ.setScore(wQ.getScore() - 0.01);
+						}
+					}
+
+					queries.clear();
+					queries.addAll(tmp);//System.out.println(tmp);
+					tmp.clear();
+				} else {//Add REGEX FILTER if resource slot is empty and predicate is datatype property
+					if(slot.getSlotType() == SlotType.RESOURCE){
+						for(WeightedQuery query : queries){
+							Query q = query.getQuery();
+							for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){
+								SPARQL_Value object = triple.getValue();
+								if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position
+									SPARQL_Property predicate = triple.getProperty();
+									if(!predicate.isVariable()){//only consider triple where predicate is URI
+										String predicateURI = predicate.getName().replace("<", "").replace(">", "");
+										if(isDatatypeProperty(predicateURI)){//if data property
+											q.addFilter(new SPARQL_Filter(new SPARQL_Pair(
+													object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX)));
+										}
+									}
+								}
+							}
+
+						}
+
+					} else {
+						if(slot.getSlotType() == SlotType.SYMPROPERTY){
+							for(WeightedQuery wQ : queries){
+								List<SPARQL_Triple> triples = wQ.getQuery().getTriplesWithVar(slot.getAnchor());
+								for(SPARQL_Triple triple : triples){
+									String typeVar;
+									String resourceURI;
+									SymPropertyDirection direction;
+									if(triple.getValue().isVariable()){
+										direction = SymPropertyDirection.VAR_RIGHT;
+										typeVar = triple.getValue().getName();
+										resourceURI = triple.getVariable().getName();
+									} else {
+										direction = SymPropertyDirection.VAR_LEFT;
+										typeVar = triple.getVariable().getName();
+										resourceURI = triple.getValue().getName();
+									}
+									resourceURI = resourceURI.replace("<", "").replace(">", "");
+									List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar);
+									for(SPARQL_Triple typeTriple : typeTriples){
+										String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", "");
+										//										List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction);
+										//										for(Entry<String, Integer> property : mostFrequentProperties){
+										//											wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey());
+										//											wQ.setScore(wQ.getScore() + 0.1);
+										//										}
+									}
+
+								}
+							}
+						}
+					}
+					//					else if(slot.getSlotType() == SlotType.CLASS){
+					//						String token = slot.getWords().get(0);
+					//						if(slot.getToken().contains("house")){
+					//							String regexToken = token.replace("houses", "").replace("house", "").trim();
+					//							try {
+					//								Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call();
+					//								SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue();
+					//								if(alloc != null && !alloc.isEmpty()){
+					//									String uri = alloc.first().getUri();
+					//									for(WeightedQuery query : queries){
+					//										Query q = query.getQuery();
+					//										for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){
+					//											SPARQL_Term subject = triple.getVariable();
+					//											SPARQL_Term object = new SPARQL_Term("desc");
+					//											object.setIsVariable(true);
+					//											object.setIsURI(false);
+					//											q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object));
+					//											q.addFilter(new SPARQL_Filter(new SPARQL_Pair(
+					//													object, "'" + regexToken + "'", SPARQL_PairType.REGEX)));
+					//										}
+					//										q.replaceVarWithURI(slot.getAnchor(), uri);
+					//										
+					//									}
+					//								}
+					//							} catch (Exception e) {
+					//								e.printStackTrace();
+					//							}
+					//						}
+					//					}
+
+
+				}
+
+			}
+			for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) {
+				WeightedQuery wQ = iterator.next();
+				if(dropZeroScoredQueries){
+					if(wQ.getScore() <= 0){
+						iterator.remove();
+					}
+				} else {
+					if(t.getSlots().size()==0) throw new AssertionError("no slots for query "+wQ);
+					wQ.setScore(wQ.getScore()/t.getSlots().size());
+				}
+
+			}
+			allQueries.addAll(queries);
+			List<Query> qList = new ArrayList<Query>();
+			for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery());
+				qList.add(wQ.getQuery());
+			}
+			template2Queries.put(t, qList);
+		}
+		logger.debug("...done in ");
+		return allQueries;
+	}
+
+	private double getProminenceValue(String uri, SlotType type){
+		Integer popularity = null;
+		if(popularityMap != null){
+			if(type == SlotType.CLASS){
+				popularity = popularityMap.getPopularity(uri, EntityType.CLASS);
+			} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY 
+					|| type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){
+				popularity = popularityMap.getPopularity(uri, EntityType.PROPERTY);
+			} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+				popularity = popularityMap.getPopularity(uri, EntityType.RESOURCE);
+			} 
+		} 
+		if(popularity == null){
+			String query = null;
+			if(type == SlotType.CLASS){
+				query = "SELECT COUNT(?s) WHERE {?s a <%s>}";
+			} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY 
+					|| type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){
+				query = "SELECT COUNT(*) WHERE {?s <%s> ?o}";
+			} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+				query = "SELECT COUNT(*) WHERE {?s ?p <%s>}";
+			}
+			query = String.format(query, uri);
+
+			ResultSet rs = executeSelect(query);
+			QuerySolution qs;
+			String projectionVar;
+			while(rs.hasNext()){
+				qs = rs.next();
+				projectionVar = qs.varNames().next();
+				popularity = qs.get(projectionVar).asLiteral().getInt();
+			}
+		}
+		if(popularity == null){
+			popularity = Integer.valueOf(0);
+		}
+		System.out.println(popularity);
+
+
+		//		if(cnt == 0){
+		//			return 0;
+		//		} 
+		//		return Math.log(cnt);
+		if(popularity!=popularity) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);}
+		return popularity;
+	}
+
+	public void setPopularityMap(PopularityMap popularityMap) {
+		this.popularityMap = popularityMap;
+	}
+
+
+	private List<String> pruneList(List<String> words){
+		List<String> prunedList = new ArrayList<String>();
+		for(String w1 : words){
+			boolean smallest = true;
+			for(String w2 : words){
+				if(!w1.equals(w2)){
+					if(w1.contains(w2)){
+						smallest = false;
+						break;
+					}
+				}
+			}
+			if(smallest){
+				prunedList.add(w1);
+			}
+		}
+		logger.info("Pruned list: " + prunedList);
+		//		return getLemmatizedWords(words);
+		return prunedList;
+	}
+
+	private List<String> getLemmatizedWords(List<String> words){
+		logger.info("Pruning word list " + words + "...");
+		//		mon.start();
+		List<String> pruned = new ArrayList<String>();
+		for(String word : words){
+			//currently only stem single words
+			if(word.contains(" ")){
+				pruned.add(word);
+			} else {
+				String lemWord = lemmatizer.stem(word);
+				if(!pruned.contains(lemWord)){
+					pruned.add(lemWord);
+				}
+			}
+
+		}
+		//		mon.stop();
+		//		logger.info("Done in " + mon.getLastValue() + "ms.");
+		logger.info("Pruned list: " + pruned);
+		return pruned;
+	}
+
+
+	private Index getIndexBySlotType(Slot slot){
+		Index index = null;
+		SlotType type = slot.getSlotType();
+		if(type == SlotType.CLASS){
+			index = classesIndex;
+		} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){
+			index = propertiesIndex;
+		} else if(type == SlotType.DATATYPEPROPERTY){
+			index = datatypePropertiesIndex;
+		} else if(type == SlotType.OBJECTPROPERTY){
+			index = objectPropertiesIndex;
+		} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+			index = resourcesIndex;
+		}
+		return index;
+	}
+
+	private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){
+		SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt();
+		validate(queries, queryType);
+	}
+
+	private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){
+		logger.debug("Testing candidate SPARQL queries on remote endpoint...");
+		sparqlMon.start();
+		if(queryType == SPARQL_QueryType.SELECT){
+			for(WeightedQuery query : queries){
+				learnedPos++;
+				List<String> results;
+				try {
+					logger.debug("Testing query:\n" + query);
+					com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ);
+					q.setLimit(1);
+					ResultSet rs = executeSelect(q.toString());
+
+					results = new ArrayList<String>();
+					QuerySolution qs;
+					String projectionVar;
+					while(rs.hasNext()){
+						qs = rs.next();
+						projectionVar = qs.varNames().next();
+						if(qs.get(projectionVar).isLiteral()){
+							results.add(qs.get(projectionVar).asLiteral().getLexicalForm());
+						} else if(qs.get(projectionVar).isURIResource()){
+							results.add(qs.get(projectionVar).asResource().getURI());
+						}
+
+					}
+					if(!results.isEmpty()){
+						try{
+							int cnt = Integer.parseInt(results.get(0));
+							if(cnt > 0){
+								learnedSPARQLQueries.add(query);
+								if(stopIfQueryResultNotEmpty){
+									return;
+								}
+							}
+						} catch (NumberFormatException e){
+							learnedSPARQLQueries.add(query);
+							if(stopIfQueryResultNotEmpty){
+								return;
+							}
+						}
+						logger.debug("Result: " + results);
+					}
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+
+			}
+		} else if(queryType == SPARQL_QueryType.ASK){
+			for(WeightedQuery query : queries){
+				learnedPos++;
+				logger.debug("Testing query:\n" + query);
+				boolean result = executeAskQuery(query.getQuery().toString());
+				learnedSPARQLQueries.add(query);
+				//				if(stopIfQueryResultNotEmpty && result){
+				//					return;
+				//				}
+				if(stopIfQueryResultNotEmpty){
+					return;
+				}
+				logger.debug("Result: " + result);
+			}
+		}
+
+		sparqlMon.stop();
+		logger.debug("Done in " + sparqlMon.getLastValue() + "ms.");
+	}
+
+	private boolean executeAskQuery(String query)
+	{
+		if(query==null) throw new NullPointerException("Parameter query == null");
+		currentlyExecutedQuery = query;		
+
+		boolean ret;
+		if (model == null)
+		{		
+			QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query);
+			qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
+			ret = qe.execAsk();			
+		}
+		else {ret = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execAsk();}
+		return ret;
+	}
+
+	private ResultSet executeSelect(String query)
+	{
+		if(query==null) throw new NullPointerException("Parameter query == null");
+		currentlyExecutedQuery = query;
+		ResultSet rs;
+		if (model == null) {
+			if (cache == null) {
+				QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query);
+				qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
+				rs = qe.execSelect();
+			} else {
+				rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+			}
+		} else {
+			rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model)
+					.execSelect();
+		}
+
+		return rs;
+	}
+
+	public String getCurrentlyExecutedQuery() {
+		return currentlyExecutedQuery;
+	}
+
+	public int getLearnedPosition() {
+		if(learnedPos >= 0){
+			return learnedPos+1;
+		}
+		return learnedPos;
+	}
+
+	@Override
+	public void start() {
+	}
+
+	@Override
+	public List<String> getCurrentlyBestSPARQLQueries(int nrOfSPARQLQueries) {
+		List<String> bestQueries = new ArrayList<String>();
+		for(WeightedQuery wQ : learnedSPARQLQueries){
+			bestQueries.add(wQ.getQuery().toString());
+		}
+		return bestQueries;
+	}
+
+	@Override
+	public String getBestSPARQLQuery() {
+		if(!learnedSPARQLQueries.isEmpty()){
+			return learnedSPARQLQueries.iterator().next().getQuery().toString();
+		} else {
+			return null;
+		}
+	}
+
+	public SortedSet<WeightedQuery> getLearnedSPARQLQueries() {
+		return learnedSPARQLQueries;
+	}
+
+	@Override
+	public LearningProblem getLearningProblem() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public void setLearningProblem(LearningProblem learningProblem) {
+		// TODO Auto-generated method stub
+
+	}
+
+	private Set<IndexResultItem> getIndexResultItems(Slot slot)
+	{
+		//		List<String> uris = new LinkedList<String>();
+		Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>();
+
+		Index index = getIndexBySlotType(slot);
+
+		for(String word : slot.getWords())
+		{
+			IndexResultSet rs = new IndexResultSet();
+			if(mappingIndex != null){
+				SlotType type = slot.getSlotType();
+				if(type == SlotType.CLASS){
+					rs.add(mappingIndex.getClassesWithScores(word));
+				} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){
+					rs.add(mappingIndex.getPropertiesWithScores(word));
+				} else if(type == SlotType.DATATYPEPROPERTY){
+					rs.add(mappingIndex.getDatatypePropertiesWithScores(word));
+				} else if(type == SlotType.OBJECTPROPERTY){
+					rs.add(mappingIndex.getObjectPropertiesWithScores(word));
+				} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+					rs.add(mappingIndex.getResourcesWithScores(word));
+				}
+			}
+			//use the non manual indexes only if mapping based resultset is not empty and option is set
+			if(!useManualMappingsIfExistOnly || rs.isEmpty()){
+				if(slot.getSlotType() == SlotType.RESOURCE){
+					rs.add(index.getResourcesWithScores(word, 20,0));
+				} else {
+					if(slot.getSlotType() == SlotType.CLASS){
+						word = PlingStemmer.stem(word); 
+					}
+					IndexResultSet tmp = index.getResourcesWithScores(word, 20,0,Collections.singleton("boa-score"));
+					for(IndexResultItem item : tmp.getItems())
+					{System.out.println(item);
+						Double boaScore = (Double) item.getFields().get("boa-score");
+						if(boaScore==null||boaScore>BOA_THRESHOLD) rs.addItem(item);
+					}
+				}
+			}
+			//			for(IndexResultItem item: rs.getItems())
+			//			{
+			//				uris.add(item.getUri());
+			//			}
+			indexResultItems.addAll(rs.getItems());
+		}
+		return indexResultItems;
+	}
+	class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{
+
+		private Slot slot;
+
+		public SlotProcessor(Slot slot) {
+			this.slot = slot;
+		}
+
+		@Override
+		public Map<Slot, SortedSet<Allocation>> call() throws Exception {
+			Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>();
+			result.put(slot, computeAllocations(slot));
+			return result;
+		}
+
+		private SortedSet<Allocation> computeAllocations(Slot slot){
+			logger.debug("Computing allocations for slot: " + slot);
+			SortedSet<Allocation> allocations = new TreeSet<Allocation>();
+
+			Index index = getIndexBySlotType(slot);
+
+			IndexResultSet rs;
+			for(String word : slot.getWords()){
+				rs = new IndexResultSet();
+				if(mappingIndex != null){
+					SlotType type = slot.getSlotType();
+					if(type == SlotType.CLASS){
+						rs.add(mappingIndex.getClassesWithScores(word));
+					} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){
+						rs.add(mappingIndex.getPropertiesWithScores(word));
+					} else if(type == SlotType.DATATYPEPROPERTY){
+						rs.add(mappingIndex.getDatatypePropertiesWithScores(word));
+					} else if(type == SlotType.OBJECTPROPERTY){
+						rs.add(mappingIndex.getObjectPropertiesWithScores(word));
+					} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+						rs.add(mappingIndex.getResourcesWithScores(word));
+					}
+				}
+				//use the non manual indexes only if mapping based resultset is not empty and option is set
+				if(!useManualMappingsIfExistOnly || rs.isEmpty()){
+					if(slot.getSlotType() == SlotType.RESOURCE){
+						rs.add(index.getResourcesWithScores(word, 20));
+					} else {
+						if(slot.getSlotType() == SlotType.CLASS){
+							word = PlingStemmer.stem(word); 
+						}
+						rs.add(index.getResourcesWithScores(word, 20));
+					}
+				}
+
+
+				for(IndexResultItem item : rs.getItems()){
+					double similarity = Similarity.getSimilarity(word, item.getLabel());
+					//					//get the labels of the redirects and compute the highest similarity
+					//					if(slot.getSlotType() == SlotType.RESOURCE){
+					//						Set<String> labels = getRedirectLabels(item.getUri());
+					//						for(String label : labels){
+					//							double tmp = Similarity.getSimilarity(word, label);
+					//							if(tmp > similarity){
+					//								similarity = tmp;
+					//							}
+					//						}
+					//					}
+					double prominence = getProminenceValue(item.getUri(), slot.getSlotType());
+					allocations.add(new Allocation(item.getUri(), prominence, similarity));
+				}
+
+			}
+
+			normProminenceValues(allocations);
+
+			computeScore(allocations);
+			logger.debug("Found " + allocations.size() + " allocations for slot " + slot);
+			return new TreeSet<Allocation>(allocations);
+		}
+
+		private Index getIndexBySlotType(Slot slot){
+			Index index = null;
+			SlotType type = slot.getSlotType();
+			if(type == SlotType.CLASS){
+				index = classesIndex;
+			} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){
+				index = propertiesIndex;
+			} else if(type == SlotType.DATATYPEPROPERTY){
+				index = datatypePropertiesIndex;
+			} else if(type == SlotType.OBJECTPROPERTY){
+				index = objectPropertiesIndex;
+			} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+				index = resourcesIndex;
+			}
+			return index;
+		}
+
+	}
+
+	public String getTaggedInput()
+	{
+		if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");}
+		return templateGenerator.getTaggedInput();
+	}
+
+	private boolean isDatatypeProperty(String uri){
+		Boolean isDatatypeProperty = null;
+		if(mappingIndex != null){
+			isDatatypeProperty = mappingIndex.isDataProperty(uri);
+		}
+		if(isDatatypeProperty == null){
+			String query = String.format("ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty> .}", uri);
+			isDatatypeProperty = executeAskQuery(query);
+		}
+		return isDatatypeProperty;
+	}
+
+	//	/**
+	//	 * @param args
+	//	 * @throws NoTemplateFoundException 
+	//	 * @throws IOException 
+	//	 * @throws FileNotFoundException 
+	//	 * @throws InvalidFileFormatException 
+	//	 */
+	//	public static void main(String[] args) throws Exception {
+	//		SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), 
+	//				Collections.<String>singletonList(""), Collections.<String>emptyList());
+	//		Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources");
+	//		Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes");
+	//		Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties");
+	//
+	//		SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2HMM(endpoint, resourcesIndex, classesIndex, propertiesIndex);
+	//		learner.init();
+	//
+	//		String question = "What is the highest mountain?";
+	//
+	//		learner.setQuestion(question);
+	//		learner.learnSPARQLQueries();
+	//		System.out.println("Learned query:\n" + learner.getBestSPARQLQuery());
+	//		System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType());
+	//		System.out.println(learner.getLearnedPosition());
+	//
+	//	}
+
+
+
+}

Deleted: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java
===================================================================
--- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java	2012-09-27 13:16:49 UTC (rev 3851)
+++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java	2012-09-27 13:18:05 UTC (rev 3852)
@@ -1,1440 +0,0 @@
-package org.dllearner.algorithm.tbsl.learning;
-
-import hmm.HiddenMarkovModel;
-import hmm.ResourceInfo;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import org.apache.commons.collections15.MultiMap;
-import org.apache.log4j.Logger;
-import org.dllearner.algorithm.tbsl.nlp.Lemmatizer;
-import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer;
-import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger;
-import org.dllearner.algorithm.tbsl.nlp.PlingStemmer;
-import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger;
-import org.dllearner.algorithm.tbsl.nlp.WordNet;
-import org.dllearner.algorithm.tbsl.sparql.Allocation;
-import org.dllearner.algorithm.tbsl.sparql.Query;
-import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter;
-import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair;
-import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType;
-import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property;
-import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType;
-import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple;
-import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value;
-import org.dllearner.algorithm.tbsl.sparql.Slot;
-import org.dllearner.algorithm.tbsl.sparql.SlotType;
-import org.dllearner.algorithm.tbsl.sparql.Template;
-import org.dllearner.algorithm.tbsl.sparql.WeightedQuery;
-import org.dllearner.algorithm.tbsl.templator.Templator;
-import org.dllearner.algorithm.tbsl.util.Knowledgebase;
-import org.dllearner.algorithm.tbsl.util.PopularityMap;
-import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType;
-import org.dllearner.algorithm.tbsl.util.Similarity;
-import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection;
-import org.dllearner.common.index.Index;
-import org.dllearner.common.index.IndexResultItem;
-import org.dllearner.common.index.IndexResultSet;
-import org.dllearner.common.index.MappingBasedIndex;
-import org.dllearner.common.index.SOLRIndex;
-import org.dllearner.common.index.SPARQLDatatypePropertiesIndex;
-import org.dllearner.common.index.SPARQLIndex;
-import org.dllearner.common.index.SPARQLObjectPropertiesIndex;
-import org.dllearner.common.index.SPARQLPropertiesIndex;
-import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex;
-import org.dllearner.common.index.VirtuosoObjectPropertiesIndex;
-import org.dllearner.common.index.VirtuosoPropertiesIndex;
-import org.dllearner.core.ComponentInitException;
-import org.dllearner.core.LearningProblem;
-import org.dllearner.core.SparqlQueryLearningAlgorithm;
-import org.dllearner.core.owl.Description;
-import org.dllearner.core.owl.NamedClass;
-import org.dllearner.core.owl.ObjectProperty;
-import org.dllearner.core.owl.Thing;
-import org.dllearner.kb.LocalModelBasedSparqlEndpointKS;
-import org.dllearner.kb.SparqlEndpointKS;
-import org.dllearner.kb.sparql.ExtractionDBCache;
-import org.dllearner.kb.sparql.SparqlEndpoint;
-import org.dllearner.kb.sparql.SparqlQuery;
-import org.dllearner.reasoning.SPARQLReasoner;
-import org.ini4j.InvalidFileFormatException;
-import org.ini4j.Options;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
-import com.hp.hpl.jena.ontology.OntModelSpec;
-import com.hp.hpl.jena.query.QueryExecutionFactory;
-import com.hp.hpl.jena.query.QueryFactory;
-import com.hp.hpl.jena.query.QuerySolution;
-import com.hp.hpl.jena.query.ResultSet;
-import com.hp.hpl.jena.query.Syntax;
-import com.hp.hpl.jena.rdf.model.Model;
-import com.hp.hpl.jena.rdf.model.ModelFactory;
-import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
-import com.hp.hpl.jena.sparql.expr.ExprAggregator;
-import com.hp.hpl.jena.s...
 
[truncated message content]

[DL-Learner SVN] SF.net SVN: dl-learner:[3852] branches/hmm/components-ext/src/main/java/org /dlle

[DL-Learner SVN] SF.net SVN: dl-learner:[3852] branches/hmm/components-ext/src/main/java/org /dllearner/algorithm/tbsl/learning