Thread: [DL-Learner SVN] SF.net SVN: dl-learner:[3700] trunk/components-ext/src/main/java/org/ dllearner/al (Page 2)

Status: Beta

Brought to you by: jenslehmann, patrickwestphal

dl-learner-svn

[DL-Learner SVN] SF.net SVN: dl-learner:[3700] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <chr...@us...> - 2012-05-10 07:08:39

Revision: 3700
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3700&view=rev
Author:   christinaunger
Date:     2012-05-10 07:08:30 +0000 (Thu, 10 May 2012)
Log Message:
-----------
[tbsl.exploration] repaired empty-property-template

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj	2012-05-09 15:34:39 UTC (rev 3699)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj	2012-05-10 07:08:30 UTC (rev 3700)
@@ -502,7 +502,7 @@
 //TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>}
 
 TOKEN: {<A: (["a"-"z","A"-"Z","0"-"9"])+>}
-TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9"])+>}
+TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9"])+>} // oder eher: SLOT_([...])+
 TOKEN: {<C: ["?","!"](["a"-"z","A"-"Z","0"-"9"])+>}
 
 Token dr() : { Token t; }{ (t=<A> | t=<C>) { return t; } }

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java	2012-05-09 15:34:39 UTC (rev 3699)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java	2012-05-10 07:08:30 UTC (rev 3700)
@@ -337,11 +337,11 @@
 				slot = "SLOT_" + token + "/PROPERTY/";
 				String[] npAdjunct = {token,
 						"(NP NP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))",
-						"<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" +
+						"<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" +
 								" ;; <x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"};
 				String[] vpAdjunct = {token,
 						"(VP VP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))",
-						"<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" +
+						"<x,l1,t,[ l1:[ | SLOT_" + token + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" +
 								" ;; <x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"};
 				result.add(npAdjunct);
 				result.add(vpAdjunct);

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3754] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <chr...@us...> - 2012-06-16 11:22:01

Revision: 3754
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3754&view=rev
Author:   christinaunger
Date:     2012-06-16 11:21:55 +0000 (Sat, 16 Jun 2012)
Log Message:
-----------
[tbsl] repaired resource slot problem

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-06-16 10:15:00 UTC (rev 3753)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-06-16 11:21:55 UTC (rev 3754)
@@ -117,12 +117,12 @@
 //            System.out.println("--- referent: " + referent.toString()); // DEBUG
             for (Slot s : slots) {
 //           	System.out.println("--- slot: " + s.toString()); // DEBUG
-        		if (s.getAnchor().equals(referent.getValue()) || s.getAnchor().equals(referent.toString())) {
+                if (s.getAnchor().equals(referent.getValue()) || s.getAnchor().equals(referent.toString())) {
 //        			System.out.println("    fits!"); // DEBUG
-       			template.addSlot(s);
-        			break;
-        		}
-        	}
+                    template.addSlot(s);
+                    break;
+                }
+            }
         }
         
         for (Slot s : slots) if (s.getAnchor().equals("SLOT_arg")) template.addSlot(s);
@@ -410,16 +410,22 @@
             if (firstIsURI || firstIsInt) {
                 drs.replaceEqualRef(secondArg, firstArg, true);
                 for (Slot s : slots) {
-                	if (s.getAnchor().equals(secondArg.getValue())) {
-                		s.setAnchor(firstArg.getValue());
-                	}
+                	if (s.getAnchor().equals(secondArg.getValue()))
+                            s.setAnchor(firstArg.getValue());
+                	if (s.getWords().contains(secondArg.getValue())) {
+                            s.getWords().remove(secondArg.getValue());
+                            s.getWords().add(firstArg.getValue());
+                        }
                 }
             } else if (secondIsURI || secondIsInt) {
                 drs.replaceEqualRef(firstArg, secondArg, true);
                 for (Slot s : slots) {
-                	if (s.getAnchor().equals(firstArg.getValue())) {
-                		s.setAnchor(secondArg.getValue());
-                	}
+                	if (s.getAnchor().equals(firstArg.getValue()))
+                            s.setAnchor(secondArg.getValue());
+                	if (s.getWords().contains(firstArg.getValue())) {
+                            s.getWords().remove(firstArg.getValue());
+                            s.getWords().add(secondArg.getValue());
+                        }
                 }
             } else {
                 drs.replaceEqualRef(firstArg, secondArg, false);

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java	2012-06-16 10:15:00 UTC (rev 3753)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java	2012-06-16 11:21:55 UTC (rev 3754)
@@ -96,8 +96,8 @@
 		Pattern whenPattern       = Pattern.compile("\\A(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))");
 		Pattern wherePattern      = Pattern.compile("\\A(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))");
 		Pattern adjsPattern       = Pattern.compile("((\\w+)/JJ.(\\w+)/JJ)");
-                Pattern adjnnpPattern     = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)");
-		Pattern adjnounPattern    = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?)");
+//              Pattern adjnnpPattern     = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)");
+		Pattern adjnounPattern    = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?(\\s|\\z))");
 		Pattern adjnprepPattern   = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NPREP)");
 		
 		m = compAdjPattern.matcher(condensedstring); 
@@ -219,15 +219,12 @@
 			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ");
 		}
-                m = adjnnpPattern.matcher(condensedstring); 
-		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NNP");
-			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NNP");
-		}
 		m = adjnounPattern.matcher(condensedstring); 
 		while (m.find()) {
+//                    if (!m.group(4).startsWith("NNP")) {
 			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN");
+//                    }
 		}
 		m = adjnprepPattern.matcher(condensedstring); 
 		while (m.find()) {

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java	2012-06-16 10:15:00 UTC (rev 3753)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java	2012-06-16 11:21:55 UTC (rev 3754)
@@ -33,11 +33,14 @@
         public Template checkandrefine() {
             
             Set<Slot> argslots = new HashSet<Slot>();
-            for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) {
+            for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) argslots.add(slot);
+            
+            for (Slot slot : argslots) {
                 String var = slot.words.get(0);
                 // check for clash (v=LITERAL && v=RESOURCE)
-                for (Slot s : argslots) {
-                    if (s.words.get(0).equals(slot.words.get(0)) && !s.type.equals(slot.type)) 
+                for (Slot s : slots) {
+                    if ((s.words.get(0).equals(slot.words.get(0)) || s.anchor.equals(slot.words.get(0)))
+                            && !s.type.equals(slot.type)) 
                         return null;
                 }
                 // check for clash (v=LITERAL && p(...,v)=OBJECTPROPERTY) || (v=RESOURCE && p(...,v)=DATATYPEPROPERTY)
@@ -53,7 +56,6 @@
                         }
                     }
                 }
-                argslots.add(slot);
             }
             
             for (Slot slot : slots) {
@@ -99,7 +101,14 @@
             }
             
             // finally remove all argslots
-            slots.removeAll(argslots);
+//            slots.removeAll(argslots); // removes all (argslots + resource slots)
+//            for (Slot sl : argslots) slots.remove(sl); // removes resource slots
+            List<Slot> keep = new ArrayList<Slot>();
+            for (Slot s : slots) {
+                if (!s.anchor.startsWith("SLOT_arg"))
+                    keep.add(s);
+            }
+            slots = keep; 
             
             return this;
         }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3755] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-06-17 20:10:09

Revision: 3755
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3755&view=rev
Author:   lorenz_b
Date:     2012-06-17 20:10:02 +0000 (Sun, 17 Jun 2012)
Log Message:
-----------
Started faster implementation of template generation process.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-16 11:21:55 UTC (rev 3754)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-17 20:10:02 UTC (rev 3755)
@@ -9,6 +9,7 @@
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -86,7 +87,8 @@
 	
 	
 	private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class);
-	private Monitor mon = MonitorFactory.getTimeMonitor("tbsl");
+	private Monitor templateMon = MonitorFactory.getTimeMonitor("template");
+	private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql");
 	
 	private boolean useRemoteEndpointValidation;
 	private boolean stopIfQueryResultNotEmpty;
@@ -129,6 +131,11 @@
 	
 	private String currentlyExecutedQuery;
 	
+	private boolean dropZeroScoredQueries = true;
+	private boolean useManualMappingsIfExistOnly = true;
+	
+	private boolean multiThreaded = true;
+	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){
 		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
 	}
@@ -280,16 +287,23 @@
 		template2Queries = new HashMap<Template, Collection<? extends Query>>();
 		slot2URI = new HashMap<Slot, List<String>>();
 		currentlyExecutedQuery = null;
+		
+//		templateMon.reset();
+//		sparqlMon.reset();
 	}
 	
 	public void learnSPARQLQueries() throws NoTemplateFoundException{
 		reset();
 		//generate SPARQL query templates
 		logger.info("Generating SPARQL query templates...");
-		mon.start();
-		templates = templateGenerator.buildTemplates(question);
-		mon.stop();
-		logger.info("Done in " + mon.getLastValue() + "ms.");
+		templateMon.start();
+		if(multiThreaded){
+			templates = templateGenerator.buildTemplatesMultiThreaded(question);
+		} else {
+			templates = templateGenerator.buildTemplates(question);
+		}
+		templateMon.stop();
+		logger.info("Done in " + templateMon.getLastValue() + "ms.");
 		if(templates.isEmpty()){
 			throw new NoTemplateFoundException();
 		}
@@ -672,8 +686,16 @@
 				}
 				
 			}
-			for(WeightedQuery q : queries){
-				q.setScore(q.getScore()/t.getSlots().size());
+			for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) {
+				WeightedQuery wQ = iterator.next();
+				if(dropZeroScoredQueries){
+					if(wQ.getScore() == 0){
+						iterator.remove();
+					}
+				} else {
+					wQ.setScore(wQ.getScore()/t.getSlots().size());
+				}
+				
 			}
 			allQueries.addAll(queries);
 			List<Query> qList = new ArrayList<Query>();
@@ -752,7 +774,7 @@
 	
 	private List<String> getLemmatizedWords(List<String> words){
 		logger.info("Pruning word list " + words + "...");
-		mon.start();
+//		mon.start();
 		List<String> pruned = new ArrayList<String>();
 		for(String word : words){
 			//currently only stem single words
@@ -766,8 +788,8 @@
 			}
 			
 		}
-		mon.stop();
-		logger.info("Done in " + mon.getLastValue() + "ms.");
+//		mon.stop();
+//		logger.info("Done in " + mon.getLastValue() + "ms.");
 		logger.info("Pruned list: " + pruned);
 		return pruned;
 	}
@@ -806,46 +828,51 @@
 	
 	private void validate(List<String> queries, SPARQL_QueryType queryType){
 		logger.info("Testing candidate SPARQL queries on remote endpoint...");
-		mon.start();
+		sparqlMon.start();
 		if(queryType == SPARQL_QueryType.SELECT){
 			for(String query : queries){
-				logger.info("Testing query:\n" + query);
-				com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ);
-				q.setLimit(1);
-				ResultSet rs = executeSelect(q.toString());//executeSelect(query);
-				
-				List<String> results = new ArrayList<String>();
-				QuerySolution qs;
-				String projectionVar;
-				while(rs.hasNext()){
-					qs = rs.next();
-					projectionVar = qs.varNames().next();
-					if(qs.get(projectionVar).isLiteral()){
-						results.add(qs.get(projectionVar).asLiteral().getLexicalForm());
-					} else if(qs.get(projectionVar).isURIResource()){
-						results.add(qs.get(projectionVar).asResource().getURI());
+				List<String> results;
+				try {
+					logger.info("Testing query:\n" + query);
+					com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ);
+					q.setLimit(1);
+					ResultSet rs = executeSelect(q.toString());//executeSelect(query);
+					
+					results = new ArrayList<String>();
+					QuerySolution qs;
+					String projectionVar;
+					while(rs.hasNext()){
+						qs = rs.next();
+						projectionVar = qs.varNames().next();
+						if(qs.get(projectionVar).isLiteral()){
+							results.add(qs.get(projectionVar).asLiteral().getLexicalForm());
+						} else if(qs.get(projectionVar).isURIResource()){
+							results.add(qs.get(projectionVar).asResource().getURI());
+						}
+						
 					}
-					
-				}
-				if(!results.isEmpty()){
-					try{
-						int cnt = Integer.parseInt(results.get(0));
-						if(cnt > 0){learnedPos = queries.indexOf(query);
+					if(!results.isEmpty()){
+						try{
+							int cnt = Integer.parseInt(results.get(0));
+							if(cnt > 0){learnedPos = queries.indexOf(query);
+								learnedSPARQLQueries.put(query, results);
+								if(stopIfQueryResultNotEmpty){
+									return;
+								}
+							}
+						} catch (NumberFormatException e){
 							learnedSPARQLQueries.put(query, results);
+							learnedPos = queries.indexOf(query);
 							if(stopIfQueryResultNotEmpty){
 								return;
 							}
 						}
-					} catch (NumberFormatException e){
-						learnedSPARQLQueries.put(query, results);
-						learnedPos = queries.indexOf(query);
-						if(stopIfQueryResultNotEmpty){
-							return;
-						}
+						logger.info("Result: " + results);
 					}
-					
+				} catch (Exception e) {
+					e.printStackTrace();
 				}
-				logger.info("Result: " + results);
+				
 			}
 		} else if(queryType == SPARQL_QueryType.ASK){
 			for(String query : queries){
@@ -862,8 +889,8 @@
 			}
 		}
 		
-		mon.stop();
-		logger.info("Done in " + mon.getLastValue() + "ms.");
+		sparqlMon.stop();
+		logger.info("Done in " + sparqlMon.getLastValue() + "ms.");
 	}
 	
 	private boolean executeAskQuery(String query){
@@ -976,15 +1003,19 @@
 						rs.add(mappingIndex.getResourcesWithScores(word));
 					}
 				}
-				if(slot.getSlotType() == SlotType.RESOURCE){
-					rs.add(index.getResourcesWithScores(word, 50));
-				} else {
-					if(slot.getSlotType() == SlotType.CLASS){
-						word = PlingStemmer.stem(word); 
+				//use the non manual indexes only if mapping based resultset is not empty and option is set
+				if(!useManualMappingsIfExistOnly || rs.isEmpty()){
+					if(slot.getSlotType() == SlotType.RESOURCE){
+						rs.add(index.getResourcesWithScores(word, 50));
+					} else {
+						if(slot.getSlotType() == SlotType.CLASS){
+							word = PlingStemmer.stem(word); 
+						}
+						rs.add(index.getResourcesWithScores(word, 20));
 					}
-					rs.add(index.getResourcesWithScores(word, 20));
 				}
 				
+				
 				for(IndexResultItem item : rs.getItems()){
 					double similarity = Similarity.getSimilarity(word, item.getLabel());
 //					//get the labels of the redirects and compute the highest similarity
@@ -1012,6 +1043,10 @@
 		
 	}
 	
+	public String getTaggedInput(){
+		return templateGenerator.getTaggedInput();
+	}
+	
 	private boolean isDatatypeProperty(String uri){
 		Boolean isDatatypeProperty = null;
 		if(mappingIndex != null){

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java	2012-06-16 11:21:55 UTC (rev 3754)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java	2012-06-17 20:10:02 UTC (rev 3755)
@@ -2,6 +2,8 @@
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 
 import org.apache.log4j.Logger;
 import org.dllearner.algorithm.tbsl.ltag.data.TreeNode;
@@ -9,6 +11,9 @@
 import org.dllearner.algorithm.tbsl.sem.dudes.reader.ParseException;
 import org.dllearner.algorithm.tbsl.sem.util.Pair;
 
+import com.jamonapi.Monitor;
+import com.jamonapi.MonitorFactory;
+
 public class Parser {
 	
 	private static final Logger logger = Logger.getLogger(Parser.class);
@@ -91,7 +96,54 @@
 		return derivationTrees;
 
 	}
+	
+	public List<DerivationTree> parseMultiThreaded(String taggeduserinput, LTAGLexicon grammar) {
 
+		derivationTrees.clear();
+		derivedTrees.clear();
+		dudes.clear();
+		temporaryEntries.clear();
+		
+		if (!VERBOSE) GrammarFilter.VERBOSE = false;
+
+		/*
+		 * create a local copy of the grammar with own treeIDs. This is
+		 * necessary since if an input string contains the same token multiple
+		 * times, a tree for each token is added. Both trees need to have
+		 * different treeIDs for the parser to work correctly.
+		 */
+		parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE);
+
+		String inputNoTags = "";		
+		for (String s : taggeduserinput.split(" ")) {
+			inputNoTags += s.substring(0,s.indexOf("/")) + " ";
+		}
+
+		this.input = ("# ".concat(inputNoTags.replaceAll("'","").trim())).split(" ");
+		int n = this.input.length;
+		
+		
+		if (SHOW_GRAMMAR) {
+			logger.trace(parseGrammar);
+		}
+		if (SHOW_LEXICAL_COVERAGE) {
+			logger.trace("# OF TREES FOUND: " + parseGrammar.size());
+			logger.trace("# OF INPUT TOKENS: " + n);
+		}
+
+		List<Pair<TreeNode, Short>> initTrees = parseGrammar.getInitTrees();
+		
+		internalParseMultiThreaded(initTrees, n);
+
+		if (USE_DPS_AS_INITTREES && derivationTrees.isEmpty()) {
+			internalParseMultiThreaded(parseGrammar.getDPInitTrees(), n);
+		}
+
+		if (VERBOSE) logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n");
+		return derivationTrees;
+
+	}
+
 	private void internalParse(List<Pair<TreeNode, Short>> initTrees, int n) {
 		
 		TREELOOP: for (int k = 0; k < initTrees.size(); k++) {
@@ -211,6 +263,23 @@
 		}
 
 	}
+	
+	private void internalParseMultiThreaded(List<Pair<TreeNode, Short>> initTrees, int n) {
+		Monitor parseMon = MonitorFactory.getTimeMonitor("parse");
+		ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+		parseMon.start();
+		for (int k = 0; k < initTrees.size(); k++) {
+			Pair<TreeNode, Short> pair = initTrees.get(k);
+			TreeNode tree = pair.getFirst();
+			short tid = pair.getSecond();
+			threadPool.execute(new TreeProcessor(tree, tid, n));
+		}
+		threadPool.shutdown();
+		while(!threadPool.isTerminated()){
+			
+		}
+		parseMon.start();
+	}
 
 	private List<List<ParseState>> makeStateSets() {
 
@@ -298,7 +367,21 @@
 		return derivedTrees;
 
 	}
+	
+	public List<TreeNode> buildDerivedTreesMultiThreaded(LTAGLexicon G) throws ParseException {
+		ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+		for (DerivationTree dTree : derivationTrees) {
+			threadPool.execute(new DerivationTreeProcessor(dTree, G));
+		}
+		threadPool.shutdown();
+		while(!threadPool.isTerminated()){
+			
+		}
 
+		return derivedTrees;
+
+	}
+
 	/**
 	 * get List of Dudes parallely constructed by Parser.buildDerivedTrees()
 	 */
@@ -342,5 +425,169 @@
 		grammar.clear(temps);
 
 	}
+	
+	class TreeProcessor implements Runnable{
 
+		
+		private TreeNode tree;
+		private short tid;
+		private int n;
+		
+		public TreeProcessor(TreeNode tree, short tid, int n) {
+			this.tree = tree;
+			this.tid = tid;
+			this.n = n;
+		}
+
+		@Override
+		public void run() {
+			List<List<ParseState>> stateSets = makeStateSets();
+
+			ParseState start = new ParseState(tree, tid);
+			// the inittree is already used
+			start.getUsedTrees().add(tid);
+			
+			stateSets.get(0).add(start);
+			boolean skip = false;
+			for (int i = 0; i < n; i++) {
+
+				if (i > 0) {
+					stateSets.get(i - 1).clear();
+					if (USE_LESS_MEMORY) {
+						System.gc();
+					}
+				}
+
+				List<ParseState> localStateSet = new ArrayList<ParseState>(
+						stateSets.get(i));
+				List<ParseState> localStateSet2 = new ArrayList<ParseState>();
+
+				stateSets.get(i).clear();
+
+				while (localStateSet.size() > 0) {
+
+					for (int j = 0; j < localStateSet.size(); j++) {
+						ParseState state = localStateSet.get(j);
+
+						List<ParseState> newStates;
+
+						OPLOOP: for (Class<?> c : operations) {
+
+							try {
+
+								ParserOperation op = (ParserOperation) c
+										.newInstance();
+
+								newStates = (op.go(i, state, input,
+										parseGrammar));
+
+								if (!newStates.isEmpty()) {
+
+									for (ParseState newState : newStates) {
+										if (newState.i.equals(i)) {
+											localStateSet2.add(newState);
+										}
+
+										if ((op instanceof Scanner)
+												|| (newState.isEndState() && newState.i == n - 1)) {
+											stateSets.get(newState.i).add(
+													newState);
+										}
+									}
+
+									op = null;
+									break OPLOOP;
+
+								}
+
+							} catch (InstantiationException e) {
+								e.printStackTrace();
+
+							} catch (IllegalAccessException e) {
+								e.printStackTrace();
+
+							}
+
+						}
+
+					}
+
+					localStateSet = null;
+					localStateSet = new ArrayList<ParseState>(localStateSet2);
+					localStateSet2 = new ArrayList<ParseState>();
+
+				}
+
+				localStateSet = null;
+				localStateSet2 = null;
+
+				/*
+				 * if the parser could not scan the next input token this run /
+				 * initial tree is rejected
+				 */
+				if (i < n - 1 && stateSets.get(i + 1).isEmpty()) {
+
+					stateSets.get(i).clear();
+					skip = true;
+					break;
+
+				}
+
+			}
+
+			if(!skip){
+				for (ParseState state : stateSets.get(n - 1)) {
+
+					
+//					if (state.isEndState() && state.t.equals(tree)) {
+					if (state.isEndState()) {
+						if (state.t.equals(tree)) {
+
+						derivationTrees.add(createDerivationTree(state,
+								parseGrammar));
+
+						}
+					}
+
+				}
+			}
+			
+			
+		}
+		
+	}
+	
+	class DerivationTreeProcessor implements Runnable{
+		
+		private DerivationTree dTree;
+		private LTAGLexicon lexicon;
+		
+		public DerivationTreeProcessor(DerivationTree dTree, LTAGLexicon lexicon) {
+			this.dTree = dTree;
+			this.lexicon = lexicon;
+		}
+
+		@Override
+		public void run() {
+			try {
+				List<Pair<TreeNode, Dude>> pairs = DerivedTree.build(dTree, parseGrammar, lexicon, CONSTRUCT_SEMANTICS);
+				
+				for (Pair<TreeNode,Dude> pair : pairs) {
+					TreeNode x = pair.getFirst();
+					Dude dude = pair.getSecond();
+
+					if (!derivedTrees.contains(x) || !dudes.contains(dude)) {
+						derivedTrees.add(x);
+						dudes.add(dude);
+					}
+					
+				}
+			} catch (ParseException e) {
+				e.printStackTrace();
+			}
+			
+		}
+		
+	}
+
 }

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-16 11:21:55 UTC (rev 3754)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-17 20:10:02 UTC (rev 3755)
@@ -6,11 +6,12 @@
 import java.util.Hashtable;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 
 import net.didion.jwnl.data.POS;
 
 import org.apache.log4j.Logger;
-
 import org.dllearner.algorithm.tbsl.converter.DRS2SPARQL_Converter;
 import org.dllearner.algorithm.tbsl.converter.DUDE2UDRS_Converter;
 import org.dllearner.algorithm.tbsl.ltag.parser.LTAGLexicon;
@@ -58,6 +59,11 @@
 	boolean USE_WORDNET = true;
 	boolean VERBOSE = true;
 	
+	private String taggedInput;
+	
+	private Set<Template> templates;
+	private Set<DRS> drses;
+	
 	public Templator() {
 		this(new StanfordPartOfSpeechTagger(), new WordNet());
 	}
@@ -141,7 +147,7 @@
 			tagged = s;
 			s = extractSentence(tagged);
 		}
-		
+		taggedInput = tagged;
 		String newtagged;
 		if (USE_NER) {
 			newtagged = pp.condenseNominals(pp.findNEs(tagged,s));
@@ -244,9 +250,6 @@
 			                					newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att));
 		                					}
 	                					}
-	                					if(newwords.isEmpty()){
-	                						
-	                					}
 	                					if (newwords.isEmpty()) {
 	                						newwords.add(slot.getWords().get(0));
 	                					}
@@ -271,11 +274,171 @@
         if (clearAgain) {
         	p.clear(g,p.getTemps());
         }
-        System.gc();
+//        System.gc();
         
         return templates;
     }
 	
+	public Set<Template> buildTemplatesMultiThreaded(String s) {
+		
+		boolean clearAgain = true;
+        
+		String tagged;
+		if (UNTAGGED_INPUT) {		
+			s = pp.normalize(s);
+			tagged = tagger.tag(s);
+			if (VERBOSE) logger.trace("Tagged input: " + tagged);
+		}
+		else {
+			tagged = s;
+			s = extractSentence(tagged);
+		}
+		taggedInput = tagged;
+		String newtagged;
+		if (USE_NER) {
+			newtagged = pp.condenseNominals(pp.findNEs(tagged,s));
+		} 
+		else newtagged = pp.condenseNominals(tagged);
+		
+		newtagged = pp.condense(newtagged);
+		if (VERBOSE) logger.trace("Preprocessed: " + newtagged); 
+        
+        p.parseMultiThreaded(newtagged,g);
+        
+        if (p.getDerivationTrees().isEmpty()) {
+            p.clear(g,p.getTemps());
+            clearAgain = false;
+            if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed.");
+        }
+        else {
+        try {
+        	p.buildDerivedTreesMultiThreaded(g);
+        } catch (ParseException e) {
+        	if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e);
+        }
+        }
+
+        // build pairs <String,POStag> from tagged
+        Hashtable<String,String> postable = new Hashtable<String,String>();
+        for (String st : newtagged.split(" ")) {
+			postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));;
+		}
+        //
+        
+        drses = new HashSet<DRS>();
+        templates = new HashSet<Template>();
+        
+//        ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+//        for (Dude dude : p.getDudes()) {
+//           threadPool.execute(new DudeProcessor(dude, postable));
+//        }
+//        threadPool.shutdown();
+//		while(!threadPool.isTerminated()){}
+        
+        for (Dude dude : p.getDudes()) {
+           
+           UDRS udrs = d2u.convert(dude);
+           if (udrs != null) { 
+               
+           	for (DRS drs : udrs.initResolve()) {
+               	
+               	List<Slot> slots = new ArrayList<Slot>();
+           		slots.addAll(dude.getSlots());
+           		d2s.setSlots(slots);
+               	d2s.redundantEqualRenaming(drs);
+               	
+               	if (!containsModuloRenaming(drses,drs)) {
+//                   	// DEBUG
+               		if (VERBOSE) {
+	                		System.out.println(dude);
+	                		System.out.println(drs);
+	                		for (Slot sl : slots) {
+	                			System.out.println(sl.toString());
+	                		}
+               		}
+//               		//
+               		drses.add(drs);
+               		
+               		try {
+               			Template temp = d2s.convert(drs,slots);
+                                       temp = temp.checkandrefine();
+               			if (temp == null) {
+               				continue;
+               			}
+               			
+       					if (USE_WORDNET) { // find WordNet synonyms
+	            				List<String> newwords;
+	            				String word; 
+	            				String pos;
+	                			for (Slot slot : temp.getSlots()) {
+	                				if (!slot.getWords().isEmpty()) {
+	                					
+	                					word = slot.getWords().get(0);
+	                					pos = postable.get(word.toLowerCase().replace(" ","_"));
+	                					
+	                					POS wordnetpos = null;
+	                					if (pos != null) {
+		                					if (equalsOneOf(pos,noun)) {
+		                						wordnetpos = POS.NOUN;
+		                					}
+		                					else if (equalsOneOf(pos,adjective)) {
+		                						wordnetpos = POS.ADJECTIVE;
+		                					}
+		                					else if (equalsOneOf(pos,verb)) {
+		                						wordnetpos = POS.VERB;
+		                					}
+		                				}
+	                					
+	               						List<String> strings = new ArrayList<String>();
+	               						if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) {
+	               							strings = wordnet.getAttributes(word);
+	               						}
+	                					
+	                					newwords = new ArrayList<String>();
+	                					newwords.addAll(slot.getWords());
+	                					newwords.addAll(strings);            					
+	                					
+	                					if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) {
+	                						newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word)));
+		                					for (String att : getLemmatizedWords(strings)) {
+			                					newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att));
+		                					}
+	                					}
+	                					if (newwords.isEmpty()) {
+	                						newwords.add(slot.getWords().get(0));
+	                					}
+	                					List<String> newwordslist = new ArrayList<String>();
+	                					newwordslist.addAll(newwords);
+	                					slot.setWords(newwordslist);
+	                				}
+	                			}
+               			}
+               			// 
+               			
+               			templates.add(temp);
+               		} catch (java.lang.ClassCastException e) {
+               			continue;
+               		}
+               		if (ONE_SCOPE_ONLY) { break; }
+               	}	
+               }
+           
+	}
+        }
+        
+ 
+        if (clearAgain) {
+        	p.clear(g,p.getTemps());
+        }
+//        System.gc();
+        
+        return templates;
+    }
+	
+	public String getTaggedInput() {
+		return taggedInput;
+	}
+	
 	private List<String> getLemmatizedWords(List<String> words){
 		List<String> stemmed = new ArrayList<String>();
 		for(String word : words){
@@ -330,5 +493,107 @@
     	return taggedSentence;
     	
     }
+	
+	class DudeProcessor implements Runnable{
+		
+		private Dude dude;
+		private Hashtable<String,String> postable;
+		
+		public DudeProcessor(Dude dude, Hashtable<String,String> postable) {
+			this.dude = dude;
+			this.postable = postable;
+		}
 
+		@Override
+		public void run() {
+			 UDRS udrs = d2u.convert(dude);
+	            if (udrs != null) { 
+	                
+	            	for (DRS drs : udrs.initResolve()) {
+	                	
+	                	List<Slot> slots = new ArrayList<Slot>();
+	            		slots.addAll(dude.getSlots());
+	            		d2s.setSlots(slots);
+	                	d2s.redundantEqualRenaming(drs);
+	                	
+	                	if (!containsModuloRenaming(drses,drs)) {
+//	                    	// DEBUG
+	                		if (VERBOSE) {
+		                		System.out.println(dude);
+		                		System.out.println(drs);
+		                		for (Slot sl : slots) {
+		                			System.out.println(sl.toString());
+		                		}
+	                		}
+//	                		//
+	                		drses.add(drs);
+	                		
+	                		try {
+	                			Template temp = d2s.convert(drs,slots);
+	                                        temp = temp.checkandrefine();
+	                			if (temp == null) {
+	                				continue;
+	                			}
+	                			
+	        					if (USE_WORDNET) { // find WordNet synonyms
+		            				List<String> newwords;
+		            				String word; 
+		            				String pos;
+		                			for (Slot slot : temp.getSlots()) {
+		                				if (!slot.getWords().isEmpty()) {
+		                					
+		                					word = slot.getWords().get(0);
+		                					pos = postable.get(word.toLowerCase().replace(" ","_"));
+		                					
+		                					POS wordnetpos = null;
+		                					if (pos != null) {
+			                					if (equalsOneOf(pos,noun)) {
+			                						wordnetpos = POS.NOUN;
+			                					}
+			                					else if (equalsOneOf(pos,adjective)) {
+			                						wordnetpos = POS.ADJECTIVE;
+			                					}
+			                					else if (equalsOneOf(pos,verb)) {
+			                						wordnetpos = POS.VERB;
+			                					}
+			                				}
+		                					
+		               						List<String> strings = new ArrayList<String>();
+		               						if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) {
+		               							strings = wordnet.getAttributes(word);
+		               						}
+		                					
+		                					newwords = new ArrayList<String>();
+		                					newwords.addAll(slot.getWords());
+		                					newwords.addAll(strings);            					
+		                					
+		                					if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) {
+		                						newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word)));
+			                					for (String att : getLemmatizedWords(strings)) {
+				                					newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att));
+			                					}
+		                					}
+		                					if (newwords.isEmpty()) {
+		                						newwords.add(slot.getWords().get(0));
+		                					}
+		                					List<String> newwordslist = new ArrayList<String>();
+		                					newwordslist.addAll(newwords);
+		                					slot.setWords(newwordslist);
+		                				}
+		                			}
+	                			}
+	                			// 
+	                			
+	                			templates.add(temp);
+	                		} catch (java.lang.ClassCastException e) {
+	                			continue;
+	                		}
+	                		if (ONE_SCOPE_ONLY) { break; }
+	                	}	
+	                }
+	            }
+		}
+		
+	}
+
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3761] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <chr...@us...> - 2012-06-18 13:56:05

Revision: 3761
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3761&view=rev
Author:   christinaunger
Date:     2012-06-18 13:55:56 +0000 (Mon, 18 Jun 2012)
Log Message:
-----------
[tbsl] re-arrange regex parts in the correct order (if the user says "gas central heating", he's getting "gas central heating"...)

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-06-18 12:51:44 UTC (rev 3760)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-06-18 13:55:56 UTC (rev 3761)
@@ -29,6 +29,7 @@
 
     private boolean silent = true; // suppresses console output
     private boolean oxford = true;
+    private String inputstring = null;
     List<Slot> slots;
     Template template;
     List<Integer> usedInts;
@@ -44,6 +45,9 @@
         usedInts = new ArrayList<Integer>();
     }
     
+    public void setInputString(String s) {
+        inputstring = s;
+    }
     public void setSlots(List<Slot> ls) {
     	slots = ls;
     }
@@ -457,7 +461,7 @@
         
         String var;
         String newvar;
-        String regex = "";
+        List<String> regexs = new ArrayList<String>();
         String[] forbidden = {"regextoken","regex","count","minimum","maximum","greater","less","greaterorequal","lessorequal","equal","sum","location","description"};
         Set<Simple_DRS_Condition> used = new HashSet<Simple_DRS_Condition>();
         
@@ -473,7 +477,9 @@
                     }
                 }
                 if (takeit) {
-                    regex += cond.getPredicate().replace("SLOT","").replaceAll("_"," ");
+                    for (String s : cond.getPredicate().replace("SLOT","").replaceAll("_"," ").trim().split(" ")) {
+                        regexs.add(s);
+                    }
                     used.add(cond);
                 }
                 else if (!cond.getPredicate().equals("regextoken")) {
@@ -482,9 +488,9 @@
                     }
                 }
             }
-            if (!regex.isEmpty()) {
+            if (!regexs.isEmpty()) {
                 c.getArguments().remove(1);
-                c.getArguments().add(new DiscourseReferent("'"+regex.trim()+"'"));
+                c.getArguments().add(new DiscourseReferent("'"+orderedRegex(regexs)+"'"));
                 c.setPredicate("regex");
             }
             else { used.add(c); } // TODO should not happen!
@@ -503,7 +509,7 @@
         for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) {
             String d = "";
             String d2 = "";
-            String newregex = "";
+            List<String> regextokens = new ArrayList<String>();
             if (c.getPredicate().equals("SLOT_description")) {
                 d = c.getArguments().get(0).getValue();
                 d2 = c.getArguments().get(1).getValue();
@@ -519,14 +525,16 @@
                 for (Simple_DRS_Condition cond : drs.getAllSimpleConditions()) {
                     if (cond.getPredicate().equals("regex") && 
                             (cond.getArguments().get(0).getValue().equals(d) || cond.getArguments().get(0).getValue().equals(d2))) {
-                        newregex += cond.getArguments().get(1).getValue().replaceAll("'","").replaceAll("_"," ").trim() + " ";
+                        for (String s : cond.getArguments().get(1).getValue().replaceAll("'","").replaceAll("_"," ").trim().split(" ")) {
+                            regextokens.add(s);
+                        }
                         oldconds.add(cond);
                     }
                 }
                 for (Simple_DRS_Condition cond : oldconds) drs.removeCondition(cond);
                 List<DiscourseReferent> newrefs = new ArrayList<DiscourseReferent>();
                 newrefs.add(new DiscourseReferent(d));
-                newrefs.add(new DiscourseReferent("'"+newregex.trim()+"'"));
+                newrefs.add(new DiscourseReferent("'"+orderedRegex(regextokens)+"'"));
                 drs.addCondition(new Simple_DRS_Condition("regex",newrefs));
                 break;
             }
@@ -605,13 +613,37 @@
         return false; // TODO
     }
     
-	private int createFresh() {
+    private int createFresh() {
 		
-		int fresh = 0;
-		for (int i = 0; usedInts.contains(i); i++) {
-			fresh = i+1 ;
-		}
-		usedInts.add(fresh);
-		return fresh;
+        int fresh = 0;
+	for (int i = 0; usedInts.contains(i); i++) {
+            fresh = i+1 ;
 	}
+	usedInts.add(fresh);
+	return fresh;
+    }
+    
+    private String orderedRegex(List<String> regextokens) {
+        
+        String newregex = "";
+        if (inputstring != null) {
+            String[] inputparts = inputstring.split(" ");
+            TreeMap<Integer,String> regexparts = new TreeMap<Integer,String>();
+            for (String s : regextokens) {
+                for (int i = 0; i < inputparts.length; i++) {
+                    if (inputparts[i].matches(s+"(/\\w+)?")) {
+                        regexparts.put(i,s);
+                        break;
+                    }
+                }
+            }
+            for (int n : regexparts.descendingKeySet()) {
+                newregex = regexparts.get(n) + " " + newregex;
+            }
+         } 
+         else for (String s : regextokens) newregex += s + " ";
+        
+        return newregex.trim();
+        }
+    
 }

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-18 12:51:44 UTC (rev 3760)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-18 13:55:56 UTC (rev 3761)
@@ -135,6 +135,8 @@
 
 	public Set<Template> buildTemplates(String s) {
 		
+            d2s.setInputString(s);
+            
 		boolean clearAgain = true;
         
 		String tagged;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3763] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-06-23 07:37:09

Revision: 3763
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3763&view=rev
Author:   lorenz_b
Date:     2012-06-23 07:37:03 +0000 (Sat, 23 Jun 2012)
Log Message:
-----------
Some extension for TBSL web UI.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java

Added Paths:
-----------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-20 12:48:53 UTC (rev 3762)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-23 07:37:03 UTC (rev 3763)
@@ -45,6 +45,7 @@
 import org.dllearner.algorithm.tbsl.sparql.Template;
 import org.dllearner.algorithm.tbsl.sparql.WeightedQuery;
 import org.dllearner.algorithm.tbsl.templator.Templator;
+import org.dllearner.algorithm.tbsl.util.Knowledgebase;
 import org.dllearner.algorithm.tbsl.util.Similarity;
 import org.dllearner.common.index.Index;
 import org.dllearner.common.index.IndexResultItem;
@@ -140,18 +141,46 @@
 		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
 	}
 	
+	public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getPropertyIndex(), knowledgebase.getClassIndex(), posTagger, wordNet, options);
+	}
+	
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){
+		this(endpoint, index, new StanfordPartOfSpeechTagger());
+	}
+	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){
 		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options());
 	}
 	
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){
+		this(endpoint, index, posTagger, new WordNet(), new Options());
+	}
+	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){
 		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options());
 	}
 	
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){
+		this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache"));
+	}
+	
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){
+		this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache"));
+	}
+	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
 		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache"));
 	}
 	
+	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache"));
+	}
+	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){
 		this.endpoint = endpoint;
 		this.resourcesIndex = resourcesIndex;
@@ -228,6 +257,13 @@
 		this.mappingIndex = mappingIndex;
 	}
 	
+	public void setKnowledgebase(Knowledgebase knowledgebase){
+		this.endpoint = knowledgebase.getEndpoint();
+		this.resourcesIndex = knowledgebase.getResourceIndex();
+		this.classesIndex = knowledgebase.getPropertyIndex();
+		this.propertiesIndex = knowledgebase.getClassIndex();
+	}
+	
 	/*
 	 * Only for Evaluation useful.
 	 */
@@ -689,7 +725,7 @@
 			for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) {
 				WeightedQuery wQ = iterator.next();
 				if(dropZeroScoredQueries){
-					if(wQ.getScore() == 0){
+					if(wQ.getScore() <= 0){
 						iterator.remove();
 					}
 				} else {

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java	2012-06-20 12:48:53 UTC (rev 3762)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java	2012-06-23 07:37:03 UTC (rev 3763)
@@ -1,5 +1,6 @@
 package org.dllearner.algorithm.tbsl.nlp;
 
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -40,6 +41,15 @@
 		}
 	}
 	
+	public WordNet(InputStream propertiesStream) {
+		try {
+			JWNL.initialize(propertiesStream);
+			dict = Dictionary.getInstance();
+		} catch (JWNLException e) {
+			e.printStackTrace();
+		}
+	}
+	
 	public List<String> getBestSynonyms(POS pos, String s) {
 		
 		List<String> synonyms = new ArrayList<String>();

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-20 12:48:53 UTC (rev 3762)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-23 07:37:03 UTC (rev 3763)
@@ -57,7 +57,7 @@
 	boolean UNTAGGED_INPUT = true;
 	boolean USE_NER = false;
 	boolean USE_WORDNET = true;
-	boolean VERBOSE = false;
+	boolean VERBOSE = true;
 	
 	private String taggedInput;
 	

Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java	                        (rev 0)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java	2012-06-23 07:37:03 UTC (rev 3763)
@@ -0,0 +1,68 @@
+package org.dllearner.algorithm.tbsl.util;
+
+import org.dllearner.common.index.Index;
+import org.dllearner.common.index.MappingBasedIndex;
+import org.dllearner.kb.sparql.SparqlEndpoint;
+
+public class Knowledgebase {
+
+	private String label;
+	private SparqlEndpoint endpoint;
+	private String description;
+
+	private Index resourceIndex;
+	private Index propertyIndex;
+	private Index classIndex;
+	
+	private MappingBasedIndex mappingIndex;
+
+	public Knowledgebase(SparqlEndpoint endpoint, String label, String description,
+			Index resourceIndex, Index propertyIndex, Index classIndex) {
+		this(endpoint, label, description, resourceIndex, propertyIndex, classIndex, null);
+	}
+	
+	public Knowledgebase(SparqlEndpoint endpoint, String label, String description,
+			Index resourceIndex, Index propertyIndex, Index classIndex, MappingBasedIndex mappingIndex) {
+		this.label = label;
+		this.endpoint = endpoint;
+		this.description = description;
+		this.resourceIndex = resourceIndex;
+		this.propertyIndex = propertyIndex;
+		this.classIndex = classIndex;
+		this.mappingIndex = mappingIndex;
+	}
+
+	public String getLabel() {
+		return label;
+	}
+
+	public SparqlEndpoint getEndpoint() {
+		return endpoint;
+	}
+
+	public String getDescription() {
+		return description;
+	}
+
+	public Index getResourceIndex() {
+		return resourceIndex;
+	}
+
+	public Index getPropertyIndex() {
+		return propertyIndex;
+	}
+
+	public Index getClassIndex() {
+		return classIndex;
+	}
+	
+	public MappingBasedIndex getMappingIndex() {
+		return mappingIndex;
+	}
+
+	@Override
+	public String toString() {
+		return label;
+	}
+
+}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3764] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-06-25 13:22:09

Revision: 3764
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3764&view=rev
Author:   lorenz_b
Date:     2012-06-25 13:21:58 +0000 (Mon, 25 Jun 2012)
Log Message:
-----------
Added option to set grammar files.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-23 07:37:03 UTC (rev 3763)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-25 13:21:58 UTC (rev 3764)
@@ -137,12 +137,14 @@
 	
 	private boolean multiThreaded = true;
 	
+	private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"};
+	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){
 		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
 	}
 	
 	public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
-		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getPropertyIndex(), knowledgebase.getClassIndex(), posTagger, wordNet, options);
+		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options);
 	}
 	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){
@@ -247,9 +249,13 @@
 		}
 	}
 	
+	public void setGrammarFiles(String[] grammarFiles){
+		templateGenerator.setGrammarFiles(grammarFiles);
+	}
+	
 	@Override
 	public void init() throws ComponentInitException {
-		 templateGenerator = new Templator(posTagger, wordNet);
+		 templateGenerator = new Templator(posTagger, wordNet, grammarFiles);
 		 lemmatizer = new LingPipeLemmatizer();
 	}
 	
@@ -260,8 +266,8 @@
 	public void setKnowledgebase(Knowledgebase knowledgebase){
 		this.endpoint = knowledgebase.getEndpoint();
 		this.resourcesIndex = knowledgebase.getResourceIndex();
-		this.classesIndex = knowledgebase.getPropertyIndex();
-		this.propertiesIndex = knowledgebase.getClassIndex();
+		this.classesIndex = knowledgebase.getClassIndex();
+		this.propertiesIndex = knowledgebase.getPropertyIndex();
 	}
 	
 	/*

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-23 07:37:03 UTC (rev 3763)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-06-25 13:21:58 UTC (rev 3764)
@@ -92,6 +92,27 @@
 	    pp = new Preprocessor(USE_NER);
 	}
 	
+	public Templator(final PartOfSpeechTagger tagger, WordNet wordnet, String[] GRAMMAR_FILES) {
+        this.tagger = tagger;
+        this.wordnet = wordnet;
+        this.GRAMMAR_FILES = GRAMMAR_FILES;
+
+        List<InputStream> grammarFiles = new ArrayList<InputStream>();
+        for(int i = 0; i < GRAMMAR_FILES.length; i++){
+	grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i]));
+        }
+	
+        g = LTAG_Constructor.construct(grammarFiles);
+	
+    p = new Parser();
+    p.SHOW_GRAMMAR = true;
+    p.USE_DPS_AS_INITTREES = true;
+    p.CONSTRUCT_SEMANTICS = true;
+    p.MODE = "LEIPZIG";
+    
+    pp = new Preprocessor(USE_NER);
+}
+	
 	public Templator(boolean b) {
             this.tagger = new StanfordPartOfSpeechTagger();
             this.USE_WORDNET = false;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3767] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-06-29 12:18:50

Revision: 3767
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3767&view=rev
Author:   lorenz_b
Date:     2012-06-29 12:18:39 +0000 (Fri, 29 Jun 2012)
Log Message:
-----------
Using popularity map as executing cache.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-28 13:44:49 UTC (rev 3766)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-06-29 12:18:39 UTC (rev 3767)
@@ -47,6 +47,7 @@
 import org.dllearner.algorithm.tbsl.templator.Templator;
 import org.dllearner.algorithm.tbsl.util.Knowledgebase;
 import org.dllearner.algorithm.tbsl.util.PopularityMap;
+import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType;
 import org.dllearner.algorithm.tbsl.util.Similarity;
 import org.dllearner.common.index.Index;
 import org.dllearner.common.index.IndexResultItem;
@@ -782,9 +783,13 @@
 	private double getProminenceValue(String uri, SlotType type){
 		Integer popularity = null;
 		if(popularityMap != null){
-			if(type == SlotType.CLASS || type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY 
+			if(type == SlotType.CLASS){
+				popularity = popularityMap.getPopularity(uri, EntityType.CLASS);
+			} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY 
 					|| type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){
-				popularity = popularityMap.getPopularity(uri);
+				popularity = popularityMap.getPopularity(uri, EntityType.PROPERTY);
+			} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+				popularity = popularityMap.getPopularity(uri, EntityType.RESOURCE);
 			} 
 		} 
 		if(popularity == null){
@@ -808,6 +813,9 @@
 				popularity = qs.get(projectionVar).asLiteral().getInt();
 			}
 		}
+		if(popularity == null){
+			popularity = Integer.valueOf(0);
+		}
 		
 		
 //		if(cnt == 0){

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java	2012-06-28 13:44:49 UTC (rev 3766)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java	2012-06-29 12:18:39 UTC (rev 3767)
@@ -7,7 +7,9 @@
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.dllearner.core.owl.DatatypeProperty;
@@ -23,7 +25,7 @@
 
 public class PopularityMap {
 	
-	enum EntityType {
+	public enum EntityType {
 		CLASS, PROPERTY, RESOURCE
 	}
 	
@@ -48,22 +50,19 @@
 			// load popularity of classes
 			for (NamedClass nc : new SPARQLTasks(endpoint).getAllClasses()) {
 				System.out.println("Computing popularity for " + nc);
-				String query = String.format("SELECT COUNT(?s) WHERE {?s a <%s>}", nc.getName());
-				int popularity = loadPopularity(query);
+				int popularity = loadPopularity(nc.getName(), EntityType.CLASS);
 				class2Popularity.put(nc.getName(), Integer.valueOf(popularity));
 			}
 			// load popularity of properties
 			for (ObjectProperty op : new SPARQLTasks(endpoint).getAllObjectProperties()) {
 				System.out.println("Computing popularity for " + op);
-				String query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", op.getName());
-				int popularity = loadPopularity(query);
-				class2Popularity.put(op.getName(), Integer.valueOf(popularity));
+				int popularity = loadPopularity(op.getName(), EntityType.PROPERTY);
+				property2Popularity.put(op.getName(), Integer.valueOf(popularity));
 			}
 			for (DatatypeProperty dp : new SPARQLTasks(endpoint).getAllDataProperties()) {
 				System.out.println("Computing popularity for " + dp);
-				String query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", dp.getName());
-				int popularity = loadPopularity(query);
-				class2Popularity.put(dp.getName(), Integer.valueOf(popularity));
+				int popularity = loadPopularity(dp.getName(), EntityType.PROPERTY);
+				property2Popularity.put(dp.getName(), Integer.valueOf(popularity));
 			}
 			serialize();
 		}
@@ -73,7 +72,11 @@
 		ObjectOutputStream oos = null;
 		try {
 			oos = new ObjectOutputStream(new FileOutputStream(new File(file)));
-			oos.writeObject(class2Popularity);
+			List<Map<String, Integer>> mapList = new ArrayList<Map<String,Integer>>();
+			mapList.add(class2Popularity);
+			mapList.add(property2Popularity);
+			mapList.add(resource2Popularity);
+			oos.writeObject(mapList);
 		} catch (FileNotFoundException e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();
@@ -98,7 +101,10 @@
 			ObjectInputStream ois = null;
 			try {
 				ois = new ObjectInputStream(new FileInputStream(new File(file)));
-				class2Popularity = (Map<String, Integer>) ois.readObject();
+				List<Map<String, Integer>> mapList = (List<Map<String, Integer>>) ois.readObject();
+				class2Popularity = mapList.get(0);
+				property2Popularity = mapList.get(1);
+				resource2Popularity = mapList.get(2);
 			} catch (FileNotFoundException e) {
 				e.printStackTrace();
 			} catch (IOException e) {
@@ -115,12 +121,21 @@
 				}
 				
 			}
+			System.out.println("Loaded popularity map.");
 			return true;
 		} 
 		return false;
 	}
 	
-	private int loadPopularity(String query){
+	private int loadPopularity(String uri, EntityType entityType){
+		String query;
+		if(entityType == EntityType.CLASS){
+			query = String.format("SELECT COUNT(?s) WHERE {?s a <%s>}", uri);
+		} else if(entityType == EntityType.PROPERTY){
+			query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", uri);
+		} else {
+			query = String.format("SELECT COUNT(*) WHERE {?s ?p <%s>}", uri);
+		}
 		int pop = 0;
 		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
 		QuerySolution qs;
@@ -137,10 +152,22 @@
 		Integer popularity;
 		if(entityType == EntityType.CLASS){
 			popularity = class2Popularity.get(uri);
+			if(popularity == null){
+				popularity = loadPopularity(uri, entityType);
+				class2Popularity.put(uri, popularity);
+			}
 		} else if(entityType == EntityType.PROPERTY){
 			popularity = property2Popularity.get(uri);
+			if(popularity == null){
+				popularity = loadPopularity(uri, entityType);
+				property2Popularity.put(uri, popularity);
+			}
 		} else {
 			popularity = resource2Popularity.get(uri);
+			if(popularity == null){
+				popularity = loadPopularity(uri, entityType);
+				resource2Popularity.put(uri, popularity);
+			}
 		}
 		return popularity;
 	}
@@ -157,7 +184,9 @@
 	}
 	
 	public static void main(String[] args) {
-		new PopularityMap("dbpedia_popularity.map", SparqlEndpoint.getEndpointDBpedia(), new ExtractionDBCache("cache")).init();
+		PopularityMap map = new PopularityMap("dbpedia_popularity.map", SparqlEndpoint.getEndpointDBpediaLiveAKSW(), new ExtractionDBCache("cache"));
+		map.init();
+		System.out.println(map.getPopularity("http://dbpedia.org/ontology/Book"));
 	}
 
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3784] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <chr...@us...> - 2012-07-12 12:13:13

Revision: 3784
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3784&view=rev
Author:   christinaunger
Date:     2012-07-12 12:13:02 +0000 (Thu, 12 Jul 2012)
Log Message:
-----------
[tbsl] if COUNT then GROUP BY + HAVING. also removed number replacement.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-07-12 11:34:09 UTC (rev 3783)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-07-12 12:13:02 UTC (rev 3784)
@@ -264,7 +264,7 @@
             if (!predicate.contains(":")) prop.setIsVariable(true);
             
             boolean literal = false; 
-            if (simple.getArguments().size() > 1 && simple.getArguments().get(1).getValue().matches("\\d+")) {
+            if (simple.getArguments().size() > 1 && (simple.getArguments().get(1).getValue().startsWith("\'") || simple.getArguments().get(1).getValue().matches("[0-9]+"))) {
             	literal = true;
             }
 
@@ -273,11 +273,7 @@
                 if (simple.getArguments().get(1).getValue().matches("[0-9]+")) {
                     String fresh = "v"+createFresh();
                     out.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, fresh));
-                    out.addFilter(new SPARQL_Filter(
-                        new SPARQL_Pair(
-                        new SPARQL_Term(fresh,false),
-                        new SPARQL_Term(simple.getArguments().get(1).getValue(),literal),
-                        SPARQL_PairType.EQ)));
+                    out.addHaving(new SPARQL_Having("?"+fresh + " = " + simple.getArguments().get(1).getValue()));
                 } else {
                     out.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, simple.getArguments().get(1).getValue()));
                 }
@@ -326,7 +322,7 @@
             } else if (predicate.equals("equal")) {
                 out.addFilter(new SPARQL_Filter(
                         new SPARQL_Pair(
-                        new SPARQL_Term(simple.getArguments().get(0).getValue(),true),
+                        new SPARQL_Term(simple.getArguments().get(0).getValue(),false),
                         new SPARQL_Term(simple.getArguments().get(1).getValue(),literal),
                         SPARQL_PairType.EQ)));
                 return out;
@@ -335,14 +331,14 @@
             	out.addFilter(new SPARQL_Filter(
             			new SPARQL_Pair(
                         new SPARQL_Term(simple.getArguments().get(0).getValue(),false),
-                        new SPARQL_Term("'^"+simple.getArguments().get(1).getValue()+"'",true),
+                        new SPARQL_Term("'^"+simple.getArguments().get(1).getValue()+"'",false),
                         SPARQL_PairType.REGEX)));
             }
             else if (predicate.equals("regex")) {
             	out.addFilter(new SPARQL_Filter(
             			new SPARQL_Pair(
                         new SPARQL_Term(simple.getArguments().get(0).getValue(),false),
-                        new SPARQL_Term(simple.getArguments().get(1).getValue().replace("_","").trim(),true),
+                        new SPARQL_Term(simple.getArguments().get(1).getValue().replace("_","").trim(),false),
                         SPARQL_PairType.REGEX)));
             }
             else {
@@ -403,10 +399,13 @@
     }
 
     public void redundantEqualRenaming(DRS drs) {
-
+        
         Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>();
         for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) {
-        	if(c.getPredicate().equals("equal")) equalsConditions.add(c);
+        	if(c.getPredicate().equals("equal")
+                        && !c.getArguments().get(0).getValue().matches("[0-9]+")
+                        && !c.getArguments().get(1).getValue().matches("[0-9]+")) 
+                    equalsConditions.add(c);
         }
         
         DiscourseReferent firstArg;
@@ -426,7 +425,7 @@
             secondIsInt = secondArg.getValue().matches("(\\?)?[0..9]+");
 
             drs.removeCondition(c);
-            if (firstIsURI || firstIsInt) {
+            if (firstIsURI) { //  firstIsURI || firstIsInt
                 drs.replaceEqualRef(secondArg, firstArg, true);
                 for (Slot s : slots) {
                 	if (s.getAnchor().equals(secondArg.getValue()))
@@ -436,7 +435,7 @@
                             s.getWords().add(firstArg.getValue());
                         }
                 }
-            } else if (secondIsURI || secondIsInt) {
+            } else if (secondIsURI) { // secondIsURI || secondIsInt
                 drs.replaceEqualRef(firstArg, secondArg, true);
                 for (Slot s : slots) {
                 	if (s.getAnchor().equals(firstArg.getValue()))

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java	2012-07-12 11:34:09 UTC (rev 3783)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java	2012-07-12 12:13:02 UTC (rev 3784)
@@ -14,6 +14,7 @@
 	Set<SPARQL_Triple> conditions;
 	Set<SPARQL_Term> orderBy;
 	Set<SPARQL_Filter> filter;
+        Set<SPARQL_Having> having;
         Set<SPARQL_Union> unions;
 	SPARQL_QueryType qt = SPARQL_QueryType.SELECT;
 
@@ -28,6 +29,7 @@
 		conditions = new HashSet<SPARQL_Triple>();
 		orderBy = new HashSet<SPARQL_Term>();
 		filter = new HashSet<SPARQL_Filter>();
+                having  = new HashSet<SPARQL_Having>();
                 unions = new HashSet<SPARQL_Union>();
 	}
 
@@ -38,6 +40,7 @@
 		this.prefixes = prefixes;
 		this.conditions = conditions;
                 filter = new HashSet<SPARQL_Filter>();
+                having = new HashSet<SPARQL_Having>();
                 unions = new HashSet<SPARQL_Union>();
 	}
 
@@ -50,6 +53,8 @@
 		this.orderBy = orderBy;
 		this.limit = limit;
 		this.offset = offset;
+                filter = new HashSet<SPARQL_Filter>();
+                having = new HashSet<SPARQL_Having>();
                 unions = new HashSet<SPARQL_Union>();
 	}
 	
@@ -107,6 +112,7 @@
 			}
 		}
 		this.filter = filters;
+                this.having = having;
                 this.unions = query.unions; // TODO copy unions
 		
 		this.limit = query.getLimit();
@@ -195,6 +201,10 @@
 		if(groupBy != null){
 			retVal += "GROUP BY " + groupBy + "\n";
 		}
+                
+                if (!having.isEmpty()) {
+                    for (SPARQL_Having h : having) retVal += h.toString() + "\n";
+                }
 
 		if (orderBy != null && !orderBy.isEmpty())
 		{
@@ -275,6 +285,10 @@
 
 		this.filter.add(f);
 	}
+        public void addHaving(SPARQL_Having h)
+	{
+		this.having.add(h);
+	}
 
 	public Set<SPARQL_Term> getOrderBy()
 	{

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java	2012-07-12 11:34:09 UTC (rev 3783)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java	2012-07-12 12:13:02 UTC (rev 3784)
@@ -79,7 +79,7 @@
 	
 	public boolean isString()
 	{
-		return name.startsWith("'") || name.matches("\\d+");
+		return name.startsWith("'");
 	}
 	
 	public void setIsURI(boolean isURI){

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3788] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <chr...@us...> - 2012-07-13 09:50:44

Revision: 3788
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3788&view=rev
Author:   christinaunger
Date:     2012-07-13 09:50:38 +0000 (Fri, 13 Jul 2012)
Log Message:
-----------
- again tried to the number problem :P
- introduced distinction between equal and equals

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-07-13 08:39:06 UTC (rev 3787)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java	2012-07-13 09:50:38 UTC (rev 3788)
@@ -319,7 +319,7 @@
                 out.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.ASC));
                 out.setLimit(1);
                 return out;
-            } else if (predicate.equals("equal")) {
+            } else if (predicate.equals("equals")) {
                 out.addFilter(new SPARQL_Filter(
                         new SPARQL_Pair(
                         new SPARQL_Term(simple.getArguments().get(0).getValue(),false),
@@ -347,9 +347,11 @@
 	            	out.addCondition(new SPARQL_Triple(term,new SPARQL_Property("type",new SPARQL_Prefix("rdf","")),prop));
 	            }
 	            else if (arity == 2) {
-	            	String arg1 = simple.getArguments().get(0).getValue();SPARQL_Term term1 = new SPARQL_Term(arg1,false);term1.setIsVariable(true);
-	            	String arg2 = simple.getArguments().get(1).getValue();SPARQL_Term term2 = new SPARQL_Term(arg2,false);term2.setIsVariable(true);
-	            	out.addCondition(new SPARQL_Triple(term1, prop, term2));
+	            	String arg1 = simple.getArguments().get(0).getValue();                   
+                        SPARQL_Term term1 = new SPARQL_Term(arg1,arg1.contains(":"),!arg1.matches("(\\?)?[0-9]+"));
+	            	String arg2 = simple.getArguments().get(1).getValue();
+                        SPARQL_Term term2 = new SPARQL_Term(arg2,arg2.contains(":"),!arg2.matches("(\\?)?[0-9]+"));
+	            	out.addCondition(new SPARQL_Triple(term1,prop,term2));
 	            }
 	            else if (arity > 2) {
 	            	// TODO
@@ -371,11 +373,11 @@
                     if (s.getAnchor().equals(v1)) v1isSlotVar = true; 
                     if (s.getAnchor().equals(v2)) v2isSlotVar = true;
                 }
-                if (!v1isSlotVar && !v1.matches("[0..9]+") && !v1.contains("count")) {
+                if (!v1isSlotVar && !v1.matches("(\\?)?[0-9]+") && !v1.contains("count")) {
                     if (vs.containsKey(v1)) vs.put(v1,vs.get(v1)+1);
                     else vs.put(v1,1);
                 }
-                if (!v2isSlotVar && !v2.matches("[0..9]+") && !v2.contains("count")) {
+                if (!v2isSlotVar && !v2.matches("(\\?)?[0-9]+") && !v2.contains("count")) {
                     if (vs.containsKey(v2)) vs.put(v2,vs.get(v2)+1);
                     else vs.put(v2,1);
                 }
@@ -402,9 +404,7 @@
         
         Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>();
         for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) {
-        	if(c.getPredicate().equals("equal")
-                        && !c.getArguments().get(0).getValue().matches("[0-9]+")
-                        && !c.getArguments().get(1).getValue().matches("[0-9]+")) 
+        	if(c.getPredicate().equals("equal"))
                     equalsConditions.add(c);
         }
         

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java	2012-07-13 08:39:06 UTC (rev 3787)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java	2012-07-13 09:50:38 UTC (rev 3788)
@@ -312,7 +312,7 @@
 			next = true;
 		}
 		m_DiscourseReferents.remove(dr2);
-		if (!isInUpperUniverse) {
+		if (!isInUpperUniverse && !dr2.m_Referent.matches("[0-9]+")) {
 			m_DiscourseReferents.add(new DiscourseReferent(dr2.m_Referent,marked,nonex));
 		}
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3794] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-07-16 12:52:49

Revision: 3794
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3794&view=rev
Author:   lorenz_b
Date:     2012-07-16 12:52:38 +0000 (Mon, 16 Jul 2012)
Log Message:
-----------
Added class to compute PMI.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java

Added Paths:
-----------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-07-16 07:10:22 UTC (rev 3793)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-07-16 12:52:38 UTC (rev 3794)
@@ -155,6 +155,8 @@
 	
 	private Set<String> relevantKeywords;
 	
+	private boolean useDomainRangeRestriction = true;
+	
 	public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){
 		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
 	}
@@ -305,6 +307,10 @@
 		reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint));
 	}
 	
+	public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) {
+		this.useDomainRangeRestriction = useDomainRangeRestriction;
+	}
+	
 	/*
 	 * Only for Evaluation useful.
 	 */
@@ -619,31 +625,66 @@
 								Query q = new Query(query.getQuery());
 								
 								boolean drop = false;
-								if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){
-									for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){
-										String objectVar = triple.getValue().getName();
-										String subjectVar = triple.getVariable().getName();
-//										System.out.println(triple);
-										for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){
-//											System.out.println(typeTriple);
-											if(true){//reasoner.isObjectProperty(a.getUri())){
-												Description range = reasoner.getRange(new ObjectProperty(a.getUri()));
+								if(useDomainRangeRestriction){
+									if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){
+										for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){
+											String objectVar = triple.getValue().getName();
+											String subjectVar = triple.getVariable().getName();
+//											System.out.println(triple);
+											for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){
+//												System.out.println(typeTriple);
+												if(true){//reasoner.isObjectProperty(a.getUri())){
+													Description range = reasoner.getRange(new ObjectProperty(a.getUri()));
+//													System.out.println(a);
+													if(range != null){
+														Set<Description> allRanges = new HashSet<Description>();
+														SortedSet<Description> superClasses;
+														if(range instanceof NamedClass){
+															superClasses = reasoner.getSuperClasses(range);
+															allRanges.addAll(superClasses);
+														} else {
+															for(Description nc : range.getChildren()){
+																superClasses = reasoner.getSuperClasses(nc);
+																allRanges.addAll(superClasses);
+															}
+														}
+														allRanges.add(range);
+														allRanges.remove(new NamedClass(Thing.instance.getURI()));
+														
+														Set<Description> allTypes = new HashSet<Description>();
+														String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1);
+														Description type = new NamedClass(typeURI);
+														superClasses = reasoner.getSuperClasses(type);
+														allTypes.addAll(superClasses);
+														allTypes.add(type);
+														
+														if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){
+															drop = true;
+														} 
+													}
+												} else {
+													drop = true;
+												}
+												
+											}
+											for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){
+												Description domain = reasoner.getDomain(new ObjectProperty(a.getUri()));
 //												System.out.println(a);
-												if(range != null){
-													Set<Description> allRanges = new HashSet<Description>();
+												if(domain != null){
+													Set<Description> allDomains = new HashSet<Description>();
 													SortedSet<Description> superClasses;
-													if(range instanceof NamedClass){
-														superClasses = reasoner.getSuperClasses(range);
-														allRanges.addAll(superClasses);
+													if(domain instanceof NamedClass){
+														superClasses = reasoner.getSuperClasses(domain);
+														allDomains.addAll(superClasses);
 													} else {
-														for(Description nc : range.getChildren()){
+														for(Description nc : domain.getChildren()){
 															superClasses = reasoner.getSuperClasses(nc);
-															allRanges.addAll(superClasses);
+															allDomains.addAll(superClasses);
 														}
 													}
-													allRanges.add(range);
-													allRanges.remove(new NamedClass(Thing.instance.getURI()));
-													
+													allDomains.add(domain);
+													allDomains.remove(new NamedClass(Thing.instance.getURI()));
+												
 													Set<Description> allTypes = new HashSet<Description>();
 													String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1);
 													Description type = new NamedClass(typeURI);
@@ -651,46 +692,13 @@
 													allTypes.addAll(superClasses);
 													allTypes.add(type);
 													
-													if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){
+													if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){
 														drop = true;
-													} 
-												}
-											} else {
-												drop = true;
-											}
-											
-										}
-										for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){
-											Description domain = reasoner.getDomain(new ObjectProperty(a.getUri()));
-//											System.out.println(a);
-											if(domain != null){
-												Set<Description> allDomains = new HashSet<Description>();
-												SortedSet<Description> superClasses;
-												if(domain instanceof NamedClass){
-													superClasses = reasoner.getSuperClasses(domain);
-													allDomains.addAll(superClasses);
-												} else {
-													for(Description nc : domain.getChildren()){
-														superClasses = reasoner.getSuperClasses(nc);
-														allDomains.addAll(superClasses);
+														System.err.println("DROPPING: \n" + q.toString());
+													} else {
+															
 													}
 												}
-												allDomains.add(domain);
-												allDomains.remove(new NamedClass(Thing.instance.getURI()));
-											
-												Set<Description> allTypes = new HashSet<Description>();
-												String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1);
-												Description type = new NamedClass(typeURI);
-												superClasses = reasoner.getSuperClasses(type);
-												allTypes.addAll(superClasses);
-												allTypes.add(type);
-												
-												if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){
-													drop = true;
-//													System.err.println("DROPPING: \n" + q.toString());
-												} else {
-														
-												}
 											}
 										}
 									}

Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java	                        (rev 0)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java	2012-07-16 12:52:38 UTC (rev 3794)
@@ -0,0 +1,187 @@
+package org.dllearner.algorithm.tbsl.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.dllearner.core.owl.NamedClass;
+import org.dllearner.core.owl.ObjectProperty;
+import org.dllearner.core.owl.Property;
+import org.dllearner.kb.sparql.ExtractionDBCache;
+import org.dllearner.kb.sparql.SparqlEndpoint;
+import org.dllearner.kb.sparql.SparqlQuery;
+
+import com.hp.hpl.jena.query.QuerySolution;
+import com.hp.hpl.jena.query.ResultSet;
+
+public class PMI {
+	
+	private SparqlEndpoint endpoint;
+	private ExtractionDBCache cache;
+	
+	public PMI(SparqlEndpoint endpoint, ExtractionDBCache cache) {
+		this.endpoint = endpoint;
+		this.cache = cache;
+	}
+	
+	public double getDirectedPMI(ObjectProperty prop, NamedClass cls){
+		System.out.println(String.format("Computing PMI(%s, %s)", prop, cls));
+		String query  = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Class occurence: " + classOccurenceCnt);
+		
+		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName());
+		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Property occurence: " + propertyOccurenceCnt);
+		
+		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?o a <%s>}", prop.getName(), cls.getName());
+		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double coOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Co-occurence: " + coOccurenceCnt);
+		
+		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}");
+		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double total = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Total: " + total);
+		
+		if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){
+			return 0;
+		}
+		
+		double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) );
+		
+		return pmi;
+	}
+	
+	public double getDirectedPMI(NamedClass cls, Property prop){
+		System.out.println(String.format("Computing PMI(%s, %s)", cls, prop));
+		String query  = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Class occurence: " + classOccurenceCnt);
+		
+		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName());
+		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Property occurence: " + propertyOccurenceCnt);
+		
+		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName());
+		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double coOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Co-occurence: " + coOccurenceCnt);
+		
+		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}");
+		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		double total = rs.next().getLiteral("cnt").getInt();
+		System.out.println("Total: " + total);
+		
+		if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){
+			return 0;
+		}
+		
+		double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) );
+		
+		return pmi;
+	}
+	
+	/**
+	 * Returns the direction of the given triple, computed by calculated the PMI values of each combination.
+	 * @param subject
+	 * @param predicate
+	 * @param object
+	 * @return -1 if the given triple should by reversed, else 1.
+	 */
+	public int getDirection(NamedClass subject, ObjectProperty predicate, NamedClass object){
+		System.out.println(String.format("Computing direction between [%s, %s, %s]", subject, predicate, object));
+		double pmi_obj_pred = getDirectedPMI(object, predicate);System.out.println("PMI(OBJECT, PREDICATE): " + pmi_obj_pred);
+		double pmi_pred_subj = getDirectedPMI(predicate, subject);System.out.println("PMI(PREDICATE, SUBJECT): " + pmi_pred_subj);
+		double pmi_subj_pred = getDirectedPMI(subject, predicate);System.out.println("PMI(SUBJECT, PREDICATE): " + pmi_subj_pred);
+		double pmi_pred_obj = getDirectedPMI(predicate, object);System.out.println("PMI(PREDICATE, OBJECT): " + pmi_pred_obj);
+		
+		double threshold = 2.0;
+		
+		double value = ((pmi_obj_pred + pmi_pred_subj) - (pmi_subj_pred + pmi_pred_obj));
+		System.out.println("(PMI(OBJECT, PREDICATE) + PMI(PREDICATE, SUBJECT)) - (PMI(SUBJECT, PREDICATE) + PMI(PREDICATE, OBJECT)) = " + value);
+		
+		if( value > threshold){
+			System.out.println(object + "---" + predicate + "--->" + subject);
+			return -1;
+		} else {
+			System.out.println(subject + "---" + predicate + "--->" + object);
+			return 1;
+		}
+	}
+	
+	public Map<ObjectProperty, Integer> getMostFrequentProperties(NamedClass cls1, NamedClass cls2){
+		Map<ObjectProperty, Integer> prop2Cnt = new HashMap<ObjectProperty, Integer>();
+		String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?x1 a <%s>. ?x2 a <%s>. ?x1 ?p ?x2} GROUP BY ?p", cls1, cls2);
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		QuerySolution qs;
+		while(rs.hasNext()){
+			qs = rs.next();
+			ObjectProperty p = new ObjectProperty(qs.getResource("p").getURI());
+			int cnt = qs.getLiteral("cnt").getInt();
+			prop2Cnt.put(p, cnt);
+		}
+		return prop2Cnt;
+	}
+	
+	public static void main(String[] args) {
+		SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
+		ExtractionDBCache cache = new ExtractionDBCache("cache");
+		String NS = "http://dbpedia.org/ontology/";
+		
+		PMI pmiGen = new PMI(endpoint, cache);
+		System.out.println(pmiGen.getDirectedPMI(
+				new ObjectProperty(NS + "author"), 
+				new NamedClass(NS+ "Person")));
+		
+		System.out.println("#########################################");
+		
+		System.out.println(pmiGen.getDirectedPMI(
+				new ObjectProperty(NS + "author"), 
+				new NamedClass(NS+ "Writer")));
+		
+		System.out.println("#########################################");
+		
+		System.out.println(pmiGen.getDirectedPMI(
+				new NamedClass(NS+ "Book"),
+				new ObjectProperty(NS + "author")) 
+				);
+		
+		System.out.println("#########################################");
+		
+		System.out.println(pmiGen.getDirection(
+				new NamedClass(NS+ "Writer"), 
+				new ObjectProperty(NS + "author"), 
+				new NamedClass(NS+ "Book")));
+		
+		System.out.println("#########################################");
+		
+		System.out.println(pmiGen.getDirection(
+				new NamedClass(NS+ "Person"), 
+				new ObjectProperty(NS + "starring"), 
+				new NamedClass(NS+ "Film")));
+		
+		System.out.println("#########################################");
+		
+		System.out.println(pmiGen.getMostFrequentProperties(
+				new NamedClass(NS+ "Person"), 
+				new NamedClass(NS+ "Film")));
+		
+		System.out.println("#########################################");
+		
+		System.out.println(pmiGen.getMostFrequentProperties(
+				new NamedClass(NS+ "Film"), 
+				new NamedClass(NS+ "Actor")));
+		
+		System.out.println("#########################################");
+		
+		System.out.println(pmiGen.getMostFrequentProperties(
+				new NamedClass(NS+ "Film"), 
+				new NamedClass(NS+ "Person")));
+		
+	}
+
+}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3797] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-07-18 13:08:53

Revision: 3797
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3797&view=rev
Author:   lorenz_b
Date:     2012-07-18 13:08:45 +0000 (Wed, 18 Jul 2012)
Log Message:
-----------
Started metrics class for SPARQL endpoints.

Added Paths:
-----------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java

Removed Paths:
-------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java

Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java	                        (rev 0)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java	2012-07-18 13:08:45 UTC (rev 3797)
@@ -0,0 +1,1031 @@
+package org.dllearner.algorithm.tbsl.learning;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.log4j.Logger;
+import org.dllearner.algorithm.tbsl.nlp.Lemmatizer;
+import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer;
+import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger;
+import org.dllearner.algorithm.tbsl.nlp.PlingStemmer;
+import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger;
+import org.dllearner.algorithm.tbsl.nlp.WordNet;
+import org.dllearner.algorithm.tbsl.sparql.Allocation;
+import org.dllearner.algorithm.tbsl.sparql.Query;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple;
+import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value;
+import org.dllearner.algorithm.tbsl.sparql.Slot;
+import org.dllearner.algorithm.tbsl.sparql.SlotType;
+import org.dllearner.algorithm.tbsl.sparql.Template;
+import org.dllearner.algorithm.tbsl.sparql.WeightedQuery;
+import org.dllearner.algorithm.tbsl.templator.Templator;
+import org.dllearner.algorithm.tbsl.util.Knowledgebase;
+import org.dllearner.algorithm.tbsl.util.PopularityMap;
+import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType;
+import org.dllearner.algorithm.tbsl.util.SPARQLEndpointMetrics;
+import org.dllearner.algorithm.tbsl.util.Similarity;
+import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper;
+import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection;
+import org.dllearner.common.index.Index;
+import org.dllearner.common.index.IndexResultItem;
+import org.dllearner.common.index.IndexResultSet;
+import org.dllearner.common.index.MappingBasedIndex;
+import org.dllearner.common.index.SOLRIndex;
+import org.dllearner.common.index.SPARQLDatatypePropertiesIndex;
+import org.dllearner.common.index.SPARQLObjectPropertiesIndex;
+import org.dllearner.common.index.SPARQLPropertiesIndex;
+import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex;
+import org.dllearner.common.index.VirtuosoObjectPropertiesIndex;
+import org.dllearner.common.index.VirtuosoPropertiesIndex;
+import org.dllearner.core.ComponentInitException;
+import org.dllearner.core.LearningProblem;
+import org.dllearner.core.SparqlQueryLearningAlgorithm;
+import org.dllearner.core.owl.Description;
+import org.dllearner.core.owl.Individual;
+import org.dllearner.core.owl.Intersection;
+import org.dllearner.core.owl.NamedClass;
+import org.dllearner.core.owl.ObjectProperty;
+import org.dllearner.core.owl.Thing;
+import org.dllearner.kb.SparqlEndpointKS;
+import org.dllearner.kb.sparql.ExtractionDBCache;
+import org.dllearner.kb.sparql.SparqlEndpoint;
+import org.dllearner.kb.sparql.SparqlQuery;
+import org.dllearner.reasoning.SPARQLReasoner;
+import org.ini4j.InvalidFileFormatException;
+import org.ini4j.Options;
+import org.semanticweb.HermiT.Configuration.DirectBlockingType;
+
+import com.hp.hpl.jena.query.QueryExecutionFactory;
+import com.hp.hpl.jena.query.QueryFactory;
+import com.hp.hpl.jena.query.QuerySolution;
+import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.query.Syntax;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.shared.UnknownPropertyException;
+import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
+import com.hp.hpl.jena.vocabulary.RDFS;
+import com.jamonapi.Monitor;
+import com.jamonapi.MonitorFactory;
+
+public class SPARQLTemplateBasedLearner3 implements SparqlQueryLearningAlgorithm{
+	
+	
+	enum Mode{
+		BEST_QUERY, BEST_NON_EMPTY_QUERY
+	}
+	
+	private Mode mode = Mode.BEST_QUERY;
+	
+	private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3.class);
+	private Monitor templateMon = MonitorFactory.getTimeMonitor("template");
+	private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql");
+	
+	private boolean useRemoteEndpointValidation;
+	private boolean stopIfQueryResultNotEmpty;
+	private int maxTestedQueriesPerTemplate = 50;
+	private int maxQueryExecutionTimeInSeconds;
+	private int maxTestedQueries = 200;
+	private int maxIndexResults;
+	
+	private SparqlEndpoint endpoint;
+	private Model model;
+	
+	private ExtractionDBCache cache = new ExtractionDBCache("cache");
+	
+	private Index resourcesIndex;
+	private Index classesIndex;
+	private Index propertiesIndex;
+	
+	private Index datatypePropertiesIndex;
+	private Index objectPropertiesIndex;
+	
+	private MappingBasedIndex mappingIndex;
+	
+	private Templator templateGenerator;
+	private Lemmatizer lemmatizer;
+	private PartOfSpeechTagger posTagger;
+	private WordNet wordNet;
+	
+	private String question;
+	private int learnedPos = -1;
+	
+	private Set<Template> templates;
+	private Map<Template, Collection<? extends Query>> template2Queries;
+	private Map<Slot, List<String>> slot2URI;
+	
+	private Collection<WeightedQuery> sparqlQueryCandidates;
+	private SortedSet<WeightedQuery> learnedSPARQLQueries;
+	private SortedSet<WeightedQuery> generatedQueries;
+	
+	private SPARQLReasoner reasoner;
+	
+	private String currentlyExecutedQuery;
+	
+	private boolean dropZeroScoredQueries = true;
+	private boolean useManualMappingsIfExistOnly = true;
+	
+	private boolean multiThreaded = true;
+	
+	private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"};
+	
+	private PopularityMap popularityMap;
+	
+	private Set<String> relevantKeywords;
+	
+	private boolean useDomainRangeRestriction = true;
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
+	}
+	
+	public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options);
+	}
+	
+	public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){
+		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options, cache);
+	}
+	
+	public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase){
+		this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index){
+		this(endpoint, index, new StanfordPartOfSpeechTagger());
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){
+		this(endpoint, index, posTagger, new WordNet(), new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, WordNet wordNet){
+		this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache"));
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){
+		this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache"));
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache"));
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache"));
+	}
+	
+	public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){
+		this.endpoint = endpoint;
+		this.resourcesIndex = resourcesIndex;
+		this.classesIndex = classesIndex;
+		this.propertiesIndex = propertiesIndex;
+		this.posTagger = posTagger;
+		this.wordNet = wordNet;
+		this.cache = cache;
+		
+		setOptions(options);
+		
+		if(propertiesIndex instanceof SPARQLPropertiesIndex){
+			if(propertiesIndex instanceof VirtuosoPropertiesIndex){
+				datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			} else {
+				datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			}
+		} else {
+			datatypePropertiesIndex = propertiesIndex;
+			objectPropertiesIndex = propertiesIndex;
+		}
+		reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache);
+	}
+	
+	public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger());
+	}
+	
+	public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options());
+	}
+	
+	public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){
+		this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache"));
+	}
+	
+	public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){
+		this.model = model;
+		this.resourcesIndex = resourcesIndex;
+		this.classesIndex = classesIndex;
+		this.propertiesIndex = propertiesIndex;
+		this.posTagger = posTagger;
+		this.wordNet = wordNet;
+		this.cache = cache;
+		
+		setOptions(options);
+		
+		if(propertiesIndex instanceof SPARQLPropertiesIndex){
+			if(propertiesIndex instanceof VirtuosoPropertiesIndex){
+				datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			} else {
+				datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			}
+		} else {
+			datatypePropertiesIndex = propertiesIndex;
+			objectPropertiesIndex = propertiesIndex;
+		}
+	}
+	
+	public void setGrammarFiles(String[] grammarFiles){
+		templateGenerator.setGrammarFiles(grammarFiles);
+	}
+	
+	@Override
+	public void init() throws ComponentInitException {
+		 templateGenerator = new Templator(posTagger, wordNet, grammarFiles);
+		 lemmatizer = new LingPipeLemmatizer();
+	}
+	
+	public void setMappingIndex(MappingBasedIndex mappingIndex) {
+		this.mappingIndex = mappingIndex;
+	}
+	
+	public void setKnowledgebase(Knowledgebase knowledgebase){
+		this.endpoint = knowledgebase.getEndpoint();
+		this.resourcesIndex = knowledgebase.getResourceIndex();
+		this.classesIndex = knowledgebase.getClassIndex();
+		this.propertiesIndex = knowledgebase.getPropertyIndex();
+		this.mappingIndex = knowledgebase.getMappingIndex();
+		if(propertiesIndex instanceof SPARQLPropertiesIndex){
+			if(propertiesIndex instanceof VirtuosoPropertiesIndex){
+				datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			} else {
+				datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+				objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex);
+			}
+		} else {
+			datatypePropertiesIndex = propertiesIndex;
+			objectPropertiesIndex = propertiesIndex;
+		}
+		reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint));
+	}
+	
+	public void setCache(ExtractionDBCache cache) {
+		this.cache = cache;
+	}
+	
+	public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) {
+		this.useDomainRangeRestriction = useDomainRangeRestriction;
+	}
+	
+	/*
+	 * Only for Evaluation useful.
+	 */
+	public void setUseIdealTagger(boolean value){
+		templateGenerator.setUNTAGGED_INPUT(!value);
+	}
+	
+	private void setOptions(Options options){
+		maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10"));
+		
+		maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "100"));
+		cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds);
+		
+		useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false;
+		stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true"));
+		maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20"));
+		
+		String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict");
+		wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath();
+		System.setProperty("wordnet.database.dir", wordnetPath);
+	}
+
+	public void setEndpoint(SparqlEndpoint endpoint){
+		this.endpoint = endpoint;
+		
+		reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint));
+		reasoner.setCache(cache);
+		reasoner.prepareSubsumptionHierarchy();
+	}
+	
+	public void setQuestion(String question){
+		this.question = question;
+	}
+	
+	public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){
+		this.useRemoteEndpointValidation = useRemoteEndpointValidation;
+	}
+	
+	public int getMaxQueryExecutionTimeInSeconds() {
+		return maxQueryExecutionTimeInSeconds;
+	}
+
+	public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) {
+		this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds;
+	}
+
+	public int getMaxTestedQueriesPerTemplate() {
+		return maxTestedQueriesPerTemplate;
+	}
+
+	public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) {
+		this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate;
+	}
+
+	private void reset(){
+		learnedSPARQLQueries = new TreeSet<WeightedQuery>();
+		template2Queries = new HashMap<Template, Collection<? extends Query>>();
+		slot2URI = new HashMap<Slot, List<String>>();
+		relevantKeywords = new HashSet<String>();
+		currentlyExecutedQuery = null;
+		
+//		templateMon.reset();
+//		sparqlMon.reset();
+	}
+	
+	public void learnSPARQLQueries() throws NoTemplateFoundException{
+		reset();
+		//generate SPARQL query templates
+		logger.info("Generating SPARQL query templates...");
+		templateMon.start();
+		if(multiThreaded){
+			templates = templateGenerator.buildTemplatesMultiThreaded(question);
+		} else {
+			templates = templateGenerator.buildTemplates(question);
+		}
+		templateMon.stop();
+		logger.info("Done in " + templateMon.getLastValue() + "ms.");
+		relevantKeywords.addAll(templateGenerator.getUnknownWords());
+		if(templates.isEmpty()){
+			throw new NoTemplateFoundException();
+		
+		}
+		logger.info("Templates:");
+		for(Template t : templates){
+			logger.info(t);
+		}
+		
+		//get the weighted query candidates
+		generatedQueries = getWeightedSPARQLQueries(templates);
+		sparqlQueryCandidates = new ArrayList<WeightedQuery>();
+		int i = 0;
+		for(WeightedQuery wQ : generatedQueries){
+			System.out.println(wQ.explain());
+			sparqlQueryCandidates.add(wQ);
+			if(i == maxTestedQueries){
+				break;
+			}
+			i++;
+		}
+		
+		if(mode == Mode.BEST_QUERY){
+			double bestScore = -1;
+			for(WeightedQuery candidate : generatedQueries){
+				double score = candidate.getScore();
+				if(score >= bestScore){
+					bestScore = score;
+					learnedSPARQLQueries.add(candidate);
+				} else {
+					break;
+				}
+			}
+		} else if(mode == Mode.BEST_NON_EMPTY_QUERY){
+			//test candidates
+			if(useRemoteEndpointValidation){ //on remote endpoint
+				validateAgainstRemoteEndpoint(sparqlQueryCandidates);
+			} else {//on local model
+				
+			}
+		}
+	}
+	
+	public SortedSet<WeightedQuery> getGeneratedQueries() {
+		return generatedQueries;
+	}
+	
+	public SortedSet<WeightedQuery> getGeneratedQueries(int topN) {
+		SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>();
+		int max = Math.min(topN, generatedQueries.size());
+		for(WeightedQuery wQ : generatedQueries){
+			topNQueries.add(wQ);
+			if(topNQueries.size() == max){
+				break;
+			}
+		}
+		return topNQueries;
+	}
+	
+	public Set<Template> getTemplates(){
+		return templates;
+	}
+	
+	public List<String> getGeneratedSPARQLQueries(){
+		List<String> queries = new ArrayList<String>();
+		for(WeightedQuery wQ : sparqlQueryCandidates){
+			queries.add(wQ.getQuery().toString());
+		}
+		
+		return queries;
+	}
+	
+	public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){
+		return template2Queries;
+	}
+	
+	public Map<Slot, List<String>> getSlot2URIs(){
+		return slot2URI;
+	}
+	
+	private void normProminenceValues(Set<Allocation> allocations){
+		double min = 0;
+		double max = 0;
+		for(Allocation a : allocations){
+			if(a.getProminence() < min){
+				min = a.getProminence();
+			}
+			if(a.getProminence() > max){
+				max = a.getProminence();
+			}
+		}
+		for(Allocation a : allocations){
+			double prominence = a.getProminence()/(max-min);
+			a.setProminence(prominence);
+		}
+	}
+	
+	private void computeScore(Set<Allocation> allocations){
+		double alpha = 0.8;
+		double beta = 1 - alpha;
+		
+		for(Allocation a : allocations){
+			double score = alpha * a.getSimilarity() + beta * a.getProminence();
+			a.setScore(score);
+		}
+		
+	}
+	
+	public Set<String> getRelevantKeywords(){
+		return relevantKeywords;
+	}
+	
+	private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){
+		logger.info("Generating SPARQL query candidates...");
+		
+		Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() {
+
+			@Override
+			public int compare(Slot o1, Slot o2) {
+				if(o1.getSlotType() == o2.getSlotType()){
+					return o1.getToken().compareTo(o2.getToken());
+				} else {
+					return -1;
+				}
+			}
+		});
+		slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>());
+		
+		
+		SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>();
+		
+		Set<Allocation> allocations;
+		
+		for(Template t : templates){
+			logger.info("Processing template:\n" + t.toString());
+			allocations = new TreeSet<Allocation>();
+			
+			ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size());
+			List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>();
+			
+			long startTime = System.currentTimeMillis();
+			
+			for (Slot slot : t.getSlots()) {
+				if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations);
+					Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot);
+					Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker);
+					list.add(submit);
+				} else {
+					System.out.println("CACHE HIT");
+				}
+			}
+			
+			for (Future<Map<Slot, SortedSet<Allocation>>> future : list) {
+				try {
+					Map<Slot, SortedSet<Allocation>> result = future.get();
+					Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next();
+					slot2Allocations.put(item.getKey(), item.getValue());
+				} catch (InterruptedException e) {
+					e.printStackTrace();
+				} catch (ExecutionException e) {
+					e.printStackTrace();
+				}
+			}
+			
+			executor.shutdown();
+			System.out.println("Time needed: " + (System.currentTimeMillis() - startTime) + "ms");
+			
+			Set<WeightedQuery> queries = new HashSet<WeightedQuery>();
+			Query cleanQuery = t.getQuery();
+			queries.add(new WeightedQuery(cleanQuery));
+			
+			Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>();
+			List<Slot> sortedSlots = new ArrayList<Slot>();
+			Set<Slot> classSlots = new HashSet<Slot>();
+			for(Slot slot : t.getSlots()){
+				if(slot.getSlotType() == SlotType.CLASS){
+					sortedSlots.add(slot);
+					classSlots.add(slot);
+				}
+			}
+			for(Slot slot : t.getSlots()){
+				if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){
+					sortedSlots.add(slot);
+				}
+			}
+			for(Slot slot : t.getSlots()){
+				if(!sortedSlots.contains(slot)){
+					sortedSlots.add(slot);
+				}
+			}
+			//add for each SYMPROPERTY Slot the reversed query
+			for(Slot slot : sortedSlots){
+				for(WeightedQuery wQ : queries){
+					if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){
+						Query reversedQuery = new Query(wQ.getQuery());
+						reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse();
+						tmp.add(new WeightedQuery(reversedQuery));
+					}
+					tmp.add(wQ);
+				}
+				queries.clear();
+				queries.addAll(tmp);
+				tmp.clear();
+			}
+		
+			for(Slot slot : sortedSlots){
+				if(!slot2Allocations.get(slot).isEmpty()){
+					for(Allocation a : slot2Allocations.get(slot)){
+						for(WeightedQuery query : queries){
+								Query q = new Query(query.getQuery());
+								q.replaceVarWithURI(slot.getAnchor(), a.getUri());
+								WeightedQuery w = new WeightedQuery(q);
+								double newScore = query.getScore() + a.getScore();
+								w.setScore(newScore);
+								w.addAllocations(query.getAllocations());
+								w.addAllocation(a);
+								tmp.add(w);
+								
+							
+						}
+					}
+					queries.clear();
+					queries.addAll(tmp);
+					tmp.clear();
+					
+					
+				}
+				
+			}
+			SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, cache);
+			for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) {
+				WeightedQuery wQ = iterator.next();
+				Query q = wQ.getQuery();
+				for(SPARQL_Triple triple : q.getConditions()){
+					SPARQL_Term subject = triple.getVariable();
+					SPARQL_Property predicate = triple.getProperty();
+					SPARQL_Value object = triple.getValue();
+					
+					if(!predicate.isVariable() && !predicate.getName().equals("type")){
+						if(subject.isVariable() && !object.isVariable()){
+							String varName = triple.getVariable().getName();
+							Set<String> types = new HashSet<String>();
+							for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(varName)){
+								types.add(typeTriple.getValue().getName().replace(">", "").replace("<", ""));
+							}
+							for(String type : types){System.out.println(type);
+								metrics.getGoodness(new NamedClass(type), 
+										new ObjectProperty(predicate.getName().replace(">", "").replace("<", "")), 
+										new Individual(object.getName().replace(">", "").replace("<", "")));
+							}
+						} else if(object.isVariable() && !subject.isVariable()){
+							String varName = triple.getVariable().getName();
+							Set<String> types = new HashSet<String>();
+							for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(varName)){
+								types.add(typeTriple.getValue().getName().replace(">", "").replace("<", ""));
+							}
+							for(String type : types){
+								metrics.getGoodness(new Individual(subject.getName().replace(">", "").replace("<", "")), 
+										new ObjectProperty(predicate.getName().replace(">", "").replace("<", "")), 
+										new NamedClass(type));
+							}
+						}
+					}
+				}
+				
+			}
+			for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) {
+				WeightedQuery wQ = iterator.next();
+				if(dropZeroScoredQueries){
+					if(wQ.getScore() <= 0){
+						iterator.remove();
+					}
+				} else {
+					wQ.setScore(wQ.getScore()/t.getSlots().size());
+				}
+				
+			}
+			allQueries.addAll(queries);
+			List<Query> qList = new ArrayList<Query>();
+			for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery());
+				qList.add(wQ.getQuery());
+			}
+			template2Queries.put(t, qList);
+		}
+		logger.info("...done in ");
+		return allQueries;
+	}
+	
+	private double getProminenceValue(String uri, SlotType type){
+		Integer popularity = null;
+		if(popularityMap != null){
+			if(type == SlotType.CLASS){
+				popularity = popularityMap.getPopularity(uri, EntityType.CLASS);
+			} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY 
+					|| type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){
+				popularity = popularityMap.getPopularity(uri, EntityType.PROPERTY);
+			} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+				popularity = popularityMap.getPopularity(uri, EntityType.RESOURCE);
+			} 
+		} 
+		if(popularity == null){
+			String query = null;
+			if(type == SlotType.CLASS){
+				query = "SELECT COUNT(?s) WHERE {?s a <%s>}";
+			} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY 
+					|| type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){
+				query = "SELECT COUNT(*) WHERE {?s <%s> ?o}";
+			} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+				query = "SELECT COUNT(*) WHERE {?s ?p <%s>}";
+			}
+			query = String.format(query, uri);
+			
+			ResultSet rs = executeSelect(query);
+			QuerySolution qs;
+			String projectionVar;
+			while(rs.hasNext()){
+				qs = rs.next();
+				projectionVar = qs.varNames().next();
+				popularity = qs.get(projectionVar).asLiteral().getInt();
+			}
+		}
+		if(popularity == null){
+			popularity = Integer.valueOf(0);
+		}
+		
+		
+//		if(cnt == 0){
+//			return 0;
+//		} 
+//		return Math.log(cnt);
+		return popularity;
+	}
+	
+	public void setPopularityMap(PopularityMap popularityMap) {
+		this.popularityMap = popularityMap;
+	}
+	
+	
+	private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){
+		SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt();
+		validate(queries, queryType);
+	}
+	
+	private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){
+		logger.info("Testing candidate SPARQL queries on remote endpoint...");
+		sparqlMon.start();
+		if(queryType == SPARQL_QueryType.SELECT){
+			for(WeightedQuery query : queries){
+				learnedPos++;
+				List<String> results;
+				try {
+					logger.info("Testing query:\n" + query);
+					com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ);
+					q.setLimit(1);
+					ResultSet rs = executeSelect(q.toString());
+					
+					results = new ArrayList<String>();
+					QuerySolution qs;
+					String projectionVar;
+					while(rs.hasNext()){
+						qs = rs.next();
+						projectionVar = qs.varNames().next();
+						if(qs.get(projectionVar).isLiteral()){
+							results.add(qs.get(projectionVar).asLiteral().getLexicalForm());
+						} else if(qs.get(projectionVar).isURIResource()){
+							results.add(qs.get(projectionVar).asResource().getURI());
+						}
+						
+					}
+					if(!results.isEmpty()){
+						try{
+							int cnt = Integer.parseInt(results.get(0));
+							if(cnt > 0){
+								learnedSPARQLQueries.add(query);
+								if(stopIfQueryResultNotEmpty){
+									return;
+								}
+							}
+						} catch (NumberFormatException e){
+							learnedSPARQLQueries.add(query);
+							if(stopIfQueryResultNotEmpty){
+								return;
+							}
+						}
+						logger.info("Result: " + results);
+					}
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+				
+			}
+		} else if(queryType == SPARQL_QueryType.ASK){
+			for(WeightedQuery query : queries){
+				learnedPos++;
+				logger.info("Testing query:\n" + query);
+				boolean result = executeAskQuery(query.getQuery().toString());
+				learnedSPARQLQueries.add(query);
+//				if(stopIfQueryResultNotEmpty && result){
+//					return;
+//				}
+				if(stopIfQueryResultNotEmpty){
+					return;
+				}
+				logger.info("Result: " + result);
+			}
+		}
+		
+		sparqlMon.stop();
+		logger.info("Done in " + sparqlMon.getLastValue() + "ms.");
+	}
+	
+	private boolean executeAskQuery(String query){
+		currentlyExecutedQuery = query;
+		QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query);
+		for(String uri : endpoint.getDefaultGraphURIs()){
+			qe.addDefaultGraph(uri);
+		}
+		boolean ret = qe.execAsk();
+		return ret;
+	}
+	
+	private ResultSet executeSelect(String query) {
+		currentlyExecutedQuery = query;
+		ResultSet rs;
+		if (model == null) {
+			if (cache == null) {
+				QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query);
+				qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
+				rs = qe.execSelect();
+			} else {
+				rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+			}
+		} else {
+			rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model)
+					.execSelect();
+		}
+		
+		return rs;
+	}
+	
+	public String getCurrentlyExecutedQuery() {
+		return currentlyExecutedQuery;
+	}
+	
+	public int getLearnedPosition() {
+		if(learnedPos >= 0){
+			return learnedPos+1;
+		}
+		return learnedPos;
+	}
+
+	@Override
+	public void start() {
+	}
+
+	@Override
+	public List<String> getCurrentlyBestSPARQLQueries(int nrOfSPARQLQueries) {
+		List<String> bestQueries = new ArrayList<String>();
+		for(WeightedQuery wQ : learnedSPARQLQueries){
+			bestQueries.add(wQ.getQuery().toString());
+		}
+		return bestQueries;
+	}
+
+	@Override
+	public String getBestSPARQLQuery() {
+		if(!learnedSPARQLQueries.isEmpty()){
+			return learnedSPARQLQueries.iterator().next().getQuery().toString();
+		} else {
+			return null;
+		}
+	}
+	
+	public SortedSet<WeightedQuery> getLearnedSPARQLQueries() {
+		return learnedSPARQLQueries;
+	}
+
+	@Override
+	public LearningProblem getLearningProblem() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public void setLearningProblem(LearningProblem learningProblem) {
+		// TODO Auto-generated method stub
+		
+	}
+	
+	class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{
+		
+		private Slot slot;
+		
+		public SlotProcessor(Slot slot) {
+			this.slot = slot;
+		}
+
+		@Override
+		public Map<Slot, SortedSet<Allocation>> call() throws Exception {
+			Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>();
+			result.put(slot, computeAllocations(slot));
+			return result;
+		}
+		
+		private SortedSet<Allocation> computeAllocations(Slot slot){
+			logger.info("Computing allocations for slot: " + slot);
+			SortedSet<Allocation> allocations = new TreeSet<Allocation>();
+			
+			Index index = getIndexBySlotType(slot);
+			
+			IndexResultSet rs;
+			for(String word : slot.getWords()){
+				rs = new IndexResultSet();
+				if(mappingIndex != null){
+					SlotType type = slot.getSlotType();
+					if(type == SlotType.CLASS){
+						rs.add(mappingIndex.getClassesWithScores(word));
+					} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){
+						rs.add(mappingIndex.getPropertiesWithScores(word));
+					} else if(type == SlotType.DATATYPEPROPERTY){
+						rs.add(mappingIndex.getDatatypePropertiesWithScores(word));
+					} else if(type == SlotType.OBJECTPROPERTY){
+						rs.add(mappingIndex.getObjectPropertiesWithScores(word));
+					} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+						rs.add(mappingIndex.getResourcesWithScores(word));
+					}
+				}
+				//use the non manual indexes only if mapping based resultset is not empty and option is set
+				if(!useManualMappingsIfExistOnly || rs.isEmpty()){
+					if(slot.getSlotType() == SlotType.RESOURCE){
+						rs.add(index.getResourcesWithScores(word, 20));
+					} else {
+						if(slot.getSlotType() == SlotType.CLASS){
+							word = PlingStemmer.stem(word); 
+						}
+						rs.add(index.getResourcesWithScores(word, 20));
+					}
+				}
+				
+				
+				for(IndexResultItem item : rs.getItems()){
+					double similarity = Similarity.getSimilarity(word, item.getLabel());
+//					//get the labels of the redirects and compute the highest similarity
+//					if(slot.getSlotType() == SlotType.RESOURCE){
+//						Set<String> labels = getRedirectLabels(item.getUri());
+//						for(String label : labels){
+//							double tmp = Similarity.getSimilarity(word, label);
+//							if(tmp > similarity){
+//								similarity = tmp;
+//							}
+//						}
+//					}
+					double prominence = getProminenceValue(item.getUri(), slot.getSlotType());
+					allocations.add(new Allocation(item.getUri(), prominence, similarity));
+				}
+				
+			}
+			
+			normProminenceValues(allocations);
+			
+			computeScore(allocations);
+			logger.info("Found " + allocations.size() + " allocations for slot " + slot);
+			return new TreeSet<Allocation>(allocations);
+		}
+		
+		private Index getIndexBySlotType(Slot slot){
+			Index index = null;
+			SlotType type = slot.getSlotType();
+			if(type == SlotType.CLASS){
+				index = classesIndex;
+			} else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){
+				index = propertiesIndex;
+			} else if(type == SlotType.DATATYPEPROPERTY){
+				index = datatypePropertiesIndex;
+			} else if(type == SlotType.OBJECTPROPERTY){
+				index = objectPropertiesIndex;
+			} else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){
+				index = resourcesIndex;
+			}
+			return index;
+		}
+		
+	}
+	
+	public String getTaggedInput(){
+		return templateGenerator.getTaggedInput();
+	}
+	
+	private boolean isDatatypeProperty(String uri){
+		Boolean isDatatypeProperty = null;
+		if(mappingIndex != null){
+			isDatatypeProperty = mappingIndex.isDataProperty(uri);
+		}
+		if(isDatatypeProperty == null){
+			String query = String.format("ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty> .}", uri);
+			isDatatypeProperty = executeAskQuery(query);
+		}
+		return isDatatypeProperty;
+	}
+	
+	/**
+	 * @param args
+	 * @throws NoTemplateFoundException 
+	 * @throws IOException 
+	 * @throws FileNotFoundException 
+	 * @throws InvalidFileFormatException 
+	 */
+	public static void main(String[] args) throws Exception {
+		SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW();
+		Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources");
+		Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes");
+		Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties");
+		
+		SPARQLTemplateBasedLearner3 learner = new SPARQLTemplateBasedLearner3(endpoint, resourcesIndex, classesIndex, propertiesIndex);
+		learner.init();
+		
+		String question = "Give me all books written by Dan Brown";
+		
+		learner.setQuestion(question);
+		learner.learnSPARQLQueries();
+		System.out.println("Learned query:\n" + learner.getBestSPARQLQuery());
+		System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType());
+		System.out.println(learner.getLearnedPosition());
+		
+	}
+
+	
+
+}

Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java	2012-07-17 14:27:32 UTC (rev 3796)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java	2012-07-18 13:08:45 UTC (rev 3797)
@@ -1,187 +0,0 @@
-package org.dllearner.algorithm.tbsl.util;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.dllearner.core.owl.NamedClass;
-import org.dllearner.core.owl.ObjectProperty;
-import org.dllearner.core.owl.Property;
-import org.dllearner.kb.sparql.ExtractionDBCache;
-import org.dllearner.kb.sparql.SparqlEndpoint;
-import org.dllearner.kb.sparql.SparqlQuery;
-
-import com.hp.hpl.jena.query.QuerySolution;
-import com.hp.hpl.jena.query.ResultSet;
-
-public class PMI {
-	
-	private SparqlEndpoint endpoint;
-	private ExtractionDBCache cache;
-	
-	public PMI(SparqlEndpoint endpoint, ExtractionDBCache cache) {
-		this.endpoint = endpoint;
-		this.cache = cache;
-	}
-	
-	public double getDirectedPMI(ObjectProperty prop, NamedClass cls){
-		System.out.println(String.format("Computing PMI(%s, %s)", prop, cls));
-		String query  = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName());
-		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Class occurence: " + classOccurenceCnt);
-		
-		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName());
-		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Property occurence: " + propertyOccurenceCnt);
-		
-		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?o a <%s>}", prop.getName(), cls.getName());
-		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double coOccurenceCnt = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Co-occurence: " + coOccurenceCnt);
-		
-		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}");
-		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double total = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Total: " + total);
-		
-		if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){
-			return 0;
-		}
-		
-		double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) );
-		
-		return pmi;
-	}
-	
-	public double getDirectedPMI(NamedClass cls, Property prop){
-		System.out.println(String.format("Computing PMI(%s, %s)", cls, prop));
-		String query  = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName());
-		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Class occurence: " + classOccurenceCnt);
-		
-		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName());
-		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Property occurence: " + propertyOccurenceCnt);
-		
-		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName());
-		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double coOccurenceCnt = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Co-occurence: " + coOccurenceCnt);
-		
-		query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}");
-		rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		double total = rs.next().getLiteral("cnt").getInt();
-		System.out.println("Total: " + total);
-		
-		if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){
-			return 0;
-		}
-		
-		double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) );
-		
-		return pmi;
-	}
-	
-	/**
-	 * Returns the direction of the given triple, computed by calculated the PMI values of each combination.
-	 * @param subject
-	 * @param predicate
-	 * @param object
-	 * @return -1 if the given triple should by reversed, else 1.
-	 */
-	public int getDirection(NamedClass subject, ObjectProperty predicate, NamedClass object){
-		System.out.println(String.format("Computing direction between [%s, %s, %s]", subject, predicate, object));
-		double pmi_obj_pred = getDirectedPMI(object, predicate);System.out.println("PMI(OBJECT, PREDICATE): " + pmi_obj_pred);
-		double pmi_pred_subj = getDirectedPMI(predicate, subject);System.out.println("PMI(PREDICATE, SUBJECT): " + pmi_pred_subj);
-		double pmi_subj_pred = getDirectedPMI(subject, predicate);System.out.println("PMI(SUBJECT, PREDICATE): " + pmi_subj_pred);
-		double pmi_pred_obj = getDirectedPMI(predicate, object);System.out.println("PMI(PREDICATE, OBJECT): " + pmi_pred_obj);
-		
-		double threshold = 2.0;
-		
-		double value = ((pmi_obj_pred + pmi_pred_subj) - (pmi_subj_pred + pmi_pred_obj));
-		System.out.println("(PMI(OBJECT, PREDICATE) + PMI(PREDICATE, SUBJECT)) - (PMI(SUBJECT, PREDICATE) + PMI(PREDICATE, OBJECT)) = " + value);
-		
-		if( value > threshold){
-			System.out.println(object + "---" + predicate + "--->" + subject);
-			return -1;
-		} else {
-			System.out.println(subject + "---" + predicate + "--->" + object);
-			return 1;
-		}
-	}
-	
-	public Map<ObjectProperty, Integer> getMostFrequentProperties(NamedClass cls1, NamedClass cls2){
-		Map<ObjectProperty, Integer> prop2Cnt = new HashMap<ObjectProperty, Integer>();
-		String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?x1 a <%s>. ?x2 a <%s>. ?x1 ?p ?x2} GROUP BY ?p", cls1, cls2);
-		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
-		QuerySolution qs;
-		while(rs.hasNext()){
-			qs = rs.next();
-			ObjectProperty p = new ObjectProperty(qs.getResource("p").getURI());
-			int cnt = qs.getLiteral("cnt").getInt();
-			prop2Cnt.put(p, cnt);
-		}
-		return prop2Cnt;
-	}
-	
-	public static void main(String[] args) {
-		SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
-		ExtractionDBCache cache = new ExtractionDBCache("cache");
-		String NS = "http://dbpedia.org/ontology/";
-		
-		PMI pmiGen = new PMI(endpoint, cache);
-		System.out.println(pmiGen.getDirectedPMI(
-				new ObjectProperty(NS + "author"), 
-				new NamedClass(NS+ "Person")));
-		
-		System.out.println("#########################################");
-		
-		System.out.println(pmiGen.getDirectedPMI(
-				new ObjectProperty(NS + "author"), 
-				new NamedClass(NS+ "Writer")));
-		
-		System.out.println("#########################################");
-		
-		System.out.println(pmiGen.getDirectedPMI(
-				new NamedClass(NS+ "Book"),
-				new ObjectProperty(NS + "author")) 
-				);
-		
-		System.out.println("#########################################");
-		
-		System.out.println(pmiGen.getDirection(
-				new NamedClass(NS+ "Writer"), 
-				new ObjectProperty(NS + "author"), 
-				new NamedClass(NS+ "Book")));
-		
-		System.out.println("#########################################");
-		
-		System.out.println(pmiGen.getDirection(
-				new NamedClass(NS+ "Person"), 
-				new ObjectProperty(NS + "starring"), 
-				new NamedClass(NS+ "Film")));
-		
-		System.out.println("#########################################");
-		
-		System.out.println(pmiGen.getMostFrequentProperties(
-				new NamedClass(NS+ "Person"), 
-				new NamedClass(NS+ "Film")));
-		
-		System.out.println("#########################################");
-		
-		System.out.println(pmiGen.getMostFrequentProperties(
-				new NamedClass(NS+ "Film"), 
-				new NamedClass(NS+ "Actor")));
-		
-		System.out.println("#########################################");
-		
-		System.out.println(pmiGen.getMostFrequentProperties(
-				new NamedClass(NS+ "Film"), 
-				new NamedClass(NS+ "Person")));
-		
-	}
-
-}

Copied: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java (from rev 3794, trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java)
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java	                        (rev 0)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java	2012-07-18 13:08:45 UTC (rev 3797)
@@ -0,0 +1,446 @@
+package org.dllearner.algorithm.tbsl.util;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.log4j.Logger;
+import org.dllearner.core.owl.Individual;
+import org.dllearner.core.owl.NamedClass;
+import org.dllearner.core.owl.ObjectProperty;
+import org.dllearner.core.owl.Property;
+import org.dllearner.kb.SparqlEndpointKS;
+import org.dllearner.kb.sparql.ExtractionDBCache;
+import org.dllearner.kb.sparql.SparqlEndpoint;
+import org.dllearner.kb.sparql.SparqlQuery;
+import org.dllearner.reasoning.SPARQLReasoner;
+
+import com.hp.hpl.jena.query.QuerySolution;
+import com.hp.hpl.jena.query.ResultSet;
+
+public class SPARQLEndpointMetrics {
+	
+	private static final Logger log = Logger.getLogger(SPARQLEndpointMetrics.class);
+	
+	private SparqlEndpoint endpoint;
+	private ExtractionDBCache cache;
+	private SPARQLReasoner reasoner;
+	
+	public SPARQLEndpointMetrics(SparqlEndpoint endpoint, ExtractionDBCache cache) {
+		this.endpoint = endpoint;
+		this.cache = cache;
+		
+		this.reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache);
+	}
+	
+	/**
+	 * Computes the directed Pointwise Mutual Information(PMI) measure. Formula: log( (f(prop, cls) * N) / (f(cls) * f(prop) ) )
+	 * @param cls
+	 * @param prop
+	 * @return
+	 */
+	public double getDirectedPMI(ObjectProperty prop, NamedClass cls){
+		log.debug(String.format("Computing PMI(%s, %s)", prop, cls));
+		
+		double classOccurenceCnt = getOccurencesInObjectPosition(cls);
+		double propertyOccurenceCnt = getOccurences(prop);
+		double coOccurenceCnt = getOccurencesPredicateObject(prop, cls);
+		double total = getTotalTripleCount();
+		
+		double pmi = 0;
+		if(coOccurenceCnt > 0 && classOccurenceCnt > 0 && propertyOccurenceCnt > 0){
+			pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) );
+		}
+		log.info(String.format("PMI(%s, %s) = %f", prop, cls, pmi));
+		return pmi;
+	}
+	
+	/**
+	 * Computes the directed Pointwise Mutual Information(PMI) measure. Formula: log( (f(cls,prop) * N) / (f(cls) * f(prop) ) )
+	 * @param cls
+	 * @param prop
+	 * @return
+	 */
+	public double getDirectedPMI(NamedClass cls, Property prop){
+		log.debug(String.format("Computing PMI(%s, %s)...", cls, prop));
+		
+		double classOccurenceCnt = getOccurencesInSubjectPosition(cls);
+		double propertyOccurenceCnt = getOccurences(prop);
+		double coOccurenceCnt = getOccurencesSubjectPredicate(cls, prop);
+		double total = getTotalTripleCount();
+		
+		double pmi = 0;
+		if(coOccurenceCnt > 0 && classOccurenceCnt > 0 && propertyOccurenceCnt > 0){
+			pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) );
+		}
+		log.info(String.format("PMI(%s, %s) = %f", cls, prop, pmi));
+		return pmi;
+	}
+	
+	/**
+	 * Computes the directed Pointwise Mutual Information(PMI) measure. Formula: log( (f(cls,prop) * N) / (f(cls) * f(prop) ) )
+	 * @param cls
+	 * @param prop
+	 * @return
+	 */
+	public double getPMI(NamedClass subject, NamedClass object){
+		log.debug(String.format("Computing PMI(%s, %s)", subject, object));
+		
+		double coOccurenceCnt = getOccurencesSubjectObject(subject, object);
+		double subjectOccurenceCnt = getOccurencesInSubjectPosition(subject);
+		double objectOccurenceCnt = getOccurencesInObjectPosition(object);
+		double total = getTotalTripleCount();
+		
+		double pmi = 0;
+		if(coOccurenceCnt > 0 && subjectOccurenceCnt > 0 && objectOccurenceCnt > 0){
+			pmi = Math.log( (coOccurenceCnt * total) / (subjectOccurenceCnt * objectOccurenceCnt) );
+		}
+		log.info(String.format("PMI(%s, %s) = %f", subject, object, pmi));
+		return pmi;
+	}
+	
+	/**
+	 * Returns the direction of the given triple, computed by calculating the PMI values of each combination.
+	 * @param subject
+	 * @param predicate
+	 * @param object
+	 * @return -1 if the given triple should by reversed, else 1.
+	 */
+	public int getDirection(NamedClass subject, ObjectProperty predicate, NamedClass object){
+		log.info(String.format("Computing direction between [%s, %s, %s]", subject, predicate, object));
+		double pmi_obj_pred = getDirectedPMI(object, predicate);
+		double pmi_pred_subj = getDirectedPMI(predicate, subject);
+		double pmi_subj_pred = getDirectedPMI(subject, predicate);
+		double pmi_pred_obj = getDirectedPMI(predicate, object);
+		
+		double threshold = 2.0;
+		
+		double value = ((pmi_obj_pred + pmi_pred_subj) - (pmi_subj_pred + pmi_pred_obj));
+		log.info("(PMI(OBJECT, PREDICATE) + PMI(PREDICATE, SUBJECT)) - (PMI(SUBJECT, PREDICATE) + PMI(PREDICATE, OBJECT)) = " + value);
+		
+		if( value > threshold){
+			log.info(object + "---" + predicate + "--->" + subject);
+			return -1;
+		} else {
+			log.info(subject + "---" + predicate + "--->" + object);
+			return 1;
+		}
+	}
+	
+	public Map<ObjectProperty, Integer> getMostFrequentProperties(NamedClass cls1, NamedClass cls2){
+		Map<ObjectProperty, Integer> prop2Cnt = new HashMap<ObjectProperty, Integer>();
+		String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?x1 a <%s>. ?x2 a <%s>. ?x1 ?p ?x2} GROUP BY ?p", cls1, cls2);
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		QuerySolution qs;
+		while(rs.hasNext()){
+			qs = rs.next();
+			ObjectProperty p = new ObjectProperty(qs.getResource("p").getURI());
+			int cnt = qs.getLiteral("cnt").getInt();
+			prop2Cnt.put(p, cnt);
+		}
+		return prop2Cnt;
+	}
+	
+	/**
+	 * Returns the number of triples with the given property as predicate and where the subject belongs to the given class.
+	 * @param cls
+	 * @return
+	 */
+	public int getOccurencesSubjectPredicate(NamedClass cls, Property prop){
+		String query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int cnt = rs.next().getLiteral("cnt").getInt();
+		return cnt;
+	}
+	
+	/**
+	 * Returns the number of triples with the given property as predicate and where the object belongs to the given class.
+	 * @param cls
+	 * @return
+	 */
+	public int getOccurencesPredicateObject(Property prop, NamedClass cls){
+		String query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?o a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int cnt = rs.next().getLiteral("cnt").getInt();
+		return cnt;
+	}
+	
+	/**
+	 * Returns the number of triples with the first given class as subject and the second given class as object.
+	 * @param cls
+	 * @return
+	 */
+	public int getOccurencesSubjectObject(NamedClass subject, NamedClass object){
+		String query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s ?p ?o. ?o a <%s>}", subject.getName(), object.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int cnt = rs.next().getLiteral("cnt").getInt();
+		return cnt;
+	}
+	
+	/**
+	 * Returns the number of triples where the subject belongs to the given class.
+	 * @param cls
+	 * @return
+	 */
+	public int getOccurencesInSubjectPosition(NamedClass cls){
+		String query  = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>. ?s ?p ?o.}", cls.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		return classOccurenceCnt;
+	}
+	
+	/**
+	 * Returns the number of triples where the object belongs to the given class.
+	 * @param cls
+	 * @return
+	 */
+	public int getOccurencesInObjectPosition(NamedClass cls){
+		String query  = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?o a <%s>. ?s ?p ?o.}", cls.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		return classOccurenceCnt;
+	}
+	
+	/**
+	 * Returns the number triples with the given property as predicate.
+	 * @param prop
+	 * @return
+	 */
+	public int getOccurences(Property prop){
+		String query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int propOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		return propOccurenceCnt;
+	}
+	
+	/**
+	 * Returns the number of triples where the subject or object belongs to the given class.
+	 * (This is not the same as computing the number of instances of the given class {@link SPARQLEndpointMetrics#getPopularity(NamedClass)})
+	 * @param cls
+	 * @return
+	 */
+	public int getOccurences(NamedClass cls){
+		String query  = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>.{?s ?p1 ?o1.} UNION {?o2 ?p2 ?s} }", cls.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		return classOccurenceCnt;
+	}
+	
+	/**
+	 * Returns the number of instances of the given class.
+	 * @param cls
+	 * @return
+	 */
+	public int getPopularity(NamedClass cls){
+		String query  = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>.{?s ?p1 ?o1.} UNION {?o2 ?p2 ?s} }", cls.getName());
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int classOccurenceCnt = rs.next().getLiteral("cnt").getInt();
+		return classOccurenceCnt;
+	}
+	
+	/**
+	 * Returns the total number of triples in the endpoint. For now we return a fixed number 275494030(got from DBpedia Live 18. July 14:00).
+	 * @return
+	 */
+	public int getTotalTripleCount(){
+		return 275494030;
+		/*String query  = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}");
+		ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));
+		int cnt = rs.next().getLiteral("cnt").getInt();
+		return cnt;*/
+	}
+	
+	public double getGoodness(NamedClass subject, ObjectProperty predicate, NamedClass object){
+		
+		double pmi_subject_predicate = getDirectedPMI(subject, predicate);
+		double pmi_preciate_object = getDirectedPMI(predicate, object);
+		double pmi_subject_object = getPMI(subject, object);
+		
+		double goodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object;
+		
+		return goodness;
+	}
+	
+	public double getGoodness(Individual subject, ObjectProperty predicate, NamedClass object){
+		//this is independent of the subject types
+		double pmi_preciate_object = getDirectedPMI(predicate, object);
+		
+		double goodness = Double.MIN_VALUE;
+		//get all asserted classes of subject and get the highest value
+		//TODO inference
+		Set<NamedClass> types = reasoner.getTypes(subject);
+		for(NamedClass type : types){
+			double pmi_subject_predicate = getDirectedPMI(type, predicate);
+			double pmi_subject_object = getPMI(type, object);
+			double tmpGoodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object;
+			if(tmpGoodness >= goodness){
+				goodness = tmpGoodness;
+			}
+		}
+		return goodness;
+	}
+	
+	public double getGoodness(NamedClass subject, ObjectProperty predicate, Individual object){
+		//this is independent of the object types
+		double pmi_subject_predicate = getDirectedPMI(subject, predicate);
+		
+		double goodness = Double.MIN_VALUE;
+		//get all asserted classes of subject and get the highest value
+		//TODO inference
+		Set<NamedClass> types = reasoner.getTypes(object);
+		for(NamedClass type : types){
+			double pmi_preciate_object = getDirectedPMI(predicate, type);
+			double pmi_subject_object = getPMI(subject, type);
+			double tmpGoodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object;
+			if(tmpGoodness >= goodness){
+				goodness = tmpGoodness;
+			}
+		}
+		return goodness;
+	}
+	
+	public double getGoodnessConsideringSimilarity(NamedClass subject, ObjectProperty predicate, NamedClass object, 
+			double subjectSim, double predicateSim, double objectSim){
+		
+		double pmi_subject_predicate = getDirectedPMI(subject, predicate);
+		double pmi_preciate_object = getDirectedPMI(predicate, object);
+		double pmi_subject_object = getPMI(subject, object);
+		
+		double goodness = pmi_subject_predicate * subjectSim * predicateSim
+				+ pmi_preciate_object * objectSim * predicateSim
+				+ 2 * pmi_subject_object * subjectSim * objectSim;
+		
+		return goodness;
+	}
+	
+	public void precompute(){
+		precompute(Collections.<String>emptySet());
+	}
+	
+	public void precompute(Collection<String> namespaces){
+		log.info("Precomputing...");
+		long startTime = System.currentTimeMillis();
+		SortedSet<NamedClass> classes = new TreeSet<NamedClass>();
+		String query = "SELECT DISTINCT ?class WHERE {?s a ...
 
[truncated message content]

[DL-Learner SVN] SF.net SVN: dl-learner:[3804] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-07-25 10:34:00

Revision: 3804
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3804&view=rev
Author:   lorenz_b
Date:     2012-07-25 10:33:43 +0000 (Wed, 25 Jul 2012)
Log Message:
-----------
Changed default log level to DEBUG.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-07-23 14:25:40 UTC (rev 3803)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-07-25 10:33:43 UTC (rev 3804)
@@ -65,7 +65,6 @@
 import org.dllearner.core.LearningProblem;
 import org.dllearner.core.SparqlQueryLearningAlgorithm;
 import org.dllearner.core.owl.Description;
-import org.dllearner.core.owl.Intersection;
 import org.dllearner.core.owl.NamedClass;
 import org.dllearner.core.owl.ObjectProperty;
 import org.dllearner.core.owl.Thing;
@@ -76,7 +75,6 @@
 import org.dllearner.reasoning.SPARQLReasoner;
 import org.ini4j.InvalidFileFormatException;
 import org.ini4j.Options;
-import org.semanticweb.HermiT.Configuration.DirectBlockingType;
 
 import com.hp.hpl.jena.query.QueryExecutionFactory;
 import com.hp.hpl.jena.query.QueryFactory;
@@ -84,7 +82,6 @@
 import com.hp.hpl.jena.query.ResultSet;
 import com.hp.hpl.jena.query.Syntax;
 import com.hp.hpl.jena.rdf.model.Model;
-import com.hp.hpl.jena.shared.UnknownPropertyException;
 import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
 import com.hp.hpl.jena.vocabulary.RDFS;
 import com.jamonapi.Monitor;
@@ -92,7 +89,6 @@
 
 public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{
 	
-	
 	enum Mode{
 		BEST_QUERY, BEST_NON_EMPTY_QUERY
 	}
@@ -383,7 +379,7 @@
 	public void learnSPARQLQueries() throws NoTemplateFoundException{
 		reset();
 		//generate SPARQL query templates
-		logger.info("Generating SPARQL query templates...");
+		logger.debug("Generating SPARQL query templates...");
 		templateMon.start();
 		if(multiThreaded){
 			templates = templateGenerator.buildTemplatesMultiThreaded(question);
@@ -391,15 +387,15 @@
 			templates = templateGenerator.buildTemplates(question);
 		}
 		templateMon.stop();
-		logger.info("Done in " + templateMon.getLastValue() + "ms.");
+		logger.debug("Done in " + templateMon.getLastValue() + "ms.");
 		relevantKeywords.addAll(templateGenerator.getUnknownWords());
 		if(templates.isEmpty()){
 			throw new NoTemplateFoundException();
 		
 		}
-		logger.info("Templates:");
+		logger.debug("Templates:");
 		for(Template t : templates){
-			logger.info(t);
+			logger.debug(t);
 		}
 		
 		//get the weighted query candidates
@@ -407,7 +403,7 @@
 		sparqlQueryCandidates = new ArrayList<WeightedQuery>();
 		int i = 0;
 		for(WeightedQuery wQ : generatedQueries){
-			System.out.println(wQ.explain());
+			logger.debug(wQ.explain());
 			sparqlQueryCandidates.add(wQ);
 			if(i == maxTestedQueries){
 				break;
@@ -506,7 +502,7 @@
 	}
 	
 	private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){
-		logger.info("Generating SPARQL query candidates...");
+		logger.debug("Generating SPARQL query candidates...");
 		
 		Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() {
 
@@ -527,7 +523,7 @@
 		Set<Allocation> allocations;
 		
 		for(Template t : templates){
-			logger.info("Processing template:\n" + t.toString());
+			logger.debug("Processing template:\n" + t.toString());
 			allocations = new TreeSet<Allocation>();
 			boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex(");
 			
@@ -541,9 +537,7 @@
 					Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot);
 					Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker);
 					list.add(submit);
-				} else {
-					System.out.println("CACHE HIT");
-				}
+				} 
 			}
 			
 			for (Future<Map<Slot, SortedSet<Allocation>>> future : list) {
@@ -582,7 +576,7 @@
 				}
 				allocations.addAll(tmp);
 			}*/
-			System.out.println("Time needed: " + (System.currentTimeMillis() - startTime) + "ms");
+			logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms");
 			
 			Set<WeightedQuery> queries = new HashSet<WeightedQuery>();
 			Query cleanQuery = t.getQuery();
@@ -794,10 +788,8 @@
 									List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar);
 									for(SPARQL_Triple typeTriple : typeTriples){
 										String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", "");
-										System.out.println(typeURI + "---" + resourceURI);
 										List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction);
 										for(Entry<String, Integer> property : mostFrequentProperties){
-											System.out.println(property);
 											wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey());
 											wQ.setScore(wQ.getScore() + 0.1);
 										}
@@ -859,7 +851,7 @@
 			}
 			template2Queries.put(t, qList);
 		}
-		logger.info("...done in ");
+		logger.debug("...done in ");
 		return allQueries;
 	}
 
@@ -993,14 +985,14 @@
 	}
 	
 	private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){
-		logger.info("Testing candidate SPARQL queries on remote endpoint...");
+		logger.debug("Testing candidate SPARQL queries on remote endpoint...");
 		sparqlMon.start();
 		if(queryType == SPARQL_QueryType.SELECT){
 			for(WeightedQuery query : queries){
 				learnedPos++;
 				List<String> results;
 				try {
-					logger.info("Testing query:\n" + query);
+					logger.debug("Testing query:\n" + query);
 					com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ);
 					q.setLimit(1);
 					ResultSet rs = executeSelect(q.toString());
@@ -1033,7 +1025,7 @@
 								return;
 							}
 						}
-						logger.info("Result: " + results);
+						logger.debug("Result: " + results);
 					}
 				} catch (Exception e) {
 					e.printStackTrace();
@@ -1043,7 +1035,7 @@
 		} else if(queryType == SPARQL_QueryType.ASK){
 			for(WeightedQuery query : queries){
 				learnedPos++;
-				logger.info("Testing query:\n" + query);
+				logger.debug("Testing query:\n" + query);
 				boolean result = executeAskQuery(query.getQuery().toString());
 				learnedSPARQLQueries.add(query);
 //				if(stopIfQueryResultNotEmpty && result){
@@ -1052,12 +1044,12 @@
 				if(stopIfQueryResultNotEmpty){
 					return;
 				}
-				logger.info("Result: " + result);
+				logger.debug("Result: " + result);
 			}
 		}
 		
 		sparqlMon.stop();
-		logger.info("Done in " + sparqlMon.getLastValue() + "ms.");
+		logger.debug("Done in " + sparqlMon.getLastValue() + "ms.");
 	}
 	
 	private boolean executeAskQuery(String query){
@@ -1154,7 +1146,7 @@
 		}
 		
 		private SortedSet<Allocation> computeAllocations(Slot slot){
-			logger.info("Computing allocations for slot: " + slot);
+			logger.debug("Computing allocations for slot: " + slot);
 			SortedSet<Allocation> allocations = new TreeSet<Allocation>();
 			
 			Index index = getIndexBySlotType(slot);
@@ -1210,7 +1202,7 @@
 			normProminenceValues(allocations);
 			
 			computeScore(allocations);
-			logger.info("Found " + allocations.size() + " allocations for slot " + slot);
+			logger.debug("Found " + allocations.size() + " allocations for slot " + slot);
 			return new TreeSet<Allocation>(allocations);
 		}
 		

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java	2012-07-23 14:25:40 UTC (rev 3803)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java	2012-07-25 10:33:43 UTC (rev 3804)
@@ -145,7 +145,7 @@
 			internalParseMultiThreaded(parseGrammar.getDPInitTrees(), n);
 		}
 
-		if (VERBOSE) logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n");
+		if (VERBOSE) logger.debug("Constructed " + derivationTrees.size() + " derivation trees.\n");
 		return derivationTrees;
 
 	}

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java	2012-07-23 14:25:40 UTC (rev 3803)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java	2012-07-25 10:33:43 UTC (rev 3804)
@@ -102,67 +102,67 @@
 		
 		m = compAdjPattern.matcher(condensedstring); 
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJR");
 		}
 //		m = superAdjPattern.matcher(condensedstring); 
 //		while (m.find()) {
-//			logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJS");
+//			logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/JJS");
 //			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJS");
 //		}
 		m = howManyPattern.matcher(condensedstring); 
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by how/WLEX many/WLEX");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by how/WLEX many/WLEX");
 			condensedstring = condensedstring.replaceFirst(m.group(1),"how/WLEX many/WLEX");
 		}
 		m = howAdjPattern.matcher(condensedstring); 
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJH");
 		}
 		m = thesameasPattern.matcher(condensedstring); 
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NNSAME");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/NNSAME");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NNSAME");
 		}
 		m = nprepPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NPREP");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/NPREP");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NPREP");
 		}
 		m = didPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by \"\"");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by \"\"");
 			condensedstring = condensedstring.replaceFirst(m.group(1),"");
 		}
 		m = prepfrontPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by \"\"");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by \"\"");
 			condensedstring = condensedstring.replaceFirst(m.group(1),"");
 		}
 		m = passivePattern1a.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+"/PASSIVE");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(6)+"/PASSIVE");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6)+"/PASSIVE");
 		}
 		m = passivePattern1b.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+m.group(7)+"/PASSIVE");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(6)+m.group(7)+"/PASSIVE");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6) + m.group(7)+"/PASSIVE");
 		}
 		m = passivePattern2a.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE");
 		}
 		m = pseudopassPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/VPREP");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+"/VPREP");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/VPREP");
 		}
 		m = pseudopwhPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+m.group(8)+"/VPREP");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+m.group(8)+"/VPREP");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+" "+m.group(8)+"/VPREP");
 		}
 		m = saveIsThere.matcher(condensedstring);
@@ -171,64 +171,64 @@
 		}
 		m = passivePattern2b.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE");
 		}
 		m = passpartPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/PASSPART");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/PASSPART");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/PASSPART");
 		}
 		m = vpassPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASS");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASS");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASS");
 		}
 		m = vpassinPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASSIN");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASSIN");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASSIN");
 		}
 		m = gerundinPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/GERUNDIN");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/GERUNDIN");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/GERUNDIN");
 		}
 		m = vprepPattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPREP");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/VPREP");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPREP");
 		}
 		m = whenPattern.matcher(condensedstring);
 		while (m.find()) {
                     if (m.group(4).equals("VPREP")) {
-                        if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHENPREP");
+                        if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHENPREP");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHENPREP");
                     } else {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHEN");
                     }
 		}
 		m = wherePattern.matcher(condensedstring);
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHERE");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHERE");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHERE");
 		}
 		m = adjsPattern.matcher(condensedstring); 
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ");
 		}
 		m = adjnounPattern.matcher(condensedstring); 
 		while (m.find()) {
 //                    if (!m.group(4).startsWith("NNP")) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN ");
 //                    }
 		}
 		m = adjnprepPattern.matcher(condensedstring); 
 		while (m.find()) {
-			if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NPREP");
+			if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NPREP");
 			condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NPREP");
 		}
 		
@@ -291,7 +291,7 @@
 		List<String> namedentities = ner.getNamedEntitites(untagged);
 		List<String> usefulnamedentities = new ArrayList<String>();
 		
-		if (VERBOSE) logger.trace("Proposed NEs: " + namedentities);
+		if (VERBOSE) logger.debug("Proposed NEs: " + namedentities);
 		
 		// keep only longest matches (e.g. keep 'World of Warcraft' and forget about 'Warcraft') 
 		// containing at least one upper case letter (in order to filter out errors like 'software')
@@ -309,7 +309,7 @@
 			}
 		}
 		
-		if (VERBOSE) logger.trace("Accepted NEs: " + usefulnamedentities);
+		if (VERBOSE) logger.debug("Accepted NEs: " + usefulnamedentities);
 		
 		// replace POS tags accordingly
 		for (String ne : usefulnamedentities) {

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-07-23 14:25:40 UTC (rev 3803)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-07-25 10:33:43 UTC (rev 3804)
@@ -164,7 +164,7 @@
 		if (UNTAGGED_INPUT) {		
 			s = pp.normalize(s);
 			tagged = tagger.tag(s);
-			if (VERBOSE) logger.trace("Tagged input: " + tagged);
+			logger.debug("Tagged input: " + tagged);
 		}
 		else {
 			tagged = s;
@@ -178,7 +178,7 @@
 		else newtagged = pp.condenseNominals(tagged);
 		
 		newtagged = pp.condense(newtagged);
-		if (VERBOSE) logger.trace("Preprocessed: " + newtagged); 
+		logger.debug("Preprocessed: " + newtagged); 
         
         parser.parse(newtagged,g);
         
@@ -219,10 +219,10 @@
                 	if (!containsModuloRenaming(drses,drs)) {
 //                    	// DEBUG
                 		if (VERBOSE) {
-	                		System.out.println(">>> DUDE:\n" + dude.toString());
-	                		System.out.println("\n>>> DRS:\n"+ drs.toString());
+	                		logger.debug(">>> DUDE:\n" + dude.toString());
+	                		logger.debug("\n>>> DRS:\n"+ drs.toString());
 	                		for (Slot sl : slots) {
-	                			System.out.println(sl.toString());
+	                			logger.debug(sl.toString());
 	                		}
                 		}
 //                		//
@@ -310,7 +310,7 @@
 		if (UNTAGGED_INPUT) {		
 			s = pp.normalize(s);
 			tagged = tagger.tag(s);
-			if (VERBOSE) logger.trace("Tagged input: " + tagged);
+			logger.debug("Tagged input: " + tagged);
 		}
 		else {
 			tagged = s;
@@ -324,20 +324,20 @@
 		else newtagged = pp.condenseNominals(tagged);
 		
 		newtagged = pp.condense(newtagged);
-		if (VERBOSE) logger.trace("Preprocessed: " + newtagged); 
+		logger.debug("Preprocessed: " + newtagged); 
         
         parser.parseMultiThreaded(newtagged,g);
         
         if (parser.getDerivationTrees().isEmpty()) {
             parser.clear(g,parser.getTemps());
             clearAgain = false;
-            if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed.");
+            logger.error("[Templator.java] '" + s + "' could not be parsed.");
         }
         else {
         try {
         	parser.buildDerivedTreesMultiThreaded(g);
         } catch (ParseException e) {
-        	if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e);
+        	logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e);
         }
         }
 
@@ -372,13 +372,11 @@
                	
                	if (!containsModuloRenaming(drses,drs)) {
 //                   	// DEBUG
-               		if (VERBOSE) {
-	                		System.out.println(dude);
-	                		System.out.println(drs);
+               			logger.debug(dude);
+               			logger.debug(drs);
 	                		for (Slot sl : slots) {
-	                			System.out.println(sl.toString());
+	                			logger.debug(sl.toString());
 	                		}
-               		}
 //               		//
                		drses.add(drs);
                		
@@ -546,10 +544,10 @@
 	                	if (!containsModuloRenaming(drses,drs)) {
 //	                    	// DEBUG
 	                		if (VERBOSE) {
-		                		System.out.println(dude);
-		                		System.out.println(drs);
+	                			logger.debug(dude);
+	                			logger.debug(drs);
 		                		for (Slot sl : slots) {
-		                			System.out.println(sl.toString());
+		                			logger.debug(sl.toString());
 		                		}
 	                		}
 //	                		//

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3811] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-07-31 10:36:17

Revision: 3811
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3811&view=rev
Author:   lorenz_b
Date:     2012-07-31 10:36:11 +0000 (Tue, 31 Jul 2012)
Log Message:
-----------
Added synchronized POS tagger.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java

Added Paths:
-----------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java	2012-07-30 13:54:13 UTC (rev 3810)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java	2012-07-31 10:36:11 UTC (rev 3811)
@@ -630,7 +630,7 @@
 				}
 				
 			}
-			SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, cache);
+			SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, new ExtractionDBCache("/opt/tbsl/cache2"));
 			for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) {
 				WeightedQuery wQ = iterator.next();
 				Query q = wQ.getQuery();
@@ -1009,7 +1009,7 @@
 	 * @throws InvalidFileFormatException 
 	 */
 	public static void main(String[] args) throws Exception {
-		SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW();
+		SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
 		Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources");
 		Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes");
 		Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties");

Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java	                        (rev 0)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java	2012-07-31 10:36:11 UTC (rev 3811)
@@ -0,0 +1,10 @@
+package org.dllearner.algorithm.tbsl.nlp;
+
+public class SynchronizedStanfordPartOfSpeechTagger extends StanfordPartOfSpeechTagger {
+
+	@Override
+	public synchronized String tag(String sentence) {
+		return super.tag(sentence);
+	}
+
+}

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

[DL-Learner SVN] SF.net SVN: dl-learner:[3820] trunk/components-ext/src/main/java/org/ dllearner/algorithm/tbsl

From: <lor...@us...> - 2012-08-09 12:47:35

Revision: 3820
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3820&view=rev
Author:   lorenz_b
Date:     2012-08-09 12:47:29 +0000 (Thu, 09 Aug 2012)
Log Message:
-----------
Got rid of annoying NPE.

Modified Paths:
--------------
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java
    trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-08-09 10:46:16 UTC (rev 3819)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java	2012-08-09 12:47:29 UTC (rev 3820)
@@ -787,11 +787,11 @@
 									List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar);
 									for(SPARQL_Triple typeTriple : typeTriples){
 										String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", "");
-										List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction);
-										for(Entry<String, Integer> property : mostFrequentProperties){
-											wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey());
-											wQ.setScore(wQ.getScore() + 0.1);
-										}
+//										List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction);
+//										for(Entry<String, Integer> property : mostFrequentProperties){
+//											wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey());
+//											wQ.setScore(wQ.getScore() + 0.1);
+//										}
 									}
 									
 								}

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java	2012-08-09 10:46:16 UTC (rev 3819)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java	2012-08-09 12:47:29 UTC (rev 3820)
@@ -51,7 +51,8 @@
                     if (clashing != null && s.type.equals(clashing)) {
                         for (SPARQL_Triple triple : query.conditions) {
                             if (triple.property.toString().equals("?"+s.anchor)) {
-                                if (triple.value.toString().equals("?"+var)) return null;
+                                if (triple.value.toString().equals("?"+var))
+                                	return null;
                             }
                         }
                     }

Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java
===================================================================
--- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-08-09 10:46:16 UTC (rev 3819)
+++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java	2012-08-09 12:47:29 UTC (rev 3820)
@@ -382,8 +382,9 @@
                		
                		try {
                			Template temp = d2s.convert(drs,slots);
+               			temp = temp.checkandrefine();
                			if (temp == null) {continue;}
-                                       temp = temp.checkandrefine();
+                                       
                			
                			
        					if (USE_WORDNET) { // find WordNet synonyms

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

<< < 1 2 (Page 2 of 2)