From: <chr...@us...> - 2012-05-10 07:08:39
|
Revision: 3700 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3700&view=rev Author: christinaunger Date: 2012-05-10 07:08:30 +0000 (Thu, 10 May 2012) Log Message: ----------- [tbsl.exploration] repaired empty-property-template Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-09 15:34:39 UTC (rev 3699) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-10 07:08:30 UTC (rev 3700) @@ -502,7 +502,7 @@ //TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} TOKEN: {<A: (["a"-"z","A"-"Z","0"-"9"])+>} -TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9"])+>} +TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9"])+>} // oder eher: SLOT_([...])+ TOKEN: {<C: ["?","!"](["a"-"z","A"-"Z","0"-"9"])+>} Token dr() : { Token t; }{ (t=<A> | t=<C>) { return t; } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-09 15:34:39 UTC (rev 3699) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-10 07:08:30 UTC (rev 3700) @@ -337,11 +337,11 @@ slot = "SLOT_" + token + "/PROPERTY/"; String[] npAdjunct = {token, "(NP NP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + " ;; <x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; String[] vpAdjunct = {token, "(VP VP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - "<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + "<x,l1,t,[ l1:[ | SLOT_" + token + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + " ;; <x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; result.add(npAdjunct); result.add(vpAdjunct); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-16 11:22:01
|
Revision: 3754 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3754&view=rev Author: christinaunger Date: 2012-06-16 11:21:55 +0000 (Sat, 16 Jun 2012) Log Message: ----------- [tbsl] repaired resource slot problem Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-16 10:15:00 UTC (rev 3753) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-16 11:21:55 UTC (rev 3754) @@ -117,12 +117,12 @@ // System.out.println("--- referent: " + referent.toString()); // DEBUG for (Slot s : slots) { // System.out.println("--- slot: " + s.toString()); // DEBUG - if (s.getAnchor().equals(referent.getValue()) || s.getAnchor().equals(referent.toString())) { + if (s.getAnchor().equals(referent.getValue()) || s.getAnchor().equals(referent.toString())) { // System.out.println(" fits!"); // DEBUG - template.addSlot(s); - break; - } - } + template.addSlot(s); + break; + } + } } for (Slot s : slots) if (s.getAnchor().equals("SLOT_arg")) template.addSlot(s); @@ -410,16 +410,22 @@ if (firstIsURI || firstIsInt) { drs.replaceEqualRef(secondArg, firstArg, true); for (Slot s : slots) { - if (s.getAnchor().equals(secondArg.getValue())) { - s.setAnchor(firstArg.getValue()); - } + if (s.getAnchor().equals(secondArg.getValue())) + s.setAnchor(firstArg.getValue()); + if (s.getWords().contains(secondArg.getValue())) { + s.getWords().remove(secondArg.getValue()); + s.getWords().add(firstArg.getValue()); + } } } else if (secondIsURI || secondIsInt) { drs.replaceEqualRef(firstArg, secondArg, true); for (Slot s : slots) { - if (s.getAnchor().equals(firstArg.getValue())) { - s.setAnchor(secondArg.getValue()); - } + if (s.getAnchor().equals(firstArg.getValue())) + s.setAnchor(secondArg.getValue()); + if (s.getWords().contains(firstArg.getValue())) { + s.getWords().remove(firstArg.getValue()); + s.getWords().add(secondArg.getValue()); + } } } else { drs.replaceEqualRef(firstArg, secondArg, false); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-16 10:15:00 UTC (rev 3753) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-16 11:21:55 UTC (rev 3754) @@ -96,8 +96,8 @@ Pattern whenPattern = Pattern.compile("\\A(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern wherePattern = Pattern.compile("\\A(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern adjsPattern = Pattern.compile("((\\w+)/JJ.(\\w+)/JJ)"); - Pattern adjnnpPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)"); - Pattern adjnounPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?)"); +// Pattern adjnnpPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)"); + Pattern adjnounPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?(\\s|\\z))"); Pattern adjnprepPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NPREP)"); m = compAdjPattern.matcher(condensedstring); @@ -219,15 +219,12 @@ if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ"); } - m = adjnnpPattern.matcher(condensedstring); - while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NNP"); - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NNP"); - } m = adjnounPattern.matcher(condensedstring); while (m.find()) { +// if (!m.group(4).startsWith("NNP")) { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN"); +// } } m = adjnprepPattern.matcher(condensedstring); while (m.find()) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-16 10:15:00 UTC (rev 3753) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-16 11:21:55 UTC (rev 3754) @@ -33,11 +33,14 @@ public Template checkandrefine() { Set<Slot> argslots = new HashSet<Slot>(); - for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) { + for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) argslots.add(slot); + + for (Slot slot : argslots) { String var = slot.words.get(0); // check for clash (v=LITERAL && v=RESOURCE) - for (Slot s : argslots) { - if (s.words.get(0).equals(slot.words.get(0)) && !s.type.equals(slot.type)) + for (Slot s : slots) { + if ((s.words.get(0).equals(slot.words.get(0)) || s.anchor.equals(slot.words.get(0))) + && !s.type.equals(slot.type)) return null; } // check for clash (v=LITERAL && p(...,v)=OBJECTPROPERTY) || (v=RESOURCE && p(...,v)=DATATYPEPROPERTY) @@ -53,7 +56,6 @@ } } } - argslots.add(slot); } for (Slot slot : slots) { @@ -99,7 +101,14 @@ } // finally remove all argslots - slots.removeAll(argslots); +// slots.removeAll(argslots); // removes all (argslots + resource slots) +// for (Slot sl : argslots) slots.remove(sl); // removes resource slots + List<Slot> keep = new ArrayList<Slot>(); + for (Slot s : slots) { + if (!s.anchor.startsWith("SLOT_arg")) + keep.add(s); + } + slots = keep; return this; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-17 20:10:09
|
Revision: 3755 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3755&view=rev Author: lorenz_b Date: 2012-06-17 20:10:02 +0000 (Sun, 17 Jun 2012) Log Message: ----------- Started faster implementation of template generation process. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-16 11:21:55 UTC (rev 3754) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-17 20:10:02 UTC (rev 3755) @@ -9,6 +9,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -86,7 +87,8 @@ private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); + private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); + private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; @@ -129,6 +131,11 @@ private String currentlyExecutedQuery; + private boolean dropZeroScoredQueries = true; + private boolean useManualMappingsIfExistOnly = true; + + private boolean multiThreaded = true; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -280,16 +287,23 @@ template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); currentlyExecutedQuery = null; + +// templateMon.reset(); +// sparqlMon.reset(); } public void learnSPARQLQueries() throws NoTemplateFoundException{ reset(); //generate SPARQL query templates logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); + templateMon.start(); + if(multiThreaded){ + templates = templateGenerator.buildTemplatesMultiThreaded(question); + } else { + templates = templateGenerator.buildTemplates(question); + } + templateMon.stop(); + logger.info("Done in " + templateMon.getLastValue() + "ms."); if(templates.isEmpty()){ throw new NoTemplateFoundException(); } @@ -672,8 +686,16 @@ } } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); + for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { + WeightedQuery wQ = iterator.next(); + if(dropZeroScoredQueries){ + if(wQ.getScore() == 0){ + iterator.remove(); + } + } else { + wQ.setScore(wQ.getScore()/t.getSlots().size()); + } + } allQueries.addAll(queries); List<Query> qList = new ArrayList<Query>(); @@ -752,7 +774,7 @@ private List<String> getLemmatizedWords(List<String> words){ logger.info("Pruning word list " + words + "..."); - mon.start(); +// mon.start(); List<String> pruned = new ArrayList<String>(); for(String word : words){ //currently only stem single words @@ -766,8 +788,8 @@ } } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); +// mon.stop(); +// logger.info("Done in " + mon.getLastValue() + "ms."); logger.info("Pruned list: " + pruned); return pruned; } @@ -806,46 +828,51 @@ private void validate(List<String> queries, SPARQL_QueryType queryType){ logger.info("Testing candidate SPARQL queries on remote endpoint..."); - mon.start(); + sparqlMon.start(); if(queryType == SPARQL_QueryType.SELECT){ for(String query : queries){ - logger.info("Testing query:\n" + query); - com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ); - q.setLimit(1); - ResultSet rs = executeSelect(q.toString());//executeSelect(query); - - List<String> results = new ArrayList<String>(); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - if(qs.get(projectionVar).isLiteral()){ - results.add(qs.get(projectionVar).asLiteral().getLexicalForm()); - } else if(qs.get(projectionVar).isURIResource()){ - results.add(qs.get(projectionVar).asResource().getURI()); + List<String> results; + try { + logger.info("Testing query:\n" + query); + com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ); + q.setLimit(1); + ResultSet rs = executeSelect(q.toString());//executeSelect(query); + + results = new ArrayList<String>(); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + if(qs.get(projectionVar).isLiteral()){ + results.add(qs.get(projectionVar).asLiteral().getLexicalForm()); + } else if(qs.get(projectionVar).isURIResource()){ + results.add(qs.get(projectionVar).asResource().getURI()); + } + } - - } - if(!results.isEmpty()){ - try{ - int cnt = Integer.parseInt(results.get(0)); - if(cnt > 0){learnedPos = queries.indexOf(query); + if(!results.isEmpty()){ + try{ + int cnt = Integer.parseInt(results.get(0)); + if(cnt > 0){learnedPos = queries.indexOf(query); + learnedSPARQLQueries.put(query, results); + if(stopIfQueryResultNotEmpty){ + return; + } + } + } catch (NumberFormatException e){ learnedSPARQLQueries.put(query, results); + learnedPos = queries.indexOf(query); if(stopIfQueryResultNotEmpty){ return; } } - } catch (NumberFormatException e){ - learnedSPARQLQueries.put(query, results); - learnedPos = queries.indexOf(query); - if(stopIfQueryResultNotEmpty){ - return; - } + logger.info("Result: " + results); } - + } catch (Exception e) { + e.printStackTrace(); } - logger.info("Result: " + results); + } } else if(queryType == SPARQL_QueryType.ASK){ for(String query : queries){ @@ -862,8 +889,8 @@ } } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); + sparqlMon.stop(); + logger.info("Done in " + sparqlMon.getLastValue() + "ms."); } private boolean executeAskQuery(String query){ @@ -976,15 +1003,19 @@ rs.add(mappingIndex.getResourcesWithScores(word)); } } - if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 50)); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 50)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); + } + rs.add(index.getResourcesWithScores(word, 20)); } - rs.add(index.getResourcesWithScores(word, 20)); } + for(IndexResultItem item : rs.getItems()){ double similarity = Similarity.getSimilarity(word, item.getLabel()); // //get the labels of the redirects and compute the highest similarity @@ -1012,6 +1043,10 @@ } + public String getTaggedInput(){ + return templateGenerator.getTaggedInput(); + } + private boolean isDatatypeProperty(String uri){ Boolean isDatatypeProperty = null; if(mappingIndex != null){ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-06-16 11:21:55 UTC (rev 3754) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-06-17 20:10:02 UTC (rev 3755) @@ -2,6 +2,8 @@ import java.util.ArrayList; import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.ltag.data.TreeNode; @@ -9,6 +11,9 @@ import org.dllearner.algorithm.tbsl.sem.dudes.reader.ParseException; import org.dllearner.algorithm.tbsl.sem.util.Pair; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + public class Parser { private static final Logger logger = Logger.getLogger(Parser.class); @@ -91,7 +96,54 @@ return derivationTrees; } + + public List<DerivationTree> parseMultiThreaded(String taggeduserinput, LTAGLexicon grammar) { + derivationTrees.clear(); + derivedTrees.clear(); + dudes.clear(); + temporaryEntries.clear(); + + if (!VERBOSE) GrammarFilter.VERBOSE = false; + + /* + * create a local copy of the grammar with own treeIDs. This is + * necessary since if an input string contains the same token multiple + * times, a tree for each token is added. Both trees need to have + * different treeIDs for the parser to work correctly. + */ + parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); + + String inputNoTags = ""; + for (String s : taggeduserinput.split(" ")) { + inputNoTags += s.substring(0,s.indexOf("/")) + " "; + } + + this.input = ("# ".concat(inputNoTags.replaceAll("'","").trim())).split(" "); + int n = this.input.length; + + + if (SHOW_GRAMMAR) { + logger.trace(parseGrammar); + } + if (SHOW_LEXICAL_COVERAGE) { + logger.trace("# OF TREES FOUND: " + parseGrammar.size()); + logger.trace("# OF INPUT TOKENS: " + n); + } + + List<Pair<TreeNode, Short>> initTrees = parseGrammar.getInitTrees(); + + internalParseMultiThreaded(initTrees, n); + + if (USE_DPS_AS_INITTREES && derivationTrees.isEmpty()) { + internalParseMultiThreaded(parseGrammar.getDPInitTrees(), n); + } + + if (VERBOSE) logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n"); + return derivationTrees; + + } + private void internalParse(List<Pair<TreeNode, Short>> initTrees, int n) { TREELOOP: for (int k = 0; k < initTrees.size(); k++) { @@ -211,6 +263,23 @@ } } + + private void internalParseMultiThreaded(List<Pair<TreeNode, Short>> initTrees, int n) { + Monitor parseMon = MonitorFactory.getTimeMonitor("parse"); + ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + parseMon.start(); + for (int k = 0; k < initTrees.size(); k++) { + Pair<TreeNode, Short> pair = initTrees.get(k); + TreeNode tree = pair.getFirst(); + short tid = pair.getSecond(); + threadPool.execute(new TreeProcessor(tree, tid, n)); + } + threadPool.shutdown(); + while(!threadPool.isTerminated()){ + + } + parseMon.start(); + } private List<List<ParseState>> makeStateSets() { @@ -298,7 +367,21 @@ return derivedTrees; } + + public List<TreeNode> buildDerivedTreesMultiThreaded(LTAGLexicon G) throws ParseException { + ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + for (DerivationTree dTree : derivationTrees) { + threadPool.execute(new DerivationTreeProcessor(dTree, G)); + } + threadPool.shutdown(); + while(!threadPool.isTerminated()){ + + } + return derivedTrees; + + } + /** * get List of Dudes parallely constructed by Parser.buildDerivedTrees() */ @@ -342,5 +425,169 @@ grammar.clear(temps); } + + class TreeProcessor implements Runnable{ + + private TreeNode tree; + private short tid; + private int n; + + public TreeProcessor(TreeNode tree, short tid, int n) { + this.tree = tree; + this.tid = tid; + this.n = n; + } + + @Override + public void run() { + List<List<ParseState>> stateSets = makeStateSets(); + + ParseState start = new ParseState(tree, tid); + // the inittree is already used + start.getUsedTrees().add(tid); + + stateSets.get(0).add(start); + boolean skip = false; + for (int i = 0; i < n; i++) { + + if (i > 0) { + stateSets.get(i - 1).clear(); + if (USE_LESS_MEMORY) { + System.gc(); + } + } + + List<ParseState> localStateSet = new ArrayList<ParseState>( + stateSets.get(i)); + List<ParseState> localStateSet2 = new ArrayList<ParseState>(); + + stateSets.get(i).clear(); + + while (localStateSet.size() > 0) { + + for (int j = 0; j < localStateSet.size(); j++) { + ParseState state = localStateSet.get(j); + + List<ParseState> newStates; + + OPLOOP: for (Class<?> c : operations) { + + try { + + ParserOperation op = (ParserOperation) c + .newInstance(); + + newStates = (op.go(i, state, input, + parseGrammar)); + + if (!newStates.isEmpty()) { + + for (ParseState newState : newStates) { + if (newState.i.equals(i)) { + localStateSet2.add(newState); + } + + if ((op instanceof Scanner) + || (newState.isEndState() && newState.i == n - 1)) { + stateSets.get(newState.i).add( + newState); + } + } + + op = null; + break OPLOOP; + + } + + } catch (InstantiationException e) { + e.printStackTrace(); + + } catch (IllegalAccessException e) { + e.printStackTrace(); + + } + + } + + } + + localStateSet = null; + localStateSet = new ArrayList<ParseState>(localStateSet2); + localStateSet2 = new ArrayList<ParseState>(); + + } + + localStateSet = null; + localStateSet2 = null; + + /* + * if the parser could not scan the next input token this run / + * initial tree is rejected + */ + if (i < n - 1 && stateSets.get(i + 1).isEmpty()) { + + stateSets.get(i).clear(); + skip = true; + break; + + } + + } + + if(!skip){ + for (ParseState state : stateSets.get(n - 1)) { + + +// if (state.isEndState() && state.t.equals(tree)) { + if (state.isEndState()) { + if (state.t.equals(tree)) { + + derivationTrees.add(createDerivationTree(state, + parseGrammar)); + + } + } + + } + } + + + } + + } + + class DerivationTreeProcessor implements Runnable{ + + private DerivationTree dTree; + private LTAGLexicon lexicon; + + public DerivationTreeProcessor(DerivationTree dTree, LTAGLexicon lexicon) { + this.dTree = dTree; + this.lexicon = lexicon; + } + + @Override + public void run() { + try { + List<Pair<TreeNode, Dude>> pairs = DerivedTree.build(dTree, parseGrammar, lexicon, CONSTRUCT_SEMANTICS); + + for (Pair<TreeNode,Dude> pair : pairs) { + TreeNode x = pair.getFirst(); + Dude dude = pair.getSecond(); + + if (!derivedTrees.contains(x) || !dudes.contains(dude)) { + derivedTrees.add(x); + dudes.add(dude); + } + + } + } catch (ParseException e) { + e.printStackTrace(); + } + + } + + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-16 11:21:55 UTC (rev 3754) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-17 20:10:02 UTC (rev 3755) @@ -6,11 +6,12 @@ import java.util.Hashtable; import java.util.List; import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import net.didion.jwnl.data.POS; import org.apache.log4j.Logger; - import org.dllearner.algorithm.tbsl.converter.DRS2SPARQL_Converter; import org.dllearner.algorithm.tbsl.converter.DUDE2UDRS_Converter; import org.dllearner.algorithm.tbsl.ltag.parser.LTAGLexicon; @@ -58,6 +59,11 @@ boolean USE_WORDNET = true; boolean VERBOSE = true; + private String taggedInput; + + private Set<Template> templates; + private Set<DRS> drses; + public Templator() { this(new StanfordPartOfSpeechTagger(), new WordNet()); } @@ -141,7 +147,7 @@ tagged = s; s = extractSentence(tagged); } - + taggedInput = tagged; String newtagged; if (USE_NER) { newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); @@ -244,9 +250,6 @@ newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); } } - if(newwords.isEmpty()){ - - } if (newwords.isEmpty()) { newwords.add(slot.getWords().get(0)); } @@ -271,11 +274,171 @@ if (clearAgain) { p.clear(g,p.getTemps()); } - System.gc(); +// System.gc(); return templates; } + public Set<Template> buildTemplatesMultiThreaded(String s) { + + boolean clearAgain = true; + + String tagged; + if (UNTAGGED_INPUT) { + s = pp.normalize(s); + tagged = tagger.tag(s); + if (VERBOSE) logger.trace("Tagged input: " + tagged); + } + else { + tagged = s; + s = extractSentence(tagged); + } + taggedInput = tagged; + String newtagged; + if (USE_NER) { + newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); + } + else newtagged = pp.condenseNominals(tagged); + + newtagged = pp.condense(newtagged); + if (VERBOSE) logger.trace("Preprocessed: " + newtagged); + + p.parseMultiThreaded(newtagged,g); + + if (p.getDerivationTrees().isEmpty()) { + p.clear(g,p.getTemps()); + clearAgain = false; + if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + p.buildDerivedTreesMultiThreaded(g); + } catch (ParseException e) { + if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { + postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; + } + // + + drses = new HashSet<DRS>(); + templates = new HashSet<Template>(); + +// ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); +// for (Dude dude : p.getDudes()) { +// threadPool.execute(new DudeProcessor(dude, postable)); +// } +// threadPool.shutdown(); +// while(!threadPool.isTerminated()){} + + for (Dude dude : p.getDudes()) { + + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { +// // DEBUG + if (VERBOSE) { + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } + } +// // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); + if (temp == null) { + continue; + } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + + } + } + + + if (clearAgain) { + p.clear(g,p.getTemps()); + } +// System.gc(); + + return templates; + } + + public String getTaggedInput() { + return taggedInput; + } + private List<String> getLemmatizedWords(List<String> words){ List<String> stemmed = new ArrayList<String>(); for(String word : words){ @@ -330,5 +493,107 @@ return taggedSentence; } + + class DudeProcessor implements Runnable{ + + private Dude dude; + private Hashtable<String,String> postable; + + public DudeProcessor(Dude dude, Hashtable<String,String> postable) { + this.dude = dude; + this.postable = postable; + } + @Override + public void run() { + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { +// // DEBUG + if (VERBOSE) { + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } + } +// // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); + if (temp == null) { + continue; + } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } + } + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-18 13:56:05
|
Revision: 3761 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3761&view=rev Author: christinaunger Date: 2012-06-18 13:55:56 +0000 (Mon, 18 Jun 2012) Log Message: ----------- [tbsl] re-arrange regex parts in the correct order (if the user says "gas central heating", he's getting "gas central heating"...) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-18 12:51:44 UTC (rev 3760) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-18 13:55:56 UTC (rev 3761) @@ -29,6 +29,7 @@ private boolean silent = true; // suppresses console output private boolean oxford = true; + private String inputstring = null; List<Slot> slots; Template template; List<Integer> usedInts; @@ -44,6 +45,9 @@ usedInts = new ArrayList<Integer>(); } + public void setInputString(String s) { + inputstring = s; + } public void setSlots(List<Slot> ls) { slots = ls; } @@ -457,7 +461,7 @@ String var; String newvar; - String regex = ""; + List<String> regexs = new ArrayList<String>(); String[] forbidden = {"regextoken","regex","count","minimum","maximum","greater","less","greaterorequal","lessorequal","equal","sum","location","description"}; Set<Simple_DRS_Condition> used = new HashSet<Simple_DRS_Condition>(); @@ -473,7 +477,9 @@ } } if (takeit) { - regex += cond.getPredicate().replace("SLOT","").replaceAll("_"," "); + for (String s : cond.getPredicate().replace("SLOT","").replaceAll("_"," ").trim().split(" ")) { + regexs.add(s); + } used.add(cond); } else if (!cond.getPredicate().equals("regextoken")) { @@ -482,9 +488,9 @@ } } } - if (!regex.isEmpty()) { + if (!regexs.isEmpty()) { c.getArguments().remove(1); - c.getArguments().add(new DiscourseReferent("'"+regex.trim()+"'")); + c.getArguments().add(new DiscourseReferent("'"+orderedRegex(regexs)+"'")); c.setPredicate("regex"); } else { used.add(c); } // TODO should not happen! @@ -503,7 +509,7 @@ for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { String d = ""; String d2 = ""; - String newregex = ""; + List<String> regextokens = new ArrayList<String>(); if (c.getPredicate().equals("SLOT_description")) { d = c.getArguments().get(0).getValue(); d2 = c.getArguments().get(1).getValue(); @@ -519,14 +525,16 @@ for (Simple_DRS_Condition cond : drs.getAllSimpleConditions()) { if (cond.getPredicate().equals("regex") && (cond.getArguments().get(0).getValue().equals(d) || cond.getArguments().get(0).getValue().equals(d2))) { - newregex += cond.getArguments().get(1).getValue().replaceAll("'","").replaceAll("_"," ").trim() + " "; + for (String s : cond.getArguments().get(1).getValue().replaceAll("'","").replaceAll("_"," ").trim().split(" ")) { + regextokens.add(s); + } oldconds.add(cond); } } for (Simple_DRS_Condition cond : oldconds) drs.removeCondition(cond); List<DiscourseReferent> newrefs = new ArrayList<DiscourseReferent>(); newrefs.add(new DiscourseReferent(d)); - newrefs.add(new DiscourseReferent("'"+newregex.trim()+"'")); + newrefs.add(new DiscourseReferent("'"+orderedRegex(regextokens)+"'")); drs.addCondition(new Simple_DRS_Condition("regex",newrefs)); break; } @@ -605,13 +613,37 @@ return false; // TODO } - private int createFresh() { + private int createFresh() { - int fresh = 0; - for (int i = 0; usedInts.contains(i); i++) { - fresh = i+1 ; - } - usedInts.add(fresh); - return fresh; + int fresh = 0; + for (int i = 0; usedInts.contains(i); i++) { + fresh = i+1 ; } + usedInts.add(fresh); + return fresh; + } + + private String orderedRegex(List<String> regextokens) { + + String newregex = ""; + if (inputstring != null) { + String[] inputparts = inputstring.split(" "); + TreeMap<Integer,String> regexparts = new TreeMap<Integer,String>(); + for (String s : regextokens) { + for (int i = 0; i < inputparts.length; i++) { + if (inputparts[i].matches(s+"(/\\w+)?")) { + regexparts.put(i,s); + break; + } + } + } + for (int n : regexparts.descendingKeySet()) { + newregex = regexparts.get(n) + " " + newregex; + } + } + else for (String s : regextokens) newregex += s + " "; + + return newregex.trim(); + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-18 12:51:44 UTC (rev 3760) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-18 13:55:56 UTC (rev 3761) @@ -135,6 +135,8 @@ public Set<Template> buildTemplates(String s) { + d2s.setInputString(s); + boolean clearAgain = true; String tagged; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-23 07:37:09
|
Revision: 3763 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3763&view=rev Author: lorenz_b Date: 2012-06-23 07:37:03 +0000 (Sat, 23 Jun 2012) Log Message: ----------- Some extension for TBSL web UI. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-20 12:48:53 UTC (rev 3762) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-23 07:37:03 UTC (rev 3763) @@ -45,6 +45,7 @@ import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; @@ -140,18 +141,46 @@ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getPropertyIndex(), knowledgebase.getClassIndex(), posTagger, wordNet, options); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){ + this(endpoint, index, new StanfordPartOfSpeechTagger()); + } + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); } + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){ + this(endpoint, index, posTagger, new WordNet(), new Options()); + } + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); } + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){ + this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); } + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.endpoint = endpoint; this.resourcesIndex = resourcesIndex; @@ -228,6 +257,13 @@ this.mappingIndex = mappingIndex; } + public void setKnowledgebase(Knowledgebase knowledgebase){ + this.endpoint = knowledgebase.getEndpoint(); + this.resourcesIndex = knowledgebase.getResourceIndex(); + this.classesIndex = knowledgebase.getPropertyIndex(); + this.propertiesIndex = knowledgebase.getClassIndex(); + } + /* * Only for Evaluation useful. */ @@ -689,7 +725,7 @@ for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { WeightedQuery wQ = iterator.next(); if(dropZeroScoredQueries){ - if(wQ.getScore() == 0){ + if(wQ.getScore() <= 0){ iterator.remove(); } } else { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2012-06-20 12:48:53 UTC (rev 3762) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2012-06-23 07:37:03 UTC (rev 3763) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.tbsl.nlp; +import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -40,6 +41,15 @@ } } + public WordNet(InputStream propertiesStream) { + try { + JWNL.initialize(propertiesStream); + dict = Dictionary.getInstance(); + } catch (JWNLException e) { + e.printStackTrace(); + } + } + public List<String> getBestSynonyms(POS pos, String s) { List<String> synonyms = new ArrayList<String>(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-20 12:48:53 UTC (rev 3762) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-23 07:37:03 UTC (rev 3763) @@ -57,7 +57,7 @@ boolean UNTAGGED_INPUT = true; boolean USE_NER = false; boolean USE_WORDNET = true; - boolean VERBOSE = false; + boolean VERBOSE = true; private String taggedInput; Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Knowledgebase.java 2012-06-23 07:37:03 UTC (rev 3763) @@ -0,0 +1,68 @@ +package org.dllearner.algorithm.tbsl.util; + +import org.dllearner.common.index.Index; +import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.kb.sparql.SparqlEndpoint; + +public class Knowledgebase { + + private String label; + private SparqlEndpoint endpoint; + private String description; + + private Index resourceIndex; + private Index propertyIndex; + private Index classIndex; + + private MappingBasedIndex mappingIndex; + + public Knowledgebase(SparqlEndpoint endpoint, String label, String description, + Index resourceIndex, Index propertyIndex, Index classIndex) { + this(endpoint, label, description, resourceIndex, propertyIndex, classIndex, null); + } + + public Knowledgebase(SparqlEndpoint endpoint, String label, String description, + Index resourceIndex, Index propertyIndex, Index classIndex, MappingBasedIndex mappingIndex) { + this.label = label; + this.endpoint = endpoint; + this.description = description; + this.resourceIndex = resourceIndex; + this.propertyIndex = propertyIndex; + this.classIndex = classIndex; + this.mappingIndex = mappingIndex; + } + + public String getLabel() { + return label; + } + + public SparqlEndpoint getEndpoint() { + return endpoint; + } + + public String getDescription() { + return description; + } + + public Index getResourceIndex() { + return resourceIndex; + } + + public Index getPropertyIndex() { + return propertyIndex; + } + + public Index getClassIndex() { + return classIndex; + } + + public MappingBasedIndex getMappingIndex() { + return mappingIndex; + } + + @Override + public String toString() { + return label; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-25 13:22:09
|
Revision: 3764 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3764&view=rev Author: lorenz_b Date: 2012-06-25 13:21:58 +0000 (Mon, 25 Jun 2012) Log Message: ----------- Added option to set grammar files. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-23 07:37:03 UTC (rev 3763) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-25 13:21:58 UTC (rev 3764) @@ -137,12 +137,14 @@ private boolean multiThreaded = true; + private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ - this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getPropertyIndex(), knowledgebase.getClassIndex(), posTagger, wordNet, options); + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); } public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){ @@ -247,9 +249,13 @@ } } + public void setGrammarFiles(String[] grammarFiles){ + templateGenerator.setGrammarFiles(grammarFiles); + } + @Override public void init() throws ComponentInitException { - templateGenerator = new Templator(posTagger, wordNet); + templateGenerator = new Templator(posTagger, wordNet, grammarFiles); lemmatizer = new LingPipeLemmatizer(); } @@ -260,8 +266,8 @@ public void setKnowledgebase(Knowledgebase knowledgebase){ this.endpoint = knowledgebase.getEndpoint(); this.resourcesIndex = knowledgebase.getResourceIndex(); - this.classesIndex = knowledgebase.getPropertyIndex(); - this.propertiesIndex = knowledgebase.getClassIndex(); + this.classesIndex = knowledgebase.getClassIndex(); + this.propertiesIndex = knowledgebase.getPropertyIndex(); } /* Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-23 07:37:03 UTC (rev 3763) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-25 13:21:58 UTC (rev 3764) @@ -92,6 +92,27 @@ pp = new Preprocessor(USE_NER); } + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet, String[] GRAMMAR_FILES) { + this.tagger = tagger; + this.wordnet = wordnet; + this.GRAMMAR_FILES = GRAMMAR_FILES; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + p = new Parser(); + p.SHOW_GRAMMAR = true; + p.USE_DPS_AS_INITTREES = true; + p.CONSTRUCT_SEMANTICS = true; + p.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); +} + public Templator(boolean b) { this.tagger = new StanfordPartOfSpeechTagger(); this.USE_WORDNET = false; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-29 12:18:50
|
Revision: 3767 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3767&view=rev Author: lorenz_b Date: 2012-06-29 12:18:39 +0000 (Fri, 29 Jun 2012) Log Message: ----------- Using popularity map as executing cache. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-28 13:44:49 UTC (rev 3766) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-29 12:18:39 UTC (rev 3767) @@ -47,6 +47,7 @@ import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.PopularityMap; +import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; @@ -782,9 +783,13 @@ private double getProminenceValue(String uri, SlotType type){ Integer popularity = null; if(popularityMap != null){ - if(type == SlotType.CLASS || type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + if(type == SlotType.CLASS){ + popularity = popularityMap.getPopularity(uri, EntityType.CLASS); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ - popularity = popularityMap.getPopularity(uri); + popularity = popularityMap.getPopularity(uri, EntityType.PROPERTY); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + popularity = popularityMap.getPopularity(uri, EntityType.RESOURCE); } } if(popularity == null){ @@ -808,6 +813,9 @@ popularity = qs.get(projectionVar).asLiteral().getInt(); } } + if(popularity == null){ + popularity = Integer.valueOf(0); + } // if(cnt == 0){ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java 2012-06-28 13:44:49 UTC (rev 3766) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java 2012-06-29 12:18:39 UTC (rev 3767) @@ -7,7 +7,9 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.dllearner.core.owl.DatatypeProperty; @@ -23,7 +25,7 @@ public class PopularityMap { - enum EntityType { + public enum EntityType { CLASS, PROPERTY, RESOURCE } @@ -48,22 +50,19 @@ // load popularity of classes for (NamedClass nc : new SPARQLTasks(endpoint).getAllClasses()) { System.out.println("Computing popularity for " + nc); - String query = String.format("SELECT COUNT(?s) WHERE {?s a <%s>}", nc.getName()); - int popularity = loadPopularity(query); + int popularity = loadPopularity(nc.getName(), EntityType.CLASS); class2Popularity.put(nc.getName(), Integer.valueOf(popularity)); } // load popularity of properties for (ObjectProperty op : new SPARQLTasks(endpoint).getAllObjectProperties()) { System.out.println("Computing popularity for " + op); - String query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", op.getName()); - int popularity = loadPopularity(query); - class2Popularity.put(op.getName(), Integer.valueOf(popularity)); + int popularity = loadPopularity(op.getName(), EntityType.PROPERTY); + property2Popularity.put(op.getName(), Integer.valueOf(popularity)); } for (DatatypeProperty dp : new SPARQLTasks(endpoint).getAllDataProperties()) { System.out.println("Computing popularity for " + dp); - String query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", dp.getName()); - int popularity = loadPopularity(query); - class2Popularity.put(dp.getName(), Integer.valueOf(popularity)); + int popularity = loadPopularity(dp.getName(), EntityType.PROPERTY); + property2Popularity.put(dp.getName(), Integer.valueOf(popularity)); } serialize(); } @@ -73,7 +72,11 @@ ObjectOutputStream oos = null; try { oos = new ObjectOutputStream(new FileOutputStream(new File(file))); - oos.writeObject(class2Popularity); + List<Map<String, Integer>> mapList = new ArrayList<Map<String,Integer>>(); + mapList.add(class2Popularity); + mapList.add(property2Popularity); + mapList.add(resource2Popularity); + oos.writeObject(mapList); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -98,7 +101,10 @@ ObjectInputStream ois = null; try { ois = new ObjectInputStream(new FileInputStream(new File(file))); - class2Popularity = (Map<String, Integer>) ois.readObject(); + List<Map<String, Integer>> mapList = (List<Map<String, Integer>>) ois.readObject(); + class2Popularity = mapList.get(0); + property2Popularity = mapList.get(1); + resource2Popularity = mapList.get(2); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { @@ -115,12 +121,21 @@ } } + System.out.println("Loaded popularity map."); return true; } return false; } - private int loadPopularity(String query){ + private int loadPopularity(String uri, EntityType entityType){ + String query; + if(entityType == EntityType.CLASS){ + query = String.format("SELECT COUNT(?s) WHERE {?s a <%s>}", uri); + } else if(entityType == EntityType.PROPERTY){ + query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", uri); + } else { + query = String.format("SELECT COUNT(*) WHERE {?s ?p <%s>}", uri); + } int pop = 0; ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); QuerySolution qs; @@ -137,10 +152,22 @@ Integer popularity; if(entityType == EntityType.CLASS){ popularity = class2Popularity.get(uri); + if(popularity == null){ + popularity = loadPopularity(uri, entityType); + class2Popularity.put(uri, popularity); + } } else if(entityType == EntityType.PROPERTY){ popularity = property2Popularity.get(uri); + if(popularity == null){ + popularity = loadPopularity(uri, entityType); + property2Popularity.put(uri, popularity); + } } else { popularity = resource2Popularity.get(uri); + if(popularity == null){ + popularity = loadPopularity(uri, entityType); + resource2Popularity.put(uri, popularity); + } } return popularity; } @@ -157,7 +184,9 @@ } public static void main(String[] args) { - new PopularityMap("dbpedia_popularity.map", SparqlEndpoint.getEndpointDBpedia(), new ExtractionDBCache("cache")).init(); + PopularityMap map = new PopularityMap("dbpedia_popularity.map", SparqlEndpoint.getEndpointDBpediaLiveAKSW(), new ExtractionDBCache("cache")); + map.init(); + System.out.println(map.getPopularity("http://dbpedia.org/ontology/Book")); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-07-12 12:13:13
|
Revision: 3784 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3784&view=rev Author: christinaunger Date: 2012-07-12 12:13:02 +0000 (Thu, 12 Jul 2012) Log Message: ----------- [tbsl] if COUNT then GROUP BY + HAVING. also removed number replacement. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-07-12 11:34:09 UTC (rev 3783) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-07-12 12:13:02 UTC (rev 3784) @@ -264,7 +264,7 @@ if (!predicate.contains(":")) prop.setIsVariable(true); boolean literal = false; - if (simple.getArguments().size() > 1 && simple.getArguments().get(1).getValue().matches("\\d+")) { + if (simple.getArguments().size() > 1 && (simple.getArguments().get(1).getValue().startsWith("\'") || simple.getArguments().get(1).getValue().matches("[0-9]+"))) { literal = true; } @@ -273,11 +273,7 @@ if (simple.getArguments().get(1).getValue().matches("[0-9]+")) { String fresh = "v"+createFresh(); out.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, fresh)); - out.addFilter(new SPARQL_Filter( - new SPARQL_Pair( - new SPARQL_Term(fresh,false), - new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), - SPARQL_PairType.EQ))); + out.addHaving(new SPARQL_Having("?"+fresh + " = " + simple.getArguments().get(1).getValue())); } else { out.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, simple.getArguments().get(1).getValue())); } @@ -326,7 +322,7 @@ } else if (predicate.equals("equal")) { out.addFilter(new SPARQL_Filter( new SPARQL_Pair( - new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.EQ))); return out; @@ -335,14 +331,14 @@ out.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term("'^"+simple.getArguments().get(1).getValue()+"'",true), + new SPARQL_Term("'^"+simple.getArguments().get(1).getValue()+"'",false), SPARQL_PairType.REGEX))); } else if (predicate.equals("regex")) { out.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term(simple.getArguments().get(1).getValue().replace("_","").trim(),true), + new SPARQL_Term(simple.getArguments().get(1).getValue().replace("_","").trim(),false), SPARQL_PairType.REGEX))); } else { @@ -403,10 +399,13 @@ } public void redundantEqualRenaming(DRS drs) { - + Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { - if(c.getPredicate().equals("equal")) equalsConditions.add(c); + if(c.getPredicate().equals("equal") + && !c.getArguments().get(0).getValue().matches("[0-9]+") + && !c.getArguments().get(1).getValue().matches("[0-9]+")) + equalsConditions.add(c); } DiscourseReferent firstArg; @@ -426,7 +425,7 @@ secondIsInt = secondArg.getValue().matches("(\\?)?[0..9]+"); drs.removeCondition(c); - if (firstIsURI || firstIsInt) { + if (firstIsURI) { // firstIsURI || firstIsInt drs.replaceEqualRef(secondArg, firstArg, true); for (Slot s : slots) { if (s.getAnchor().equals(secondArg.getValue())) @@ -436,7 +435,7 @@ s.getWords().add(firstArg.getValue()); } } - } else if (secondIsURI || secondIsInt) { + } else if (secondIsURI) { // secondIsURI || secondIsInt drs.replaceEqualRef(firstArg, secondArg, true); for (Slot s : slots) { if (s.getAnchor().equals(firstArg.getValue())) Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-07-12 11:34:09 UTC (rev 3783) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-07-12 12:13:02 UTC (rev 3784) @@ -14,6 +14,7 @@ Set<SPARQL_Triple> conditions; Set<SPARQL_Term> orderBy; Set<SPARQL_Filter> filter; + Set<SPARQL_Having> having; Set<SPARQL_Union> unions; SPARQL_QueryType qt = SPARQL_QueryType.SELECT; @@ -28,6 +29,7 @@ conditions = new HashSet<SPARQL_Triple>(); orderBy = new HashSet<SPARQL_Term>(); filter = new HashSet<SPARQL_Filter>(); + having = new HashSet<SPARQL_Having>(); unions = new HashSet<SPARQL_Union>(); } @@ -38,6 +40,7 @@ this.prefixes = prefixes; this.conditions = conditions; filter = new HashSet<SPARQL_Filter>(); + having = new HashSet<SPARQL_Having>(); unions = new HashSet<SPARQL_Union>(); } @@ -50,6 +53,8 @@ this.orderBy = orderBy; this.limit = limit; this.offset = offset; + filter = new HashSet<SPARQL_Filter>(); + having = new HashSet<SPARQL_Having>(); unions = new HashSet<SPARQL_Union>(); } @@ -107,6 +112,7 @@ } } this.filter = filters; + this.having = having; this.unions = query.unions; // TODO copy unions this.limit = query.getLimit(); @@ -195,6 +201,10 @@ if(groupBy != null){ retVal += "GROUP BY " + groupBy + "\n"; } + + if (!having.isEmpty()) { + for (SPARQL_Having h : having) retVal += h.toString() + "\n"; + } if (orderBy != null && !orderBy.isEmpty()) { @@ -275,6 +285,10 @@ this.filter.add(f); } + public void addHaving(SPARQL_Having h) + { + this.having.add(h); + } public Set<SPARQL_Term> getOrderBy() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-07-12 11:34:09 UTC (rev 3783) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-07-12 12:13:02 UTC (rev 3784) @@ -79,7 +79,7 @@ public boolean isString() { - return name.startsWith("'") || name.matches("\\d+"); + return name.startsWith("'"); } public void setIsURI(boolean isURI){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-07-13 09:50:44
|
Revision: 3788 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3788&view=rev Author: christinaunger Date: 2012-07-13 09:50:38 +0000 (Fri, 13 Jul 2012) Log Message: ----------- - again tried to the number problem :P - introduced distinction between equal and equals Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-07-13 08:39:06 UTC (rev 3787) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-07-13 09:50:38 UTC (rev 3788) @@ -319,7 +319,7 @@ out.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.ASC)); out.setLimit(1); return out; - } else if (predicate.equals("equal")) { + } else if (predicate.equals("equals")) { out.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), @@ -347,9 +347,11 @@ out.addCondition(new SPARQL_Triple(term,new SPARQL_Property("type",new SPARQL_Prefix("rdf","")),prop)); } else if (arity == 2) { - String arg1 = simple.getArguments().get(0).getValue();SPARQL_Term term1 = new SPARQL_Term(arg1,false);term1.setIsVariable(true); - String arg2 = simple.getArguments().get(1).getValue();SPARQL_Term term2 = new SPARQL_Term(arg2,false);term2.setIsVariable(true); - out.addCondition(new SPARQL_Triple(term1, prop, term2)); + String arg1 = simple.getArguments().get(0).getValue(); + SPARQL_Term term1 = new SPARQL_Term(arg1,arg1.contains(":"),!arg1.matches("(\\?)?[0-9]+")); + String arg2 = simple.getArguments().get(1).getValue(); + SPARQL_Term term2 = new SPARQL_Term(arg2,arg2.contains(":"),!arg2.matches("(\\?)?[0-9]+")); + out.addCondition(new SPARQL_Triple(term1,prop,term2)); } else if (arity > 2) { // TODO @@ -371,11 +373,11 @@ if (s.getAnchor().equals(v1)) v1isSlotVar = true; if (s.getAnchor().equals(v2)) v2isSlotVar = true; } - if (!v1isSlotVar && !v1.matches("[0..9]+") && !v1.contains("count")) { + if (!v1isSlotVar && !v1.matches("(\\?)?[0-9]+") && !v1.contains("count")) { if (vs.containsKey(v1)) vs.put(v1,vs.get(v1)+1); else vs.put(v1,1); } - if (!v2isSlotVar && !v2.matches("[0..9]+") && !v2.contains("count")) { + if (!v2isSlotVar && !v2.matches("(\\?)?[0-9]+") && !v2.contains("count")) { if (vs.containsKey(v2)) vs.put(v2,vs.get(v2)+1); else vs.put(v2,1); } @@ -402,9 +404,7 @@ Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { - if(c.getPredicate().equals("equal") - && !c.getArguments().get(0).getValue().matches("[0-9]+") - && !c.getArguments().get(1).getValue().matches("[0-9]+")) + if(c.getPredicate().equals("equal")) equalsConditions.add(c); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java 2012-07-13 08:39:06 UTC (rev 3787) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java 2012-07-13 09:50:38 UTC (rev 3788) @@ -312,7 +312,7 @@ next = true; } m_DiscourseReferents.remove(dr2); - if (!isInUpperUniverse) { + if (!isInUpperUniverse && !dr2.m_Referent.matches("[0-9]+")) { m_DiscourseReferents.add(new DiscourseReferent(dr2.m_Referent,marked,nonex)); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-16 12:52:49
|
Revision: 3794 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3794&view=rev Author: lorenz_b Date: 2012-07-16 12:52:38 +0000 (Mon, 16 Jul 2012) Log Message: ----------- Added class to compute PMI. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-16 07:10:22 UTC (rev 3793) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-16 12:52:38 UTC (rev 3794) @@ -155,6 +155,8 @@ private Set<String> relevantKeywords; + private boolean useDomainRangeRestriction = true; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -305,6 +307,10 @@ reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); } + public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { + this.useDomainRangeRestriction = useDomainRangeRestriction; + } + /* * Only for Evaluation useful. */ @@ -619,31 +625,66 @@ Query q = new Query(query.getQuery()); boolean drop = false; - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); + if(useDomainRangeRestriction){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); +// System.out.println(triple); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ +// System.out.println(typeTriple); + if(true){//reasoner.isObjectProperty(a.getUri())){ + Description range = reasoner.getRange(new ObjectProperty(a.getUri())); +// System.out.println(a); + if(range != null){ + Set<Description> allRanges = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(range instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(range); + allRanges.addAll(superClasses); + } else { + for(Description nc : range.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allRanges.addAll(superClasses); + } + } + allRanges.add(range); + allRanges.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } + } + } else { + drop = true; + } + + } + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ + Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); // System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); + if(domain != null){ + Set<Description> allDomains = new HashSet<Description>(); SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); + if(domain instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(domain); + allDomains.addAll(superClasses); } else { - for(Description nc : range.getChildren()){ + for(Description nc : domain.getChildren()){ superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); + allDomains.addAll(superClasses); } } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); - + allDomains.add(domain); + allDomains.remove(new NamedClass(Thing.instance.getURI())); + Set<Description> allTypes = new HashSet<Description>(); String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Description type = new NamedClass(typeURI); @@ -651,46 +692,13 @@ allTypes.addAll(superClasses); allTypes.add(type); - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); -// System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); - } else { - for(Description nc : domain.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); + System.err.println("DROPPING: \n" + q.toString()); + } else { + } } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; -// System.err.println("DROPPING: \n" + q.toString()); - } else { - - } } } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java 2012-07-16 12:52:38 UTC (rev 3794) @@ -0,0 +1,187 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.HashMap; +import java.util.Map; + +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.Property; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; + +public class PMI { + + private SparqlEndpoint endpoint; + private ExtractionDBCache cache; + + public PMI(SparqlEndpoint endpoint, ExtractionDBCache cache) { + this.endpoint = endpoint; + this.cache = cache; + } + + public double getDirectedPMI(ObjectProperty prop, NamedClass cls){ + System.out.println(String.format("Computing PMI(%s, %s)", prop, cls)); + String query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Class occurence: " + classOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Property occurence: " + propertyOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?o a <%s>}", prop.getName(), cls.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double coOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Co-occurence: " + coOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}"); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double total = rs.next().getLiteral("cnt").getInt(); + System.out.println("Total: " + total); + + if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){ + return 0; + } + + double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); + + return pmi; + } + + public double getDirectedPMI(NamedClass cls, Property prop){ + System.out.println(String.format("Computing PMI(%s, %s)", cls, prop)); + String query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Class occurence: " + classOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Property occurence: " + propertyOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double coOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Co-occurence: " + coOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}"); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double total = rs.next().getLiteral("cnt").getInt(); + System.out.println("Total: " + total); + + if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){ + return 0; + } + + double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); + + return pmi; + } + + /** + * Returns the direction of the given triple, computed by calculated the PMI values of each combination. + * @param subject + * @param predicate + * @param object + * @return -1 if the given triple should by reversed, else 1. + */ + public int getDirection(NamedClass subject, ObjectProperty predicate, NamedClass object){ + System.out.println(String.format("Computing direction between [%s, %s, %s]", subject, predicate, object)); + double pmi_obj_pred = getDirectedPMI(object, predicate);System.out.println("PMI(OBJECT, PREDICATE): " + pmi_obj_pred); + double pmi_pred_subj = getDirectedPMI(predicate, subject);System.out.println("PMI(PREDICATE, SUBJECT): " + pmi_pred_subj); + double pmi_subj_pred = getDirectedPMI(subject, predicate);System.out.println("PMI(SUBJECT, PREDICATE): " + pmi_subj_pred); + double pmi_pred_obj = getDirectedPMI(predicate, object);System.out.println("PMI(PREDICATE, OBJECT): " + pmi_pred_obj); + + double threshold = 2.0; + + double value = ((pmi_obj_pred + pmi_pred_subj) - (pmi_subj_pred + pmi_pred_obj)); + System.out.println("(PMI(OBJECT, PREDICATE) + PMI(PREDICATE, SUBJECT)) - (PMI(SUBJECT, PREDICATE) + PMI(PREDICATE, OBJECT)) = " + value); + + if( value > threshold){ + System.out.println(object + "---" + predicate + "--->" + subject); + return -1; + } else { + System.out.println(subject + "---" + predicate + "--->" + object); + return 1; + } + } + + public Map<ObjectProperty, Integer> getMostFrequentProperties(NamedClass cls1, NamedClass cls2){ + Map<ObjectProperty, Integer> prop2Cnt = new HashMap<ObjectProperty, Integer>(); + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?x1 a <%s>. ?x2 a <%s>. ?x1 ?p ?x2} GROUP BY ?p", cls1, cls2); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + ObjectProperty p = new ObjectProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + prop2Cnt.put(p, cnt); + } + return prop2Cnt; + } + + public static void main(String[] args) { + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + ExtractionDBCache cache = new ExtractionDBCache("cache"); + String NS = "http://dbpedia.org/ontology/"; + + PMI pmiGen = new PMI(endpoint, cache); + System.out.println(pmiGen.getDirectedPMI( + new ObjectProperty(NS + "author"), + new NamedClass(NS+ "Person"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirectedPMI( + new ObjectProperty(NS + "author"), + new NamedClass(NS+ "Writer"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirectedPMI( + new NamedClass(NS+ "Book"), + new ObjectProperty(NS + "author")) + ); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirection( + new NamedClass(NS+ "Writer"), + new ObjectProperty(NS + "author"), + new NamedClass(NS+ "Book"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirection( + new NamedClass(NS+ "Person"), + new ObjectProperty(NS + "starring"), + new NamedClass(NS+ "Film"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getMostFrequentProperties( + new NamedClass(NS+ "Person"), + new NamedClass(NS+ "Film"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getMostFrequentProperties( + new NamedClass(NS+ "Film"), + new NamedClass(NS+ "Actor"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getMostFrequentProperties( + new NamedClass(NS+ "Film"), + new NamedClass(NS+ "Person"))); + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-18 13:08:53
|
Revision: 3797 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3797&view=rev Author: lorenz_b Date: 2012-07-18 13:08:45 +0000 (Wed, 18 Jul 2012) Log Message: ----------- Started metrics class for SPARQL endpoints. Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java Removed Paths: ------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-18 13:08:45 UTC (rev 3797) @@ -0,0 +1,1031 @@ +package org.dllearner.algorithm.tbsl.learning; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; +import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.PlingStemmer; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.WordNet; +import org.dllearner.algorithm.tbsl.sparql.Allocation; +import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; +import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.SlotType; +import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; +import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.algorithm.tbsl.util.PopularityMap; +import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; +import org.dllearner.algorithm.tbsl.util.SPARQLEndpointMetrics; +import org.dllearner.algorithm.tbsl.util.Similarity; +import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper; +import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.IndexResultItem; +import org.dllearner.common.index.IndexResultSet; +import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; +import org.dllearner.common.index.SPARQLObjectPropertiesIndex; +import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; +import org.dllearner.common.index.VirtuosoObjectPropertiesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.SparqlQueryLearningAlgorithm; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.Intersection; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.Thing; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.reasoning.SPARQLReasoner; +import org.ini4j.InvalidFileFormatException; +import org.ini4j.Options; +import org.semanticweb.HermiT.Configuration.DirectBlockingType; + +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.shared.UnknownPropertyException; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.vocabulary.RDFS; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +public class SPARQLTemplateBasedLearner3 implements SparqlQueryLearningAlgorithm{ + + + enum Mode{ + BEST_QUERY, BEST_NON_EMPTY_QUERY + } + + private Mode mode = Mode.BEST_QUERY; + + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3.class); + private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); + private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); + + private boolean useRemoteEndpointValidation; + private boolean stopIfQueryResultNotEmpty; + private int maxTestedQueriesPerTemplate = 50; + private int maxQueryExecutionTimeInSeconds; + private int maxTestedQueries = 200; + private int maxIndexResults; + + private SparqlEndpoint endpoint; + private Model model; + + private ExtractionDBCache cache = new ExtractionDBCache("cache"); + + private Index resourcesIndex; + private Index classesIndex; + private Index propertiesIndex; + + private Index datatypePropertiesIndex; + private Index objectPropertiesIndex; + + private MappingBasedIndex mappingIndex; + + private Templator templateGenerator; + private Lemmatizer lemmatizer; + private PartOfSpeechTagger posTagger; + private WordNet wordNet; + + private String question; + private int learnedPos = -1; + + private Set<Template> templates; + private Map<Template, Collection<? extends Query>> template2Queries; + private Map<Slot, List<String>> slot2URI; + + private Collection<WeightedQuery> sparqlQueryCandidates; + private SortedSet<WeightedQuery> learnedSPARQLQueries; + private SortedSet<WeightedQuery> generatedQueries; + + private SPARQLReasoner reasoner; + + private String currentlyExecutedQuery; + + private boolean dropZeroScoredQueries = true; + private boolean useManualMappingsIfExistOnly = true; + + private boolean multiThreaded = true; + + private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; + + private PopularityMap popularityMap; + + private Set<String> relevantKeywords; + + private boolean useDomainRangeRestriction = true; + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); + } + + public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options, cache); + } + + public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index){ + this(endpoint, index, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){ + this(endpoint, index, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, WordNet wordNet){ + this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ + this.endpoint = endpoint; + this.resourcesIndex = resourcesIndex; + this.classesIndex = classesIndex; + this.propertiesIndex = propertiesIndex; + this.posTagger = posTagger; + this.wordNet = wordNet; + this.cache = cache; + + setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); + } + + public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ + this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ + this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ + this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner3(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ + this.model = model; + this.resourcesIndex = resourcesIndex; + this.classesIndex = classesIndex; + this.propertiesIndex = propertiesIndex; + this.posTagger = posTagger; + this.wordNet = wordNet; + this.cache = cache; + + setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + } + + public void setGrammarFiles(String[] grammarFiles){ + templateGenerator.setGrammarFiles(grammarFiles); + } + + @Override + public void init() throws ComponentInitException { + templateGenerator = new Templator(posTagger, wordNet, grammarFiles); + lemmatizer = new LingPipeLemmatizer(); + } + + public void setMappingIndex(MappingBasedIndex mappingIndex) { + this.mappingIndex = mappingIndex; + } + + public void setKnowledgebase(Knowledgebase knowledgebase){ + this.endpoint = knowledgebase.getEndpoint(); + this.resourcesIndex = knowledgebase.getResourceIndex(); + this.classesIndex = knowledgebase.getClassIndex(); + this.propertiesIndex = knowledgebase.getPropertyIndex(); + this.mappingIndex = knowledgebase.getMappingIndex(); + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + } + + public void setCache(ExtractionDBCache cache) { + this.cache = cache; + } + + public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { + this.useDomainRangeRestriction = useDomainRangeRestriction; + } + + /* + * Only for Evaluation useful. + */ + public void setUseIdealTagger(boolean value){ + templateGenerator.setUNTAGGED_INPUT(!value); + } + + private void setOptions(Options options){ + maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10")); + + maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "100")); + cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); + + useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; + stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); + maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); + + String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); + wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); + System.setProperty("wordnet.database.dir", wordnetPath); + } + + public void setEndpoint(SparqlEndpoint endpoint){ + this.endpoint = endpoint; + + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + reasoner.setCache(cache); + reasoner.prepareSubsumptionHierarchy(); + } + + public void setQuestion(String question){ + this.question = question; + } + + public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ + this.useRemoteEndpointValidation = useRemoteEndpointValidation; + } + + public int getMaxQueryExecutionTimeInSeconds() { + return maxQueryExecutionTimeInSeconds; + } + + public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) { + this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; + } + + public int getMaxTestedQueriesPerTemplate() { + return maxTestedQueriesPerTemplate; + } + + public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { + this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; + } + + private void reset(){ + learnedSPARQLQueries = new TreeSet<WeightedQuery>(); + template2Queries = new HashMap<Template, Collection<? extends Query>>(); + slot2URI = new HashMap<Slot, List<String>>(); + relevantKeywords = new HashSet<String>(); + currentlyExecutedQuery = null; + +// templateMon.reset(); +// sparqlMon.reset(); + } + + public void learnSPARQLQueries() throws NoTemplateFoundException{ + reset(); + //generate SPARQL query templates + logger.info("Generating SPARQL query templates..."); + templateMon.start(); + if(multiThreaded){ + templates = templateGenerator.buildTemplatesMultiThreaded(question); + } else { + templates = templateGenerator.buildTemplates(question); + } + templateMon.stop(); + logger.info("Done in " + templateMon.getLastValue() + "ms."); + relevantKeywords.addAll(templateGenerator.getUnknownWords()); + if(templates.isEmpty()){ + throw new NoTemplateFoundException(); + + } + logger.info("Templates:"); + for(Template t : templates){ + logger.info(t); + } + + //get the weighted query candidates + generatedQueries = getWeightedSPARQLQueries(templates); + sparqlQueryCandidates = new ArrayList<WeightedQuery>(); + int i = 0; + for(WeightedQuery wQ : generatedQueries){ + System.out.println(wQ.explain()); + sparqlQueryCandidates.add(wQ); + if(i == maxTestedQueries){ + break; + } + i++; + } + + if(mode == Mode.BEST_QUERY){ + double bestScore = -1; + for(WeightedQuery candidate : generatedQueries){ + double score = candidate.getScore(); + if(score >= bestScore){ + bestScore = score; + learnedSPARQLQueries.add(candidate); + } else { + break; + } + } + } else if(mode == Mode.BEST_NON_EMPTY_QUERY){ + //test candidates + if(useRemoteEndpointValidation){ //on remote endpoint + validateAgainstRemoteEndpoint(sparqlQueryCandidates); + } else {//on local model + + } + } + } + + public SortedSet<WeightedQuery> getGeneratedQueries() { + return generatedQueries; + } + + public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { + SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); + int max = Math.min(topN, generatedQueries.size()); + for(WeightedQuery wQ : generatedQueries){ + topNQueries.add(wQ); + if(topNQueries.size() == max){ + break; + } + } + return topNQueries; + } + + public Set<Template> getTemplates(){ + return templates; + } + + public List<String> getGeneratedSPARQLQueries(){ + List<String> queries = new ArrayList<String>(); + for(WeightedQuery wQ : sparqlQueryCandidates){ + queries.add(wQ.getQuery().toString()); + } + + return queries; + } + + public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ + return template2Queries; + } + + public Map<Slot, List<String>> getSlot2URIs(){ + return slot2URI; + } + + private void normProminenceValues(Set<Allocation> allocations){ + double min = 0; + double max = 0; + for(Allocation a : allocations){ + if(a.getProminence() < min){ + min = a.getProminence(); + } + if(a.getProminence() > max){ + max = a.getProminence(); + } + } + for(Allocation a : allocations){ + double prominence = a.getProminence()/(max-min); + a.setProminence(prominence); + } + } + + private void computeScore(Set<Allocation> allocations){ + double alpha = 0.8; + double beta = 1 - alpha; + + for(Allocation a : allocations){ + double score = alpha * a.getSimilarity() + beta * a.getProminence(); + a.setScore(score); + } + + } + + public Set<String> getRelevantKeywords(){ + return relevantKeywords; + } + + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + logger.info("Generating SPARQL query candidates..."); + + Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { + + @Override + public int compare(Slot o1, Slot o2) { + if(o1.getSlotType() == o2.getSlotType()){ + return o1.getToken().compareTo(o2.getToken()); + } else { + return -1; + } + } + }); + slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); + + + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + + Set<Allocation> allocations; + + for(Template t : templates){ + logger.info("Processing template:\n" + t.toString()); + allocations = new TreeSet<Allocation>(); + + ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); + List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); + + long startTime = System.currentTimeMillis(); + + for (Slot slot : t.getSlots()) { + if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); + Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); + Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); + list.add(submit); + } else { + System.out.println("CACHE HIT"); + } + } + + for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { + try { + Map<Slot, SortedSet<Allocation>> result = future.get(); + Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); + slot2Allocations.put(item.getKey(), item.getValue()); + } catch (InterruptedException e) { + e.printStackTrace(); + } catch (ExecutionException e) { + e.printStackTrace(); + } + } + + executor.shutdown(); + System.out.println("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); + + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); + Query cleanQuery = t.getQuery(); + queries.add(new WeightedQuery(cleanQuery)); + + Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); + List<Slot> sortedSlots = new ArrayList<Slot>(); + Set<Slot> classSlots = new HashSet<Slot>(); + for(Slot slot : t.getSlots()){ + if(slot.getSlotType() == SlotType.CLASS){ + sortedSlots.add(slot); + classSlots.add(slot); + } + } + for(Slot slot : t.getSlots()){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){ + sortedSlots.add(slot); + } + } + for(Slot slot : t.getSlots()){ + if(!sortedSlots.contains(slot)){ + sortedSlots.add(slot); + } + } + //add for each SYMPROPERTY Slot the reversed query + for(Slot slot : sortedSlots){ + for(WeightedQuery wQ : queries){ + if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){ + Query reversedQuery = new Query(wQ.getQuery()); + reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); + tmp.add(new WeightedQuery(reversedQuery)); + } + tmp.add(wQ); + } + queries.clear(); + queries.addAll(tmp); + tmp.clear(); + } + + for(Slot slot : sortedSlots){ + if(!slot2Allocations.get(slot).isEmpty()){ + for(Allocation a : slot2Allocations.get(slot)){ + for(WeightedQuery query : queries){ + Query q = new Query(query.getQuery()); + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + w.addAllocations(query.getAllocations()); + w.addAllocation(a); + tmp.add(w); + + + } + } + queries.clear(); + queries.addAll(tmp); + tmp.clear(); + + + } + + } + SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, cache); + for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { + WeightedQuery wQ = iterator.next(); + Query q = wQ.getQuery(); + for(SPARQL_Triple triple : q.getConditions()){ + SPARQL_Term subject = triple.getVariable(); + SPARQL_Property predicate = triple.getProperty(); + SPARQL_Value object = triple.getValue(); + + if(!predicate.isVariable() && !predicate.getName().equals("type")){ + if(subject.isVariable() && !object.isVariable()){ + String varName = triple.getVariable().getName(); + Set<String> types = new HashSet<String>(); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(varName)){ + types.add(typeTriple.getValue().getName().replace(">", "").replace("<", "")); + } + for(String type : types){System.out.println(type); + metrics.getGoodness(new NamedClass(type), + new ObjectProperty(predicate.getName().replace(">", "").replace("<", "")), + new Individual(object.getName().replace(">", "").replace("<", ""))); + } + } else if(object.isVariable() && !subject.isVariable()){ + String varName = triple.getVariable().getName(); + Set<String> types = new HashSet<String>(); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(varName)){ + types.add(typeTriple.getValue().getName().replace(">", "").replace("<", "")); + } + for(String type : types){ + metrics.getGoodness(new Individual(subject.getName().replace(">", "").replace("<", "")), + new ObjectProperty(predicate.getName().replace(">", "").replace("<", "")), + new NamedClass(type)); + } + } + } + } + + } + for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { + WeightedQuery wQ = iterator.next(); + if(dropZeroScoredQueries){ + if(wQ.getScore() <= 0){ + iterator.remove(); + } + } else { + wQ.setScore(wQ.getScore()/t.getSlots().size()); + } + + } + allQueries.addAll(queries); + List<Query> qList = new ArrayList<Query>(); + for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); + qList.add(wQ.getQuery()); + } + template2Queries.put(t, qList); + } + logger.info("...done in "); + return allQueries; + } + + private double getProminenceValue(String uri, SlotType type){ + Integer popularity = null; + if(popularityMap != null){ + if(type == SlotType.CLASS){ + popularity = popularityMap.getPopularity(uri, EntityType.CLASS); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ + popularity = popularityMap.getPopularity(uri, EntityType.PROPERTY); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + popularity = popularityMap.getPopularity(uri, EntityType.RESOURCE); + } + } + if(popularity == null){ + String query = null; + if(type == SlotType.CLASS){ + query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ + query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; + } + query = String.format(query, uri); + + ResultSet rs = executeSelect(query); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + popularity = qs.get(projectionVar).asLiteral().getInt(); + } + } + if(popularity == null){ + popularity = Integer.valueOf(0); + } + + +// if(cnt == 0){ +// return 0; +// } +// return Math.log(cnt); + return popularity; + } + + public void setPopularityMap(PopularityMap popularityMap) { + this.popularityMap = popularityMap; + } + + + private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){ + SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt(); + validate(queries, queryType); + } + + private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){ + logger.info("Testing candidate SPARQL queries on remote endpoint..."); + sparqlMon.start(); + if(queryType == SPARQL_QueryType.SELECT){ + for(WeightedQuery query : queries){ + learnedPos++; + List<String> results; + try { + logger.info("Testing query:\n" + query); + com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); + q.setLimit(1); + ResultSet rs = executeSelect(q.toString()); + + results = new ArrayList<String>(); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + if(qs.get(projectionVar).isLiteral()){ + results.add(qs.get(projectionVar).asLiteral().getLexicalForm()); + } else if(qs.get(projectionVar).isURIResource()){ + results.add(qs.get(projectionVar).asResource().getURI()); + } + + } + if(!results.isEmpty()){ + try{ + int cnt = Integer.parseInt(results.get(0)); + if(cnt > 0){ + learnedSPARQLQueries.add(query); + if(stopIfQueryResultNotEmpty){ + return; + } + } + } catch (NumberFormatException e){ + learnedSPARQLQueries.add(query); + if(stopIfQueryResultNotEmpty){ + return; + } + } + logger.info("Result: " + results); + } + } catch (Exception e) { + e.printStackTrace(); + } + + } + } else if(queryType == SPARQL_QueryType.ASK){ + for(WeightedQuery query : queries){ + learnedPos++; + logger.info("Testing query:\n" + query); + boolean result = executeAskQuery(query.getQuery().toString()); + learnedSPARQLQueries.add(query); +// if(stopIfQueryResultNotEmpty && result){ +// return; +// } + if(stopIfQueryResultNotEmpty){ + return; + } + logger.info("Result: " + result); + } + } + + sparqlMon.stop(); + logger.info("Done in " + sparqlMon.getLastValue() + "ms."); + } + + private boolean executeAskQuery(String query){ + currentlyExecutedQuery = query; + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + for(String uri : endpoint.getDefaultGraphURIs()){ + qe.addDefaultGraph(uri); + } + boolean ret = qe.execAsk(); + return ret; + } + + private ResultSet executeSelect(String query) { + currentlyExecutedQuery = query; + ResultSet rs; + if (model == null) { + if (cache == null) { + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + rs = qe.execSelect(); + } else { + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + } + } else { + rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model) + .execSelect(); + } + + return rs; + } + + public String getCurrentlyExecutedQuery() { + return currentlyExecutedQuery; + } + + public int getLearnedPosition() { + if(learnedPos >= 0){ + return learnedPos+1; + } + return learnedPos; + } + + @Override + public void start() { + } + + @Override + public List<String> getCurrentlyBestSPARQLQueries(int nrOfSPARQLQueries) { + List<String> bestQueries = new ArrayList<String>(); + for(WeightedQuery wQ : learnedSPARQLQueries){ + bestQueries.add(wQ.getQuery().toString()); + } + return bestQueries; + } + + @Override + public String getBestSPARQLQuery() { + if(!learnedSPARQLQueries.isEmpty()){ + return learnedSPARQLQueries.iterator().next().getQuery().toString(); + } else { + return null; + } + } + + public SortedSet<WeightedQuery> getLearnedSPARQLQueries() { + return learnedSPARQLQueries; + } + + @Override + public LearningProblem getLearningProblem() { + // TODO Auto-generated method stub + return null; + } + + @Override + public void setLearningProblem(LearningProblem learningProblem) { + // TODO Auto-generated method stub + + } + + class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ + + private Slot slot; + + public SlotProcessor(Slot slot) { + this.slot = slot; + } + + @Override + public Map<Slot, SortedSet<Allocation>> call() throws Exception { + Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>(); + result.put(slot, computeAllocations(slot)); + return result; + } + + private SortedSet<Allocation> computeAllocations(Slot slot){ + logger.info("Computing allocations for slot: " + slot); + SortedSet<Allocation> allocations = new TreeSet<Allocation>(); + + Index index = getIndexBySlotType(slot); + + IndexResultSet rs; + for(String word : slot.getWords()){ + rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); + } + } + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 20)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); + } + rs.add(index.getResourcesWithScores(word, 20)); + } + } + + + for(IndexResultItem item : rs.getItems()){ + double similarity = Similarity.getSimilarity(word, item.getLabel()); +// //get the labels of the redirects and compute the highest similarity +// if(slot.getSlotType() == SlotType.RESOURCE){ +// Set<String> labels = getRedirectLabels(item.getUri()); +// for(String label : labels){ +// double tmp = Similarity.getSimilarity(word, label); +// if(tmp > similarity){ +// similarity = tmp; +// } +// } +// } + double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); + allocations.add(new Allocation(item.getUri(), prominence, similarity)); + } + + } + + normProminenceValues(allocations); + + computeScore(allocations); + logger.info("Found " + allocations.size() + " allocations for slot " + slot); + return new TreeSet<Allocation>(allocations); + } + + private Index getIndexBySlotType(Slot slot){ + Index index = null; + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + index = classesIndex; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + index = propertiesIndex; + } else if(type == SlotType.DATATYPEPROPERTY){ + index = datatypePropertiesIndex; + } else if(type == SlotType.OBJECTPROPERTY){ + index = objectPropertiesIndex; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + index = resourcesIndex; + } + return index; + } + + } + + public String getTaggedInput(){ + return templateGenerator.getTaggedInput(); + } + + private boolean isDatatypeProperty(String uri){ + Boolean isDatatypeProperty = null; + if(mappingIndex != null){ + isDatatypeProperty = mappingIndex.isDataProperty(uri); + } + if(isDatatypeProperty == null){ + String query = String.format("ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty> .}", uri); + isDatatypeProperty = executeAskQuery(query); + } + return isDatatypeProperty; + } + + /** + * @param args + * @throws NoTemplateFoundException + * @throws IOException + * @throws FileNotFoundException + * @throws InvalidFileFormatException + */ + public static void main(String[] args) throws Exception { + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); + Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); + Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + + SPARQLTemplateBasedLearner3 learner = new SPARQLTemplateBasedLearner3(endpoint, resourcesIndex, classesIndex, propertiesIndex); + learner.init(); + + String question = "Give me all books written by Dan Brown"; + + learner.setQuestion(question); + learner.learnSPARQLQueries(); + System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); + System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); + System.out.println(learner.getLearnedPosition()); + + } + + + +} Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java 2012-07-17 14:27:32 UTC (rev 3796) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java 2012-07-18 13:08:45 UTC (rev 3797) @@ -1,187 +0,0 @@ -package org.dllearner.algorithm.tbsl.util; - -import java.util.HashMap; -import java.util.Map; - -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.Property; -import org.dllearner.kb.sparql.ExtractionDBCache; -import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; - -import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; - -public class PMI { - - private SparqlEndpoint endpoint; - private ExtractionDBCache cache; - - public PMI(SparqlEndpoint endpoint, ExtractionDBCache cache) { - this.endpoint = endpoint; - this.cache = cache; - } - - public double getDirectedPMI(ObjectProperty prop, NamedClass cls){ - System.out.println(String.format("Computing PMI(%s, %s)", prop, cls)); - String query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName()); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); - System.out.println("Class occurence: " + classOccurenceCnt); - - query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt(); - System.out.println("Property occurence: " + propertyOccurenceCnt); - - query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?o a <%s>}", prop.getName(), cls.getName()); - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double coOccurenceCnt = rs.next().getLiteral("cnt").getInt(); - System.out.println("Co-occurence: " + coOccurenceCnt); - - query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}"); - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double total = rs.next().getLiteral("cnt").getInt(); - System.out.println("Total: " + total); - - if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){ - return 0; - } - - double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); - - return pmi; - } - - public double getDirectedPMI(NamedClass cls, Property prop){ - System.out.println(String.format("Computing PMI(%s, %s)", cls, prop)); - String query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName()); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); - System.out.println("Class occurence: " + classOccurenceCnt); - - query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt(); - System.out.println("Property occurence: " + propertyOccurenceCnt); - - query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName()); - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double coOccurenceCnt = rs.next().getLiteral("cnt").getInt(); - System.out.println("Co-occurence: " + coOccurenceCnt); - - query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}"); - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - double total = rs.next().getLiteral("cnt").getInt(); - System.out.println("Total: " + total); - - if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){ - return 0; - } - - double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); - - return pmi; - } - - /** - * Returns the direction of the given triple, computed by calculated the PMI values of each combination. - * @param subject - * @param predicate - * @param object - * @return -1 if the given triple should by reversed, else 1. - */ - public int getDirection(NamedClass subject, ObjectProperty predicate, NamedClass object){ - System.out.println(String.format("Computing direction between [%s, %s, %s]", subject, predicate, object)); - double pmi_obj_pred = getDirectedPMI(object, predicate);System.out.println("PMI(OBJECT, PREDICATE): " + pmi_obj_pred); - double pmi_pred_subj = getDirectedPMI(predicate, subject);System.out.println("PMI(PREDICATE, SUBJECT): " + pmi_pred_subj); - double pmi_subj_pred = getDirectedPMI(subject, predicate);System.out.println("PMI(SUBJECT, PREDICATE): " + pmi_subj_pred); - double pmi_pred_obj = getDirectedPMI(predicate, object);System.out.println("PMI(PREDICATE, OBJECT): " + pmi_pred_obj); - - double threshold = 2.0; - - double value = ((pmi_obj_pred + pmi_pred_subj) - (pmi_subj_pred + pmi_pred_obj)); - System.out.println("(PMI(OBJECT, PREDICATE) + PMI(PREDICATE, SUBJECT)) - (PMI(SUBJECT, PREDICATE) + PMI(PREDICATE, OBJECT)) = " + value); - - if( value > threshold){ - System.out.println(object + "---" + predicate + "--->" + subject); - return -1; - } else { - System.out.println(subject + "---" + predicate + "--->" + object); - return 1; - } - } - - public Map<ObjectProperty, Integer> getMostFrequentProperties(NamedClass cls1, NamedClass cls2){ - Map<ObjectProperty, Integer> prop2Cnt = new HashMap<ObjectProperty, Integer>(); - String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?x1 a <%s>. ?x2 a <%s>. ?x1 ?p ?x2} GROUP BY ?p", cls1, cls2); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - QuerySolution qs; - while(rs.hasNext()){ - qs = rs.next(); - ObjectProperty p = new ObjectProperty(qs.getResource("p").getURI()); - int cnt = qs.getLiteral("cnt").getInt(); - prop2Cnt.put(p, cnt); - } - return prop2Cnt; - } - - public static void main(String[] args) { - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - ExtractionDBCache cache = new ExtractionDBCache("cache"); - String NS = "http://dbpedia.org/ontology/"; - - PMI pmiGen = new PMI(endpoint, cache); - System.out.println(pmiGen.getDirectedPMI( - new ObjectProperty(NS + "author"), - new NamedClass(NS+ "Person"))); - - System.out.println("#########################################"); - - System.out.println(pmiGen.getDirectedPMI( - new ObjectProperty(NS + "author"), - new NamedClass(NS+ "Writer"))); - - System.out.println("#########################################"); - - System.out.println(pmiGen.getDirectedPMI( - new NamedClass(NS+ "Book"), - new ObjectProperty(NS + "author")) - ); - - System.out.println("#########################################"); - - System.out.println(pmiGen.getDirection( - new NamedClass(NS+ "Writer"), - new ObjectProperty(NS + "author"), - new NamedClass(NS+ "Book"))); - - System.out.println("#########################################"); - - System.out.println(pmiGen.getDirection( - new NamedClass(NS+ "Person"), - new ObjectProperty(NS + "starring"), - new NamedClass(NS+ "Film"))); - - System.out.println("#########################################"); - - System.out.println(pmiGen.getMostFrequentProperties( - new NamedClass(NS+ "Person"), - new NamedClass(NS+ "Film"))); - - System.out.println("#########################################"); - - System.out.println(pmiGen.getMostFrequentProperties( - new NamedClass(NS+ "Film"), - new NamedClass(NS+ "Actor"))); - - System.out.println("#########################################"); - - System.out.println(pmiGen.getMostFrequentProperties( - new NamedClass(NS+ "Film"), - new NamedClass(NS+ "Person"))); - - } - -} Copied: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java (from rev 3794, trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java) =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java 2012-07-18 13:08:45 UTC (rev 3797) @@ -0,0 +1,446 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.Property; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.reasoning.SPARQLReasoner; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; + +public class SPARQLEndpointMetrics { + + private static final Logger log = Logger.getLogger(SPARQLEndpointMetrics.class); + + private SparqlEndpoint endpoint; + private ExtractionDBCache cache; + private SPARQLReasoner reasoner; + + public SPARQLEndpointMetrics(SparqlEndpoint endpoint, ExtractionDBCache cache) { + this.endpoint = endpoint; + this.cache = cache; + + this.reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); + } + + /** + * Computes the directed Pointwise Mutual Information(PMI) measure. Formula: log( (f(prop, cls) * N) / (f(cls) * f(prop) ) ) + * @param cls + * @param prop + * @return + */ + public double getDirectedPMI(ObjectProperty prop, NamedClass cls){ + log.debug(String.format("Computing PMI(%s, %s)", prop, cls)); + + double classOccurenceCnt = getOccurencesInObjectPosition(cls); + double propertyOccurenceCnt = getOccurences(prop); + double coOccurenceCnt = getOccurencesPredicateObject(prop, cls); + double total = getTotalTripleCount(); + + double pmi = 0; + if(coOccurenceCnt > 0 && classOccurenceCnt > 0 && propertyOccurenceCnt > 0){ + pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); + } + log.info(String.format("PMI(%s, %s) = %f", prop, cls, pmi)); + return pmi; + } + + /** + * Computes the directed Pointwise Mutual Information(PMI) measure. Formula: log( (f(cls,prop) * N) / (f(cls) * f(prop) ) ) + * @param cls + * @param prop + * @return + */ + public double getDirectedPMI(NamedClass cls, Property prop){ + log.debug(String.format("Computing PMI(%s, %s)...", cls, prop)); + + double classOccurenceCnt = getOccurencesInSubjectPosition(cls); + double propertyOccurenceCnt = getOccurences(prop); + double coOccurenceCnt = getOccurencesSubjectPredicate(cls, prop); + double total = getTotalTripleCount(); + + double pmi = 0; + if(coOccurenceCnt > 0 && classOccurenceCnt > 0 && propertyOccurenceCnt > 0){ + pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); + } + log.info(String.format("PMI(%s, %s) = %f", cls, prop, pmi)); + return pmi; + } + + /** + * Computes the directed Pointwise Mutual Information(PMI) measure. Formula: log( (f(cls,prop) * N) / (f(cls) * f(prop) ) ) + * @param cls + * @param prop + * @return + */ + public double getPMI(NamedClass subject, NamedClass object){ + log.debug(String.format("Computing PMI(%s, %s)", subject, object)); + + double coOccurenceCnt = getOccurencesSubjectObject(subject, object); + double subjectOccurenceCnt = getOccurencesInSubjectPosition(subject); + double objectOccurenceCnt = getOccurencesInObjectPosition(object); + double total = getTotalTripleCount(); + + double pmi = 0; + if(coOccurenceCnt > 0 && subjectOccurenceCnt > 0 && objectOccurenceCnt > 0){ + pmi = Math.log( (coOccurenceCnt * total) / (subjectOccurenceCnt * objectOccurenceCnt) ); + } + log.info(String.format("PMI(%s, %s) = %f", subject, object, pmi)); + return pmi; + } + + /** + * Returns the direction of the given triple, computed by calculating the PMI values of each combination. + * @param subject + * @param predicate + * @param object + * @return -1 if the given triple should by reversed, else 1. + */ + public int getDirection(NamedClass subject, ObjectProperty predicate, NamedClass object){ + log.info(String.format("Computing direction between [%s, %s, %s]", subject, predicate, object)); + double pmi_obj_pred = getDirectedPMI(object, predicate); + double pmi_pred_subj = getDirectedPMI(predicate, subject); + double pmi_subj_pred = getDirectedPMI(subject, predicate); + double pmi_pred_obj = getDirectedPMI(predicate, object); + + double threshold = 2.0; + + double value = ((pmi_obj_pred + pmi_pred_subj) - (pmi_subj_pred + pmi_pred_obj)); + log.info("(PMI(OBJECT, PREDICATE) + PMI(PREDICATE, SUBJECT)) - (PMI(SUBJECT, PREDICATE) + PMI(PREDICATE, OBJECT)) = " + value); + + if( value > threshold){ + log.info(object + "---" + predicate + "--->" + subject); + return -1; + } else { + log.info(subject + "---" + predicate + "--->" + object); + return 1; + } + } + + public Map<ObjectProperty, Integer> getMostFrequentProperties(NamedClass cls1, NamedClass cls2){ + Map<ObjectProperty, Integer> prop2Cnt = new HashMap<ObjectProperty, Integer>(); + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?x1 a <%s>. ?x2 a <%s>. ?x1 ?p ?x2} GROUP BY ?p", cls1, cls2); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + ObjectProperty p = new ObjectProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + prop2Cnt.put(p, cnt); + } + return prop2Cnt; + } + + /** + * Returns the number of triples with the given property as predicate and where the subject belongs to the given class. + * @param cls + * @return + */ + public int getOccurencesSubjectPredicate(NamedClass cls, Property prop){ + String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int cnt = rs.next().getLiteral("cnt").getInt(); + return cnt; + } + + /** + * Returns the number of triples with the given property as predicate and where the object belongs to the given class. + * @param cls + * @return + */ + public int getOccurencesPredicateObject(Property prop, NamedClass cls){ + String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?o a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int cnt = rs.next().getLiteral("cnt").getInt(); + return cnt; + } + + /** + * Returns the number of triples with the first given class as subject and the second given class as object. + * @param cls + * @return + */ + public int getOccurencesSubjectObject(NamedClass subject, NamedClass object){ + String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s ?p ?o. ?o a <%s>}", subject.getName(), object.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int cnt = rs.next().getLiteral("cnt").getInt(); + return cnt; + } + + /** + * Returns the number of triples where the subject belongs to the given class. + * @param cls + * @return + */ + public int getOccurencesInSubjectPosition(NamedClass cls){ + String query = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>. ?s ?p ?o.}", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + return classOccurenceCnt; + } + + /** + * Returns the number of triples where the object belongs to the given class. + * @param cls + * @return + */ + public int getOccurencesInObjectPosition(NamedClass cls){ + String query = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?o a <%s>. ?s ?p ?o.}", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + return classOccurenceCnt; + } + + /** + * Returns the number triples with the given property as predicate. + * @param prop + * @return + */ + public int getOccurences(Property prop){ + String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int propOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + return propOccurenceCnt; + } + + /** + * Returns the number of triples where the subject or object belongs to the given class. + * (This is not the same as computing the number of instances of the given class {@link SPARQLEndpointMetrics#getPopularity(NamedClass)}) + * @param cls + * @return + */ + public int getOccurences(NamedClass cls){ + String query = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>.{?s ?p1 ?o1.} UNION {?o2 ?p2 ?s} }", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + return classOccurenceCnt; + } + + /** + * Returns the number of instances of the given class. + * @param cls + * @return + */ + public int getPopularity(NamedClass cls){ + String query = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>.{?s ?p1 ?o1.} UNION {?o2 ?p2 ?s} }", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + return classOccurenceCnt; + } + + /** + * Returns the total number of triples in the endpoint. For now we return a fixed number 275494030(got from DBpedia Live 18. July 14:00). + * @return + */ + public int getTotalTripleCount(){ + return 275494030; + /*String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}"); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int cnt = rs.next().getLiteral("cnt").getInt(); + return cnt;*/ + } + + public double getGoodness(NamedClass subject, ObjectProperty predicate, NamedClass object){ + + double pmi_subject_predicate = getDirectedPMI(subject, predicate); + double pmi_preciate_object = getDirectedPMI(predicate, object); + double pmi_subject_object = getPMI(subject, object); + + double goodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object; + + return goodness; + } + + public double getGoodness(Individual subject, ObjectProperty predicate, NamedClass object){ + //this is independent of the subject types + double pmi_preciate_object = getDirectedPMI(predicate, object); + + double goodness = Double.MIN_VALUE; + //get all asserted classes of subject and get the highest value + //TODO inference + Set<NamedClass> types = reasoner.getTypes(subject); + for(NamedClass type : types){ + double pmi_subject_predicate = getDirectedPMI(type, predicate); + double pmi_subject_object = getPMI(type, object); + double tmpGoodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object; + if(tmpGoodness >= goodness){ + goodness = tmpGoodness; + } + } + return goodness; + } + + public double getGoodness(NamedClass subject, ObjectProperty predicate, Individual object){ + //this is independent of the object types + double pmi_subject_predicate = getDirectedPMI(subject, predicate); + + double goodness = Double.MIN_VALUE; + //get all asserted classes of subject and get the highest value + //TODO inference + Set<NamedClass> types = reasoner.getTypes(object); + for(NamedClass type : types){ + double pmi_preciate_object = getDirectedPMI(predicate, type); + double pmi_subject_object = getPMI(subject, type); + double tmpGoodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object; + if(tmpGoodness >= goodness){ + goodness = tmpGoodness; + } + } + return goodness; + } + + public double getGoodnessConsideringSimilarity(NamedClass subject, ObjectProperty predicate, NamedClass object, + double subjectSim, double predicateSim, double objectSim){ + + double pmi_subject_predicate = getDirectedPMI(subject, predicate); + double pmi_preciate_object = getDirectedPMI(predicate, object); + double pmi_subject_object = getPMI(subject, object); + + double goodness = pmi_subject_predicate * subjectSim * predicateSim + + pmi_preciate_object * objectSim * predicateSim + + 2 * pmi_subject_object * subjectSim * objectSim; + + return goodness; + } + + public void precompute(){ + precompute(Collections.<String>emptySet()); + } + + public void precompute(Collection<String> namespaces){ + log.info("Precomputing..."); + long startTime = System.currentTimeMillis(); + SortedSet<NamedClass> classes = new TreeSet<NamedClass>(); + String query = "SELECT DISTINCT ?class WHERE {?s a ... [truncated message content] |
From: <lor...@us...> - 2012-07-25 10:34:00
|
Revision: 3804 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3804&view=rev Author: lorenz_b Date: 2012-07-25 10:33:43 +0000 (Wed, 25 Jul 2012) Log Message: ----------- Changed default log level to DEBUG. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-23 14:25:40 UTC (rev 3803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-25 10:33:43 UTC (rev 3804) @@ -65,7 +65,6 @@ import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Intersection; import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.ObjectProperty; import org.dllearner.core.owl.Thing; @@ -76,7 +75,6 @@ import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; -import org.semanticweb.HermiT.Configuration.DirectBlockingType; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -84,7 +82,6 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.shared.UnknownPropertyException; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; @@ -92,7 +89,6 @@ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - enum Mode{ BEST_QUERY, BEST_NON_EMPTY_QUERY } @@ -383,7 +379,7 @@ public void learnSPARQLQueries() throws NoTemplateFoundException{ reset(); //generate SPARQL query templates - logger.info("Generating SPARQL query templates..."); + logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ templates = templateGenerator.buildTemplatesMultiThreaded(question); @@ -391,15 +387,15 @@ templates = templateGenerator.buildTemplates(question); } templateMon.stop(); - logger.info("Done in " + templateMon.getLastValue() + "ms."); + logger.debug("Done in " + templateMon.getLastValue() + "ms."); relevantKeywords.addAll(templateGenerator.getUnknownWords()); if(templates.isEmpty()){ throw new NoTemplateFoundException(); } - logger.info("Templates:"); + logger.debug("Templates:"); for(Template t : templates){ - logger.info(t); + logger.debug(t); } //get the weighted query candidates @@ -407,7 +403,7 @@ sparqlQueryCandidates = new ArrayList<WeightedQuery>(); int i = 0; for(WeightedQuery wQ : generatedQueries){ - System.out.println(wQ.explain()); + logger.debug(wQ.explain()); sparqlQueryCandidates.add(wQ); if(i == maxTestedQueries){ break; @@ -506,7 +502,7 @@ } private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - logger.info("Generating SPARQL query candidates..."); + logger.debug("Generating SPARQL query candidates..."); Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @@ -527,7 +523,7 @@ Set<Allocation> allocations; for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); + logger.debug("Processing template:\n" + t.toString()); allocations = new TreeSet<Allocation>(); boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); @@ -541,9 +537,7 @@ Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); list.add(submit); - } else { - System.out.println("CACHE HIT"); - } + } } for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { @@ -582,7 +576,7 @@ } allocations.addAll(tmp); }*/ - System.out.println("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); + logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); Query cleanQuery = t.getQuery(); @@ -794,10 +788,8 @@ List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); for(SPARQL_Triple typeTriple : typeTriples){ String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); - System.out.println(typeURI + "---" + resourceURI); List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); for(Entry<String, Integer> property : mostFrequentProperties){ - System.out.println(property); wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); wQ.setScore(wQ.getScore() + 0.1); } @@ -859,7 +851,7 @@ } template2Queries.put(t, qList); } - logger.info("...done in "); + logger.debug("...done in "); return allQueries; } @@ -993,14 +985,14 @@ } private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){ - logger.info("Testing candidate SPARQL queries on remote endpoint..."); + logger.debug("Testing candidate SPARQL queries on remote endpoint..."); sparqlMon.start(); if(queryType == SPARQL_QueryType.SELECT){ for(WeightedQuery query : queries){ learnedPos++; List<String> results; try { - logger.info("Testing query:\n" + query); + logger.debug("Testing query:\n" + query); com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); q.setLimit(1); ResultSet rs = executeSelect(q.toString()); @@ -1033,7 +1025,7 @@ return; } } - logger.info("Result: " + results); + logger.debug("Result: " + results); } } catch (Exception e) { e.printStackTrace(); @@ -1043,7 +1035,7 @@ } else if(queryType == SPARQL_QueryType.ASK){ for(WeightedQuery query : queries){ learnedPos++; - logger.info("Testing query:\n" + query); + logger.debug("Testing query:\n" + query); boolean result = executeAskQuery(query.getQuery().toString()); learnedSPARQLQueries.add(query); // if(stopIfQueryResultNotEmpty && result){ @@ -1052,12 +1044,12 @@ if(stopIfQueryResultNotEmpty){ return; } - logger.info("Result: " + result); + logger.debug("Result: " + result); } } sparqlMon.stop(); - logger.info("Done in " + sparqlMon.getLastValue() + "ms."); + logger.debug("Done in " + sparqlMon.getLastValue() + "ms."); } private boolean executeAskQuery(String query){ @@ -1154,7 +1146,7 @@ } private SortedSet<Allocation> computeAllocations(Slot slot){ - logger.info("Computing allocations for slot: " + slot); + logger.debug("Computing allocations for slot: " + slot); SortedSet<Allocation> allocations = new TreeSet<Allocation>(); Index index = getIndexBySlotType(slot); @@ -1210,7 +1202,7 @@ normProminenceValues(allocations); computeScore(allocations); - logger.info("Found " + allocations.size() + " allocations for slot " + slot); + logger.debug("Found " + allocations.size() + " allocations for slot " + slot); return new TreeSet<Allocation>(allocations); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-23 14:25:40 UTC (rev 3803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-25 10:33:43 UTC (rev 3804) @@ -145,7 +145,7 @@ internalParseMultiThreaded(parseGrammar.getDPInitTrees(), n); } - if (VERBOSE) logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n"); + if (VERBOSE) logger.debug("Constructed " + derivationTrees.size() + " derivation trees.\n"); return derivationTrees; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-07-23 14:25:40 UTC (rev 3803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-07-25 10:33:43 UTC (rev 3804) @@ -102,67 +102,67 @@ m = compAdjPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJR"); } // m = superAdjPattern.matcher(condensedstring); // while (m.find()) { -// logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJS"); +// logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/JJS"); // condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJS"); // } m = howManyPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by how/WLEX many/WLEX"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by how/WLEX many/WLEX"); condensedstring = condensedstring.replaceFirst(m.group(1),"how/WLEX many/WLEX"); } m = howAdjPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJH"); } m = thesameasPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NNSAME"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/NNSAME"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NNSAME"); } m = nprepPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NPREP"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/NPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NPREP"); } m = didPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by \"\""); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by \"\""); condensedstring = condensedstring.replaceFirst(m.group(1),""); } m = prepfrontPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by \"\""); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by \"\""); condensedstring = condensedstring.replaceFirst(m.group(1),""); } m = passivePattern1a.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+"/PASSIVE"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(6)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6)+"/PASSIVE"); } m = passivePattern1b.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+m.group(7)+"/PASSIVE"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(6)+m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6) + m.group(7)+"/PASSIVE"); } m = passivePattern2a.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } m = pseudopassPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/VPREP"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/VPREP"); } m = pseudopwhPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+m.group(8)+"/VPREP"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+m.group(8)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+" "+m.group(8)+"/VPREP"); } m = saveIsThere.matcher(condensedstring); @@ -171,64 +171,64 @@ } m = passivePattern2b.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } m = passpartPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/PASSPART"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/PASSPART"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/PASSPART"); } m = vpassPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASS"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASS"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASS"); } m = vpassinPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASSIN"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASSIN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASSIN"); } m = gerundinPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/GERUNDIN"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/GERUNDIN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/GERUNDIN"); } m = vprepPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPREP"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPREP"); } m = whenPattern.matcher(condensedstring); while (m.find()) { if (m.group(4).equals("VPREP")) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHENPREP"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHENPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHENPREP"); } else { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHEN"); } } m = wherePattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHERE"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHERE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHERE"); } m = adjsPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ"); } m = adjnounPattern.matcher(condensedstring); while (m.find()) { // if (!m.group(4).startsWith("NNP")) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN "); // } } m = adjnprepPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NPREP"); + if (VERBOSE) logger.debug("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NPREP"); } @@ -291,7 +291,7 @@ List<String> namedentities = ner.getNamedEntitites(untagged); List<String> usefulnamedentities = new ArrayList<String>(); - if (VERBOSE) logger.trace("Proposed NEs: " + namedentities); + if (VERBOSE) logger.debug("Proposed NEs: " + namedentities); // keep only longest matches (e.g. keep 'World of Warcraft' and forget about 'Warcraft') // containing at least one upper case letter (in order to filter out errors like 'software') @@ -309,7 +309,7 @@ } } - if (VERBOSE) logger.trace("Accepted NEs: " + usefulnamedentities); + if (VERBOSE) logger.debug("Accepted NEs: " + usefulnamedentities); // replace POS tags accordingly for (String ne : usefulnamedentities) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-23 14:25:40 UTC (rev 3803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-25 10:33:43 UTC (rev 3804) @@ -164,7 +164,7 @@ if (UNTAGGED_INPUT) { s = pp.normalize(s); tagged = tagger.tag(s); - if (VERBOSE) logger.trace("Tagged input: " + tagged); + logger.debug("Tagged input: " + tagged); } else { tagged = s; @@ -178,7 +178,7 @@ else newtagged = pp.condenseNominals(tagged); newtagged = pp.condense(newtagged); - if (VERBOSE) logger.trace("Preprocessed: " + newtagged); + logger.debug("Preprocessed: " + newtagged); parser.parse(newtagged,g); @@ -219,10 +219,10 @@ if (!containsModuloRenaming(drses,drs)) { // // DEBUG if (VERBOSE) { - System.out.println(">>> DUDE:\n" + dude.toString()); - System.out.println("\n>>> DRS:\n"+ drs.toString()); + logger.debug(">>> DUDE:\n" + dude.toString()); + logger.debug("\n>>> DRS:\n"+ drs.toString()); for (Slot sl : slots) { - System.out.println(sl.toString()); + logger.debug(sl.toString()); } } // // @@ -310,7 +310,7 @@ if (UNTAGGED_INPUT) { s = pp.normalize(s); tagged = tagger.tag(s); - if (VERBOSE) logger.trace("Tagged input: " + tagged); + logger.debug("Tagged input: " + tagged); } else { tagged = s; @@ -324,20 +324,20 @@ else newtagged = pp.condenseNominals(tagged); newtagged = pp.condense(newtagged); - if (VERBOSE) logger.trace("Preprocessed: " + newtagged); + logger.debug("Preprocessed: " + newtagged); parser.parseMultiThreaded(newtagged,g); if (parser.getDerivationTrees().isEmpty()) { parser.clear(g,parser.getTemps()); clearAgain = false; - if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); + logger.error("[Templator.java] '" + s + "' could not be parsed."); } else { try { parser.buildDerivedTreesMultiThreaded(g); } catch (ParseException e) { - if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); } } @@ -372,13 +372,11 @@ if (!containsModuloRenaming(drses,drs)) { // // DEBUG - if (VERBOSE) { - System.out.println(dude); - System.out.println(drs); + logger.debug(dude); + logger.debug(drs); for (Slot sl : slots) { - System.out.println(sl.toString()); + logger.debug(sl.toString()); } - } // // drses.add(drs); @@ -546,10 +544,10 @@ if (!containsModuloRenaming(drses,drs)) { // // DEBUG if (VERBOSE) { - System.out.println(dude); - System.out.println(drs); + logger.debug(dude); + logger.debug(drs); for (Slot sl : slots) { - System.out.println(sl.toString()); + logger.debug(sl.toString()); } } // // This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-31 10:36:17
|
Revision: 3811 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3811&view=rev Author: lorenz_b Date: 2012-07-31 10:36:11 +0000 (Tue, 31 Jul 2012) Log Message: ----------- Added synchronized POS tagger. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-30 13:54:13 UTC (rev 3810) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-31 10:36:11 UTC (rev 3811) @@ -630,7 +630,7 @@ } } - SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, cache); + SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, new ExtractionDBCache("/opt/tbsl/cache2")); for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { WeightedQuery wQ = iterator.next(); Query q = wQ.getQuery(); @@ -1009,7 +1009,7 @@ * @throws InvalidFileFormatException */ public static void main(String[] args) throws Exception { - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java 2012-07-31 10:36:11 UTC (rev 3811) @@ -0,0 +1,10 @@ +package org.dllearner.algorithm.tbsl.nlp; + +public class SynchronizedStanfordPartOfSpeechTagger extends StanfordPartOfSpeechTagger { + + @Override + public synchronized String tag(String sentence) { + return super.tag(sentence); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-09 12:47:35
|
Revision: 3820 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3820&view=rev Author: lorenz_b Date: 2012-08-09 12:47:29 +0000 (Thu, 09 Aug 2012) Log Message: ----------- Got rid of annoying NPE. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-09 10:46:16 UTC (rev 3819) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-09 12:47:29 UTC (rev 3820) @@ -787,11 +787,11 @@ List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); for(SPARQL_Triple typeTriple : typeTriples){ String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); - List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); - for(Entry<String, Integer> property : mostFrequentProperties){ - wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); - wQ.setScore(wQ.getScore() + 0.1); - } +// List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); +// for(Entry<String, Integer> property : mostFrequentProperties){ +// wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); +// wQ.setScore(wQ.getScore() + 0.1); +// } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-09 10:46:16 UTC (rev 3819) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-09 12:47:29 UTC (rev 3820) @@ -51,7 +51,8 @@ if (clashing != null && s.type.equals(clashing)) { for (SPARQL_Triple triple : query.conditions) { if (triple.property.toString().equals("?"+s.anchor)) { - if (triple.value.toString().equals("?"+var)) return null; + if (triple.value.toString().equals("?"+var)) + return null; } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-09 10:46:16 UTC (rev 3819) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-09 12:47:29 UTC (rev 3820) @@ -382,8 +382,9 @@ try { Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); if (temp == null) {continue;} - temp = temp.checkandrefine(); + if (USE_WORDNET) { // find WordNet synonyms This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |