From: <lor...@us...> - 2011-03-28 10:20:30
|
Revision: 2737 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2737&view=rev Author: lorenz_b Date: 2011-03-28 10:20:23 +0000 (Mon, 28 Mar 2011) Log Message: ----------- Ranking of queries included. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Filter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/RatedQuery.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -4,15 +4,21 @@ import java.net.URL; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import org.apache.log4j.Logger; import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; import org.dllearner.algorithm.tbsl.search.SolrSearch; import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -38,6 +44,7 @@ private static final int TOP_K = 5; private static final String SOLR_SERVER_URL = "http://139.18.2.173:8080/apache-solr-1.4.1"; private static final int RECURSION_DEPTH = 2; + private boolean USE_LUCENE_RANKING = true; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); private ExtractionDBCache cache = new ExtractionDBCache("cache"); @@ -51,6 +58,8 @@ private String question; + + public SPARQLTemplateBasedLearner(){ resource_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_resources"); resource_index.setHitsPerPage(TOP_K); @@ -89,14 +98,19 @@ logger.info(t); } - //generate candidate SPQRL queries - List<String> possibleSPARQLQueries = getPossibleSPARQLQueries(templates); + //generate SPARQL query candidates + Set<? extends Query> sparqlQueryCandidates; + if(USE_LUCENE_RANKING){ + sparqlQueryCandidates = getRatedSPARQLQueryCandidates(templates); + } else { + sparqlQueryCandidates = getSPARQLQueryCandidates(templates); + } //test candidates on remote endpoint - validateAgainstRemoteEndpoint(possibleSPARQLQueries); + validateAgainstRemoteEndpoint(sparqlQueryCandidates); //test candidates on local model - validateAgainstLocalModel(possibleSPARQLQueries); + validateAgainstLocalModel(sparqlQueryCandidates); } @@ -115,21 +129,50 @@ return workingModel; } - private List<String> getPossibleSPARQLQueries(Set<Template> templates){ +// private List<String> getSPARQLQueryCandidates(Set<Template> templates){ +// logger.info("Generating candidate SPARQL queries..."); +// mon.start(); +// List<String> queries = new ArrayList<String>(); +// Query query; +// for(Template template : templates){ +// query = template.getQuery(); +// queries.add(query.toString()); +// for(Slot slot : template.getSlots()){ +// Set<String> tmp = new HashSet<String>(); +// String var = slot.getAnchor(); +// List<String> words = slot.getWords(); +// for(String uri : getCandidateURIs(slot)){ +// for(String q : queries){ +// tmp.add(q.replace("?" + var, "<" + uri + ">")); +// } +// } +// if(!words.isEmpty()){ +// queries.clear(); +// queries.addAll(tmp); +// } +// } +// } +// mon.stop(); +// logger.info("Done in " + mon.getLastValue() + "ms."); +// return queries; +// } + + private Set<Query> getSPARQLQueryCandidates(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); mon.start(); - List<String> queries = new ArrayList<String>(); - Query query; + Set<Query> queries = new HashSet<Query>(); + for(Template template : templates){ - query = template.getQuery(); - queries.add(query.toString()); + queries.add(template.getQuery()); for(Slot slot : template.getSlots()){ - Set<String> tmp = new HashSet<String>(); + Set<Query> tmp = new HashSet<Query>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(String uri : getCandidateURIs(slot)){ - for(String q : queries){ - tmp.add(q.replace("?" + var, "<" + uri + ">")); + for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(Query query : queries){ + Query newQuery = new Query(query); + newQuery.replaceVarWithURI(var, entry1.getKey()); + tmp.add(newQuery); } } if(!words.isEmpty()){ @@ -143,6 +186,67 @@ return queries; } + private Map<String, Float> getCandidateRatedSPARQLQueries(Set<Template> templates){ + logger.info("Generating candidate SPARQL queries..."); + mon.start(); + Map<String, Float> query2Score = new HashMap<String, Float>(); + + Query query; + for(Template template : templates){ + query = template.getQuery(); + query2Score.put(query.toString(), Float.valueOf(0)); + for(Slot slot : template.getSlots()){ + Map<String, Float> tmp = new HashMap<String, Float>(); + String var = slot.getAnchor(); + List<String> words = slot.getWords(); + for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(Entry<String, Float> entry2 : query2Score.entrySet()){ + tmp.put(entry2.getKey().replace("?" + var, "<" + entry1.getKey() + ">"), Float.valueOf(entry1.getValue()+entry2.getValue())); + } + } + if(!words.isEmpty()){ + query2Score.clear(); + query2Score.putAll(tmp); + } + } + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + return query2Score; + } + + private Set<RatedQuery> getRatedSPARQLQueryCandidates(Set<Template> templates){ + logger.info("Generating candidate SPARQL queries..."); + mon.start(); + SortedSet<RatedQuery> ratedQueries = new TreeSet<RatedQuery>(); + + Query query; + for(Template template : templates){ + query = template.getQuery(); + ratedQueries.add(new RatedQuery(query, 0)); + for(Slot slot : template.getSlots()){ + Set<RatedQuery> tmp = new HashSet<RatedQuery>(); + String var = slot.getAnchor(); + List<String> words = slot.getWords(); + for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(RatedQuery rQ : ratedQueries){ + RatedQuery newRQ = new RatedQuery(rQ, rQ.getScore()); + newRQ.replaceVarWithURI(var, entry1.getKey()); + newRQ.setScore(newRQ.getScore()+entry1.getValue()); + tmp.add(newRQ); + } + } + if(!words.isEmpty()){ + ratedQueries.clear(); + ratedQueries.addAll(tmp); + } + } + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + return ratedQueries; + } + private Set<String> getCandidateURIs(Slot slot){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); @@ -161,10 +265,41 @@ } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Candiate URIs: " + uris); + logger.info("Candidate URIs: " + uris); return uris; } + private Map<String, Float> getCandidateURIsWithScore(Slot slot){ + logger.info("Generating candidate URIs for " + slot.getWords() + "..."); + mon.start(); + SolrSearch index = null; + Map<String, Float> uri2Score = new HashMap<String, Float>(); + boolean sorted = false; + if(slot.getSlotType() == SlotType.CLASS){ + index = class_index; + } else if(slot.getSlotType() == SlotType.PROPERTY){ + index = property_index; + } else if(slot.getSlotType() == SlotType.RESOURCE){ + index = resource_index; + sorted = true; + } + for(String word : slot.getWords()){ + uri2Score.putAll(index.getResourcesWithScores("label:" + word, sorted)); + } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); + logger.info("Candidate URIs: " + uri2Score.keySet()); + return uri2Score; + } + + private void validateAgainstRemoteEndpoint(Set<? extends Query> queries){ + List<String> queryStrings = new ArrayList<String>(); + for(Query query : queries){ + queryStrings.add(query.toString()); + } + validateAgainstRemoteEndpoint(queryStrings); + } + private void validateAgainstRemoteEndpoint(List<String> queries){ logger.info("Testing candidate SPARQL queries on remote endpoint..."); mon.start(); @@ -177,21 +312,33 @@ logger.info("Done in " + mon.getLastValue() + "ms."); } + private void validateAgainstLocalModel(Set<? extends Query> queries){ + List<String> queryStrings = new ArrayList<String>(); + for(Query query : queries){ + queryStrings.add(query.toString()); + } + validateAgainstLocalModel(queryStrings); + } + private void validateAgainstLocalModel(List<String> queries){ + logger.info("Testing candidate SPARQL queries on remote endpoint..."); + mon.start(); List<String> resources = resource_index.getResources(question); Model model = getWorkingModel(resources); for(String query : queries){ - System.out.println("Testing query:\n" + query); + logger.info("Testing query:\n" + query); List<String> results = getResultFromLocalModel(query, model); - System.out.println("Result: " + results); + logger.info("Result: " + results); } + mon.stop(); + logger.info("Done in " + mon.getLastValue() + "ms."); } private List<String> getResultFromRemoteEndpoint(String query){ List<String> resources = new ArrayList<String>(); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 1")); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 10")); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); @@ -226,5 +373,6 @@ learner.learnSPARQLQueries("Give me all soccer clubs in Premier League"); } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -321,8 +321,48 @@ System.out.println("Look at that, " + s + " has no POS tag!"); // DEBUG } } - + result = extractNominalPhrases(result); return result; } + + private static List<Pair<String,String>> extractNominalPhrases(List<Pair<String,String>> tokenPOSpairs){ + List<Pair<String,String>> test = new ArrayList<Pair<String,String>>(); + + String nounPhrase = ""; + String phraseTag = ""; + for(Pair<String,String> pair : tokenPOSpairs){ + if(pair.snd.startsWith("NNP")){ + if(phraseTag.equals("NN")){ + if(!nounPhrase.isEmpty()){ + test.add(new Pair<String, String>(phraseTag.trim(), "NN")); + nounPhrase = ""; + } + } + phraseTag = "NNP"; + nounPhrase += " " + pair.fst; + } else if(pair.snd.startsWith("NN")){ + if(phraseTag.equals("NNP")){ + if(!nounPhrase.isEmpty()){ + test.add(new Pair<String, String>(phraseTag.trim(), "NNP")); + nounPhrase = ""; + } + } + phraseTag = "NN"; + nounPhrase += " " + pair.fst; + } else { + if(!nounPhrase.isEmpty()){ + test.add(new Pair<String, String>(nounPhrase.trim(), phraseTag)); + nounPhrase = ""; + } + test.add(pair); + } + } + if(!nounPhrase.isEmpty()){ + test.add(new Pair<String, String>(nounPhrase.trim(), phraseTag)); + nounPhrase = ""; + } + + return test; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -61,11 +61,19 @@ return getResourcesWithScores(queryString, hitsPerPage); } + public Map<String, Float> getResourcesWithScores(String queryString, boolean sorted) { + return getResourcesWithScores(queryString, hitsPerPage); + } + public Map<String, Float> getResourcesWithScores(String queryString, int limit) { - return getResourcesWithScores(queryString, limit, 0); + return getResourcesWithScores(queryString, limit, 0, false); } - public Map<String, Float> getResourcesWithScores(String queryString, int limit, int offset) { + public Map<String, Float> getResourcesWithScores(String queryString, int limit, boolean sorted) { + return getResourcesWithScores(queryString, limit, 0, sorted); + } + + public Map<String, Float> getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { Map<String, Float> resource2ScoreMap = new HashMap<String, Float>(); QueryResponse response; @@ -75,9 +83,10 @@ query.setRows(hitsPerPage); query.setStart(offset); query.addField("score"); - query.addSortField("score", SolrQuery.ORDER.desc); - query.addSortField( "pagerank", SolrQuery.ORDER.desc ); - + if(sorted){ + query.addSortField("score", SolrQuery.ORDER.desc); + query.addSortField( "pagerank", SolrQuery.ORDER.desc ); + } response = server.query(query); SolrDocumentList docList = response.getResults(); lastTotalHits = (int) docList.getNumFound(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -46,7 +46,59 @@ this.limit = limit; this.offset = offset; } - + + //copy constructor + public Query(Query query){ + Set<SPARQL_Term> selTerms = new HashSet<SPARQL_Term>(); + for(SPARQL_Term term : query.getSelTerms()){ + SPARQL_Term newTerm = new SPARQL_Term(term.getName()); + newTerm.setIsVariable(term.isVariable()); + newTerm.setAggregate(term.getAggregate()); + newTerm.setOrderBy(term.getOrderBy()); + selTerms.add(newTerm); + } + this.selTerms = selTerms; + Set<SPARQL_Prefix> prefixes = new HashSet<SPARQL_Prefix>(); + for(SPARQL_Prefix prefix : query.getPrefixes()){ + SPARQL_Prefix newPrefix = new SPARQL_Prefix(prefix.getName(), prefix.getUrl()); + prefixes.add(newPrefix); + } + this.prefixes = prefixes; + Set<SPARQL_Triple> conditions = new HashSet<SPARQL_Triple>(); + for(SPARQL_Triple condition : query.getConditions()){ + SPARQL_Term variable = new SPARQL_Term(condition.getVariable().getName()); + variable.setIsVariable(condition.getVariable().isVariable()); + SPARQL_Property property = new SPARQL_Property(condition.getProperty().getName()); + property.setIsVariable(condition.getProperty().isVariable()); + property.setPrefix(condition.getProperty().getPrefix()); + SPARQL_Value value = new SPARQL_Term(condition.getValue().getName()); + value.setIsVariable(condition.getValue().isVariable()); + SPARQL_Triple newCondition = new SPARQL_Triple(variable, property, value); + conditions.add(newCondition); + } + this.conditions = conditions; + Set<SPARQL_Term> orderBy = new HashSet<SPARQL_Term>(); + for(SPARQL_Term order : query.getOrderBy()){ + SPARQL_Term newTerm = new SPARQL_Term(order.getName()); + newTerm.setIsVariable(order.isVariable()); + newTerm.setAggregate(order.getAggregate()); + newTerm.setOrderBy(order.getOrderBy()); + selTerms.add(newTerm); + } + //TODO add copy for filters + Set<SPARQL_Filter> filters = new HashSet<SPARQL_Filter>(); + for(SPARQL_Filter filter : query.getFilters()){ + for(SPARQL_Pair term : filter.getTerms()){ + + } + } + this.filter = filters; + + this.orderBy = orderBy; + this.limit = query.getLimit(); + this.offset = query.getOffset(); + } + public Set<Integer> getSlotInts() { Set<Integer> result = new HashSet<Integer>(); @@ -160,6 +212,10 @@ { return prefixes; } + + public Set<SPARQL_Filter> getFilters(){ + return filter; + } public void setPrefixes(Set<SPARQL_Prefix> prefixes) { @@ -267,5 +323,37 @@ { this.qt = qt; } + + public void replaceVarWithURI(String var, String uri){ + SPARQL_Value subject; + SPARQL_Value property; + SPARQL_Value object; + uri = "<" + uri + ">"; + + for(SPARQL_Triple triple : conditions){ + subject = triple.getVariable(); + property = triple.getProperty(); + object = triple.getValue(); + if(subject.isVariable()){ + if(subject.getName().equals(var)){ + subject.setName(uri); + subject.setIsVariable(false); + } + } + if(property.isVariable()){ + if(property.getName().equals(var)){ + property.setName(uri); + property.setIsVariable(false); + } + } + if(object.isVariable()){ + if(object.getName().equals(var)){ + object.setName(uri); + object.setIsVariable(false); + } + } + + } + } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/RatedQuery.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/RatedQuery.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/RatedQuery.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -0,0 +1,54 @@ +package org.dllearner.algorithm.tbsl.sparql; + +public class RatedQuery extends Query implements Comparable<RatedQuery>{ + + private float score; + + public RatedQuery(Query query, float score){ + super(query); + this.score = score; + } + + public RatedQuery(float score){ + this.score = score; + } + + public float getScore() { + return score; + } + + public void setScore(float score) { + this.score = score; + } + +// @Override +// public String toString() { +// return super.toString() + "\nSCORE(" + score + ")"; +// } + + @Override + public boolean equals(Object obj) { + if(obj instanceof RatedQuery || obj == null){ + return false; + } + if(obj == this){ + return true; + } + RatedQuery other = (RatedQuery)obj; + return super.equals(other) && this.score == other.score; + } + + @Override + public int hashCode() { + return super.hashCode() + Float.valueOf(score).hashCode(); + } + + @Override + public int compareTo(RatedQuery o) { + if(o.getScore() < this.score){ + return -1; + } else if(o.getScore() > this.score){ + return 1; + } else return this.toString().compareTo(o.toString()); + } +} Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Filter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Filter.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Filter.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -40,6 +40,14 @@ terms.add(new SPARQL_Pair(term, o, t)); } + public Set<SPARQL_Pair> getTerms(){ + return terms; + } + + public boolean isAnd(){ + return and; + } + @Override public boolean equals(Object obj) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Property.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -3,8 +3,6 @@ public class SPARQL_Property extends SPARQL_Value { private SPARQL_Prefix prefix = null; - private boolean isVariable = false; - public SPARQL_Property(String name) { super(); @@ -17,10 +15,6 @@ this.prefix = prefix; } - public void setIsVariable(boolean b) { - isVariable = b; - } - public SPARQL_Prefix getPrefix() { return prefix; } @@ -31,7 +25,7 @@ @Override public String toString() { - if (isVariable) { + if (isVariable()) { return "?" + name; } if (prefix == null) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -5,7 +5,6 @@ SPARQL_OrderBy orderBy; SPARQL_Aggregate aggregate; SPARQL_Term as = null; - private boolean isVariable = false; public SPARQL_Term(String name) { super(name); @@ -18,7 +17,7 @@ this.name = name.replace("?","").replace("!",""); orderBy = SPARQL_OrderBy.NONE; aggregate = SPARQL_Aggregate.NONE; - isVariable = b; + setIsVariable(b); } public SPARQL_Term(String name, SPARQL_Aggregate aggregate) { @@ -28,7 +27,7 @@ public SPARQL_Term(String name, SPARQL_Aggregate aggregate,boolean b,SPARQL_Term t) { super(name); this.aggregate = aggregate; - isVariable = b; + setIsVariable(b); as = t; } @@ -37,10 +36,6 @@ this.orderBy = orderBy; } - public void setIsVariable(boolean b) { - isVariable = b; - } - @Override public boolean equals(Object obj) { if (!(obj instanceof SPARQL_Term)) return false; @@ -86,7 +81,7 @@ else return "DESC(?"+name.toLowerCase()+")"; } - if (isVariable) { + if (isVariable()) { return "?"+name.toLowerCase(); } else { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Value.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -15,6 +15,10 @@ public void setIsVariable(boolean b) { isVariable = b; } + + public boolean isVariable(){ + return isVariable; + } public SPARQL_Value(String name) { super(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -135,27 +135,6 @@ return false; } - private List<Pair<String,String>> extractNominalPhrases(List<Pair<String,String>> tokenPOSpairs){ - List<Pair<String,String>> test = new ArrayList<Pair<String,String>>(); - - String nounPhrase = ""; - String phraseTag = ""; - for(Pair<String,String> pair : tokenPOSpairs){ - if(pair.snd.startsWith("NNP")){ - if(phraseTag.equals("NN")){ - - } - phraseTag = "NNP"; - nounPhrase += " " + pair.snd; - } else if(pair.snd.startsWith("NN")){ - - } else { - test.add(pair); - } - } - - - return test; - } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -62,7 +62,7 @@ p.parse(tagged,g); if (p.getDerivationTrees().isEmpty()) { - p.clear(g,p.getTemps()); +// p.clear(g,p.getTemps()); System.out.println("'" + s + "' could not be parsed."); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java 2011-03-24 11:34:07 UTC (rev 2736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java 2011-03-28 10:20:23 UTC (rev 2737) @@ -3,6 +3,8 @@ import java.util.ArrayList; import java.util.List; +import scala.actors.threadpool.Arrays; + import edu.smu.tspell.wordnet.*; public class WordNet { @@ -44,6 +46,25 @@ return synonyms; } + public List<String> getHypernyms(String s) { + + List<String> hypernyms = new ArrayList<String>(); + + Synset[] synsets = database.getSynsets(s); + Synset[] hypsets = {}; + for(int i = 0; i < synsets.length; i++){ + if(synsets[i].getType() == SynsetType.NOUN){ + hypsets = ((NounSynset)synsets[i]).getHypernyms(); + } else if(synsets[i].getType() == SynsetType.VERB){ + hypsets = ((VerbSynset)synsets[i]).getHypernyms(); + } + for(Synset hypset : hypsets){ + hypernyms.addAll(Arrays.asList(hypset.getWordForms())); + } + } + return hypernyms; + } + public List<String> getAttributes(String s) { List<String> result = new ArrayList<String>(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-06-15 07:05:01
|
Revision: 2876 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2876&view=rev Author: christinaunger Date: 2011-06-15 07:04:54 +0000 (Wed, 15 Jun 2011) Log Message: ----------- [tbsl] fixed 'has' Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-06-14 20:10:31 UTC (rev 2875) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-06-15 07:04:54 UTC (rev 2876) @@ -358,14 +358,12 @@ Set<Simple_DRS_Condition> equalEqualsConditions = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { if(c.getPredicate().equals("equal") && c.getArguments().get(0).getValue().equals(c.getArguments().get(1).getValue())) { - System.out.println("Found " + c); // DEBUG equalEqualsConditions.add(c); } } for (Simple_DRS_Condition c : equalEqualsConditions) { drs.removeCondition(c); } - System.out.println("DRS: " + drs); } private boolean isUri(String arg) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-06-14 20:10:31 UTC (rev 2875) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-06-15 07:04:54 UTC (rev 2876) @@ -168,16 +168,23 @@ /* VERBS */ else if (equalsOneOf(pos,verb)) { - String slot = "SLOT_" + token + "/PROPERTY/" + token; - String symslot = "SLOT_" + token + "/SYMPROPERTY/" + token; - List<String> preds = wordnet.getAttributes(token); - for (Iterator<String> i = preds.iterator(); i.hasNext();) { - slot += i.next(); - symslot += i.next(); - if (i.hasNext()) { - slot += "^"; - symslot += "^"; - } + String slot; String symslot; + if (token.equals("has") || token.equals("have") || token.equals("had")) { + slot = "SLOT_" + token + "/PROPERTY/"; + symslot = "SLOT_" + token + "/SYMPROPERTY/"; + } + else { + slot = "SLOT_" + token + "/PROPERTY/" + token; + symslot = "SLOT_" + token + "/SYMPROPERTY/" + token; + List<String> preds = wordnet.getAttributes(token); + for (Iterator<String> i = preds.iterator(); i.hasNext();) { + slot += i.next(); + symslot += i.next(); + if (i.hasNext()) { + slot += "^"; + symslot += "^"; + } + } } if (pos.equals("PASSIVE")) { String[] passEntry1 = {token, Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-06-14 20:10:31 UTC (rev 2875) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-06-15 07:04:54 UTC (rev 2876) @@ -99,13 +99,13 @@ d2s.redundantEqualRenaming(drs); if (!containsModuloRenaming(drses,drs)) { - // DEBUG - System.out.println(dude); - System.out.println(drs); - for (Slot sl : slots) { - System.out.println(sl.toString()); - } - // +// // DEBUG +// System.out.println(dude); +// System.out.println(drs); +// for (Slot sl : slots) { +// System.out.println(sl.toString()); +// } +// // drses.add(drs); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-09-13 14:41:45
|
Revision: 3252 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3252&view=rev Author: christinaunger Date: 2011-09-13 14:41:35 +0000 (Tue, 13 Sep 2011) Log Message: ----------- [tbsl] updated BasicQueryTemplate generation Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2011-09-13 09:54:19 UTC (rev 3251) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2011-09-13 14:41:35 UTC (rev 3252) @@ -18,9 +18,6 @@ public static void main(String[] args) { - Templator templator = new Templator(); - BasicTemplator btemplator = new BasicTemplator(); - System.out.println("======= SPARQL Templator ================="); System.out.println("Running in " + MODE + " mode."); System.out.println("\nType ':q' to quit."); @@ -33,12 +30,14 @@ } if (MODE.equals("BASIC")) { + BasicTemplator btemplator = new BasicTemplator(); Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(s); for (BasicQueryTemplate temp : querytemps) { System.out.println(temp.toString()); } } else if (MODE.equals("LEIPZG")) { + Templator templator = new Templator(); Set<Template> temps = templator.buildTemplates(s); for (Template temp : temps) { System.out.println(temp.toString()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2011-09-13 09:54:19 UTC (rev 3251) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2011-09-13 14:41:35 UTC (rev 3252) @@ -13,6 +13,7 @@ import org.dllearner.algorithm.tbsl.sem.drs.Negated_DRS; import org.dllearner.algorithm.tbsl.sem.drs.Simple_DRS_Condition; import org.dllearner.algorithm.tbsl.sparql.BasicQueryTemplate; +import org.dllearner.algorithm.tbsl.sparql.Path; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Aggregate; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; import org.dllearner.algorithm.tbsl.sparql.SPARQL_OrderBy; @@ -69,7 +70,7 @@ } } - for (DiscourseReferent referent : drs.getDRs()) { + for (DiscourseReferent referent : drs.collectDRs()) { if (referent.isMarked()) { SPARQL_Term term = new SPARQL_Term(referent.toString().replace("?","")); term.setIsVariable(true); @@ -177,17 +178,31 @@ SPARQL_Property prop = new SPARQL_Property(predicate); prop.setIsVariable(true); - boolean noliteral = true; + boolean literal = false; if (simple.getArguments().size() > 1 && simple.getArguments().get(1).getValue().matches("\\d+")) { - noliteral = false; + literal = true; } - if (predicate.equals("p")) { - query.addConditions(simple.toString()); + if (predicate.equals("of")) { + if (simple.getArguments().size() == 2) { + Path p = new Path(); + p.setStart(simple.getArguments().get(1).getValue()); + p.setTarget(simple.getArguments().get(0).getValue()); + query.addConditions(p); + } } + if (predicate.startsWith("p")) { + if (simple.getArguments().size() == 2) { + Path p = new Path(); + p.setStart(simple.getArguments().get(0).getValue()); + p.setVia(simple.getPredicate()); + p.setTarget(simple.getArguments().get(1).getValue()); + query.addConditions(p); + } + } else if (predicate.equals("count")) { // COUNT(?x) AS ?c - if (noliteral) { + if (!literal) { query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, simple.getArguments().get(1).getValue())); return query; } @@ -208,28 +223,28 @@ query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.GT))); return query; } else if (predicate.equals("greaterorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.GTEQ))); return query; } else if (predicate.equals("less")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.LT))); return query; } else if (predicate.equals("lessorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.LTEQ))); return query; } else if (predicate.equals("maximum")) { @@ -256,7 +271,7 @@ query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.EQ))); return query; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java 2011-09-13 09:54:19 UTC (rev 3251) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS.java 2011-09-13 14:41:35 UTC (rev 3252) @@ -151,8 +151,7 @@ } - public Set<DiscourseReferent> getDRs() { - + public Set<DiscourseReferent> getDRs() { return m_DiscourseReferents; } @@ -167,6 +166,18 @@ } return result; } + + public Set<DiscourseReferent> collectDRs() { + Set<DiscourseReferent> result = new HashSet<DiscourseReferent>(); + result.addAll(m_DiscourseReferents); + for (DRS_Condition c : m_DRS_Conditions) { + if (c.isComplexCondition()) { + result.addAll(((Complex_DRS_Condition) c).m_Restrictor.collectDRs()); + result.addAll(((Complex_DRS_Condition) c).m_Scope.collectDRs()); + } + } + return result; + } public Set<DRS_Condition> getConditions() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java 2011-09-13 09:54:19 UTC (rev 3251) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java 2011-09-13 14:41:35 UTC (rev 3252) @@ -270,15 +270,20 @@ for (String var : variables) { String freshbase; - if (var.charAt(0) == '?') { - freshbase = "?"; + String varbase; + if (var.charAt(0) == '?') { + freshbase = "?"; + varbase = ""+var.charAt(1); } - else { + else { freshbase = ""; + varbase = ""+var.charAt(0); } - String fresh = freshbase + "v0"; - for (int i = 0; (allVariables.contains("v"+i) || allVariables.contains("?v"+i)); i++) { - fresh = freshbase + "v"+ (i+1); + + String fresh = freshbase + varbase + "0"; + + for (int i = 0; (allVariables.contains(varbase+i) || allVariables.contains("?"+varbase+i)); i++) { + fresh = freshbase + varbase + (i+1); } allVariables.add(fresh); dude.replaceReferent(var,fresh); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java 2011-09-13 09:54:19 UTC (rev 3251) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java 2011-09-13 14:41:35 UTC (rev 3252) @@ -10,7 +10,7 @@ Set<SPARQL_Term> selTerms; // SELECT ?x ?y Set<SPARQL_Prefix> prefixes; - Set<String> conditions; + Set<Path> conditions; Set<SPARQL_Term> orderBy; Set<SPARQL_Filter> filter; SPARQL_QueryType qt = SPARQL_QueryType.SELECT; @@ -24,7 +24,7 @@ super(); selTerms = new HashSet<SPARQL_Term>(); prefixes = new HashSet<SPARQL_Prefix>(); - conditions = new HashSet<String>(); + conditions = new HashSet<Path>(); orderBy = new HashSet<SPARQL_Term>(); filter = new HashSet<SPARQL_Filter>(); slots = new ArrayList<Slot>(); @@ -34,8 +34,8 @@ slots.add(s); } - public void addConditions(String s) { - conditions.add(s); + public void addConditions(Path p) { + conditions.add(p); } @Override @@ -61,8 +61,8 @@ retVal += "WHERE {\n"; - for (String s : conditions) { - retVal += "\t" + s + "\n"; + for (Path p : conditions) { + retVal += "\t" + p.toString() + "\n"; } for (SPARQL_Filter f : filter) Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java 2011-09-13 14:41:35 UTC (rev 3252) @@ -0,0 +1,38 @@ +package org.dllearner.algorithm.tbsl.sparql; + +public class Path { + + String start; + String via; + String target; + + public Path() { + start = ""; + via = ""; + target = ""; + } + public Path(String s,String v,String t) { + start = s; + via = v; + target = t; + } + + public void setStart(String s) { + start = s; + } + public void setVia(String v) { + via = v; + } + public void setTarget(String t) { + target = t; + } + + public String toString() { + if (via.isEmpty()) { + return "?" + start + " -- " + "?" + target; + } + else { + return "?" + start + " -- ?" + via + " -- ?" + target; + } + } +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2011-09-13 09:54:19 UTC (rev 3251) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2011-09-13 14:41:35 UTC (rev 3252) @@ -89,13 +89,13 @@ else if (pos.equals("NPREP")) { String[] dpEntry1 = {token, "(DP (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; String[] dpEntry2 = {token, "(DP DET[det] (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>"}; String[] npEntry = {token, "(NP " + treetoken + " DP[pobj])", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; result.add(dpEntry1); result.add(dpEntry2); result.add(npEntry); @@ -104,13 +104,13 @@ slot = "SLOT_" + tokenfluent + "/UNSPEC/" + token; String[] dpEntry1 = {token, "(DP (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(p) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>" }; + "<x,l1,<<e,t>,t>,[ l1:[ x,p | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>" }; String[] dpEntry2 = {token, "(DP DET[det] (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(p) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>" }; + "<x,l1,<<e,t>,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>" }; String[] npEntry = {token, "(NP " + treetoken + " DP[pobj])", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(p) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; result.add(dpEntry1); result.add(dpEntry2); result.add(npEntry); @@ -136,81 +136,81 @@ if (pos.equals("PASSIVE")) { String[] passEntry1 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; String[] passEntry2 = {token, "(S DP[wh] (VP DP[dp] V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry1); result.add(passEntry2); } else if (pos.equals("PASSPART")) { String[] passpartEntry = {token, "(NP NP* (VP V:'" + token + "' DP[dp]))", - "<x,l1,t,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; result.add(passpartEntry); } else if (pos.equals("VPASS")) { String[] passEntry = {token, "(S DP[subj] (VP V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry); } else if (pos.equals("VPASSIN")) { String[] passEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry); } else if (pos.equals("GERUNDIN")) { String[] gerundinEntry1 = {token, "(NP NP* V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; String[] gerundinEntry2 = {token, "(ADJ V:'" + token + "' DP[obj]))", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; result.add(gerundinEntry1); result.add(gerundinEntry2); } else if (pos.equals("VPREP")) { String[] passEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; String[] whEntry = {token, "(S DP[obj] (VP DP[subj] V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry); result.add(whEntry); } else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP")) { String[] vEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(vEntry); } else if (pos.equals("VB")) { String[] whEntry = {token, "(S DP[obj] (VP DP[subj] V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(whEntry); } else if (pos.equals("VBG") || pos.equals("VBN")) { String[] gerEntry = {token, "(NP NP* (VP V:'" + token + "' DP[dp]))", - "<x,l1,t,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; result.add(gerEntry); } else if (pos.equals("WHEN")) { slot = "SLOT_" + token + "/PROPERTY/" + token + "_date"; String[] whenEntry = {token, "(S DP[subj] (VP V:'" + token + "'))", - "<x,l1,t,[ l1:[ ?y | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; + "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; result.add(whenEntry); } else if (pos.equals("WHERE")) { slot = "SLOT_" + token + "/PROPERTY/" + token + "_place"; String[] whereEntry = {token, "(S DP[subj] (VP V:'" + token + "'))", - "<x,l1,t,[ l1:[ ?y | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; + "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; result.add(whereEntry); } @@ -246,7 +246,7 @@ result.add(compEntry1); String[] compEntry2 = {token, "(NP NP* (ADJ ADJ:'" + token.toLowerCase() + "' P:'than' DP[compobj]))", - "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(p), p(i), p(j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; + "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(p), p(x,i), p(y,j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; result.add(compEntry2); } /* SUPERLATIVE */ @@ -275,7 +275,7 @@ else if (equalsOneOf(pos,preps)) { String[] npAdjunct = {token, "(NP NP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; result.add(npAdjunct); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-09-14 08:23:20
|
Revision: 3257 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3257&view=rev Author: christinaunger Date: 2011-09-14 08:23:09 +0000 (Wed, 14 Sep 2011) Log Message: ----------- [tbsl] repaired missing '?' in queries Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2011-09-14 07:43:41 UTC (rev 3256) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2011-09-14 08:23:09 UTC (rev 3257) @@ -14,7 +14,7 @@ public class TestFrontend { // MODE ::= BASIC | LEIPZIG - static String MODE = "BASIC"; + static String MODE = "LEIPZIG"; public static void main(String[] args) { @@ -22,6 +22,9 @@ System.out.println("Running in " + MODE + " mode."); System.out.println("\nType ':q' to quit."); + BasicTemplator btemplator = new BasicTemplator(); + Templator templator = new Templator(); + while (true) { String s = getStringFromUser("input > ").trim(); @@ -30,14 +33,12 @@ } if (MODE.equals("BASIC")) { - BasicTemplator btemplator = new BasicTemplator(); Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(s); for (BasicQueryTemplate temp : querytemps) { System.out.println(temp.toString()); } } - else if (MODE.equals("LEIPZG")) { - Templator templator = new Templator(); + else if (MODE.equals("LEIPZIG")) { Set<Template> temps = templator.buildTemplates(s); for (Template temp : temps) { System.out.println(temp.toString()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-09-14 07:43:41 UTC (rev 3256) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-09-14 08:23:09 UTC (rev 3257) @@ -231,9 +231,9 @@ SPARQL_Property prop = new SPARQL_Property(predicate); prop.setIsVariable(true); - boolean noliteral = true; + boolean literal = false; if (simple.getArguments().size() > 1 && simple.getArguments().get(1).getValue().matches("\\d+")) { - noliteral = false; + literal = true; } if (predicate.equals("count")) { @@ -247,28 +247,28 @@ query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.GT))); return query; } else if (predicate.equals("greaterorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.GTEQ))); return query; } else if (predicate.equals("less")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.LT))); return query; } else if (predicate.equals("lessorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.LTEQ))); return query; } else if (predicate.equals("maximum")) { @@ -285,19 +285,19 @@ query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),true), - new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.EQ))); return query; } if (arity == 1) { - SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),true); + SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),false); query.addCondition(new SPARQL_Triple(term,new SPARQL_Property("type",new SPARQL_Prefix("rdf","")),prop)); } else if (arity == 2) { String arg1 = simple.getArguments().get(0).getValue(); String arg2 = simple.getArguments().get(1).getValue(); - query.addCondition(new SPARQL_Triple(new SPARQL_Term(arg1,true),prop,new SPARQL_Term(arg2,true))); + query.addCondition(new SPARQL_Triple(new SPARQL_Term(arg1,false),prop,new SPARQL_Term(arg2,false))); } else if (arity > 2) { // TODO Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-09-14 07:43:41 UTC (rev 3256) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-09-14 08:23:09 UTC (rev 3257) @@ -53,7 +53,7 @@ List<String> words = new ArrayList<String>(); words.add(token); if (!pos.equals("NNP") && !pos.equals("NNPS") && !pos.equals("JJNN")) { - words.addAll(wordnet.getBestSynonyms(token)); +// words.addAll(wordnet.getBestSynonyms(token)); } String tokenfluent = token.replaceAll(" ","").replaceAll("_",""); @@ -175,15 +175,15 @@ else { slot = "SLOT_" + token + "/PROPERTY/" + token; symslot = "SLOT_" + token + "/SYMPROPERTY/" + token; - List<String> preds = wordnet.getAttributes(token); - for (Iterator<String> i = preds.iterator(); i.hasNext();) { - slot += i.next(); - symslot += i.next(); - if (i.hasNext()) { - slot += "^"; - symslot += "^"; - } - } +// List<String> preds = wordnet.getAttributes(token); +// for (Iterator<String> i = preds.iterator(); i.hasNext();) { +// slot += i.next(); +// symslot += i.next(); +// if (i.hasNext()) { +// slot += "^"; +// symslot += "^"; +// } +// } } if (pos.equals("PASSIVE")) { String[] passEntry1 = {token, @@ -283,13 +283,13 @@ else if (equalsOneOf(pos,adjective)) { String slot = "SLOT_" + token + "/PROPERTY/" + token; - List<String> preds = wordnet.getAttributes(token); - for (Iterator<String> i = preds.iterator(); i.hasNext();) { - slot += i.next(); - if (i.hasNext()) { - slot += "^"; - } - } +// List<String> preds = wordnet.getAttributes(token); +// for (Iterator<String> i = preds.iterator(); i.hasNext();) { +// slot += i.next(); +// if (i.hasNext()) { +// slot += "^"; +// } +// } /* ADJECTIVE */ if (pos.equals("JJ")) { String[] adjEntry = {token, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-09-14 12:58:17
|
Revision: 3261 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3261&view=rev Author: lorenz_b Date: 2011-09-14 12:58:07 +0000 (Wed, 14 Sep 2011) Log Message: ----------- Fixed bug in replacement of variables with URIs which is done during Query candidate generation. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-09-14 11:26:09 UTC (rev 3260) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-09-14 12:58:07 UTC (rev 3261) @@ -291,13 +291,13 @@ } if (arity == 1) { - SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),false); + SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),false);term.setIsVariable(true); query.addCondition(new SPARQL_Triple(term,new SPARQL_Property("type",new SPARQL_Prefix("rdf","")),prop)); } else if (arity == 2) { - String arg1 = simple.getArguments().get(0).getValue(); - String arg2 = simple.getArguments().get(1).getValue(); - query.addCondition(new SPARQL_Triple(new SPARQL_Term(arg1,false),prop,new SPARQL_Term(arg2,false))); + String arg1 = simple.getArguments().get(0).getValue();SPARQL_Term term1 = new SPARQL_Term(arg1,false);term1.setIsVariable(true); + String arg2 = simple.getArguments().get(1).getValue();SPARQL_Term term2 = new SPARQL_Term(arg2,false);term2.setIsVariable(true); + query.addCondition(new SPARQL_Triple(term1, prop, term2)); } else if (arity > 2) { // TODO Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-14 11:26:09 UTC (rev 3260) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-14 12:58:07 UTC (rev 3261) @@ -7,6 +7,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -739,15 +740,17 @@ // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "Give me all books written by authors influenced by Ernest Hemingway."; - String question = "Give me all cities in Canada."; +// String question = "Give me all cities in Canada."; + + String question = "Give me all soccer clubs in Premier League?"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), Collections.<String>singletonList(""), Collections.<String>emptyList()); learner.setEndpoint(endpoint); learner.setQuestion(question); learner.learnSPARQLQueries(); - System.out.println(learner.getBestSPARQLQuery()); - System.out.println(learner.getTemplates().iterator().next().getLexicalAnswerType()); + System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); + System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-09-14 11:26:09 UTC (rev 3260) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-09-14 12:58:07 UTC (rev 3261) @@ -68,10 +68,14 @@ for(SPARQL_Triple condition : query.getConditions()){ SPARQL_Term variable = new SPARQL_Term(condition.getVariable().getName()); variable.setIsVariable(condition.getVariable().isVariable()); + variable.setIsURI(condition.getVariable().isURI); SPARQL_Property property = new SPARQL_Property(condition.getProperty().getName()); property.setIsVariable(condition.getProperty().isVariable()); property.setPrefix(condition.getProperty().getPrefix()); - SPARQL_Value value = new SPARQL_Term(condition.getValue().getName()); + SPARQL_Term value = new SPARQL_Term(condition.getValue().getName()); + if(condition.getValue() instanceof SPARQL_Term){ + value.setIsURI(((SPARQL_Term)condition.getValue()).isURI); + } value.setIsVariable(condition.getValue().isVariable()); SPARQL_Triple newCondition = new SPARQL_Triple(variable, property, value); conditions.add(newCondition); @@ -357,8 +361,8 @@ } public void replaceVarWithPrefixedURI(String var, String uri){ - SPARQL_Value subject; - SPARQL_Value property; + SPARQL_Term subject; + SPARQL_Property property; SPARQL_Value object; for(SPARQL_Triple triple : conditions){ @@ -369,6 +373,7 @@ if(subject.getName().equals(var)){ subject.setName(uri); subject.setIsVariable(false); + subject.setIsURI(true); } } if(property.isVariable()){ @@ -381,6 +386,9 @@ if(object.getName().equals(var)){ object.setName(uri); object.setIsVariable(false); + if(object instanceof SPARQL_Term){ + ((SPARQL_Term) object).setIsURI(true); + } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-09-14 11:26:09 UTC (rev 3260) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-09-14 12:58:07 UTC (rev 3261) @@ -69,6 +69,10 @@ { return name.startsWith("'") || name.matches("\\d+"); } + + public void setIsURI(boolean isURI){ + this.isURI = isURI; + } @Override public String toString() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <seb...@us...> - 2011-09-23 08:33:51
|
Revision: 3284 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3284&view=rev Author: sebastianwtr Date: 2011-09-23 08:33:44 +0000 (Fri, 23 Sep 2011) Log Message: ----------- [tbsl] added some new packetches and function for the QALD Projekt at UNI Bi Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,84 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; + +import org.dllearner.algorithm.tbsl.exploration.sax.MySaxParser; + +public class GetRessourcePropertys { + + public ArrayList<String> getPropertys(String element) throws IOException{ + sendServerPropertyRequest(element); + return do_parsing("answer_property"); + + + } + /** + * Get an uri and saves the properties of this resource + * @param vergleich + * @throws IOException + */ + private void sendServerPropertyRequest(String vergleich) throws IOException{ + + String bla123 = vergleich; + //to get only the name + bla123=bla123.replace("http://dbpedia.org/resource/Category:",""); + bla123=bla123.replace("http://dbpedia.org/resource/",""); + vergleich=bla123; + String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3A"+vergleich+"+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout="; + URL url; + InputStream is; + InputStreamReader isr; + BufferedReader r; + String str; + String result=""; + + try { + url = new URL(tmp); + is = url.openStream(); + isr = new InputStreamReader(is); + r = new BufferedReader(isr); + do { + str = r.readLine(); + if (str != null) + result=result+str; + } while (str != null); + } catch (MalformedURLException e) { + System.out.println("Must enter a valid URL"); + } catch (IOException e) { + System.out.println("Can not connect"); + } + + FileWriter w = new FileWriter("answer_property"); + w.write(result); + w.close(); + } + + + private static ArrayList<String> do_parsing(String datei) + { + ArrayList<String> indexObject = null; + + File file = new File(datei); + try + { + MySaxParser parser = new MySaxParser(file); + parser.parse(); + indexObject = parser.getIndexObject(); + } + catch (Exception ex) + { + System.out.println("Another exciting error occured: " + ex.getLocalizedMessage()); + } + + return indexObject; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,54 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; + +import java.util.HashMap; + +public class SparqlFilter { + public void create_Sparql_who(String string,HashMap<String, String> hm){ + // string=string.replaceAll("?", ""); + String[] array= string.split(" "); + //schauen ob erstes Wort ein who ist! + if(array[0].contains("who")){ + int position=0; + for(int i=0;i<array.length;i++){ + if (array[i].contains("of")){ + position=i; + break; + } + } + String vor_of=array[position-1]; + String nach_of=""; + //wenn nur ein element hinter of kommt + if(array.length-position-1==1){ + nach_of=array[position+1]; + } + else{ + for(int i=position+1; i<array.length;i++){ + //nach_of=nach_of+array[i]+" "; + nach_of=(nach_of.concat(array[i])).concat(" "); + } + + //letztes leerzeichen loeschen + nach_of = nach_of.substring(0, nach_of.length()-1); + } + String uri_vor_of=" "; + String uri_nach_of=" "; + + uri_vor_of=hm.get(vor_of); + uri_nach_of=hm.get(nach_of); + if(uri_vor_of!=null && uri_nach_of!=null){ + uri_nach_of=uri_nach_of.replace("Category:", ""); + uri_nach_of=uri_nach_of.replace("category:", ""); + + + String anfrage=null; + anfrage="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>select ?x where { <"+uri_nach_of+"> <"+uri_vor_of+"> ?x.}"; + + } + else{ + //System.out.println("Nothing to do"); + } + + } + + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,364 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Set; + +import net.didion.jwnl.JWNLException; + +import org.dllearner.algorithm.tbsl.nlp.WordNet; +import org.dllearner.algorithm.tbsl.sparql.BasicQueryTemplate; +import org.dllearner.algorithm.tbsl.templator.BasicTemplator; +import org.dllearner.algorithm.tbsl.templator.Templator; + + + +public class SparqlObject { + //global Variable dict + + //start counting with 0 + static int iteration_deept=1; + static WordNet wordnet; + BasicTemplator btemplator; + Templator templator; + + /*Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(line); + for (BasicQueryTemplate temp : querytemps) { + System.out.println(temp.toString()); + } + + */ + //Konstruktor + public SparqlObject() throws MalformedURLException{ + wordnet = new WordNet(); + System.out.println("Loading SPARQL Templator"); + btemplator = new BasicTemplator(); + templator = new Templator(); + System.out.println("Loading SPARQL Templator Done\n"); + } + + + public void create_Sparql_query(String string,HashMap<String, String> hm) throws JWNLException{ + // string=string.replaceAll("?", ""); + String[] array= string.split(" "); + String teststring=""; + /*Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries("Who is the Mayor of Berlin?"); + for (BasicQueryTemplate temp : querytemps) { + //System.out.println(temp.toString()); + teststring=teststring.concat(temp.toString()); + } + + System.out.println("##################"); + System.out.println(teststring);*/ + + /** + * Cluster function + */ + /* + int length=array.length; + int [] result_array= new int[length]; + for(int p =0;p<length;p++){ + result_array[p]=0; + } + int zaehler=1; + + //looking for max 3 word as one index + for(int z=length-1;z>=0;z=z-1){ + if(z-2>=0){ + String tmp1 = array[z]; + String tmp2 = array[z-1]; + String tmp3 = array[z-2]; + + String tmpstring3=(((tmp3.concat(" ")).concat(tmp2)).concat(" ")).concat(tmp1); + String tmpstring2=(tmp2.concat(" ")).concat(tmp1); + String tmpstring1=tmp1; + + //always looking for the "biggest" match + if(hm.get(tmpstring3)!=null){ + result_array[z]=zaehler; + result_array[z-1]=zaehler; + result_array[z-2]=zaehler; + zaehler++; + } + else{ + if(hm.get(tmpstring2)!=null){ + result_array[z]=zaehler; + result_array[z-1]=zaehler; + zaehler++; + } + else{ + if(hm.get(tmpstring1)!=null){ + result_array[z]=zaehler; + zaehler++; + } + } + } + + } + else{ + if(z-1>=0){ + String tmp1 = array[z]; + String tmp2 = array[z-1]; + + String tmpstring2=(tmp2.concat(" ")).concat(tmp1); + String tmpstring1=tmp1; + + //always looking for the "biggest" match + + if(hm.get(tmpstring2)!=null){ + result_array[z]=zaehler; + result_array[z-1]=zaehler; + zaehler++; + } + else{ + if(hm.get(tmpstring1)!=null){ + result_array[z]=zaehler; + zaehler++; + } + } + } + if(z==0){ + if(hm.get(array[z])!=null){ + result_array[z]=zaehler; + zaehler++; + } + } + } + } + + System.out.println("###### Cluster ######"); + for(int p =0;p<length;p++){ + System.out.println(result_array[p]); + } + System.out.println("######"); + */ + + //look, if the first word is a who! + if(array[0].contains("who")){ + int position=0; + for(int i=0;i<array.length;i++){ + if (array[i].contains("of")){ + position=i; + break; + } + } + String vor_of=array[position-1]; + String nach_of=""; + //if there is only one element after of + if(array.length-position-1==1){ + nach_of=array[position+1]; + } + else{ + for(int i=position+1; i<array.length;i++){ + nach_of=(nach_of.concat(array[i])).concat(" "); + } + + //delete last emty space + nach_of = nach_of.substring(0, nach_of.length()-1); + } + String uri_vor_of=" "; + String uri_nach_of=" "; + + uri_vor_of=hm.get(vor_of); + uri_nach_of=hm.get(nach_of); + if(uri_vor_of!=null && uri_nach_of!=null){ + uri_nach_of=uri_nach_of.replace("Category:", ""); + uri_nach_of=uri_nach_of.replace("category:", ""); + + String anfrage; + anfrage="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>select ?x where { <"+uri_nach_of+"> <"+uri_vor_of+"> ?x.}"; + + //if there is no answer from the server, start searching with wordnet + String result=""; + result=sendServerQuestionRequest(anfrage); + if(result!="noanswer"){ + System.out.println(result); + } + else{ + long startTime = System.currentTimeMillis(); + + System.out.println("Get Propertys of "+nach_of); + + //contains uri AND string, every second is the string + ArrayList<String> properties = new ArrayList<String>(); + GetRessourcePropertys property = new GetRessourcePropertys(); + try { + //using uri now, not the string + properties=property.getPropertys(hm.get(nach_of)); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + System.out.println("Start Iterating Wordnet with "+vor_of+" and deept of "+iteration_deept); + ArrayList<String> semantics=new ArrayList<String>(); + ArrayList<String> tmp_semantics=new ArrayList<String>(); + ArrayList<String> result_SemanticsMatchProperties=new ArrayList<String>(); + semantics.add(vor_of); + tmp_semantics=semantics; + for(int i=0;i<=iteration_deept;i++){ + + try { + tmp_semantics=getSemantics(tmp_semantics); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + + } + //each word only one time + for(String k : tmp_semantics){ + if(!semantics.contains(k)) semantics.add(k); + } + + } + long endTime = System.currentTimeMillis(); + System.out.println("Getting Properties and Semantics took "+(endTime-startTime) +" ms\n"); + + //TODO: Try, if it works, if you use only one loop: (b.lowerCase).contains(properties.get(h)) + for(int h=1;h<properties.size()-2;h=h+2){ + for(String b : semantics){ + //System.out.println(properties.get(h)); + //System.out.println(b); + if(properties.get(h).contains(b.toLowerCase())){ + if(!result_SemanticsMatchProperties.contains(properties.get(h))) + result_SemanticsMatchProperties.add(properties.get(h)); + } + } + } + for(String b : result_SemanticsMatchProperties){ + vor_of=b.toLowerCase(); + uri_vor_of=hm.get(vor_of); + if(uri_vor_of!=null){ + anfrage="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>select ?x where { <"+uri_nach_of+"> <"+uri_vor_of+"> ?x.}"; + System.out.println("Answer with the property \" " + b + "\" :\n"+sendServerQuestionRequest(anfrage)); + } + } + long endTime2 = System.currentTimeMillis(); + System.out.println("Getting Properties, Semantics and Answer from server took "+(endTime2-startTime) +" ms"); + } + } + + } + + } + + private static ArrayList<String> getSemantics (ArrayList<String> semantics) throws IOException, JWNLException { + ArrayList<String> result = new ArrayList<String>(); + for(String id :semantics){ + List<String> array =wordnet.getRelatedNouns(id); + for(String i:array){ + if(!result.contains(i))result.add(i); + } + + + } + return result; + } + + + + + private String sendServerQuestionRequest(String query){ + //SPARQL-Endpoint of Semantic Computing Group + String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query="+createServerRequest(query)+"&format=text%2Fhtml&debug=on&timeout="; + URL url; + InputStream is; + InputStreamReader isr; + BufferedReader r; + String str=""; + String result=""; + + try { + url = new URL(tmp); + is = url.openStream(); + isr = new InputStreamReader(is); + r = new BufferedReader(isr); + int counter=0; + do { + str = r.readLine(); + if (str != null){ + result=result.concat(str); + counter=counter+1;} + } while (str != null); + + if(result.isEmpty()) System.out.println("HALOSHSS"); + //TODO:if counter = 5 or less, there is an empty answer from the Server! Still to Verify! + if(counter<=5){ + System.out.println("Empty Answer from Server"); + return "noanswer"; + } + } catch (MalformedURLException e) { + System.out.println("Must enter a valid URL"); + } catch (IOException e) { + System.out.println("Can not connect"); + } + + + + return createAnswer(result); + } + + + private String createAnswer(String string){ + string=string.replace("table",""); + string=string.replace("<tr>", ""); + string=string.replace("</tr>", ""); + string=string.replace("</>",""); + string=string.replace("<th>l</th>",""); + string=string.replace("<th>x</th>",""); + string=string.replace("< class=\"sparql\" border=\"1\">",""); + string=string.replace("\n",""); + string=string.replace(" ",""); + string=string.replace("</td>",""); + string=string.replace("<td>",""); + return string; + + } + + + private String createServerRequest(String query){ + String anfrage=null; + anfrage=removeSpecialKeys(query); + anfrage=anfrage.replace("<","<"); + anfrage=anfrage.replace("%gt;",">"); + anfrage=anfrage.replace("&","&"); + //anfrage=anfrage.replaceAll("#>","%23%3E%0D%0A%"); + anfrage=anfrage.replace("#","%23"); + anfrage=anfrage.replace(" ","+"); + anfrage=anfrage.replace("/","%2F"); + anfrage=anfrage.replace(":","%3A"); + anfrage=anfrage.replace("?","%3F"); + anfrage=anfrage.replace("$","%24"); + //anfrage=anfrage.replaceAll("F>+","F%3E%0D%0A"); + anfrage=anfrage.replace(">","%3E"); + anfrage=anfrage.replace("<","%3C"); + anfrage=anfrage.replace("\"","%22"); + anfrage=anfrage.replace("\n","%0D%0A%09"); + anfrage=anfrage.replace("%%0D%0A%09","%09"); + anfrage=anfrage.replace("=","%3D"); + anfrage=anfrage.replace("@","%40"); + anfrage=anfrage.replace("&","%26"); + anfrage=anfrage.replace("(","%28"); + anfrage=anfrage.replace(")","%29"); + anfrage=anfrage.replace("%3E%0D%0A%25","%3E"); + //anfrage=anfrage.replaceAll("\n",".%0D%0A%09"); + return anfrage; + } + + private String removeSpecialKeys(String query){ + query=query.replace("\\",""); + //query=query.replaceAll("\a",""); + query=query.replace("\b",""); + query=query.replace("\f",""); + query=query.replace("\r",""); + query=query.replace("\t",""); + // query=query.replaceAll("\v",""); + return query; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,60 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; + + +import java.lang.Math; +import java.math.BigDecimal; + +public class Levenshtein { + + + //http://de.wikipedia.org/wiki/Levenshtein-Distanz + public double nld(String orig, String eing){ + int result = diff(orig,eing); + int length=Math.max(orig.length(),eing.length()); + + + //if distance between both is zero, then the NLD must be one + if(result==0 ){ + return 1; + } + else{ + BigDecimal m = new BigDecimal(result); + BigDecimal n = new BigDecimal(length); + + BigDecimal c = new BigDecimal(0); + c=m.divide(n, 5, BigDecimal.ROUND_FLOOR); + + return c.doubleValue(); + } + + } + + public int diff(String orig, String eing) { + + int matrix[][] = new int[orig.length() + 1][eing.length() + 1]; + for (int i = 0; i < orig.length() + 1; i++) { + matrix[i][0] = i; + } + for (int i = 0; i < eing.length() + 1; i++) { + matrix[0][i] = i; + } + for (int a = 1; a < orig.length() + 1; a++) { + for (int b = 1; b < eing.length() + 1; b++) { + int right = 0; + if (orig.charAt(a - 1) != eing.charAt(b - 1)) { + right = 1; + } + int mini = matrix[a - 1][b] + 1; + if (matrix[a][b - 1] + 1 < mini) { + mini = matrix[a][b - 1] + 1; + } + if (matrix[a - 1][b - 1] + right < mini) { + mini = matrix[a - 1][b - 1] + right; + } + matrix[a][b] = mini; + } + } + + return matrix[orig.length()][eing.length()]; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,19 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; +import java.util.HashMap; + + +public class Parsing { + public static void do_parsing(HashMap<String, String> hm, String string){ + String [] array = string.split(" "); + + for(String name : hm.values()){ + //System.err.println(name); + for(String inhalt : array){ + if(name.equals(inhalt)){ + System.out.println("Super " + inhalt); + } + } + } + + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,144 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; +import java.io.BufferedReader; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.util.*; + +import net.didion.jwnl.JWNLException; + +import org.dllearner.algorithm.tbsl.exploration.Sparql.SparqlObject; +import org.dllearner.algorithm.tbsl.exploration.sax.ParseXmlHtml; + +/* + * + * As you need more than 512 MB Ram, increase usable RAM for Java + * in Eclipse Run -> RunConfigurations -> Arguments -> VM Arguments -> -Xmx1024m + */ + +// Sax example from http://www.bennyn.de/programmierung/java/java-xml-sax-parser.html + +/* + * + * eins:http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fx+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fx+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout= + * zwei:http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fc+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fc+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout= + * + */ +public class exploration_main { + + private static HashMap<String, String> hm = new HashMap<String, String>(); + private static String qaldEntity2="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fc+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fc+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout="; + private static String qaldEntity1="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fx+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fx+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout="; + /** + * @param args + * @throws IOException + * @throws JWNLException + * @throws InterruptedException + */ + public static void main(String[] args) throws IOException, JWNLException, InterruptedException { + + /** + * Do the starting initializing stuff + */ + long startInitTime = System.currentTimeMillis(); + + System.out.println("Start Indexing"); + + //For testing! + hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/ressource/sparql_zwei",hm); + hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/ressource/sparql_eins",hm); + + + /* + * For real use! + */ + /* hm=ParseXmlHtml.parse_xml((getEntity(qaldEntity2,"/tmp/qaldEntity2")),hm); + System.out.println("Entity2 done"); + hm=ParseXmlHtml.parse_xml((getEntity(qaldEntity1,"/tmp/qaldEntity1")),hm); + System.out.println("Entity1 done");*/ + System.out.println("Done with indexing\n"); + System.out.println("Start generating Wordnet Dictionary"); + SparqlObject sparql = new SparqlObject(); + System.out.println("Generating Wordnet Dictionary Done"); + long stopInitTime = System.currentTimeMillis(); + System.out.println("Time for Initialising "+(stopInitTime-startInitTime)+" ms"); + + boolean schleife=true; + while(schleife==true){ + BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); + String line; + try { + System.out.println("\n\n"); + System.out.println("Please enter a Question:"); + line = in.readLine(); + if(line.contains("quit")){ + schleife=false; + System.out.println("Bye!"); + } + if(line.contains("text")&& schleife==true){ + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + + + System.out.println("Please enter Path of txt. File:"); + line=in.readLine(); + + //Start Time measuring + long startTime = System.currentTimeMillis(); + String s=""; + BufferedReader in_file = new BufferedReader(new InputStreamReader(new FileInputStream(line))); + int anzahl=0; + while( null != (s = in_file.readLine()) ) { + System.out.println(s); + anzahl++; + //get each line and send it to the parser + s=s.replace("?",""); + sparql.create_Sparql_query(s.toLowerCase(),hm); + } + long timeNow = System.currentTimeMillis(); + long diff = timeNow-startTime; + + System.out.println("Time for "+anzahl+" questions = "+diff+" ms."); + + } + else if(schleife==true){ + long startTime = System.currentTimeMillis(); + line=line.replace("?",""); + /* Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(line); + for (BasicQueryTemplate temp : querytemps) { + System.out.println(temp.toString()); + }*/ + sparql.create_Sparql_query(line.toLowerCase(),hm); + long endTime= System.currentTimeMillis(); + System.out.println("\n The complete answering of the Question took "+(endTime-startTime)+" ms"); + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + } + + + + private static String getEntity(String query, String name) throws IOException, InterruptedException{ + + // String query_complete="wget "+"\""+query+"\""+" -O "+"\""+name+"\""; + URL url = new URL(query); + ReadableByteChannel rbc = Channels.newChannel(url.openStream()); + //System.out.println(rbc.toString()); + FileOutputStream fos = new FileOutputStream(name); + //max 200MB = 209715200 Byte + fos.getChannel().transferFrom(rbc, 0, 209715200 ); + + + return name; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,131 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; + +import org.dllearner.algorithm.tbsl.exploration.sax.MySaxParser; + + +/* + * + * wget "http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3ABerlin+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3ABerlin+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}&format=text%2Fhtml&debug=on&timeout=" -O bla.txt + + */ +public class test_vergleich { + + public String DoVergleich(String suchbegriff, String vergleich) throws IOException{ + String ergebnis_string=""; + //sendServerRequest(vergleich); + sendServerRequest_new(vergleich); + ergebnis_string=do_parsing("answer",suchbegriff); + + return ergebnis_string; + + + } + + private void sendServerRequest(String vergleich) throws IOException{ + String tmp="wget -O answer \"http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3ABerlin+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout=\""; + //System.out.println(tmp); + Process p = Runtime.getRuntime().exec(tmp); + try { + p.waitFor(); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + private void sendServerRequest_new(String vergleich) throws IOException{ + String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3ABerlin+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout="; + URL url; + InputStream is; + InputStreamReader isr; + BufferedReader r; + String str; + String result=""; + + try { + url = new URL(tmp); + is = url.openStream(); + isr = new InputStreamReader(is); + r = new BufferedReader(isr); + do { + str = r.readLine(); + if (str != null) + result=result+str; + } while (str != null); + } catch (MalformedURLException e) { + System.out.println("Must enter a valid URL"); + } catch (IOException e) { + System.out.println("Can not connect"); + } + + FileWriter w = new FileWriter("answer"); + w.write(result); + w.close(); + } + + + private static String do_parsing(String datei, String suchbergriff) + { + ArrayList<String> indexObject = null; + String ergebnis_uri=""; + double zwischenwert=0; + double tmp=0; + Levenshtein levenshtein = new Levenshtein(); + + File file = new File(datei); + try + { + MySaxParser parser = new MySaxParser(file); + parser.parse(); + indexObject = parser.getIndexObject(); + for (int i = 1; i < indexObject.size(); i=i+2) + { + System.out.println((indexObject.get(i)).toLowerCase()); + tmp = levenshtein.nld(suchbergriff.toLowerCase(), (indexObject.get(i)).toLowerCase()); + System.out.println(tmp); + System.out.println("######"); + + String ergebnis_string; + if(tmp==1.0){ + zwischenwert=tmp; + System.out.println(tmp); + System.out.println("YEAH!!!!"); + ergebnis_string=indexObject.get(i); + + ergebnis_uri=indexObject.get(i-1); + System.out.println(ergebnis_uri); + i=indexObject.size(); + break; + } + if(tmp>zwischenwert){ + zwischenwert=tmp; + System.out.println(tmp); + ergebnis_string=indexObject.get(i); + + ergebnis_uri=indexObject.get(i-1); + System.out.println(ergebnis_uri); + } + } + indexObject.clear(); + + } + catch (Exception ex) + { + System.out.println("Another exciting error occured: " + ex.getLocalizedMessage()); + } + + return ergebnis_uri; + } + + + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,77 @@ +package org.dllearner.algorithm.tbsl.exploration.sax; + +import java.util.ArrayList; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +public class MySaxHandler extends DefaultHandler +{ + + private StringBuffer buffer; + private boolean buffering; + private ArrayList<String> indexObject; + + public MySaxHandler() + { + this.buffer = null; + this.buffering = false; + indexObject = new ArrayList<String>(); + } + + @Override + public void startDocument() throws SAXException + { + this.buffer = new StringBuffer(""); + } + + @Override + public void startElement(String namespaceURI, String localName, String tagName, Attributes attributes) throws SAXException + { + String tag = tagName; + //name=td + if (tag.equals("td")) + { + this.buffering = true; + } + } + + @Override + public void endElement(String namespaceURI, String localName, String tagName) throws SAXException + { + String tag = tagName; + String tagValue = null; + + //name=td + if (tag.equals("td")) + { + tagValue = this.buffer.toString(); + this.buffering = false; + this.buffer = new StringBuffer(); + } + + parseValue(tagValue); + } + + @Override + public void characters(char chars[], int start, int length) + { + if (this.buffering) + { + this.buffer = this.buffer.append(chars, start, length); + } + } + + private void parseValue(String value) + { + if (value != null) + { + this.indexObject.add(value); + } + } + + public ArrayList<String> getIndexObject() + { + return this.indexObject; + } +} \ No newline at end of file Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,51 @@ +package org.dllearner.algorithm.tbsl.exploration.sax; + + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +public class MySaxParser +{ + private URL url; + private ArrayList<String> indexObject; + + public MySaxParser() + { + super(); + } + + public MySaxParser(File file) throws MalformedURLException + { + this.url = file.toURI().toURL(); + } + + public void parse() throws ParserConfigurationException, SAXException, IOException + { + // Initialize SAX Parser: + SAXParserFactory factory = SAXParserFactory.newInstance(); + SAXParser parser = factory.newSAXParser(); + XMLReader reader = parser.getXMLReader(); + // Create SAX Handler: + MySaxHandler handler = new MySaxHandler(); + reader.setContentHandler(handler); + // Parse XML file: + InputSource input = new InputSource(url.openStream()); + reader.parse(input); + // Get the result: + this.indexObject = handler.getIndexObject(); + } + + public ArrayList<String> getIndexObject() + { + return this.indexObject; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,36 @@ +package org.dllearner.algorithm.tbsl.exploration.sax; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; + +public class ParseXmlHtml { + public static HashMap<String, String> parse_xml(String dateiname, HashMap<String, String> hm) + { + ArrayList<String> indexObject = null; + File file = new File(dateiname); + try + { + MySaxParser parser = new MySaxParser(file); + parser.parse(); + indexObject = parser.getIndexObject(); + /*for (int i = 0; i < indexObject.size(); i++) + { + hm.put((indexObject.get(i+1)).toLowerCase(), indexObject.get(i)); + }*/ + for (int i = 1; i < indexObject.size(); i=i+2) + { + hm.put((indexObject.get(i)).toLowerCase(), indexObject.get(i-1)); + } + indexObject.clear(); + + } + catch (Exception ex) + { + System.out.println("Another exciting error occured: " + ex.getLocalizedMessage()); + } + return hm; + } + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-09-29 08:47:21
|
Revision: 3291 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3291&view=rev Author: lorenz_b Date: 2011-09-29 08:47:15 +0000 (Thu, 29 Sep 2011) Log Message: ----------- Integrated new indexes in learning process. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-28 18:15:59 UTC (rev 3290) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-29 08:47:15 UTC (rev 3291) @@ -26,7 +26,9 @@ import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; +import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; import org.dllearner.algorithm.tbsl.search.SolrSearch; +import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; @@ -129,20 +131,22 @@ private void init(Options options){ String resourcesIndexUrl = options.fetch("solr.resources.url"); String resourcesIndexSearchField = options.fetch("solr.resources.searchfield"); - resource_index = new SolrSearch(resourcesIndexUrl, resourcesIndexSearchField); + resource_index = new ThresholdSlidingSolrSearch(resourcesIndexUrl, resourcesIndexSearchField, 1.0, 0.1); String classesIndexUrl = options.fetch("solr.classes.url"); String classesIndexSearchField = options.fetch("solr.classes.searchfield"); - class_index = new SolrSearch(classesIndexUrl, classesIndexSearchField); + class_index = new ThresholdSlidingSolrSearch(classesIndexUrl, classesIndexSearchField, 1.0, 0.1); String propertiesIndexUrl = options.fetch("solr.properties.url"); String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); - property_index = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField); + SolrSearch labelBasedPropertyIndex = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField); String boaPatternIndexUrl = options.fetch("solr.boa.properties.url"); String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); - boa_pattern_property_index = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField); + SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField); + property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); + int maxIndexResults = Integer.parseInt(options.fetch("solr.query.limit"), 10); maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); @@ -507,41 +511,18 @@ words = pruneList(slot.getWords()); } - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(String word : words){ - tmp = new TreeSet<String>(new StringSimilarityComparator(word)); - uris = uriCache.get(word); - index = boa_pattern_property_index; - if(uris == null){ - uris = index.getResources(word); - uriCache.put(word, uris); - } - index = property_index; - if(uris.size() < 10){ - uris.addAll(index.getResources(word)); - } - if(uris.size() < 10){ - uris.addAll(index.getResources("" + word + "~0.8")); - } - tmp.addAll(uris); - sortedURIs.addAll(tmp); - tmp.clear(); + for(String word : words){ + tmp = new TreeSet<String>(new StringSimilarityComparator(word)); + uris = uriCache.get(word); + + if(uris == null){ + uris = index.getResources(word, 5); + uriCache.put(word, uris); } - } else { - for(String word : words){ - tmp = new TreeSet<String>(new StringSimilarityComparator(word)); - uris = uriCache.get(word); - if(uris == null){ - uris = index.getResources(word); - uriCache.put(word, uris); - } - if(uris.size() < 10){ - uris.addAll(index.getResources("" + word + "~0.7")); - } - tmp.addAll(uris); - sortedURIs.addAll(tmp); - tmp.clear(); - } + + tmp.addAll(uris); + sortedURIs.addAll(tmp); + tmp.clear(); } slot2URI.put(slot, sortedURIs); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-09-28 18:15:59 UTC (rev 3290) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-09-29 08:47:15 UTC (rev 3291) @@ -54,6 +54,10 @@ @Override public List<String> getResources(String queryString, int limit, int offset) { + return findResources(queryString, limit, offset); + } + + protected List<String> findResources(String queryString, int limit, int offset){ List<String> resources = new ArrayList<String>(); QueryResponse response; try { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-09-28 18:15:59 UTC (rev 3290) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-09-29 08:47:15 UTC (rev 3291) @@ -28,12 +28,14 @@ double threshold = 1; - while(resources.size() < limit){ - resources.addAll(getResources(queryString + "~" + threshold, limit - resources.size())); + String queryWithThreshold = queryString; + while(resources.size() < limit && threshold >= minThreshold){ + if(threshold < 1){ + queryWithThreshold = queryString + "~" + threshold; + } + System.out.println(queryWithThreshold); + resources.addAll(findResources(queryWithThreshold, limit - resources.size(), 0)); threshold -= step; - if(threshold < minThreshold){ - break; - } } @@ -42,4 +44,4 @@ -} +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-09-30 07:17:24
|
Revision: 3294 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3294&view=rev Author: lorenz_b Date: 2011-09-30 07:17:18 +0000 (Fri, 30 Sep 2011) Log Message: ----------- Added comparator into separate class. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/StringSimilarityComparator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-30 06:57:38 UTC (rev 3293) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-30 07:17:18 UTC (rev 3294) @@ -1,17 +1,12 @@ package org.dllearner.algorithm.tbsl.learning; -import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; -import java.net.MalformedURLException; import java.net.URL; -import java.net.URLDecoder; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -37,7 +32,7 @@ import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Prefixes; -import org.dllearner.algorithm.tbsl.util.Similarity; +import org.dllearner.algorithm.tbsl.util.StringSimilarityComparator; import org.dllearner.core.ComponentInitException; import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; @@ -53,9 +48,6 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.sparql.vocabulary.FOAF; -import com.hp.hpl.jena.vocabulary.RDF; -import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -575,29 +567,6 @@ return pruned; } - class StringSimilarityComparator implements Comparator<String>{ - private String s; - - public StringSimilarityComparator(String s) { - this.s = s; - } - - @Override - public int compare(String s1, String s2) { - - double sim1 = Similarity.getSimilarity(s, s1); - double sim2 = Similarity.getSimilarity(s, s2); - - if(sim1 < sim2){ - return 1; - } else if(sim1 > sim2){ - return -1; - } else { - return s1.compareTo(s2); - } - } - - } private SolrSearch getIndexBySlotType(Slot slot){ SolrSearch index = null; @@ -756,9 +725,9 @@ // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "Give me all books written by authors influenced by Ernest Hemingway."; -// String question = "Give me all cities in Canada."; + String question = "Give me the highest mountain in Germany"; - String question = "Give me all books written by authors influenced by Ernest Hemingway."; +// String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), Collections.<String>singletonList(""), Collections.<String>emptyList()); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/StringSimilarityComparator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/StringSimilarityComparator.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/StringSimilarityComparator.java 2011-09-30 07:17:18 UTC (rev 3294) @@ -0,0 +1,27 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.Comparator; + +public class StringSimilarityComparator implements Comparator<String>{ + private String s; + + public StringSimilarityComparator(String s) { + this.s = s; + } + + @Override + public int compare(String s1, String s2) { + + double sim1 = Similarity.getSimilarity(s, s1); + double sim2 = Similarity.getSimilarity(s, s2); + + if(sim1 < sim2){ + return 1; + } else if(sim1 > sim2){ + return -1; + } else { + return s1.compareTo(s2); + } + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/StringSimilarityComparator.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-10-12 10:34:59
|
Revision: 3299 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3299&view=rev Author: christinaunger Date: 2011-10-12 10:34:52 +0000 (Wed, 12 Oct 2011) Log Message: ----------- [tbsl] fixed small bugs in template generation and DRS2SPARQL conversion Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-10-11 08:51:56 UTC (rev 3298) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-10-12 10:34:52 UTC (rev 3299) @@ -246,38 +246,38 @@ } else if (predicate.equals("greater")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( - new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.GT))); return query; } else if (predicate.equals("greaterorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( - new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.GTEQ))); return query; } else if (predicate.equals("less")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( - new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.LT))); return query; } else if (predicate.equals("lessorequal")) { query.addFilter(new SPARQL_Filter( new SPARQL_Pair( - new SPARQL_Term(simple.getArguments().get(0).getValue(),true), + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), SPARQL_PairType.LTEQ))); return query; } else if (predicate.equals("maximum")) { - query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(),true)); + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(),false)); query.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.DESC)); query.setLimit(1); return query; } else if (predicate.equals("minimum")) { - query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(),true)); + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(),false)); query.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.ASC)); query.setLimit(1); return query; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-11 08:51:56 UTC (rev 3298) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-12 10:34:52 UTC (rev 3299) @@ -116,7 +116,7 @@ // build pairs <String,POStag> from tagged Hashtable<String,String> postable = new Hashtable<String,String>(); for (String st : newtagged.split(" ")) { - postable.put(st.substring(0,st.indexOf("/")),st.substring(st.indexOf("/")+1));; + postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; } // @@ -136,11 +136,11 @@ if (!containsModuloRenaming(drses,drs)) { // // DEBUG -// System.out.println(dude); -// System.out.println(drs); -// for (Slot sl : slots) { -// System.out.println(sl.toString()); -// } + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } // // drses.add(drs); @@ -155,21 +155,23 @@ if (!slot.getWords().isEmpty()) { word = slot.getWords().get(0); - pos = postable.get(word.replace(" ","_")); + pos = postable.get(word.toLowerCase().replace(" ","_")); POS wordnetpos = null; - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } List<String> strings = new ArrayList<String>(); - if (wordnetpos.equals(POS.ADJECTIVE)) { + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { strings = wordnet.getAttributes(word); } @@ -177,9 +179,11 @@ newwords.add(word); newwords.addAll(strings); - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + if (wordnetpos != null) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } } if (newwords.isEmpty()) { newwords.add(slot.getWords().get(0)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-10-26 09:04:10
|
Revision: 3322 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3322&view=rev Author: lorenz_b Date: 2011-10-26 09:03:57 +0000 (Wed, 26 Oct 2011) Log Message: ----------- Some changes to get fuzzy search working. Added some methods to Tagger for POS Tagger evlauation. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -27,6 +27,7 @@ import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -48,6 +49,7 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -67,7 +69,7 @@ private Ranking ranking; private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; - private int maxTestedQueriesPerTemplate; + private int maxTestedQueriesPerTemplate = 50; private int maxQueryExecutionTimeInSeconds; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); @@ -88,7 +90,7 @@ private Map<String, List<String>> classesURICache; private Map<String, List<String>> propertiesURICache; - private Map<String, List<String>> learnedSPARQLQueries; + private Map<String, Object> learnedSPARQLQueries; private Set<Template> templates; private Collection<Query> sparqlQueryCandidates; private Map<Template, Collection<? extends Query>> template2Queries; @@ -120,6 +122,13 @@ templateGenerator = new Templator(); } + /* + * Only for Evaluation useful. + */ + public void setUseIdealTagger(boolean value){ + templateGenerator.setUNTAGGED_INPUT(!value); + } + private void init(Options options){ String resourcesIndexUrl = options.fetch("solr.resources.url"); String resourcesIndexSearchField = options.fetch("solr.resources.searchfield"); @@ -127,8 +136,14 @@ String classesIndexUrl = options.fetch("solr.classes.url"); String classesIndexSearchField = options.fetch("solr.classes.searchfield"); - class_index = new ThresholdSlidingSolrSearch(classesIndexUrl, classesIndexSearchField, 1.0, 0.1); + SolrSearch dbpediaClassIndex = new SolrSearch(classesIndexUrl, classesIndexSearchField); + String yagoClassesIndexUrl = options.fetch("solr.yago.classes.url"); + String yagoClassesIndexSearchField = options.fetch("solr.yago.classes.searchfield"); + SolrSearch yagoClassIndex = new SolrSearch(yagoClassesIndexUrl, yagoClassesIndexSearchField); + + class_index = new ThresholdSlidingSolrSearch(dbpediaClassIndex);// new HierarchicalSolrSearch(dbpediaClassIndex, yagoClassIndex); + String propertiesIndexUrl = options.fetch("solr.properties.url"); String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); SolrSearch labelBasedPropertyIndex = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField); @@ -137,8 +152,12 @@ String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField); - property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); + //first BOA pattern then label based +// property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); + //first label based then BOA pattern + property_index = new HierarchicalSolrSearch(labelBasedPropertyIndex, patternBasedPropertyIndex); + int maxIndexResults = Integer.parseInt(options.fetch("solr.query.limit"), 10); maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); @@ -191,7 +210,7 @@ } private void reset(){ - learnedSPARQLQueries = new HashMap<String, List<String>>(); + learnedSPARQLQueries = new HashMap<String, Object>(); resourcesURICache = new HashMap<String, List<String>>(); classesURICache = new HashMap<String, List<String>>(); propertiesURICache = new HashMap<String, List<String>>(); @@ -417,13 +436,13 @@ SPARQL_Prefix prefix = null; uriCandidates = getCandidateURIsSortedBySimilarity(slot); for(String uri : uriCandidates){ - for(Entry<String, String> uri2prefix : prefixMap.entrySet()){ - if(uri.startsWith(uri2prefix.getKey())){ - prefix = new SPARQL_Prefix(uri2prefix.getValue(), uri2prefix.getKey()); - uri = uri.replace(uri2prefix.getKey(), uri2prefix.getValue() + ":"); - break; - } - } +// for(Entry<String, String> uri2prefix : prefixMap.entrySet()){ +// if(uri.startsWith(uri2prefix.getKey())){ +// prefix = new SPARQL_Prefix(uri2prefix.getValue(), uri2prefix.getKey()); +// uri = uri.replace(uri2prefix.getKey(), uri2prefix.getValue() + ":"); +// break; +// } +// } for(Query query : queries){ if(slot.getSlotType() == SlotType.SYMPROPERTY){ Query reversedQuery = new Query(query); @@ -635,30 +654,57 @@ private void validateAgainstRemoteEndpoint(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); + SPARQL_QueryType queryType = SPARQL_QueryType.SELECT; for(Query query : queries){ + if(query.getQt() == SPARQL_QueryType.ASK){ + queryType = SPARQL_QueryType.ASK; + } else if(query.getQt() == SPARQL_QueryType.SELECT){ + queryType = SPARQL_QueryType.SELECT; + } queryStrings.add(query.toString()); } - validateAgainstRemoteEndpoint(queryStrings); + validateAgainstRemoteEndpoint(queryStrings, queryType); } - private void validateAgainstRemoteEndpoint(List<String> queries){ + private void validateAgainstRemoteEndpoint(List<String> queries, SPARQL_QueryType queryType){ logger.info("Testing candidate SPARQL queries on remote endpoint..."); mon.start(); - for(String query : queries){ - logger.info("Testing query:\n" + query); - List<String> results = getResultFromRemoteEndpoint(query); - if(!results.isEmpty()){ - learnedSPARQLQueries.put(query, results); - if(stopIfQueryResultNotEmpty){ + if(queryType == SPARQL_QueryType.SELECT){ + for(String query : queries){ + logger.info("Testing query:\n" + query); + List<String> results = getResultFromRemoteEndpoint(query); + if(!results.isEmpty()){ + learnedSPARQLQueries.put(query, results); + if(stopIfQueryResultNotEmpty){ + return; + } + } + logger.info("Result: " + results); + } + } else if(queryType == SPARQL_QueryType.ASK){ + for(String query : queries){ + logger.info("Testing query:\n" + query); + boolean result = executeAskQuery(query); + learnedSPARQLQueries.put(query, result); + if(stopIfQueryResultNotEmpty && result){ return; } + logger.info("Result: " + result); } - logger.info("Result: " + results); } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); } + private boolean executeAskQuery(String query){ + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + for(String uri : endpoint.getDefaultGraphURIs()){ + qe.addDefaultGraph(uri); + } + boolean ret = qe.execAsk(); + return ret; + } + private void validateAgainstLocalModel(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); for(Query query : queries){ @@ -686,7 +732,11 @@ private List<String> getResultFromRemoteEndpoint(String query){ List<String> resources = new ArrayList<String>(); try { - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 10")); + String queryString = query; + if(!query.contains("LIMIT") && !query.contains("ASK")){ + queryString = query + " LIMIT 10"; + } + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, queryString)); QuerySolution qs; String projectionVar; while(rs.hasNext()){ @@ -724,8 +774,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); -// String question = "Give me all books written by authors influenced by Ernest Hemingway."; - String question = "Give me all European Capitals!"; + String question = "Who wrote the book The pillars of the Earth?"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -7,6 +7,7 @@ import java.util.regex.Pattern; import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.nlp.DBpediaSpotlightNER; import org.dllearner.algorithm.tbsl.nlp.LingPipeNER; import org.dllearner.algorithm.tbsl.nlp.NER; import org.dllearner.algorithm.tbsl.templator.Templator; @@ -24,6 +25,7 @@ USE_NER = n; if (USE_NER) { ner = new LingPipeNER(true); //not case sensitive best solution? +// ner = new DBpediaSpotlightNER(); } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -3,12 +3,15 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import opennlp.tools.postag.POSModel; import opennlp.tools.postag.POSTaggerME; import opennlp.tools.util.Sequence; +import com.aliasi.tag.Tagging; + public class ApachePartOfSpeechTagger implements PartOfSpeechTagger{ private POSTaggerME tagger; @@ -41,6 +44,11 @@ tokenizer = new ApacheTokenizer(); } + + @Override + public String getName() { + return "Apache Open NLP POS Tagger"; + } @Override public String tag(String sentence) { @@ -49,6 +57,13 @@ return convert2TaggedSentence(tokens, tags); } + + public List<String> getTags(String sentence){ + String[] tokens = tokenizer.tokenize(sentence); + String[] tags = tagger.tag(tokens); + + return Arrays.asList(tags); + } @Override public List<String> tagTopK(String sentence) { @@ -61,6 +76,14 @@ return taggedSentences; } + @Override + public Tagging<String> getTagging(String sentence){ + String[] tokens = tokenizer.tokenize(sentence); + String[] tags = tagger.tag(tokens); + + return new Tagging<String>(Arrays.asList(tokens), Arrays.asList(tags)); + } + private String convert2TaggedSentence(String[] words, String[] tags){ StringBuilder sb = new StringBuilder(); for(int i = 0; i < words.length; i++){ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -48,15 +48,17 @@ } rd.close(); JSONObject json = new JSONObject(sb.toString()); - JSONArray array = json.getJSONArray("Resources"); - JSONObject entityObject; - for(int i = 0; i < array.length(); i++){ - entityObject = array.getJSONObject(i); - System.out.println("Entity: " + entityObject.getString("@surfaceForm")); - System.out.println("DBpedia URI: " + entityObject.getString("@URI")); - System.out.println("Types: " + entityObject.getString("@types")); - namedEntities.add(entityObject.getString("@surfaceForm")); - + if(!json.isNull("Resources")){ + JSONArray array = json.getJSONArray("Resources"); + JSONObject entityObject; + for(int i = 0; i < array.length(); i++){ + entityObject = array.getJSONObject(i); + System.out.println("Entity: " + entityObject.getString("@surfaceForm")); + System.out.println("DBpedia URI: " + entityObject.getString("@URI")); + System.out.println("Types: " + entityObject.getString("@types")); + namedEntities.add(entityObject.getString("@surfaceForm")); + + } } } catch (MalformedURLException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -42,6 +42,11 @@ } @Override + public String getName() { + return "LingPipe POS Tagger"; + } + + @Override public String tag(String sentence) { com.aliasi.tokenizer.Tokenizer tokenizer = IndoEuropeanTokenizerFactory.INSTANCE.tokenizer(sentence.toCharArray(), 0, sentence.length()); // Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(cs,0,cs.length); @@ -66,5 +71,16 @@ } return taggedSentences; } + + @Override + public Tagging<String> getTagging(String sentence) { + com.aliasi.tokenizer.Tokenizer tokenizer = IndoEuropeanTokenizerFactory.INSTANCE.tokenizer(sentence.toCharArray(), 0, sentence.length()); +// Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(cs,0,cs.length); + String[] tokens = tokenizer.tokenize(); + List<String> tokenList = Arrays.asList(tokens); + Tagging<String> tagging = tagger.tag(tokenList); + + return tagging; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -2,10 +2,17 @@ import java.util.List; +import com.aliasi.tag.Tagging; + public interface PartOfSpeechTagger { + String getName(); + String tag(String sentence); List<String> tagTopK(String sentence); + + Tagging<String> getTagging(String sentence); + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -4,9 +4,12 @@ import java.io.StringReader; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import com.aliasi.tag.Tagging; + import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.tagger.maxent.MaxentTagger; @@ -29,6 +32,11 @@ e.printStackTrace(); } } + + @Override + public String getName() { + return "Stanford POS Tagger"; + } @Override public String tag(String sentence) { @@ -53,5 +61,46 @@ public List<String> tagTopK(String sentence) { return Collections.singletonList(tag(sentence)); } + + public List<String> getTags(String sentence){ + List<String> tags = new ArrayList<String>(); + + ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); + + StringReader reader = new StringReader(sentence); + List<List<HasWord>> text = MaxentTagger.tokenizeText(reader); + + if (text.size() == 1) { + tagged = tagger.tagSentence(text.get(0)); + } + + for(TaggedWord tW : tagged){ + tags.add(tW.tag()); + } + + return tags; + } + + @Override + public Tagging<String> getTagging(String sentence){ + ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); + + StringReader reader = new StringReader(sentence); + List<List<HasWord>> text = MaxentTagger.tokenizeText(reader); + + if (text.size() == 1) { + tagged = tagger.tagSentence(text.get(0)); + } + + List<String> tokenList = new ArrayList<String>(); + List<String> tagList = new ArrayList<String>(); + + for(TaggedWord tW : tagged){ + tokenList.add(tW.word()); + tagList.add(tW.tag()); + } + + return new Tagging<String>(tokenList, tagList); + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -8,6 +8,8 @@ import org.annolab.tt4j.TreeTaggerException; import org.annolab.tt4j.TreeTaggerWrapper; +import com.aliasi.tag.Tagging; + public class TreeTagger implements PartOfSpeechTagger { TreeTaggerWrapper<String> tt; @@ -40,6 +42,17 @@ } return ""; } + + @Override + public String getName() { + return "Tree Tagger"; + } + + @Override + public Tagging<String> getTagging(String sentence) { + // TODO Auto-generated method stub + return null; + } @Override public List<String> tagTopK(String sentence) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -8,6 +8,9 @@ import net.didion.jwnl.JWNLException; import net.didion.jwnl.data.IndexWord; import net.didion.jwnl.data.POS; +import net.didion.jwnl.data.Pointer; +import net.didion.jwnl.data.PointerTarget; +import net.didion.jwnl.data.PointerType; import net.didion.jwnl.data.PointerUtils; import net.didion.jwnl.data.Synset; import net.didion.jwnl.data.Word; @@ -52,6 +55,24 @@ return synonyms; } + public List<String> getSisterTerms(POS pos, String s){ + List<String> sisterTerms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) +// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); + if(iw != null){ + Synset[] synsets = iw.getSenses();System.out.println(synsets[0]); + PointerTarget[] pointerArr = synsets[0].getTargets(); + System.out.println(pointerArr); + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + return sisterTerms; + } + public List<String> getAttributes(String s) { List<String> result = new ArrayList<String>(); @@ -78,6 +99,7 @@ public static void main(String[] args) { System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); + System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); } /** Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -41,6 +41,14 @@ this(solrServerURL); this.searchField = searchField; } + + public String getServerURL() { + return server.getBaseURL(); + } + + public String getSearchField() { + return searchField; + } @Override public List<String> getResources(String queryString) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -1,13 +1,17 @@ package org.dllearner.algorithm.tbsl.search; +import java.text.NumberFormat; import java.util.ArrayList; import java.util.List; +import java.util.Locale; public class ThresholdSlidingSolrSearch extends SolrSearch { private double minThreshold = 0.8; private double step = 0.1; + private NumberFormat format = NumberFormat.getInstance(Locale.GERMAN); + public ThresholdSlidingSolrSearch(String solrServerURL) { super(solrServerURL); } @@ -22,26 +26,29 @@ this.step = step; } + public ThresholdSlidingSolrSearch(SolrSearch search){ + super(search.getServerURL(), search.getSearchField()); + } + + @Override public List<String> getResources(String queryString, int limit, int offset) { List<String> resources = new ArrayList<String>(); + double threshold = 1; String queryWithThreshold = queryString; while(resources.size() < limit && threshold >= minThreshold){ if(threshold < 1){ - queryWithThreshold = queryString + "~" + threshold; + queryWithThreshold = queryString + "~" + format.format(threshold); } resources.addAll(findResources(queryWithThreshold, limit - resources.size(), 0)); threshold -= step; } - return resources; } - - } \ No newline at end of file Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -49,6 +49,7 @@ //copy constructor public Query(Query query){ + this.qt = query.getQt(); Set<SPARQL_Term> selTerms = new HashSet<SPARQL_Term>(); for(SPARQL_Term term : query.getSelTerms()){ SPARQL_Term newTerm = new SPARQL_Term(term.getName()); @@ -56,6 +57,7 @@ newTerm.setIsURI(newTerm.isURI); newTerm.setAggregate(term.getAggregate()); newTerm.setOrderBy(term.getOrderBy()); + newTerm.setAlias(term.getAlias()); selTerms.add(newTerm); } this.selTerms = selTerms; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -73,6 +73,14 @@ public void setIsURI(boolean isURI){ this.isURI = isURI; } + + public String getAlias() { + return alias; + } + + public void setAlias(String alias) { + this.alias = alias; + } @Override public String toString() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -39,10 +39,12 @@ } public List<String> getLexicalAnswerType(){ - String variable = query.getAnswerTypeVariable(); - for(Slot slot : slots){ - if(slot.getAnchor().equals(variable)){ - return slot.getWords(); + if(query.getQt() == SPARQL_QueryType.SELECT){ + String variable = query.getAnswerTypeVariable(); + for(Slot slot : slots){ + if(slot.getAnchor().equals(variable)){ + return slot.getWords(); + } } } return null; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -16,10 +16,9 @@ import org.dllearner.algorithm.tbsl.ltag.parser.LTAG_Lexicon_Constructor; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.ltag.parser.Preprocessor; -import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sem.drs.DRS; import org.dllearner.algorithm.tbsl.sem.drs.UDRS; @@ -63,8 +62,8 @@ g = LTAG_Constructor.construct(grammarFiles); -// tagger = new StanfordPartOfSpeechTagger(); - tagger = new ApachePartOfSpeechTagger(); + tagger = new StanfordPartOfSpeechTagger(); +// tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; @@ -93,6 +92,7 @@ } else { tagged = s; + s = extractSentence(tagged); } String newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); @@ -137,11 +137,11 @@ if (!containsModuloRenaming(drses,drs)) { // // DEBUG -// System.out.println(dude); -// System.out.println(drs); -// for (Slot sl : slots) { -// System.out.println(sl.toString()); -// } + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } // // drses.add(drs); @@ -186,6 +186,9 @@ newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); } } + if(newwords.isEmpty()){ + + } if (newwords.isEmpty()) { newwords.add(slot.getWords().get(0)); } @@ -250,5 +253,23 @@ } return false; } + + private String extractSentence(String taggedSentence){ + int pos = taggedSentence.indexOf("/"); + while(pos != -1){ + String first = taggedSentence.substring(0, pos); + int endPos = taggedSentence.substring(pos).indexOf(" "); + if(endPos == -1){ + endPos = taggedSentence.substring(pos).length(); + } + String rest = taggedSentence.substring(pos + endPos); + + taggedSentence = first + rest; + pos = taggedSentence.indexOf("/"); + + } + return taggedSentence; + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-10-26 13:49:04
|
Revision: 3326 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3326&view=rev Author: lorenz_b Date: 2011-10-26 13:48:52 +0000 (Wed, 26 Oct 2011) Log Message: ----------- Added wrapper class for SOLR search result. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -22,6 +22,8 @@ import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; +import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; +import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; import org.dllearner.algorithm.tbsl.search.SolrSearch; import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; import org.dllearner.algorithm.tbsl.sparql.Query; @@ -33,6 +35,7 @@ import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Prefixes; +import org.dllearner.algorithm.tbsl.util.SolrQueryResultStringSimilarityComparator; import org.dllearner.algorithm.tbsl.util.StringSimilarityComparator; import org.dllearner.core.ComponentInitException; import org.dllearner.core.Oracle; @@ -86,9 +89,9 @@ private Oracle oracle; - private Map<String, List<String>> resourcesURICache; - private Map<String, List<String>> classesURICache; - private Map<String, List<String>> propertiesURICache; + private Map<String, SolrQueryResultSet> resourcesURICache; + private Map<String, SolrQueryResultSet> classesURICache; + private Map<String, SolrQueryResultSet> propertiesURICache; private Map<String, Object> learnedSPARQLQueries; private Set<Template> templates; @@ -211,9 +214,9 @@ private void reset(){ learnedSPARQLQueries = new HashMap<String, Object>(); - resourcesURICache = new HashMap<String, List<String>>(); - classesURICache = new HashMap<String, List<String>>(); - propertiesURICache = new HashMap<String, List<String>>(); + resourcesURICache = new HashMap<String, SolrQueryResultSet>(); + classesURICache = new HashMap<String, SolrQueryResultSet>(); + propertiesURICache = new HashMap<String, SolrQueryResultSet>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); } @@ -337,10 +340,10 @@ Set<Query> tmp = new HashSet<Query>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ for(Query query : queries){ Query newQuery = new Query(query); - newQuery.replaceVarWithURI(var, entry1.getKey()); + newQuery.replaceVarWithURI(var, item.getUri()); tmp.add(newQuery); } } @@ -368,9 +371,9 @@ Map<String, Float> tmp = new HashMap<String, Float>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ for(Entry<String, Float> entry2 : query2Score.entrySet()){ - tmp.put(entry2.getKey().replace("?" + var, "<" + entry1.getKey() + ">"), Float.valueOf(entry1.getValue()+entry2.getValue())); + tmp.put(entry2.getKey().replace("?" + var, "<" + item.getUri() + ">"), item.getScore() + entry2.getValue()); } } if(!words.isEmpty()){ @@ -400,11 +403,11 @@ Set<RatedQuery> tmp = new HashSet<RatedQuery>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ for(RatedQuery rQ : ratedQueries){ RatedQuery newRQ = new RatedQuery(rQ, rQ.getScore()); - newRQ.replaceVarWithURI(var, entry1.getKey()); - newRQ.setScore(newRQ.getScore()+entry1.getValue()); + newRQ.replaceVarWithURI(var, item.getUri()); + newRQ.setScore(newRQ.getScore() + item.getScore()); tmp.add(newRQ); } } @@ -508,10 +511,10 @@ //get the appropriate index based on slot type SolrSearch index = getIndexBySlotType(slot); //get the appropriate cache for URIs to avoid redundant queries to index - Map<String, List<String>> uriCache = getCacheBySlotType(slot); + Map<String, SolrQueryResultSet> uriCache = getCacheBySlotType(slot); - SortedSet<String> tmp; - List<String> uris; + SortedSet<SolrQueryResultItem> tmp; + SolrQueryResultSet rs; //prune the word list only when slot type is not RESOURCE List<String> words; @@ -523,16 +526,18 @@ } for(String word : words){ - tmp = new TreeSet<String>(new StringSimilarityComparator(word)); - uris = uriCache.get(word); + tmp = new TreeSet<SolrQueryResultItem>(new SolrQueryResultStringSimilarityComparator(word)); + rs = uriCache.get(word); - if(uris == null){ - uris = index.getResources(word, 5); - uriCache.put(word, uris); + if(rs == null){ + rs = index.getResourcesWithScores(word, 50); + uriCache.put(word, rs); } - tmp.addAll(uris); - sortedURIs.addAll(tmp); + tmp.addAll(rs.getItems()); + for(SolrQueryResultItem item : tmp){ + sortedURIs.add(item.getUri()); + } tmp.clear(); } @@ -600,8 +605,8 @@ return index; } - private Map<String, List<String>> getCacheBySlotType(Slot slot){ - Map<String, List<String>> cache = null; + private Map<String, SolrQueryResultSet> getCacheBySlotType(Slot slot){ + Map<String, SolrQueryResultSet> cache = null; SlotType type = slot.getSlotType(); if(type == SlotType.CLASS){ cache = classesURICache; @@ -613,7 +618,7 @@ return cache; } - private Map<String, Float> getCandidateURIsWithScore(Slot slot){ + private SolrQueryResultSet getCandidateURIsWithScore(Slot slot){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); SolrSearch index = null; @@ -627,13 +632,14 @@ index = resource_index; sorted = true; } + SolrQueryResultSet resultSet = null; for(String word : slot.getWords()){ - uri2Score.putAll(index.getResourcesWithScores("label:" + word, sorted)); + resultSet.add(index.getResourcesWithScores("label:" + word, sorted)); } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); logger.info("Candidate URIs: " + uri2Score.keySet()); - return uri2Score; + return resultSet; } private List<Query> getNBestQueryCandidatesForTemplates(Map<Template, Collection<? extends Query>> template2Queries){ @@ -774,7 +780,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Who wrote the book The pillars of the Earth?"; + String question = "Is Natalie Portman an actress?"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -24,8 +24,8 @@ public Preprocessor(boolean n) { USE_NER = n; if (USE_NER) { - ner = new LingPipeNER(true); //not case sensitive best solution? -// ner = new DBpediaSpotlightNER(); +// ner = new LingPipeNER(true); //not case sensitive best solution? + ner = new DBpediaSpotlightNER(); } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -0,0 +1,67 @@ +package org.dllearner.algorithm.tbsl.search; + +public class SolrQueryResultItem { + + private String label; + private String uri; + private float score; + + public SolrQueryResultItem(String label, String uri) { + this(label, uri, -1); + } + + public SolrQueryResultItem(String label, String uri, float score) { + super(); + this.label = label; + this.uri = uri; + this.score = score; + } + + public String getLabel() { + return label; + } + + public String getUri() { + return uri; + } + + public float getScore() { + return score; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((label == null) ? 0 : label.hashCode()); + result = prime * result + Float.floatToIntBits(score); + result = prime * result + ((uri == null) ? 0 : uri.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SolrQueryResultItem other = (SolrQueryResultItem) obj; + if (label == null) { + if (other.label != null) + return false; + } else if (!label.equals(other.label)) + return false; + if (Float.floatToIntBits(score) != Float.floatToIntBits(other.score)) + return false; + if (uri == null) { + if (other.uri != null) + return false; + } else if (!uri.equals(other.uri)) + return false; + return true; + } + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -0,0 +1,32 @@ +package org.dllearner.algorithm.tbsl.search; + +import java.util.HashSet; +import java.util.Set; + +public class SolrQueryResultSet { + + private Set<SolrQueryResultItem> items; + + public SolrQueryResultSet() { + items = new HashSet<SolrQueryResultItem>(); + } + + public SolrQueryResultSet(Set<SolrQueryResultItem> items) { + this.items = items; + } + + public Set<SolrQueryResultItem> getItems() { + return items; + } + + public void addItems(Set<SolrQueryResultItem> items) { + this.items.addAll(items); + } + + public void add(SolrQueryResultSet rs) { + this.items.addAll(rs.getItems()); + } + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -3,8 +3,10 @@ import java.net.MalformedURLException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; @@ -90,29 +92,54 @@ return resources; } - public Map<String, Float> getResourcesWithScores(String queryString) { + protected SolrQueryResultSet findResourcesWithScores(String queryString, int limit, int offset, boolean sorted){ + Set<SolrQueryResultItem> items = new HashSet<SolrQueryResultItem>(); + + QueryResponse response; + try { + SolrQuery query = new SolrQuery((searchField != null) ? searchField + ":" + queryString : queryString); + query.setRows(limit); + query.setStart(offset); + query.addField("score"); + if(sorted){ + query.addSortField("score", SolrQuery.ORDER.desc); + query.addSortField( "pagerank", SolrQuery.ORDER.desc ); + } + response = server.query(query); + SolrDocumentList docList = response.getResults(); + lastTotalHits = (int) docList.getNumFound(); + + for(SolrDocument d : docList){ + items.add(new SolrQueryResultItem((String) d.get("label"), (String) d.get("uri"), (Float) d.get("score"))); + } + } catch (SolrServerException e) { + e.printStackTrace(); + } + return new SolrQueryResultSet(items); + } + + public SolrQueryResultSet getResourcesWithScores(String queryString) { return getResourcesWithScores(queryString, hitsPerPage); } - public Map<String, Float> getResourcesWithScores(String queryString, boolean sorted) { + public SolrQueryResultSet getResourcesWithScores(String queryString, boolean sorted) { return getResourcesWithScores(queryString, hitsPerPage); } - public Map<String, Float> getResourcesWithScores(String queryString, int limit) { + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit) { return getResourcesWithScores(queryString, limit, 0, false); } - public Map<String, Float> getResourcesWithScores(String queryString, int limit, boolean sorted) { + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit, boolean sorted) { return getResourcesWithScores(queryString, limit, 0, sorted); } - public Map<String, Float> getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { - Map<String, Float> resource2ScoreMap = new HashMap<String, Float>(); + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { + Set<SolrQueryResultItem> items = new HashSet<SolrQueryResultItem>(); QueryResponse response; try { - SolrQuery query = new SolrQuery(); - query.setQuery(queryString); + SolrQuery query = new SolrQuery((searchField != null) ? searchField + ":" + queryString : queryString); query.setRows(hitsPerPage); query.setStart(offset); query.addField("score"); @@ -123,13 +150,14 @@ response = server.query(query); SolrDocumentList docList = response.getResults(); lastTotalHits = (int) docList.getNumFound(); + for(SolrDocument d : docList){ - resource2ScoreMap.put((String) d.get("uri"), (Float) d.get("score")); + items.add(new SolrQueryResultItem((String) d.get("label"), (String) d.get("uri"), (Float) d.get("score"))); } } catch (SolrServerException e) { e.printStackTrace(); } - return resource2ScoreMap; + return new SolrQueryResultSet(items); } @Override @@ -141,5 +169,5 @@ public void setHitsPerPage(int hitsPerPage) { this.hitsPerPage = hitsPerPage; } - + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -51,4 +51,23 @@ return resources; } + @Override + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { + SolrQueryResultSet rs = new SolrQueryResultSet(); + + double threshold = 1; + + String queryWithThreshold = queryString; + while(rs.getItems().size() < limit && threshold >= minThreshold){ + if(threshold < 1){ + queryWithThreshold = queryString + "~" + format.format(threshold); + } + + rs.add(findResourcesWithScores(queryWithThreshold, limit - rs.getItems().size(), 0, sorted)); + threshold -= step; + } + + return rs; + } + } \ No newline at end of file Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -97,7 +97,7 @@ Set<SPARQL_Filter> filters = new HashSet<SPARQL_Filter>(); for(SPARQL_Filter filter : query.getFilters()){ for(SPARQL_Pair term : filter.getTerms()){ - + filters.add(new SPARQL_Filter(term)); } } this.filter = filters; Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -0,0 +1,29 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.Comparator; + +import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; + +public class SolrQueryResultStringSimilarityComparator implements Comparator<SolrQueryResultItem>{ + private String s; + + public SolrQueryResultStringSimilarityComparator(String s) { + this.s = s; + } + + @Override + public int compare(SolrQueryResultItem item1, SolrQueryResultItem item2) { + + double sim1 = Similarity.getSimilarity(s, item1.getLabel()); + double sim2 = Similarity.getSimilarity(s, item2.getLabel()); + + if(sim1 < sim2){ + return 1; + } else if(sim1 > sim2){ + return -1; + } else { + return item1.getLabel().compareTo(item2.getLabel()); + } + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-10-26 16:05:25
|
Revision: 3329 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3329&view=rev Author: christinaunger Date: 2011-10-26 16:05:16 +0000 (Wed, 26 Oct 2011) Log Message: ----------- [tbsl] updated templates for name-constructions (e.g. "a video game called Battle Chess") Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-10-26 15:11:38 UTC (rev 3328) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-10-26 16:05:16 UTC (rev 3329) @@ -305,6 +305,13 @@ new SPARQL_Term("'^"+simple.getArguments().get(1).getValue()+"'",true), SPARQL_PairType.REGEX))); } + else if (predicate.equals("regex")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),true), + SPARQL_PairType.REGEX))); + } else { if (arity == 1) { SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),false);term.setIsVariable(true); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-10-26 15:11:38 UTC (rev 3328) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-10-26 16:05:16 UTC (rev 3329) @@ -298,9 +298,9 @@ } semName = semName.substring(1); - out.add(new Pair<String,String>("(NP NP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<e,t>,[ l1:[ | SLOT_title(x,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); - out.add(new Pair<String,String>("(DP DP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_title(x,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); - out.add(new Pair<String,String>("(ADJ ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<e,t>,[ l1:[ | SLOT_title(x,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); + out.add(new Pair<String,String>("(NP NP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<e,t>,[ l1:[ y | SLOT_title(x,y), regex(y,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); + out.add(new Pair<String,String>("(DP DP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<<e,t>,t>,[ l1:[ y | SLOT_title(x,y), regex(y,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); + out.add(new Pair<String,String>("(ADJ ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<e,t>,[ l1:[ y | SLOT_title(x,y), regex(y,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); return out; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 15:11:38 UTC (rev 3328) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 16:05:16 UTC (rev 3329) @@ -62,7 +62,7 @@ Matcher m; Pattern compAdjPattern = Pattern.compile("(\\w+/RBR.(\\w+)/JJ)"); - Pattern superAdjPattern = Pattern.compile("(\\w+/RBS.(\\w+)/JJ)"); +// Pattern superAdjPattern = Pattern.compile("(\\w+/RBS.(\\w+)/JJ)"); // TODO "(the most) official languages" vs "the (most official) languages" Pattern howAdjPattern = Pattern.compile("(\\w+/WRB.(\\w+)(?<!many)/JJ)"); Pattern nprepPattern = Pattern.compile("\\s((\\w+)/NNS?.of/IN)"); Pattern didPattern = Pattern.compile("(?i)(\\s((did)|(do)|(does))/VB.?)\\s"); @@ -90,11 +90,11 @@ logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJR"); } - m = superAdjPattern.matcher(condensedstring); - while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJS"); - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJS"); - } +// m = superAdjPattern.matcher(condensedstring); +// while (m.find()) { +// logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJS"); +// condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJS"); +// } m = howAdjPattern.matcher(condensedstring); while (m.find()) { logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH"); @@ -240,7 +240,6 @@ "VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN", "GERUNDIN","VPREP","WHEN","WHERE","IN","TO","DT"}; -// NER ner = new LingPipeNER(); List<String> namedentities = ner.getNamedEntitites(untagged); List<String> usefulnamedentities = new ArrayList<String>(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-26 15:11:38 UTC (rev 3328) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-26 16:05:16 UTC (rev 3329) @@ -53,6 +53,8 @@ boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; + boolean USE_NER = false; + public Templator() { List<InputStream> grammarFiles = new ArrayList<InputStream>(); @@ -71,7 +73,7 @@ p.CONSTRUCT_SEMANTICS = true; p.MODE = "LEIPZIG"; - pp = new Preprocessor(true); + pp = new Preprocessor(USE_NER); wordnet = new WordNet(); } @@ -79,6 +81,9 @@ public void setUNTAGGED_INPUT(boolean b) { UNTAGGED_INPUT = b; } + public void setUSE_NER(boolean b) { + USE_NER = b; + } public Set<Template> buildTemplates(String s) { @@ -95,7 +100,12 @@ s = extractSentence(tagged); } - String newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); + String newtagged; + if (USE_NER) { + newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); + } + else newtagged = pp.condenseNominals(tagged); + newtagged = pp.condense(newtagged); logger.trace("Preprocessed: " + newtagged); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-10-27 11:31:40
|
Revision: 3331 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3331&view=rev Author: lorenz_b Date: 2011-10-27 11:31:34 +0000 (Thu, 27 Oct 2011) Log Message: ----------- Fixed NPEs in Solr search. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/HierarchicalSolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 16:24:10 UTC (rev 3330) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-27 11:31:34 UTC (rev 3331) @@ -68,6 +68,7 @@ private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); private static final int RECURSION_DEPTH = 2; + private static final int MAX_URIS_PER_SLOT = 10; private Ranking ranking; private boolean useRemoteEndpointValidation; @@ -135,11 +136,11 @@ private void init(Options options){ String resourcesIndexUrl = options.fetch("solr.resources.url"); String resourcesIndexSearchField = options.fetch("solr.resources.searchfield"); - resource_index = new ThresholdSlidingSolrSearch(resourcesIndexUrl, resourcesIndexSearchField, 1.0, 0.1); + resource_index = new ThresholdSlidingSolrSearch(resourcesIndexUrl, resourcesIndexSearchField, "label", 1.0, 0.1); String classesIndexUrl = options.fetch("solr.classes.url"); String classesIndexSearchField = options.fetch("solr.classes.searchfield"); - SolrSearch dbpediaClassIndex = new SolrSearch(classesIndexUrl, classesIndexSearchField); + SolrSearch dbpediaClassIndex = new SolrSearch(classesIndexUrl, classesIndexSearchField, "label"); String yagoClassesIndexUrl = options.fetch("solr.yago.classes.url"); String yagoClassesIndexSearchField = options.fetch("solr.yago.classes.searchfield"); @@ -149,11 +150,11 @@ String propertiesIndexUrl = options.fetch("solr.properties.url"); String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); - SolrSearch labelBasedPropertyIndex = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField); + SolrSearch labelBasedPropertyIndex = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField, "label"); String boaPatternIndexUrl = options.fetch("solr.boa.properties.url"); String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); - SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField); + SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField, "nlr"); //first BOA pattern then label based // property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); @@ -535,10 +536,16 @@ } tmp.addAll(rs.getItems()); + int i = 0; for(SolrQueryResultItem item : tmp){ sortedURIs.add(item.getUri()); + if(i == MAX_URIS_PER_SLOT){ + break; + } + i++; } tmp.clear(); + } slot2URI.put(slot, sortedURIs); @@ -735,7 +742,7 @@ logger.info("Done in " + mon.getLastValue() + "ms."); } - private List<String> getResultFromRemoteEndpoint(String query){ + private List<String> getResultFromRemoteEndpoint(String query){System.out.println(query); List<String> resources = new ArrayList<String>(); try { String queryString = query; @@ -784,6 +791,8 @@ // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); +// SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), +// Collections.<String>singletonList(""), Collections.<String>emptyList()); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), Collections.<String>singletonList(""), Collections.<String>emptyList()); learner.setEndpoint(endpoint); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/HierarchicalSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/HierarchicalSolrSearch.java 2011-10-26 16:24:10 UTC (rev 3330) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/HierarchicalSolrSearch.java 2011-10-27 11:31:34 UTC (rev 3331) @@ -32,5 +32,14 @@ } return resources; } + + @Override + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { + SolrQueryResultSet rs = primarySearch.getResourcesWithScores(queryString, limit, offset, sorted); + if(rs.getItems().size() < limit){ + rs.add(secondarySearch.getResourcesWithScores(queryString, limit-rs.getItems().size(), offset, sorted)); + } + return rs; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 16:24:10 UTC (rev 3330) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-27 11:31:34 UTC (rev 3331) @@ -25,23 +25,28 @@ private int lastTotalHits = 0; private String searchField; + private String labelField; public SolrSearch() { - // TODO Auto-generated constructor stub } public SolrSearch(String solrServerURL){ + this(solrServerURL, null, null); + } + + public SolrSearch(String solrServerURL, String searchField){ + this(solrServerURL, searchField, null); + } + + public SolrSearch(String solrServerURL, String searchField, String labelField){ try { server = new CommonsHttpSolrServer(solrServerURL); server.setRequestWriter(new BinaryRequestWriter()); } catch (MalformedURLException e) { e.printStackTrace(); } - } - - public SolrSearch(String solrServerURL, String searchField){ - this(solrServerURL); this.searchField = searchField; + this.labelField = labelField; } public String getServerURL() { @@ -51,6 +56,14 @@ public String getSearchField() { return searchField; } + + public void setLabelField(String labelField) { + this.labelField = labelField; + } + + public String getLabelField() { + return labelField; + } @Override public List<String> getResources(String queryString) { @@ -110,7 +123,7 @@ lastTotalHits = (int) docList.getNumFound(); for(SolrDocument d : docList){ - items.add(new SolrQueryResultItem((String) d.get("label"), (String) d.get("uri"), (Float) d.get("score"))); + items.add(new SolrQueryResultItem((String) d.get(labelField), (String) d.get("uri"), (Float) d.get("score"))); } } catch (SolrServerException e) { e.printStackTrace(); @@ -140,7 +153,7 @@ QueryResponse response; try { SolrQuery query = new SolrQuery((searchField != null) ? searchField + ":" + queryString : queryString); - query.setRows(hitsPerPage); + query.setRows(limit); query.setStart(offset); query.addField("score"); if(sorted){ @@ -152,7 +165,7 @@ lastTotalHits = (int) docList.getNumFound(); for(SolrDocument d : docList){ - items.add(new SolrQueryResultItem((String) d.get("label"), (String) d.get("uri"), (Float) d.get("score"))); + items.add(new SolrQueryResultItem((String) d.get(labelField), (String) d.get("uri"), (Float) d.get("score"))); } } catch (SolrServerException e) { e.printStackTrace(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 16:24:10 UTC (rev 3330) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-27 11:31:34 UTC (rev 3331) @@ -21,13 +21,17 @@ } public ThresholdSlidingSolrSearch(String solrServerURL, String searchField, double minThreshold, double step) { - super(solrServerURL, searchField); + this(solrServerURL, searchField, null, minThreshold, step); + } + + public ThresholdSlidingSolrSearch(String solrServerURL, String searchField, String labelField, double minThreshold, double step) { + super(solrServerURL, searchField, labelField); this.minThreshold = minThreshold; this.step = step; } public ThresholdSlidingSolrSearch(SolrSearch search){ - super(search.getServerURL(), search.getSearchField()); + super(search.getServerURL(), search.getSearchField(), search.getLabelField()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-10-31 22:11:44
|
Revision: 3345 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3345&view=rev Author: lorenz_b Date: 2011-10-31 22:11:38 +0000 (Mon, 31 Oct 2011) Log Message: ----------- Fixed small bug. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-31 14:14:31 UTC (rev 3344) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-31 22:11:38 UTC (rev 3345) @@ -150,7 +150,7 @@ String propertiesIndexUrl = options.fetch("solr.properties.url"); String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); - SolrSearch labelBasedPropertyIndex = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField, "label"); + SolrSearch labelBasedPropertyIndex = new ThresholdSlidingSolrSearch(propertiesIndexUrl, propertiesIndexSearchField, "label", 1.0, 0.1); String boaPatternIndexUrl = options.fetch("solr.boa.properties.url"); String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); @@ -536,13 +536,15 @@ } tmp.addAll(rs.getItems()); - int i = 0; + for(SolrQueryResultItem item : tmp){ - sortedURIs.add(item.getUri()); - if(i == MAX_URIS_PER_SLOT){ + if(!sortedURIs.contains(item.getUri())){ + sortedURIs.add(item.getUri()); + } + if(sortedURIs.size() == MAX_URIS_PER_SLOT){ break; } - i++; + } tmp.clear(); @@ -787,7 +789,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Is Natalie Portman an actress?"; + String question = "Who developed the video game World of Warcraft?"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java 2011-10-31 14:14:31 UTC (rev 3344) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java 2011-10-31 22:11:38 UTC (rev 3345) @@ -62,6 +62,11 @@ return false; return true; } + + @Override + public String toString() { + return uri + "(label: " + label + ", score: " + score + ")"; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java 2011-10-31 14:14:31 UTC (rev 3344) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java 2011-10-31 22:11:38 UTC (rev 3345) @@ -1,28 +1,48 @@ package org.dllearner.algorithm.tbsl.util; import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; public class SolrQueryResultStringSimilarityComparator implements Comparator<SolrQueryResultItem>{ private String s; + private Map<String, Double> cache; public SolrQueryResultStringSimilarityComparator(String s) { this.s = s; + cache = new HashMap<String, Double>(); } @Override public int compare(SolrQueryResultItem item1, SolrQueryResultItem item2) { - double sim1 = Similarity.getSimilarity(s, item1.getLabel()); - double sim2 = Similarity.getSimilarity(s, item2.getLabel()); - + double sim1 = 0; + if(cache.containsKey(item1.getLabel())){ + sim1 = cache.get(item1.getLabel()); + } else { + sim1 = Similarity.getSimilarity(s, item1.getLabel()); + cache.put(item1.getLabel(), sim1); + } + double sim2 = 0; + if(cache.containsKey(item2.getLabel())){ + sim2 = cache.get(item2.getLabel()); + } else { + sim2 = Similarity.getSimilarity(s, item2.getLabel()); + cache.put(item2.getLabel(), sim2); + } + if(sim1 < sim2){ return 1; } else if(sim1 > sim2){ return -1; } else { - return item1.getLabel().compareTo(item2.getLabel()); + int val = item1.getLabel().compareTo(item2.getLabel()); + if(val == 0){ + return item1.getUri().compareTo(item2.getUri()); + } + return val; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-01 13:26:21
|
Revision: 3346 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3346&view=rev Author: lorenz_b Date: 2011-11-01 13:26:11 +0000 (Tue, 01 Nov 2011) Log Message: ----------- Started new method to build the queries weighted by similarity and prominence. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-31 22:11:38 UTC (rev 3345) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-01 13:26:11 UTC (rev 3346) @@ -26,6 +26,7 @@ import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; import org.dllearner.algorithm.tbsl.search.SolrSearch; import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; +import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; @@ -33,8 +34,10 @@ import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Prefixes; +import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.algorithm.tbsl.util.SolrQueryResultStringSimilarityComparator; import org.dllearner.algorithm.tbsl.util.StringSimilarityComparator; import org.dllearner.core.ComponentInitException; @@ -238,10 +241,17 @@ logger.info(t); } - //generate SPARQL query candidates, but select only a fixed number per template - template2Queries = getSPARQLQueryCandidates(templates, ranking); - sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); +// //generate SPARQL query candidates, but select only a fixed number per template +// template2Queries = getSPARQLQueryCandidates(templates, ranking); +// sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); + //get the weighted query candidates + Set<WeightedQuery> weightedQueries = getWeightedSPARQLQueries(templates); + sparqlQueryCandidates = new ArrayList<Query>(); + for(WeightedQuery wQ : weightedQueries){ + sparqlQueryCandidates.add(wQ.getQuery()); + } + //test candidates if(useRemoteEndpointValidation){ //on remote endpoint validateAgainstRemoteEndpoint(sparqlQueryCandidates); @@ -328,6 +338,122 @@ } } + private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + double alpha = 0.7; + double beta = 1 - alpha; + Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); + + Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + + Set<Allocation> allAllocations; + for(Template t : templates){ + allAllocations = new HashSet<Allocation>(); + for(Slot slot : t.getSlots()){ + Set<Allocation> allocations = computeAllocation(slot); + allAllocations.addAll(allocations); + slot2Allocations.put(slot, allocations); + } + + int min = Integer.MAX_VALUE; + int max = Integer.MIN_VALUE; + for(Allocation a : allAllocations){ + if(a.getInDegree() < min){ + min = a.getInDegree(); + } + if(a.getInDegree() > max){ + max = a.getInDegree(); + } + } + for(Allocation a : allAllocations){ + double prominence = a.getInDegree()/(max-min); + a.setProminence(prominence); + + double score = alpha * a.getSimilarity() + beta * a.getProminence(); + a.setScore(score); + + } +// System.out.println(allAllocations); + + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); + Query cleanQuery = t.getQuery(); + queries.add(new WeightedQuery(cleanQuery)); + + Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); + for(Slot slot : t.getSlots()){ + for(Allocation a : slot2Allocations.get(slot)){ + for(WeightedQuery query : queries){ + if(slot.getSlotType() == SlotType.SYMPROPERTY){ + Query reversedQuery = new Query(query.getQuery()); + reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); + reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(reversedQuery); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + tmp.add(w); + } + Query q = new Query(query.getQuery()); + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + tmp.add(w); + } + } + queries.clear(); + queries.addAll(tmp); + tmp.clear(); + } + for(WeightedQuery q : queries){ + q.setScore(q.getScore()/t.getSlots().size()); + } + allQueries.addAll(queries); + } + return allQueries; + } + + private Set<Allocation> computeAllocation(Slot slot){ + Set<Allocation> allocations = new HashSet<Allocation>(); + + SolrSearch index = getIndexBySlotType(slot); + + SolrQueryResultSet rs; + for(String word : slot.getWords()){ + rs = index.getResourcesWithScores(word, 10); + + for(SolrQueryResultItem item : rs.getItems()){ + int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); + double similarity = Similarity.getSimilarity(word, item.getLabel()); + allocations.add(new Allocation(item.getUri(), prominence, similarity)); + } + + } + + return allocations; + } + + private int getProminenceValue(String uri, SlotType type){ + int cnt = 1; + String query = null; + if(type == SlotType.CLASS){ + query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; + } + query = String.format(query, uri); + + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + cnt = qs.get(projectionVar).asLiteral().getInt(); + } + return cnt; + } + private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); mon.start(); @@ -483,11 +609,10 @@ return template2Queries; } - private Set<String> getCandidateURIs(Slot slot){ + private SolrQueryResultSet getCandidateURIs(Slot slot, int limit){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); SolrSearch index = null; - Set<String> uris = new HashSet<String>(); if(slot.getSlotType() == SlotType.CLASS){ index = class_index; } else if(slot.getSlotType() == SlotType.PROPERTY){ @@ -495,14 +620,13 @@ } else if(slot.getSlotType() == SlotType.RESOURCE){ index = resource_index; } + SolrQueryResultSet rs = new SolrQueryResultSet(); for(String word : slot.getWords()){ - uris.addAll(index.getResources("label:" + word)); - + rs.add(index.getResourcesWithScores(word, limit)); } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Candidate URIs: " + uris); - return uris; + return rs; } private List<String> getCandidateURIsSortedBySimilarity(Slot slot){ @@ -641,7 +765,7 @@ index = resource_index; sorted = true; } - SolrQueryResultSet resultSet = null; + SolrQueryResultSet resultSet = new SolrQueryResultSet(); for(String word : slot.getWords()){ resultSet.add(index.getResourcesWithScores("label:" + word, sorted)); } @@ -777,6 +901,8 @@ return resources; } + + /** * @param args @@ -789,7 +915,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Who developed the video game World of Warcraft?"; + String question = "Give me all films produced by Hal Roach?"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-01 13:26:11 UTC (rev 3346) @@ -0,0 +1,57 @@ +package org.dllearner.algorithm.tbsl.sparql; + + +public class Allocation { + + private String uri; + private int inDegree; + + private double similarity; + private double prominence; + + private double score; + + public Allocation(String uri, int inDegree, double similarity) { + this.uri = uri; + this.inDegree = inDegree; + this.similarity = similarity; + } + + public String getUri() { + return uri; + } + + public int getInDegree() { + return inDegree; + } + + public double getSimilarity() { + return similarity; + } + + public double getProminence() { + return prominence; + } + + public void setProminence(double prominence) { + this.prominence = prominence; + } + + public double getScore() { + return score; + } + + public void setScore(double score) { + this.score = score; + } + + @Override + public String toString() { + return uri + "(similarity: " + similarity + "; prominence: " + inDegree + ")"; + } + + + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java 2011-11-01 13:26:11 UTC (rev 3346) @@ -0,0 +1,46 @@ +package org.dllearner.algorithm.tbsl.sparql; + +public class WeightedQuery implements Comparable<WeightedQuery>{ + + private double score; + private Query query; + + public WeightedQuery(Query query, double score) { + super(); + this.score = score; + this.query = query; + } + + public WeightedQuery(Query query) { + this(query, 0); + } + + public double getScore() { + return score; + } + + public void setScore(double score) { + this.score = score; + } + + public Query getQuery() { + return query; + } + + @Override + public int compareTo(WeightedQuery o) { + if(o.getScore() < this.score){ + return -1; + } else if(o.getScore() > this.score){ + return 1; + } else return query.toString().compareTo(o.getQuery().toString()); + } + + @Override + public String toString() { + return query.toString() + "\n(Score: " + score + ")"; + } + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-02 14:37:45
|
Revision: 3362 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3362&view=rev Author: lorenz_b Date: 2011-11-02 14:37:35 +0000 (Wed, 02 Nov 2011) Log Message: ----------- Changed handling of count results with value 0. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-02 14:07:30 UTC (rev 3361) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-02 14:37:35 UTC (rev 3362) @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -16,6 +17,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.commons.collections.SetUtils; import org.apache.log4j.Logger; import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; @@ -31,6 +33,7 @@ import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -43,9 +46,13 @@ import org.dllearner.core.ComponentInitException; import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; @@ -56,6 +63,7 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -109,6 +117,8 @@ private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); + private SPARQLReasoner reasoner; + public SPARQLTemplateBasedLearner() throws InvalidFileFormatException, FileNotFoundException, IOException{ this(OPTIONS_FILE); } @@ -188,6 +198,9 @@ predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); modelGenenerator = new ModelGenerator(endpoint, predicateFilters); + + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + reasoner.prepareSubsumptionHierarchy(); } public void setQuestion(String question){ @@ -251,7 +264,7 @@ Set<WeightedQuery> weightedQueries = getWeightedSPARQLQueries(templates); sparqlQueryCandidates = new ArrayList<Query>(); int i = 0; - for(WeightedQuery wQ : weightedQueries){ + for(WeightedQuery wQ : weightedQueries){System.out.println(wQ); sparqlQueryCandidates.add(wQ.getQuery()); if(i == maxTestedQueries){ break; @@ -346,7 +359,7 @@ } private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - double alpha = 0.7; + double alpha = 0.8; double beta = 1 - alpha; Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); @@ -355,6 +368,7 @@ Set<Allocation> allAllocations; for(Template t : templates){ allAllocations = new HashSet<Allocation>(); + for(Slot slot : t.getSlots()){ Set<Allocation> allocations = computeAllocation(slot); allAllocations.addAll(allocations); @@ -386,25 +400,71 @@ queries.add(new WeightedQuery(cleanQuery)); Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); + List<Slot> sortedSlots = new ArrayList<Slot>(); + Set<Slot> classSlots = new HashSet<Slot>(); for(Slot slot : t.getSlots()){ + if(slot.getSlotType() == SlotType.CLASS){ + sortedSlots.add(slot); + classSlots.add(slot); + } + } + for(Slot slot : t.getSlots()){ + if(!sortedSlots.contains(slot)){ + sortedSlots.add(slot); + } + } + for(Slot slot : sortedSlots){ if(!slot2Allocations.get(slot).isEmpty()){ for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY){ Query reversedQuery = new Query(query.getQuery()); reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(reversedQuery); + //check if the query is possible + if(slot.getSlotType() == SlotType.SYMPROPERTY){ + boolean drop = false; + for(SPARQL_Triple triple : query.getQuery().getTriplesWithVar(slot.getAnchor())){ + System.out.println(triple); + for(SPARQL_Triple typeTriple : query.getQuery().getRDFTypeTriples(triple.getValue().getName())){ + System.out.println(typeTriple); + Set<String> ranges = getRanges(a.getUri()); + System.out.println(a); + if(!ranges.isEmpty()){ + Set<String> allRanges = new HashSet<String>(); + for(String range : ranges){ + allRanges.addAll(getSuperClasses(range)); + } + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Set<String> allTypes = getSuperClasses(typeURI); + allTypes.add(typeTriple.getValue().getName()); + System.out.println("RANGES: " + ranges); + System.out.println("TYPES: " + allTypes); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } + } + } + } + + if(!drop){ + reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(reversedQuery); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + tmp.add(w); + } + + + + + } + Query q = new Query(query.getQuery()); + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(q); double newScore = query.getScore() + a.getScore(); w.setScore(newScore); tmp.add(w); - } - Query q = new Query(query.getQuery()); - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); + } } queries.clear(); @@ -425,6 +485,29 @@ } return allQueries; } + +/* + * for(SPARQL_Triple triple : t.getQuery().getTriplesWithVar(slot.getAnchor())){System.out.println(triple); + for(SPARQL_Triple typeTriple : t.getQuery().getRDFTypeTriples(triple.getVariable().getName())){ + System.out.println(typeTriple); + for(Allocation a : allocations){ + Set<String> domains = getDomains(a.getUri()); + System.out.println(a); + System.out.println(domains); + for(Slot s : classSlots){ + if(s.getAnchor().equals(triple.getVariable().getName())){ + for(Allocation all : slot2Allocations.get(s)){ + if(!domains.contains(all.getUri())){ + System.out.println("DROP " + a); + } + } + } + } + } + + + } + */ private Set<Allocation> computeAllocation(Slot slot){ Set<Allocation> allocations = new HashSet<Allocation>(); @@ -829,10 +912,21 @@ logger.info("Testing query:\n" + query); List<String> results = getResultFromRemoteEndpoint(query); if(!results.isEmpty()){ - learnedSPARQLQueries.put(query, results); - if(stopIfQueryResultNotEmpty){ - return; + try{ + int cnt = Integer.parseInt(results.get(0)); + if(cnt > 0){ + learnedSPARQLQueries.put(query, results); + if(stopIfQueryResultNotEmpty){ + return; + } + } + } catch (NumberFormatException e){ + learnedSPARQLQueries.put(query, results); + if(stopIfQueryResultNotEmpty){ + return; + } } + } logger.info("Result: " + results); } @@ -884,7 +978,7 @@ logger.info("Done in " + mon.getLastValue() + "ms."); } - private List<String> getResultFromRemoteEndpoint(String query){System.out.println(query); + private List<String> getResultFromRemoteEndpoint(String query){ List<String> resources = new ArrayList<String>(); try { String queryString = query; @@ -897,7 +991,12 @@ while(rs.hasNext()){ qs = rs.next(); projectionVar = qs.varNames().next(); - resources.add(qs.get(projectionVar).toString()); + if(qs.get(projectionVar).isLiteral()){ + resources.add(qs.get(projectionVar).asLiteral().getLexicalForm()); + } else if(qs.get(projectionVar).isURIResource()){ + resources.add(qs.get(projectionVar).asResource().getURI()); + } + } } catch (Exception e) { logger.error("Query execution failed.", e); @@ -917,8 +1016,42 @@ return resources; } + private Set<String> getDomains(String property){ + Set<String> domains = new HashSet<String>(); + String query = String.format("SELECT ?domain WHERE {<%s> <%s> ?domain}", property, RDFS.domain.getURI()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + domains.add(qs.getResource("domain").getURI()); + } + + return domains; + } + private Set<String> getRanges(String property){ + Set<String> domains = new HashSet<String>(); + String query = String.format("SELECT ?range WHERE {<%s> <%s> ?range}", property, RDFS.range.getURI()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + domains.add(qs.getResource("range").getURI()); + } + + return domains; + } + private Set<String> getSuperClasses(String cls){ + Set<String> superClasses = new HashSet<String>(); + for(Description d : reasoner.getClassHierarchy().getSuperClasses(new NamedClass(cls))){ + superClasses.add(((NamedClass)d).getName()); + } + return superClasses; + } + + + /** * @param args @@ -932,7 +1065,9 @@ // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "In which programming language is GIMP written?"; - String question = "Who/WP are/VBP the/DT presidents/NNS of/IN the/DT United/NNP States/NNPS"; +// String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; + String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; +// String question = "Give/VB me/PRP all/DT soccer/NN clubs/NNS in/IN the/DT Premier/NNP League/NNP"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-11-02 14:07:30 UTC (rev 3361) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-11-02 14:37:35 UTC (rev 3362) @@ -423,6 +423,28 @@ } return triples; } + + public List<SPARQL_Triple> getRDFTypeTriples(){ + List<SPARQL_Triple> triples = new ArrayList<SPARQL_Triple>(); + + for(SPARQL_Triple triple : conditions){ + if(triple.getProperty().equals("rdf:type")){ + triples.add(triple); + } + } + return triples; + } + + public List<SPARQL_Triple> getRDFTypeTriples(String var){ + List<SPARQL_Triple> triples = new ArrayList<SPARQL_Triple>(); + + for(SPARQL_Triple triple : conditions){ + if(triple.getProperty().toString().equals("rdf:type") && triple.getVariable().getName().equals(var)){ + triples.add(triple); + } + } + return triples; + } @Override public int hashCode() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java 2011-11-02 14:07:30 UTC (rev 3361) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java 2011-11-02 14:37:35 UTC (rev 3362) @@ -33,7 +33,16 @@ return -1; } else if(o.getScore() > this.score){ return 1; - } else return query.toString().compareTo(o.getQuery().toString()); + } else { + int filter = Boolean.valueOf(query.getFilters().isEmpty()).compareTo(Boolean.valueOf(o.getQuery().getFilters().isEmpty())); + if(filter == 0){ + return query.toString().compareTo(o.getQuery().toString()); + } else { + return filter; + } + } + + } @Override This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-02 22:28:40
|
Revision: 3364 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3364&view=rev Author: lorenz_b Date: 2011-11-02 22:28:33 +0000 (Wed, 02 Nov 2011) Log Message: ----------- Added simple type restriction to ignore invalid queries. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-02 19:32:53 UTC (rev 3363) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-02 22:28:33 UTC (rev 3364) @@ -200,6 +200,7 @@ modelGenenerator = new ModelGenerator(endpoint, predicateFilters); reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + reasoner.setCache(cache); reasoner.prepareSubsumptionHierarchy(); } @@ -358,6 +359,7 @@ } } + /* private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ double alpha = 0.8; double beta = 1 - alpha; @@ -417,17 +419,20 @@ if(!slot2Allocations.get(slot).isEmpty()){ for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ - Query reversedQuery = new Query(query.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); //check if the query is possible if(slot.getSlotType() == SlotType.SYMPROPERTY){ + Query reversedQuery = new Query(query.getQuery()); + reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); + boolean drop = false; - for(SPARQL_Triple triple : query.getQuery().getTriplesWithVar(slot.getAnchor())){ - System.out.println(triple); - for(SPARQL_Triple typeTriple : query.getQuery().getRDFTypeTriples(triple.getValue().getName())){ - System.out.println(typeTriple); + for(SPARQL_Triple triple : reversedQuery.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); +// System.out.println(triple); + for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(objectVar)){ +// System.out.println(typeTriple); Set<String> ranges = getRanges(a.getUri()); - System.out.println(a); +// System.out.println(a); if(!ranges.isEmpty()){ Set<String> allRanges = new HashSet<String>(); for(String range : ranges){ @@ -436,14 +441,38 @@ String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Set<String> allTypes = getSuperClasses(typeURI); allTypes.add(typeTriple.getValue().getName()); - System.out.println("RANGES: " + ranges); - System.out.println("TYPES: " + allTypes); +// System.out.println("RANGES: " + ranges); +// System.out.println("TYPES: " + allTypes); if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ drop = true; + } else { + System.out.println("DROPPING: \n" + reversedQuery.toString()); } } } + for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(subjectVar)){ +// System.out.println(typeTriple); + Set<String> domains = getDomains(a.getUri()); +// System.out.println(a); + if(!domains.isEmpty()){ + Set<String> allDomains = new HashSet<String>(); + for(String domain : domains){ + allDomains.addAll(getSuperClasses(domain)); + } + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Set<String> allTypes = getSuperClasses(typeURI); + allTypes.add(typeTriple.getValue().getName()); +// System.out.println("DOMAINS: " + domains); +// System.out.println("TYPES: " + allTypes); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ + drop = true; + } else { + System.out.println("DROPPING: \n" + reversedQuery.toString()); + } + } + } } if(!drop){ @@ -454,21 +483,245 @@ tmp.add(w); } - + } + Query q = new Query(query.getQuery()); + boolean drop = false; + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); +// System.out.println(triple); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ +// System.out.println(typeTriple); + Set<String> ranges = getRanges(a.getUri()); +// System.out.println(a); + if(!ranges.isEmpty()){ + Set<String> allRanges = new HashSet<String>(); + for(String range : ranges){ + allRanges.addAll(getSuperClasses(range)); + } + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Set<String> allTypes = getSuperClasses(typeURI); + allTypes.add(typeTriple.getValue().getName()); +// System.out.println("RANGES: " + ranges); +// System.out.println("TYPES: " + allTypes); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } else { + System.out.println("DROPPING: \n" + q.toString()); + } + } + } + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ +// System.out.println(typeTriple); + Set<String> domains = getDomains(a.getUri()); +// System.out.println(a); + if(!domains.isEmpty()){ + Set<String> allDomains = new HashSet<String>(); + for(String domain : domains){ + allDomains.addAll(getSuperClasses(domain)); + } + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Set<String> allTypes = getSuperClasses(typeURI); + allTypes.add(typeTriple.getValue().getName()); +// System.out.println("DOMAINS: " + domains); +// System.out.println("TYPES: " + allTypes); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ + drop = true; + } else { + System.out.println("DROPPING: \n" + q.toString()); + } + } + } + } + } - } + + if(!drop){ + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + tmp.add(w); + } + + + } + } + queries.clear(); + queries.addAll(tmp);System.out.println(tmp); + tmp.clear(); + } + + } + for(WeightedQuery q : queries){ + q.setScore(q.getScore()/t.getSlots().size()); + } + allQueries.addAll(queries); + List<Query> qList = new ArrayList<Query>(); + for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); + qList.add(wQ.getQuery()); + } + template2Queries.put(t, qList); + } + return allQueries; + } + */ + + private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + double alpha = 0.8; + double beta = 1 - alpha; + Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); + + Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + + Set<Allocation> allAllocations; + for(Template t : templates){ + allAllocations = new HashSet<Allocation>(); + + for(Slot slot : t.getSlots()){ + Set<Allocation> allocations = computeAllocation(slot); + allAllocations.addAll(allocations); + slot2Allocations.put(slot, allocations); + } + + int min = Integer.MAX_VALUE; + int max = Integer.MIN_VALUE; + for(Allocation a : allAllocations){ + if(a.getInDegree() < min){ + min = a.getInDegree(); + } + if(a.getInDegree() > max){ + max = a.getInDegree(); + } + } + for(Allocation a : allAllocations){ + double prominence = a.getInDegree()/(max-min); + a.setProminence(prominence); + + double score = alpha * a.getSimilarity() + beta * a.getProminence(); + a.setScore(score); + + } +// System.out.println(allAllocations); + + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); + Query cleanQuery = t.getQuery(); + queries.add(new WeightedQuery(cleanQuery)); + + Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); + List<Slot> sortedSlots = new ArrayList<Slot>(); + Set<Slot> classSlots = new HashSet<Slot>(); + for(Slot slot : t.getSlots()){ + if(slot.getSlotType() == SlotType.CLASS){ + sortedSlots.add(slot); + classSlots.add(slot); + } + } + for(Slot slot : t.getSlots()){ + if(!sortedSlots.contains(slot)){ + sortedSlots.add(slot); + } + } + //add for each SYMPROPERTY Slot the reversed query + for(Slot slot : sortedSlots){ + for(WeightedQuery wQ : queries){ + if(slot.getSlotType() == SlotType.SYMPROPERTY){ + Query reversedQuery = new Query(wQ.getQuery()); + reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); + tmp.add(new WeightedQuery(reversedQuery)); + } + tmp.add(wQ); + } + queries.clear(); + queries.addAll(tmp); + tmp.clear(); + } + + for(Slot slot : sortedSlots){ + if(!slot2Allocations.get(slot).isEmpty()){ + for(Allocation a : slot2Allocations.get(slot)){ + for(WeightedQuery query : queries){ Query q = new Query(query.getQuery()); - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); + + boolean drop = false; + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); +// System.out.println(triple); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ +// System.out.println(typeTriple); + Set<String> ranges = getRanges(a.getUri()); +// System.out.println(a); + if(!ranges.isEmpty()){ + Set<String> allRanges = new HashSet<String>(); + for(String range : ranges){ + allRanges.addAll(getSuperClasses(range)); + } + allRanges.addAll(ranges); + allRanges.remove("http://www.w3.org/2002/07/owl#Thing"); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Set<String> allTypes = getSuperClasses(typeURI); + allTypes.add(typeTriple.getValue().getName()); +// if(typeURI.equals("http://dbpedia.org/ontology/Film") && a.getUri().equals("http://dbpedia.org/ontology/starring")){ +// System.out.println("RANGES: " + allRanges); +// System.out.println("TYPES: " + allTypes); +// } + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } else { + System.out.println("DROPPING: \n" + q.toString()); + } + } + } + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ +// System.out.println(typeTriple); + Set<String> domains = getDomains(a.getUri());System.out.println(a.getUri() + ":" + domains); +// System.out.println(a); + if(!domains.isEmpty()){ + Set<String> allDomains = new HashSet<String>(); + for(String domain : domains){ + allDomains.addAll(getSuperClasses(domain)); + } + allDomains.addAll(domains); + allDomains.remove("http://www.w3.org/2002/07/owl#Thing"); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Set<String> allTypes = getSuperClasses(typeURI); + allTypes.add(typeTriple.getValue().getName()); +// if(typeURI.equals("http://dbpedia.org/ontology/Film") && a.getUri().equals("http://dbpedia.org/ontology/starring")){ +// System.out.println("DOMAINS: " + allDomains); +// System.out.println("TYPES: " + allTypes); +// } + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ + drop = true; + } else { + System.out.println("DROPPING: \n" + q.toString()); + } + } + } + } + } + + + if(!drop){ + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + tmp.add(w); + } + } } queries.clear(); - queries.addAll(tmp); + queries.addAll(tmp);//System.out.println(tmp); tmp.clear(); } @@ -478,7 +731,7 @@ } allQueries.addAll(queries); List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){ + for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); qList.add(wQ.getQuery()); } template2Queries.put(t, qList); @@ -516,7 +769,7 @@ SolrQueryResultSet rs; for(String word : slot.getWords()){ - rs = index.getResourcesWithScores(word, 10); + rs = index.getResourcesWithScores(word, 3); for(SolrQueryResultItem item : rs.getItems()){ int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-02 19:32:53 UTC (rev 3363) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-02 22:28:33 UTC (rev 3364) @@ -49,6 +49,36 @@ public String toString() { return uri + "(similarity: " + similarity + "; prominence: " + inDegree + ")"; } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + long temp; + temp = Double.doubleToLongBits(score); + result = prime * result + (int) (temp ^ (temp >>> 32)); + result = prime * result + ((uri == null) ? 0 : uri.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Allocation other = (Allocation) obj; + if (Double.doubleToLongBits(score) != Double.doubleToLongBits(other.score)) + return false; + if (uri == null) { + if (other.uri != null) + return false; + } else if (!uri.equals(other.uri)) + return false; + return true; + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-03 08:26:23
|
Revision: 3365 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3365&view=rev Author: lorenz_b Date: 2011-11-03 08:26:17 +0000 (Thu, 03 Nov 2011) Log Message: ----------- Continued type checking. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-02 22:28:33 UTC (rev 3364) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-03 08:26:17 UTC (rev 3365) @@ -667,7 +667,7 @@ String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Set<String> allTypes = getSuperClasses(typeURI); allTypes.add(typeTriple.getValue().getName()); -// if(typeURI.equals("http://dbpedia.org/ontology/Film") && a.getUri().equals("http://dbpedia.org/ontology/starring")){ +// if(typeURI.equals("http://dbpedia.org/ontology/Actor") && a.getUri().equals("http://dbpedia.org/ontology/birthPlace")){ // System.out.println("RANGES: " + allRanges); // System.out.println("TYPES: " + allTypes); // } @@ -693,7 +693,7 @@ String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Set<String> allTypes = getSuperClasses(typeURI); allTypes.add(typeTriple.getValue().getName()); -// if(typeURI.equals("http://dbpedia.org/ontology/Film") && a.getUri().equals("http://dbpedia.org/ontology/starring")){ +// if(typeURI.equals("http://dbpedia.org/ontology/Actor") && a.getUri().equals("http://dbpedia.org/ontology/birthPlace")){ // System.out.println("DOMAINS: " + allDomains); // System.out.println("TYPES: " + allTypes); // } @@ -769,7 +769,7 @@ SolrQueryResultSet rs; for(String word : slot.getWords()){ - rs = index.getResourcesWithScores(word, 3); + rs = index.getResourcesWithScores(word, 10);System.out.println(word + "->" + rs); for(SolrQueryResultItem item : rs.getItems()){ int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); @@ -1297,8 +1297,13 @@ private Set<String> getSuperClasses(String cls){ Set<String> superClasses = new HashSet<String>(); - for(Description d : reasoner.getClassHierarchy().getSuperClasses(new NamedClass(cls))){ + + for(Description d : reasoner.getClassHierarchy().getSuperClasses((new NamedClass(cls)))){ superClasses.add(((NamedClass)d).getName()); + for(Description sup : reasoner.getClassHierarchy().getSuperClasses(d)){ + superClasses.add(((NamedClass)sup).getName()); + } + } return superClasses; } @@ -1319,8 +1324,8 @@ // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "In which programming language is GIMP written?"; // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; - String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; -// String question = "Give/VB me/PRP all/DT soccer/NN clubs/NNS in/IN the/DT Premier/NNP League/NNP"; +// String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; + String question = "Which/WDT actors/NNS were/VBD born/VBN in/IN Germany/NNP"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java 2011-11-02 22:28:33 UTC (rev 3364) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java 2011-11-03 08:26:17 UTC (rev 3365) @@ -27,6 +27,9 @@ this.items.addAll(rs.getItems()); } - + @Override + public String toString() { + return items.toString(); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-03 14:02:10
|
Revision: 3366 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3366&view=rev Author: lorenz_b Date: 2011-11-03 14:02:03 +0000 (Thu, 03 Nov 2011) Log Message: ----------- Added dbprop: hack. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-03 08:26:17 UTC (rev 3365) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-03 14:02:03 UTC (rev 3366) @@ -7,7 +7,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -17,7 +16,7 @@ import java.util.SortedSet; import java.util.TreeSet; -import org.apache.commons.collections.SetUtils; +import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; @@ -42,7 +41,6 @@ import org.dllearner.algorithm.tbsl.util.Prefixes; import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.algorithm.tbsl.util.SolrQueryResultStringSimilarityComparator; -import org.dllearner.algorithm.tbsl.util.StringSimilarityComparator; import org.dllearner.core.ComponentInitException; import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; @@ -63,6 +61,7 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -571,43 +570,54 @@ } */ - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + private void normProminenceValues(Set<Allocation> allocations){ + double min = 0; + double max = 0; + for(Allocation a : allocations){ + if(a.getProminence() < min){ + min = a.getProminence(); + } + if(a.getProminence() > max){ + max = a.getProminence(); + } + } + for(Allocation a : allocations){ + double prominence = a.getProminence()/(max-min); + a.setProminence(prominence); + } + } + + private void computeScore(Set<Allocation> allocations){ double alpha = 0.8; double beta = 1 - alpha; + + for(Allocation a : allocations){ + double score = alpha * a.getSimilarity() + beta * a.getProminence(); + a.setScore(score); + } + } + + private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - Set<Allocation> allAllocations; + Set<Allocation> allocations; for(Template t : templates){ - allAllocations = new HashSet<Allocation>(); + allocations = new HashSet<Allocation>(); for(Slot slot : t.getSlots()){ - Set<Allocation> allocations = computeAllocation(slot); - allAllocations.addAll(allocations); - slot2Allocations.put(slot, allocations); - } - - int min = Integer.MAX_VALUE; - int max = Integer.MIN_VALUE; - for(Allocation a : allAllocations){ - if(a.getInDegree() < min){ - min = a.getInDegree(); - } - if(a.getInDegree() > max){ - max = a.getInDegree(); - } - } - for(Allocation a : allAllocations){ - double prominence = a.getInDegree()/(max-min); - a.setProminence(prominence); + allocations = computeAllocations(slot); - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); + normProminenceValues(allocations); + computeScore(allocations); + + slot2Allocations.put(slot, allocations); } -// System.out.println(allAllocations); + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); Query cleanQuery = t.getQuery(); queries.add(new WeightedQuery(cleanQuery)); @@ -642,11 +652,13 @@ } for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ + if(!slot2Allocations.get(slot).isEmpty()){System.out.println(slot2Allocations.get(slot)); for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ Query q = new Query(query.getQuery()); - + if(a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Organisation>")){ + System.out.println("YES"); + } boolean drop = false; if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ @@ -655,33 +667,38 @@ // System.out.println(triple); for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ // System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); + if(isObjectProperty(a.getUri())){ + Set<String> ranges = getRanges(a.getUri()); +// System.out.println(a); + if(!ranges.isEmpty()){ + Set<String> allRanges = new HashSet<String>(); + for(String range : ranges){ + allRanges.addAll(getSuperClasses(range)); + } + allRanges.addAll(ranges); + allRanges.remove("http://www.w3.org/2002/07/owl#Thing"); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Set<String> allTypes = getSuperClasses(typeURI); + allTypes.add(typeTriple.getValue().getName()); + if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ + System.out.println("RANGES: " + allRanges); + System.out.println("TYPES: " + allTypes); + } + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } else { +// System.out.println("DROPPING: \n" + q.toString()); + } } - allRanges.addAll(ranges); - allRanges.remove("http://www.w3.org/2002/07/owl#Thing"); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// if(typeURI.equals("http://dbpedia.org/ontology/Actor") && a.getUri().equals("http://dbpedia.org/ontology/birthPlace")){ -// System.out.println("RANGES: " + allRanges); -// System.out.println("TYPES: " + allTypes); -// } - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } + } else { + drop = true; } + } for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ // System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri());System.out.println(a.getUri() + ":" + domains); + Set<String> domains = getDomains(a.getUri()); // System.out.println(a); if(!domains.isEmpty()){ Set<String> allDomains = new HashSet<String>(); @@ -693,15 +710,15 @@ String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Set<String> allTypes = getSuperClasses(typeURI); allTypes.add(typeTriple.getValue().getName()); -// if(typeURI.equals("http://dbpedia.org/ontology/Actor") && a.getUri().equals("http://dbpedia.org/ontology/birthPlace")){ -// System.out.println("DOMAINS: " + allDomains); -// System.out.println("TYPES: " + allTypes); -// } + if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ + System.out.println("DOMAINS: " + allDomains); + System.out.println("TYPES: " + allTypes); + } if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ drop = true; } else { - System.out.println("DROPPING: \n" + q.toString()); +// System.out.println("DROPPING: \n" + q.toString()); } } } @@ -762,15 +779,24 @@ } */ - private Set<Allocation> computeAllocation(Slot slot){ + private Set<Allocation> computeAllocations(Slot slot){ Set<Allocation> allocations = new HashSet<Allocation>(); SolrSearch index = getIndexBySlotType(slot); SolrQueryResultSet rs; for(String word : slot.getWords()){ - rs = index.getResourcesWithScores(word, 10);System.out.println(word + "->" + rs); - + rs = index.getResourcesWithScores(word, 10); + //for tests add the property URI with http://dbpedia.org/property/ namespace + Set<SolrQueryResultItem> tmp = new HashSet<SolrQueryResultItem>(); + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SolrQueryResultItem i : rs.getItems()){ + String uri = "http://dbpedia.org/property/" + i.getUri().substring(i.getUri().lastIndexOf("/")+1); + tmp.add(new SolrQueryResultItem(i.getLabel(), uri)); + } + } + rs.addItems(tmp); +// System.out.println(word + "->" + rs); for(SolrQueryResultItem item : rs.getItems()){ int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); double similarity = Similarity.getSimilarity(word, item.getLabel()); @@ -1295,6 +1321,15 @@ return domains; } + private boolean isObjectProperty(String property){ + String query = String.format("SELECT * WHERE {<%s> a <%s>}", property, OWL.ObjectProperty.getURI()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + if(rs.hasNext()){ + return true; + } + return false; + } + private Set<String> getSuperClasses(String cls){ Set<String> superClasses = new HashSet<String>(); @@ -1325,7 +1360,8 @@ // String question = "In which programming language is GIMP written?"; // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; // String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; - String question = "Which/WDT actors/NNS were/VBD born/VBN in/IN Germany/NNP"; + String question = "Which/WDT country/NN does/VBZ the/DT Airedale/NNP Terrier/NNP come/VBP from/IN"; +// String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-03 08:26:17 UTC (rev 3365) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-03 14:02:03 UTC (rev 3366) @@ -4,16 +4,15 @@ public class Allocation { private String uri; - private int inDegree; private double similarity; private double prominence; private double score; - public Allocation(String uri, int inDegree, double similarity) { + public Allocation(String uri, int prominence, double similarity) { this.uri = uri; - this.inDegree = inDegree; + this.prominence = prominence; this.similarity = similarity; } @@ -21,10 +20,6 @@ return uri; } - public int getInDegree() { - return inDegree; - } - public double getSimilarity() { return similarity; } @@ -47,7 +42,7 @@ @Override public String toString() { - return uri + "(similarity: " + similarity + "; prominence: " + inDegree + ")"; + return uri + "(score: " + score + "; similarity: " + similarity + "; prominence: " + prominence + ")"; } @Override This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-03 14:29:38
|
Revision: 3368 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3368&view=rev Author: lorenz_b Date: 2011-11-03 14:29:32 +0000 (Thu, 03 Nov 2011) Log Message: ----------- Small changes in dbprop handling. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-03 14:04:11 UTC (rev 3367) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-03 14:29:32 UTC (rev 3368) @@ -595,6 +595,7 @@ double score = alpha * a.getSimilarity() + beta * a.getProminence(); a.setScore(score); } + } private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ @@ -615,6 +616,18 @@ computeScore(allocations); slot2Allocations.put(slot, allocations); + + //for tests add the property URI with http://dbpedia.org/property/ namespace + Set<Allocation> tmp = new HashSet<Allocation>(); + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(Allocation a : allocations){ + String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1); + Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence()); + newA.setScore(a.getScore()-0.000001); + tmp.add(newA); + } + } + allocations.addAll(tmp); } @@ -787,15 +800,7 @@ SolrQueryResultSet rs; for(String word : slot.getWords()){ rs = index.getResourcesWithScores(word, 10); - //for tests add the property URI with http://dbpedia.org/property/ namespace - Set<SolrQueryResultItem> tmp = new HashSet<SolrQueryResultItem>(); - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SolrQueryResultItem i : rs.getItems()){ - String uri = "http://dbpedia.org/property/" + i.getUri().substring(i.getUri().lastIndexOf("/")+1); - tmp.add(new SolrQueryResultItem(i.getLabel(), uri)); - } - } - rs.addItems(tmp); + // System.out.println(word + "->" + rs); for(SolrQueryResultItem item : rs.getItems()){ int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); @@ -1359,9 +1364,10 @@ // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "In which programming language is GIMP written?"; // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; -// String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; - String question = "Which/WDT country/NN does/VBZ the/DT Airedale/NNP Terrier/NNP come/VBP from/IN"; + String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; +// String question = "Which/WDT country/NN does/VBZ the/DT Airedale/NNP Terrier/NNP come/VBP from/IN"; // String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; +// String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-03 14:04:11 UTC (rev 3367) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-03 14:29:32 UTC (rev 3368) @@ -10,7 +10,7 @@ private double score; - public Allocation(String uri, int prominence, double similarity) { + public Allocation(String uri, double prominence, double similarity) { this.uri = uri; this.prominence = prominence; this.similarity = similarity; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-04 12:51:40
|
Revision: 3372 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3372&view=rev Author: lorenz_b Date: 2011-11-04 12:51:33 +0000 (Fri, 04 Nov 2011) Log Message: ----------- Added constructors with POS tagger. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-03 19:32:25 UTC (rev 3371) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-04 12:51:33 UTC (rev 3372) @@ -5,10 +5,12 @@ import java.io.IOException; import java.net.URL; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -22,6 +24,7 @@ import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; @@ -68,6 +71,10 @@ public class SPARQLTemplateBasedLearner implements SparqlQueryLearningAlgorithm{ + //for debugging + List<String> exclusions = Arrays.asList(new String[]{"http://dbpedia.org/ontology/GeopoliticalOrganisation", + "http://dbpedia.org/ontology/Non-ProfitOrganisation"}); + enum Ranking{ LUCENE, SIMILARITY, NONE } @@ -140,6 +147,20 @@ templateGenerator = new Templator(); } + public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger){ + init(options); + + Set<String> predicateFilters = new HashSet<String>(); + predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); + predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + + prefixMap = Prefixes.getPrefixes(); + + modelGenenerator = new ModelGenerator(endpoint, predicateFilters); + + templateGenerator = new Templator(tagger); + } + /* * Only for Evaluation useful. */ @@ -635,7 +656,7 @@ Query cleanQuery = t.getQuery(); queries.add(new WeightedQuery(cleanQuery)); - Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); + Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); List<Slot> sortedSlots = new ArrayList<Slot>(); Set<Slot> classSlots = new HashSet<Slot>(); for(Slot slot : t.getSlots()){ @@ -669,8 +690,8 @@ for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ Query q = new Query(query.getQuery()); - if(a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Organisation>")){ - System.out.println("YES"); + if(a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Organisation>") && q.toString().contains("/Software>")){ + System.out.println("YES:\n" + query); } boolean drop = false; if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ @@ -701,7 +722,8 @@ if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ drop = true; } else { -// System.out.println("DROPPING: \n" + q.toString()); + if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")) + System.out.println("DROPPING: \n" + q.toString()); } } } else { @@ -731,7 +753,8 @@ if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ drop = true; } else { -// System.out.println("DROPPING: \n" + q.toString()); + if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")) + System.out.println("DROPPING: \n" + q.toString()); } } } @@ -801,6 +824,14 @@ for(String word : slot.getWords()){ rs = index.getResourcesWithScores(word, 10); + //debugging +// for(Iterator<SolrQueryResultItem> iter = rs.getItems().iterator();iter.hasNext();){ +// SolrQueryResultItem item = iter.next(); +// if(exclusions.contains(item.getUri())){ +// iter.remove(); +// } +// } + // System.out.println(word + "->" + rs); for(SolrQueryResultItem item : rs.getItems()){ int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); @@ -1364,12 +1395,11 @@ // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "In which programming language is GIMP written?"; // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; - String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; +// String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; // String question = "Which/WDT country/NN does/VBZ the/DT Airedale/NNP Terrier/NNP come/VBP from/IN"; -// String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; + String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; // String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; - -// String question = "Give me all books written by authors influenced by Ernest Hemingway."; +// String question = "Which/WDT music/NN albums/NNS contain/VBP the/DT song/NN Last/NNP Christmas/NNP"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), // Collections.<String>singletonList(""), Collections.<String>emptyList()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-03 19:32:25 UTC (rev 3371) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-04 12:51:33 UTC (rev 3372) @@ -41,6 +41,7 @@ PartOfSpeechTagger tagger; LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); + Parser p; Preprocessor pp; @@ -53,9 +54,14 @@ boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; - boolean USE_NER = false; + boolean USE_NER = true; public Templator() { + this(new StanfordPartOfSpeechTagger()); + } + + public Templator(final PartOfSpeechTagger tagger) { + this.tagger = tagger; List<InputStream> grammarFiles = new ArrayList<InputStream>(); for(int i = 0; i < GRAMMAR_FILES.length; i++){ @@ -64,8 +70,6 @@ g = LTAG_Constructor.construct(grammarFiles); - tagger = new StanfordPartOfSpeechTagger(); -// tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-06 21:28:54
|
Revision: 3382 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3382&view=rev Author: lorenz_b Date: 2011-11-06 21:28:47 +0000 (Sun, 06 Nov 2011) Log Message: ----------- Added new method to use redirects. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-06 20:15:33 UTC (rev 3381) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-06 21:28:47 UTC (rev 3382) @@ -8,9 +8,9 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -18,7 +18,6 @@ import java.util.SortedSet; import java.util.TreeSet; -import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; @@ -287,7 +286,8 @@ generatedQueries = getWeightedSPARQLQueries(templates); sparqlQueryCandidates = new ArrayList<Query>(); int i = 0; - for(WeightedQuery wQ : generatedQueries){System.out.println(wQ); + for(WeightedQuery wQ : generatedQueries){ + System.out.println(wQ.explain()); sparqlQueryCandidates.add(wQ.getQuery()); if(i == maxTestedQueries){ break; @@ -648,12 +648,8 @@ allocations = new TreeSet<Allocation>(); for(Slot slot : t.getSlots()){ - allocations = computeAllocations(slot); + allocations = computeAllocations(slot, 50); - normProminenceValues(allocations); - - computeScore(allocations); - slot2Allocations.put(slot, allocations); //for tests add the property URI with http://dbpedia.org/property/ namespace @@ -788,6 +784,8 @@ WeightedQuery w = new WeightedQuery(q); double newScore = query.getScore() + a.getScore(); w.setScore(newScore); + w.addAllocations(query.getAllocations()); + w.addAllocation(a); tmp.add(w); } @@ -836,15 +834,20 @@ } */ - private Set<Allocation> computeAllocations(Slot slot){ - Set<Allocation> allocations = new TreeSet<Allocation>(); + private SortedSet<Allocation> computeAllocations(Slot slot){ + SortedSet<Allocation> allocations = new TreeSet<Allocation>(); SolrSearch index = getIndexBySlotType(slot); SolrQueryResultSet rs; for(String word : slot.getWords()){ - rs = index.getResourcesWithScores(word, 30); + if(slot.getSlotType() == SlotType.RESOURCE){ + rs = index.getResourcesWithScores(word, 250); + } else { + rs = index.getResourcesWithScores(word, 30); + } + //debugging // for(Iterator<SolrQueryResultItem> iter = rs.getItems().iterator();iter.hasNext();){ // SolrQueryResultItem item = iter.next(); @@ -855,16 +858,68 @@ System.out.println(word + "->" + rs); for(SolrQueryResultItem item : rs.getItems()){ + double similarity = Similarity.getSimilarity(word, item.getLabel()); + //get the labels of the redirects and compute the highest similarity + if(slot.getSlotType() == SlotType.RESOURCE){ + Set<String> labels = getRedirectLabels(item.getUri()); + for(String label : labels){ + double tmp = Similarity.getSimilarity(word, label); + if(tmp > similarity){ + similarity = tmp; + } + } + } int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - double similarity = Similarity.getSimilarity(word, item.getLabel()); allocations.add(new Allocation(item.getUri(), prominence, similarity)); } } - return allocations; + normProminenceValues(allocations); + + computeScore(allocations); + return new TreeSet<Allocation>(allocations); } + private Set<Allocation> computeAllocations(Slot slot, int limit){ + SortedSet<Allocation> allocations = computeAllocations(slot); + + if(allocations.isEmpty()){ + return allocations; + } + + ArrayList<Allocation> l = new ArrayList<Allocation>(allocations); + Collections.sort(l, new Comparator<Allocation>() { + + @Override + public int compare(Allocation o1, Allocation o2) { + double dif = o1.getScore() - o2.getScore(); + if(dif < 0){ + return 1; + } else if(dif > 0){ + return -1; + } else { + return o1.getUri().compareTo(o2.getUri()); + } + } + }); + + return new TreeSet<Allocation>(l.subList(0, Math.min(limit, allocations.size()))); + } + + private Set<String> getRedirectLabels(String uri){ + Set<String> labels = new HashSet<String>(); + String query = String.format("SELECT ?label WHERE {?s <http://dbpedia.org/ontology/wikiPageRedirects> <%s>. ?s <%s> ?label.}", uri, RDFS.label.getURI()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + labels.add(qs.getLiteral("label").getLexicalForm()); + + } + return labels; + } + private int getProminenceValue(String uri, SlotType type){ int cnt = 1; String query = null; @@ -1417,15 +1472,17 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); -// String question = "In which programming language is GIMP written?"; // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; // String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; // String question = "Which/WDT country/NN does/VBZ the/DT Airedale/NNP Terrier/NNP come/VBP from/IN"; // String question = "When/WRB was/VBD Capcom/NNP founded/VBD"; - String question = "Is/VBZ there/RB a/DT video/NN game/NN called/VBN Battle/NNP Chess/NNP"; +// String question = "Which/WDT organizations/NNS were/VBD founded/VBN in/IN 1950/CD"; +// String question = "Is/VBZ there/RB a/DT video/NN game/NN called/VBN Battle/NNP Chess/NNP"; // String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; // String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; // String question = "Which/WDT music/NN albums/NNS contain/VBP the/DT song/NN Last/NNP Christmas/NNP"; +// String question = "Which/WDT companies/NNS are/VBP located/VBN in/IN California/NNP USA/NNP"; + String question = "Who/WP wrote/VBD the/DT book/NN The/NNP pillars/NNP of/NNP the/NNP Earth/NNP"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), // Collections.<String>singletonList(""), Collections.<String>emptyList()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-06 20:15:33 UTC (rev 3381) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-06 21:28:47 UTC (rev 3382) @@ -78,9 +78,9 @@ @Override public int compareTo(Allocation o) { if(o.getScore() < this.score){ + return 1; + } else if(o.getScore() > this.score){ return -1; - } else if(o.getScore() > this.score){ - return 1; } else { return this.uri.compareTo(o.getUri()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-08 12:48:02
|
Revision: 3388 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3388&view=rev Author: lorenz_b Date: 2011-11-08 12:47:55 +0000 (Tue, 08 Nov 2011) Log Message: ----------- Added constructors. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-07 18:14:29 UTC (rev 3387) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-08 12:47:55 UTC (rev 3388) @@ -24,6 +24,7 @@ import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; @@ -163,6 +164,20 @@ templateGenerator = new Templator(tagger); } + public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger, WordNet wordNet){ + init(options); + + Set<String> predicateFilters = new HashSet<String>(); + predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); + predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + + prefixMap = Prefixes.getPrefixes(); + + modelGenenerator = new ModelGenerator(endpoint, predicateFilters); + + templateGenerator = new Templator(tagger, wordNet); + } + /* * Only for Evaluation useful. */ @@ -870,7 +885,7 @@ } } } - int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); + double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); allocations.add(new Allocation(item.getUri(), prominence, similarity)); } @@ -921,7 +936,7 @@ return labels; } - private int getProminenceValue(String uri, SlotType type){ + private double getProminenceValue(String uri, SlotType type){ int cnt = 1; String query = null; if(type == SlotType.CLASS){ @@ -941,6 +956,10 @@ projectionVar = qs.varNames().next(); cnt = qs.get(projectionVar).asLiteral().getInt(); } +// if(cnt == 0){ +// return 0; +// } +// return Math.log(cnt); return cnt; } @@ -1395,7 +1414,7 @@ } } - } catch (Exception e) { + } catch (Exception e) {e.printStackTrace(); logger.error("Query execution failed.", e); } return resources; @@ -1499,7 +1518,8 @@ // String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; // String question = "Which/WDT music/NN albums/NNS contain/VBP the/DT song/NN Last/NNP Christmas/NNP"; // String question = "Which/WDT companies/NNS are/VBP located/VBN in/IN California/NNP USA/NNP"; - String question = "Who/WP wrote/VBD the/DT book/NN The/NNP pillars/NNP of/NNP the/NNP Earth/NNP"; +// String question = "Who/WP wrote/VBD the/DT book/NN The/NNP pillars/NNP of/NNP the/NNP Earth/NNP"; + String question = "Who/WP is/VBZ called/VBN Dana/NNP"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), // Collections.<String>singletonList(""), Collections.<String>emptyList()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-11-07 18:14:29 UTC (rev 3387) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-11-08 12:47:55 UTC (rev 3388) @@ -31,6 +31,15 @@ } } + public WordNet(String configPath) { + try { + JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream(configPath)); + dict = Dictionary.getInstance(); + } catch (JWNLException e) { + e.printStackTrace(); + } + } + public List<String> getBestSynonyms(POS pos, String s) { List<String> synonyms = new ArrayList<String>(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-07 18:14:29 UTC (rev 3387) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-08 12:47:55 UTC (rev 3388) @@ -54,14 +54,19 @@ boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; - boolean USE_NER = true; + boolean USE_NER = false; public Templator() { - this(new StanfordPartOfSpeechTagger()); + this(new StanfordPartOfSpeechTagger(), new WordNet()); } public Templator(final PartOfSpeechTagger tagger) { + this(tagger, new WordNet()); + } + + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet) { this.tagger = tagger; + this.wordnet = wordnet; List<InputStream> grammarFiles = new ArrayList<InputStream>(); for(int i = 0; i < GRAMMAR_FILES.length; i++){ @@ -78,8 +83,6 @@ p.MODE = "LEIPZIG"; pp = new Preprocessor(USE_NER); - - wordnet = new WordNet(); } public void setUNTAGGED_INPUT(boolean b) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-11-14 10:56:48
|
Revision: 3400 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3400&view=rev Author: christinaunger Date: 2011-11-14 10:56:41 +0000 (Mon, 14 Nov 2011) Log Message: ----------- [tbsl] enabled parsing of NERs enclodes in "..." and of genitives (...'s) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-11-12 23:10:28 UTC (rev 3399) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-11-14 10:56:41 UTC (rev 3400) @@ -33,6 +33,8 @@ static List<Integer> usedInts = new ArrayList<Integer>(); static ArrayList<String> doubles = new ArrayList<String>(); + public static boolean VERBOSE = true; + static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps, String mode) { // DISAM: CLEAR @@ -191,7 +193,7 @@ start++; } - logger.trace("\ncovered tokens: " + coveredTokens); + if (VERBOSE) logger.trace("\ncovered tokens: " + coveredTokens); /* construct slots for all unknown tokens */ @@ -211,7 +213,7 @@ } } } - logger.trace("unknown words: " + unknownWords); + if (VERBOSE) logger.trace("unknown words: " + unknownWords); List<Pair<String,String>> buildSlotFor = new ArrayList<Pair<String,String>>(); @@ -238,7 +240,7 @@ System.out.println("Oh no, " + s + " has no POS tag!"); } } - logger.trace("build slot for: " + buildSlotFor + "\n"); + if (VERBOSE) logger.trace("build slot for: " + buildSlotFor + "\n"); List<String[]> entries; if (mode.equals("LEIPZIG")) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-11-12 23:10:28 UTC (rev 3399) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-11-14 10:56:41 UTC (rev 3400) @@ -18,6 +18,7 @@ public boolean USE_LESS_MEMORY = false; public boolean SHOW_GRAMMAR = false; public boolean SHOW_LEXICAL_COVERAGE = false; + public boolean VERBOSE = true; public String MODE = "BASIC"; // MODE ::= BASIC | LEIPZIG (set by Templator and BasicTemplator) private String[] input; @@ -50,6 +51,8 @@ derivedTrees.clear(); dudes.clear(); temporaryEntries.clear(); + + if (!VERBOSE) GrammarFilter.VERBOSE = false; /* * create a local copy of the grammar with own treeIDs. This is @@ -64,7 +67,7 @@ inputNoTags += s.substring(0,s.indexOf("/")) + " "; } - this.input = ("# ".concat(inputNoTags.trim())).split(" "); + this.input = ("# ".concat(inputNoTags.replaceAll("'","").trim())).split(" "); int n = this.input.length; @@ -84,7 +87,7 @@ internalParse(parseGrammar.getDPInitTrees(), n); } - logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n"); + if (VERBOSE) logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n"); return derivationTrees; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-11-12 23:10:28 UTC (rev 3399) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-11-14 10:56:41 UTC (rev 3400) @@ -16,19 +16,25 @@ private static final Logger logger = Logger.getLogger(Preprocessor.class); - static final String[] genericReplacements = { "\"", "", "'", "", "[!?.,;]", "" }; + static final String[] genericReplacements = { "[!?.,;]", "" }; static final String[] englishReplacements = { "don't", "do not", "doesn't", "does not" }; static boolean USE_NER; + static boolean VERBOSE; static NER ner; public Preprocessor(boolean n) { USE_NER = n; + VERBOSE = true; if (USE_NER) { // ner = new LingPipeNER(true); //not case sensitive best solution? ner = new DBpediaSpotlightNER(); } } + public void setVERBOSE(boolean b) { + VERBOSE = b; + } + public String normalize(String s) { return normalize(s, new String[0]); } @@ -58,7 +64,7 @@ * nn/RBR of/IN > nn/NPREP * usw. * */ - String condensedstring = taggedstring; + String condensedstring = taggedstring.replaceAll("``/``","").replaceAll("''/''","").replaceAll(" "," "); Matcher m; Pattern compAdjPattern = Pattern.compile("(\\w+/RBR.(\\w+)/JJ)"); @@ -89,7 +95,7 @@ m = compAdjPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJR"); } // m = superAdjPattern.matcher(condensedstring); @@ -99,57 +105,57 @@ // } m = howManyPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by how/WLEX many/WLEX"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by how/WLEX many/WLEX"); condensedstring = condensedstring.replaceFirst(m.group(1),"how/WLEX many/WLEX"); } m = howAdjPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJH"); } m = thesameasPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NNSAME"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NNSAME"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NNSAME"); } m = nprepPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NPREP"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/NPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NPREP"); } m = didPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by \"\""); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by \"\""); condensedstring = condensedstring.replaceFirst(m.group(1),""); } m = prepfrontPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by \"\""); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by \"\""); condensedstring = condensedstring.replaceFirst(m.group(1),""); } m = passivePattern1a.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+"/PASSIVE"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6)+"/PASSIVE"); } m = passivePattern1b.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+m.group(7)+"/PASSIVE"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(6)+m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6) + m.group(7)+"/PASSIVE"); } m = passivePattern2a.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } m = pseudopassPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/VPREP"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/VPREP"); } m = pseudopwhPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+m.group(8)+"/VPREP"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+m.group(8)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+" "+m.group(8)+"/VPREP"); } m = saveIsThere.matcher(condensedstring); @@ -158,57 +164,57 @@ } m = passivePattern2b.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } m = passpartPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/PASSPART"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/PASSPART"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/PASSPART"); } m = vpassPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASS"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASS"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASS"); } m = vpassinPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASSIN"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASSIN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASSIN"); } m = gerundinPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/GERUNDIN"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/GERUNDIN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/GERUNDIN"); } m = vprepPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPREP"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPREP"); } m = whenPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHEN"); } m = wherePattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHERE"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHERE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHERE"); } m = adjsPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ"); } m = adjnounPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN"); } m = adjnprepPattern.matcher(condensedstring); while (m.find()) { - logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNPREP"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNPREP"); } @@ -259,7 +265,7 @@ List<String> namedentities = ner.getNamedEntitites(untagged); List<String> usefulnamedentities = new ArrayList<String>(); - logger.trace("Proposed NEs: " + namedentities); + if (VERBOSE) logger.trace("Proposed NEs: " + namedentities); // keep only longest matches (e.g. keep 'World of Warcraft' and forget about 'Warcraft') // containing at least one upper case letter (in order to filter out errors like 'software') @@ -277,7 +283,7 @@ } } - logger.trace("Accepted NEs: " + usefulnamedentities); + if (VERBOSE) logger.trace("Accepted NEs: " + usefulnamedentities); // replace POS tags accordingly for (String ne : usefulnamedentities) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-11-12 23:10:28 UTC (rev 3399) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-11-14 10:56:41 UTC (rev 3400) @@ -34,8 +34,23 @@ String type = "UNSPEC"; + /* 's */ + if (token.equals("'s")) { + String slot = "SLOT_of/SYMPROPERTY/of"; + String[] npAdjunct = {token, + "(NP NP* PART:'s' NP[obj]))", + "<x,l1,<e,t>,[ l1:[ y | SLOT_of(x,y) ] ],[(l2,y,obj,<e,t>)],[l2=l1],["+slot+"]>" + + " ;; <x,l1,<e,t>,[ l1:[ y | empty(x,y) ] ],[(l2,y,obj,<e,t>)],[l2=l1],[]>"}; + String[] dpAdjunct = {token, + "(DP DP* PART:'s' NP[obj]))", + "<x,l1,<<e,t>,t>,[ l1:[ y | SLOT_of(x,y) ] ],[(l2,y,obj,<e,t>)],[l2=l1],["+slot+"]>" + + " ;; <x,l1,<<e,t>,t>,[ l1:[ y | empty(x,y) ] ],[(l2,y,obj,<e,t>)],[l2=l1],[]>"}; + result.add(npAdjunct); + result.add(dpAdjunct); + } + /* NOUNS */ - if (equalsOneOf(pos,noun)) { + else if (equalsOneOf(pos,noun)) { if (pos.equals("NN") || pos.equals("NNS")) { type = "CLASS"; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-12 23:10:28 UTC (rev 3399) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-14 10:56:41 UTC (rev 3400) @@ -53,8 +53,9 @@ boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; - boolean USE_NER = false; + boolean USE_WORDNET = true; + boolean VERBOSE = true; public Templator() { this(new StanfordPartOfSpeechTagger(), new WordNet()); @@ -74,7 +75,6 @@ } g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); p.SHOW_GRAMMAR = true; @@ -85,12 +85,41 @@ pp = new Preprocessor(USE_NER); } + public Templator(boolean b) { + this.tagger = new StanfordPartOfSpeechTagger(); + this.USE_WORDNET = false; + VERBOSE = b; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + p = new Parser(); + p.SHOW_GRAMMAR = false; + p.VERBOSE = b; + p.USE_DPS_AS_INITTREES = true; + p.CONSTRUCT_SEMANTICS = true; + p.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); + pp.setVERBOSE(b); + } + public void setUNTAGGED_INPUT(boolean b) { UNTAGGED_INPUT = b; } public void setUSE_NER(boolean b) { USE_NER = b; } + public void setVERBOSE(boolean b) { + VERBOSE = b; + } + public void setGrammarFiles(String[] gf) { + GRAMMAR_FILES = gf; + } public Set<Template> buildTemplates(String s) { @@ -100,7 +129,7 @@ if (UNTAGGED_INPUT) { s = pp.normalize(s); tagged = tagger.tag(s); - logger.trace("Tagged input: " + tagged); + if (VERBOSE) logger.trace("Tagged input: " + tagged); } else { tagged = s; @@ -114,20 +143,20 @@ else newtagged = pp.condenseNominals(tagged); newtagged = pp.condense(newtagged); - logger.trace("Preprocessed: " + newtagged); + if (VERBOSE) logger.trace("Preprocessed: " + newtagged); p.parse(newtagged,g); if (p.getDerivationTrees().isEmpty()) { p.clear(g,p.getTemps()); clearAgain = false; - logger.error("[Templator.java] '" + s + "' could not be parsed."); + if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); } else { try { p.buildDerivedTrees(g); } catch (ParseException e) { - logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); } } @@ -154,10 +183,12 @@ if (!containsModuloRenaming(drses,drs)) { // // DEBUG - System.out.println(dude); - System.out.println(drs); - for (Slot sl : slots) { - System.out.println(sl.toString()); + if (VERBOSE) { + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } } // // drses.add(drs); @@ -168,54 +199,55 @@ continue; } - // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; + if(newwords.isEmpty()){ + } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if(newwords.isEmpty()){ - - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } + } } // This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-12-20 11:43:32
|
Revision: 3511 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3511&view=rev Author: christinaunger Date: 2011-12-20 11:43:21 +0000 (Tue, 20 Dec 2011) Log Message: ----------- [tbsl] revised BasicTemplator Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2011-12-19 16:07:11 UTC (rev 3510) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2011-12-20 11:43:21 UTC (rev 3511) @@ -33,6 +33,7 @@ List<Slot> slots; // BasicQueryTemplate query; List<Integer> usedInts; + List<Simple_DRS_Condition> unusedConditions; public DRS2BasicSPARQL_Converter() { // query = new BasicQueryTemplate(); @@ -54,7 +55,7 @@ // query = new BasicQueryTemplate(); slots = ls; - + return convert(drs, new BasicQueryTemplate(), false); } @@ -65,8 +66,10 @@ if (!restructureEmpty(drs)) { return null; } - System.out.println("--- DRS (after) : " + drs); // DEBUG - + System.out.println("DRS:\n" + drs); // DEBUG + + unusedConditions = new ArrayList<Simple_DRS_Condition>(); + for (DRS_Condition condition : drs.getConditions()) { convertCondition(condition,temp); if (negate) { @@ -77,6 +80,21 @@ } } } + + for (Simple_DRS_Condition c : unusedConditions) { + if (!temp.getVariablesInConditions().contains(c.getArguments().get(0))) { + String v = c.getArguments().get(0).getValue(); + for (Slot s : slots) { + if (s.getAnchor().equals(v) && !s.getSlotType().equals(SlotType.RESOURCE)) { + String fresh = v+createFresh(); + s.setAnchor(fresh); + temp.addConditions(new Path(v,"isA",fresh)); + temp.addSlot(s); + break; + } + } + } + } for (DiscourseReferent referent : drs.collectDRs()) { if (referent.isMarked()) { @@ -91,6 +109,7 @@ f.addNotBound(term); temp.addFilter(f); } + for (Slot s : slots) { if (s.getAnchor().equals(referent.getValue())) { temp.addSlot(s); // query @@ -311,15 +330,14 @@ new SPARQL_Term(simple.getArguments().get(1).getValue(),true), SPARQL_PairType.REGEX))); } - else if (predicate.equals("ISA")) { - temp.addConditions(new Path(simple.getArguments().get(0).getValue(),"isA",simple.getArguments().get(1).getValue())); + else { + if (simple.getArguments().size() == 1) { + unusedConditions.add((Simple_DRS_Condition) condition); + } } -// else { -// if (simple.getArguments().size() == 1) { -// temp.addConditions(new Path(simple.getArguments().get(0).getValue(),"rdf:type",simple.getPredicate())); -// } -// } + } + return temp; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java 2011-12-19 16:07:11 UTC (rev 3510) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java 2011-12-20 11:43:21 UTC (rev 3511) @@ -106,6 +106,16 @@ } return result; } + + public Set<String> getVariablesInConditions() { + Set<String> vars = new HashSet<String>(); + for (Path p : conditions) { + vars.add(p.start); + vars.add(p.via); + vars.add(p.target); + } + return vars; + } public Set<SPARQL_Term> getSelTerms() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2011-12-19 16:07:11 UTC (rev 3510) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2011-12-20 11:43:21 UTC (rev 3511) @@ -59,19 +59,19 @@ /* DP */ String[] dpEntry1 = {token, "(DP (NP " + treetoken + "))", - "<x,l1,<<e,t>,t>,[ l1:[ x,p | SLOT_" + tokenfluent + "(p), ISA(x,p) ] ],[],[],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; String[] dpEntry2 = {token, "(DP (NP " + treetoken + " DP[name]))", - "<x,l1,<<e,t>,t>,[ l1:[ x,p | SLOT_" + tokenfluent + "(p), ISA(x,p), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; result.add(dpEntry1); result.add(dpEntry2); /* NP */ String[] npEntry1 = {token, "(NP " + treetoken + ")", - "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), ISA(x,p) ] ],[],[],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; String[] npEntry2 = {token, "(NP " + treetoken + " DP[name])", - "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), ISA(x,p), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; result.add(npEntry1); result.add(npEntry2); } @@ -89,13 +89,13 @@ else if (pos.equals("NPREP")) { String[] dpEntry1 = {token, "(DP (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; String[] dpEntry2 = {token, "(DP DET[det] (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>"}; String[] npEntry = {token, "(NP " + treetoken + " DP[pobj])", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; result.add(dpEntry1); result.add(dpEntry2); result.add(npEntry); @@ -104,13 +104,13 @@ slot = "SLOT_" + tokenfluent + "/UNSPEC/" + token; String[] dpEntry1 = {token, "(DP (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x,p | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>" }; + "<x,l1,<<e,t>,t>,[ l1:[ x,p | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>" }; String[] dpEntry2 = {token, "(DP DET[det] (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>" }; + "<x,l1,<<e,t>,t>,[ l1:[ p | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>" }; String[] npEntry = {token, "(NP " + treetoken + " DP[pobj])", - "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; result.add(dpEntry1); result.add(dpEntry2); result.add(npEntry); @@ -119,7 +119,7 @@ slot = "SLOT_" + tokenfluent + "/UNSPEC/" + token; String[] npEntry = {token, "(NP " + treetoken + " )", - "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), ISA(x,p) ] ],[],[],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; result.add(npEntry); } @@ -136,11 +136,11 @@ if (pos.equals("PASSIVE")) { String[] passEntry1 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; String[] passEntry2 = {token, "(S DP[wh] (VP DP[dp] V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(y,x) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; result.add(passEntry1); result.add(passEntry2); @@ -148,35 +148,35 @@ else if (pos.equals("PASSPART")) { String[] passpartEntry = {token, "(NP NP* (VP V:'" + token + "' DP[dp]))", - "<x,l1,t,[ l1:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + + "<x,l1,t,[ l1:[ p | SLOT_" + token + "(y,x) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[ | empty(y,x) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[]>"}; result.add(passpartEntry); } else if (pos.equals("VPASS")) { String[] passEntry = {token, "(S DP[subj] (VP V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; result.add(passEntry); } else if (pos.equals("VPASSIN")) { String[] passEntry1 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; String[] passEntry2 = {token, "(S DP[dp] (VP V:'" + token + "' NUM[num]))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,z) ] ],[(l2,x,dp,<<e,t>,t>),(l3,z,num,e)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(y,z) ] ],[(l2,x,dp,<<e,t>,t>),(l3,z,num,e)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry1); result.add(passEntry2); } else if (pos.equals("GERUNDIN")) { String[] gerundinEntry1 = {token, "(NP NP* V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + + "<x,l1,t,[ l1:[ p | SLOT_" + token + "(y,x) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[]>"}; String[] gerundinEntry2 = {token, "(ADJ V:'" + token + "' DP[obj]))", - "<x,l1,<e,t>,[ l1:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + + "<x,l1,<e,t>,[ l1:[ p | SLOT_" + token + "(y,x) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + " ;; <x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[]>"}; result.add(gerundinEntry1); result.add(gerundinEntry2); @@ -184,14 +184,14 @@ else if (pos.equals("VPREP")) { String[] passEntry1 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; String[] passEntry2 = {token, "(S DP[subj] (VP V:'" + token + "' NUM[num]))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y), DATE(y,z) ] ],[(l2,x,subj,<<e,t>,t>),(l3,z,num,e)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(x,y), DATE(y,z) ] ],[(l2,x,subj,<<e,t>,t>),(l3,z,num,e)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; String[] whEntry = {token, "(S DP[obj] (VP DP[subj] V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; result.add(passEntry1); result.add(passEntry2); @@ -200,7 +200,7 @@ else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP")) { String[] vEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; result.add(vEntry); @@ -208,18 +208,18 @@ else if (pos.equals("VB")) { String[] whEntry = {token, "(S DP[obj] (VP DP[subj] V:'" + token + "'))", - "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; result.add(whEntry); } else if (pos.equals("VBG") || pos.equals("VBN")) { String[] gerEntry = {token, "(NP NP* (VP V:'" + token + "' DP[dp]))", - "<x,l1,t,[ l1:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + + "<x,l1,t,[ l1:[ p | SLOT_" + token + "(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>" + ";; <x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[]>"}; String[] wasGerEntry = {token, "(S DP[comp] (VP V:'was' DP[subject] V:'" + token + "'))", - "<y,l1,t,[ l1:[ | SLOT_" + token + "(p), p(y,z) ] ],[(l2,y,comp,<<e,t>,t>), (l3,z,subject,<<e,t>,t>) ],[ l2=l1, l3=l1 ],[" + slot + "]>"}; + "<y,l1,t,[ l1:[ | SLOT_" + token + "(y,z) ] ],[(l2,y,comp,<<e,t>,t>), (l3,z,subject,<<e,t>,t>) ],[ l2=l1, l3=l1 ],[" + slot + "]>"}; result.add(wasGerEntry); result.add(gerEntry); } @@ -227,14 +227,14 @@ slot = "SLOT_" + token + "/PROPERTY/" + token + "_date"; String[] whenEntry = {token, "(S DP[subj] (VP V:'" + token + "'))", - "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; + "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; result.add(whenEntry); } else if (pos.equals("WHERE")) { slot = "SLOT_" + token + "/PROPERTY/" + token + "_place"; String[] whereEntry = {token, "(S DP[subj] (VP V:'" + token + "'))", - "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; + "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; result.add(whereEntry); } @@ -247,13 +247,13 @@ if (pos.equals("JJ")) { String[] adjEntry = {token, "(NP ADJ:'" + token.toLowerCase() + "' NP*)", - "<x,l1,<e,t>,[ l1:[ j | SLOT_" + token + "(p), p(x,j) ] ],[],[],["+slot+"]>"}; + "<x,l1,<e,t>,[ l1:[ j | SLOT_" + token + "(x,j) ] ],[],[],["+slot+"]>"}; result.add(adjEntry); } if (pos.equals("JJH")) { String[] howEntry = {"how "+token, "(DP ADJ:'" + token.toLowerCase() + "')", - "<x,l1,<<e,t>,t>,[ l1:[ ?j,x,p | SLOT_" + token + "(p), p(x,j) ] ],[],[],["+slot+"]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ ?j,x,p | SLOT_" + token + "(x,j) ] ],[],[],["+slot+"]>"}; result.add(howEntry); } /* COMPARATIVE */ @@ -266,11 +266,11 @@ String[] compEntry1 = {token, "(ADJ ADJ:'" + token.toLowerCase() + "' P:'than' DP[compobj])", - "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(p), p(x,i), p(y,j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; + "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(x,i), SLOT_" + token + "(y,j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; result.add(compEntry1); String[] compEntry2 = {token, "(NP NP* (ADJ ADJ:'" + token.toLowerCase() + "' P:'than' DP[compobj]))", - "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(p), p(x,i), p(y,j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; + "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(x,i), SLOT_" + token + "(y,j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; result.add(compEntry2); } /* SUPERLATIVE */ @@ -283,28 +283,29 @@ String[] superEntry1 = {token, "(DET DET:'the' ADJ:'" + token.toLowerCase() + "')", - "<x,l1,e,[ l1:[ p,x,j | SLOT_" + token + "(p), p(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; + "<x,l1,e,[ l1:[ p,x,j | SLOT_" + token + "(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; result.add(superEntry1); String[] superEntry2 = {token, "(DP (NP DET:'the' ADJ:'" + token.toLowerCase() + "'))", - "<x,l1,<<e,t>,t>,[ l1:[ p,x,j | SLOT_" + token + "(p), p(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ p,x,j | SLOT_" + token + "(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; result.add(superEntry2); String[] superEntry3 = {token, "(DP (NP DET:'the' ADJ:'" + token.toLowerCase() + "' NP[noun]))", - "<x,l1,<<e,t>,t>,[ l1:[ p,x,j | SLOT_" + token + "(p), p(x,j), " + comp + "(j) ] ],[ (l2,x,noun,<e,t>) ],[l2=l1],["+slot+"]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ p,x,j | SLOT_" + token + "(x,j), " + comp + "(j) ] ],[ (l2,x,noun,<e,t>) ],[l2=l1],["+slot+"]>"}; result.add(superEntry3); } } /* PREPOSITIONS */ else if (equalsOneOf(pos,preps)) { + slot = "SLOT_" + token + "/PROPERTY/"; String[] npAdjunct = {token, "(NP NP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>" + - " ;; <x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + // "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + "<x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; String[] vpAdjunct = {token, "(VP VP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - "<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>" + - " ;; <x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + // "<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + "<x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; result.add(npAdjunct); result.add(vpAdjunct); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |