From: <lor...@us...> - 2011-10-26 09:04:10
|
Revision: 3322 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3322&view=rev Author: lorenz_b Date: 2011-10-26 09:03:57 +0000 (Wed, 26 Oct 2011) Log Message: ----------- Some changes to get fuzzy search working. Added some methods to Tagger for POS Tagger evlauation. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -27,6 +27,7 @@ import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -48,6 +49,7 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -67,7 +69,7 @@ private Ranking ranking; private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; - private int maxTestedQueriesPerTemplate; + private int maxTestedQueriesPerTemplate = 50; private int maxQueryExecutionTimeInSeconds; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); @@ -88,7 +90,7 @@ private Map<String, List<String>> classesURICache; private Map<String, List<String>> propertiesURICache; - private Map<String, List<String>> learnedSPARQLQueries; + private Map<String, Object> learnedSPARQLQueries; private Set<Template> templates; private Collection<Query> sparqlQueryCandidates; private Map<Template, Collection<? extends Query>> template2Queries; @@ -120,6 +122,13 @@ templateGenerator = new Templator(); } + /* + * Only for Evaluation useful. + */ + public void setUseIdealTagger(boolean value){ + templateGenerator.setUNTAGGED_INPUT(!value); + } + private void init(Options options){ String resourcesIndexUrl = options.fetch("solr.resources.url"); String resourcesIndexSearchField = options.fetch("solr.resources.searchfield"); @@ -127,8 +136,14 @@ String classesIndexUrl = options.fetch("solr.classes.url"); String classesIndexSearchField = options.fetch("solr.classes.searchfield"); - class_index = new ThresholdSlidingSolrSearch(classesIndexUrl, classesIndexSearchField, 1.0, 0.1); + SolrSearch dbpediaClassIndex = new SolrSearch(classesIndexUrl, classesIndexSearchField); + String yagoClassesIndexUrl = options.fetch("solr.yago.classes.url"); + String yagoClassesIndexSearchField = options.fetch("solr.yago.classes.searchfield"); + SolrSearch yagoClassIndex = new SolrSearch(yagoClassesIndexUrl, yagoClassesIndexSearchField); + + class_index = new ThresholdSlidingSolrSearch(dbpediaClassIndex);// new HierarchicalSolrSearch(dbpediaClassIndex, yagoClassIndex); + String propertiesIndexUrl = options.fetch("solr.properties.url"); String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); SolrSearch labelBasedPropertyIndex = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField); @@ -137,8 +152,12 @@ String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField); - property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); + //first BOA pattern then label based +// property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); + //first label based then BOA pattern + property_index = new HierarchicalSolrSearch(labelBasedPropertyIndex, patternBasedPropertyIndex); + int maxIndexResults = Integer.parseInt(options.fetch("solr.query.limit"), 10); maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); @@ -191,7 +210,7 @@ } private void reset(){ - learnedSPARQLQueries = new HashMap<String, List<String>>(); + learnedSPARQLQueries = new HashMap<String, Object>(); resourcesURICache = new HashMap<String, List<String>>(); classesURICache = new HashMap<String, List<String>>(); propertiesURICache = new HashMap<String, List<String>>(); @@ -417,13 +436,13 @@ SPARQL_Prefix prefix = null; uriCandidates = getCandidateURIsSortedBySimilarity(slot); for(String uri : uriCandidates){ - for(Entry<String, String> uri2prefix : prefixMap.entrySet()){ - if(uri.startsWith(uri2prefix.getKey())){ - prefix = new SPARQL_Prefix(uri2prefix.getValue(), uri2prefix.getKey()); - uri = uri.replace(uri2prefix.getKey(), uri2prefix.getValue() + ":"); - break; - } - } +// for(Entry<String, String> uri2prefix : prefixMap.entrySet()){ +// if(uri.startsWith(uri2prefix.getKey())){ +// prefix = new SPARQL_Prefix(uri2prefix.getValue(), uri2prefix.getKey()); +// uri = uri.replace(uri2prefix.getKey(), uri2prefix.getValue() + ":"); +// break; +// } +// } for(Query query : queries){ if(slot.getSlotType() == SlotType.SYMPROPERTY){ Query reversedQuery = new Query(query); @@ -635,30 +654,57 @@ private void validateAgainstRemoteEndpoint(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); + SPARQL_QueryType queryType = SPARQL_QueryType.SELECT; for(Query query : queries){ + if(query.getQt() == SPARQL_QueryType.ASK){ + queryType = SPARQL_QueryType.ASK; + } else if(query.getQt() == SPARQL_QueryType.SELECT){ + queryType = SPARQL_QueryType.SELECT; + } queryStrings.add(query.toString()); } - validateAgainstRemoteEndpoint(queryStrings); + validateAgainstRemoteEndpoint(queryStrings, queryType); } - private void validateAgainstRemoteEndpoint(List<String> queries){ + private void validateAgainstRemoteEndpoint(List<String> queries, SPARQL_QueryType queryType){ logger.info("Testing candidate SPARQL queries on remote endpoint..."); mon.start(); - for(String query : queries){ - logger.info("Testing query:\n" + query); - List<String> results = getResultFromRemoteEndpoint(query); - if(!results.isEmpty()){ - learnedSPARQLQueries.put(query, results); - if(stopIfQueryResultNotEmpty){ + if(queryType == SPARQL_QueryType.SELECT){ + for(String query : queries){ + logger.info("Testing query:\n" + query); + List<String> results = getResultFromRemoteEndpoint(query); + if(!results.isEmpty()){ + learnedSPARQLQueries.put(query, results); + if(stopIfQueryResultNotEmpty){ + return; + } + } + logger.info("Result: " + results); + } + } else if(queryType == SPARQL_QueryType.ASK){ + for(String query : queries){ + logger.info("Testing query:\n" + query); + boolean result = executeAskQuery(query); + learnedSPARQLQueries.put(query, result); + if(stopIfQueryResultNotEmpty && result){ return; } + logger.info("Result: " + result); } - logger.info("Result: " + results); } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); } + private boolean executeAskQuery(String query){ + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + for(String uri : endpoint.getDefaultGraphURIs()){ + qe.addDefaultGraph(uri); + } + boolean ret = qe.execAsk(); + return ret; + } + private void validateAgainstLocalModel(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); for(Query query : queries){ @@ -686,7 +732,11 @@ private List<String> getResultFromRemoteEndpoint(String query){ List<String> resources = new ArrayList<String>(); try { - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 10")); + String queryString = query; + if(!query.contains("LIMIT") && !query.contains("ASK")){ + queryString = query + " LIMIT 10"; + } + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, queryString)); QuerySolution qs; String projectionVar; while(rs.hasNext()){ @@ -724,8 +774,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); -// String question = "Give me all books written by authors influenced by Ernest Hemingway."; - String question = "Give me all European Capitals!"; + String question = "Who wrote the book The pillars of the Earth?"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -7,6 +7,7 @@ import java.util.regex.Pattern; import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.nlp.DBpediaSpotlightNER; import org.dllearner.algorithm.tbsl.nlp.LingPipeNER; import org.dllearner.algorithm.tbsl.nlp.NER; import org.dllearner.algorithm.tbsl.templator.Templator; @@ -24,6 +25,7 @@ USE_NER = n; if (USE_NER) { ner = new LingPipeNER(true); //not case sensitive best solution? +// ner = new DBpediaSpotlightNER(); } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -3,12 +3,15 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import opennlp.tools.postag.POSModel; import opennlp.tools.postag.POSTaggerME; import opennlp.tools.util.Sequence; +import com.aliasi.tag.Tagging; + public class ApachePartOfSpeechTagger implements PartOfSpeechTagger{ private POSTaggerME tagger; @@ -41,6 +44,11 @@ tokenizer = new ApacheTokenizer(); } + + @Override + public String getName() { + return "Apache Open NLP POS Tagger"; + } @Override public String tag(String sentence) { @@ -49,6 +57,13 @@ return convert2TaggedSentence(tokens, tags); } + + public List<String> getTags(String sentence){ + String[] tokens = tokenizer.tokenize(sentence); + String[] tags = tagger.tag(tokens); + + return Arrays.asList(tags); + } @Override public List<String> tagTopK(String sentence) { @@ -61,6 +76,14 @@ return taggedSentences; } + @Override + public Tagging<String> getTagging(String sentence){ + String[] tokens = tokenizer.tokenize(sentence); + String[] tags = tagger.tag(tokens); + + return new Tagging<String>(Arrays.asList(tokens), Arrays.asList(tags)); + } + private String convert2TaggedSentence(String[] words, String[] tags){ StringBuilder sb = new StringBuilder(); for(int i = 0; i < words.length; i++){ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/DBpediaSpotlightNER.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -48,15 +48,17 @@ } rd.close(); JSONObject json = new JSONObject(sb.toString()); - JSONArray array = json.getJSONArray("Resources"); - JSONObject entityObject; - for(int i = 0; i < array.length(); i++){ - entityObject = array.getJSONObject(i); - System.out.println("Entity: " + entityObject.getString("@surfaceForm")); - System.out.println("DBpedia URI: " + entityObject.getString("@URI")); - System.out.println("Types: " + entityObject.getString("@types")); - namedEntities.add(entityObject.getString("@surfaceForm")); - + if(!json.isNull("Resources")){ + JSONArray array = json.getJSONArray("Resources"); + JSONObject entityObject; + for(int i = 0; i < array.length(); i++){ + entityObject = array.getJSONObject(i); + System.out.println("Entity: " + entityObject.getString("@surfaceForm")); + System.out.println("DBpedia URI: " + entityObject.getString("@URI")); + System.out.println("Types: " + entityObject.getString("@types")); + namedEntities.add(entityObject.getString("@surfaceForm")); + + } } } catch (MalformedURLException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -42,6 +42,11 @@ } @Override + public String getName() { + return "LingPipe POS Tagger"; + } + + @Override public String tag(String sentence) { com.aliasi.tokenizer.Tokenizer tokenizer = IndoEuropeanTokenizerFactory.INSTANCE.tokenizer(sentence.toCharArray(), 0, sentence.length()); // Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(cs,0,cs.length); @@ -66,5 +71,16 @@ } return taggedSentences; } + + @Override + public Tagging<String> getTagging(String sentence) { + com.aliasi.tokenizer.Tokenizer tokenizer = IndoEuropeanTokenizerFactory.INSTANCE.tokenizer(sentence.toCharArray(), 0, sentence.length()); +// Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(cs,0,cs.length); + String[] tokens = tokenizer.tokenize(); + List<String> tokenList = Arrays.asList(tokens); + Tagging<String> tagging = tagger.tag(tokenList); + + return tagging; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/PartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -2,10 +2,17 @@ import java.util.List; +import com.aliasi.tag.Tagging; + public interface PartOfSpeechTagger { + String getName(); + String tag(String sentence); List<String> tagTopK(String sentence); + + Tagging<String> getTagging(String sentence); + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -4,9 +4,12 @@ import java.io.StringReader; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import com.aliasi.tag.Tagging; + import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.tagger.maxent.MaxentTagger; @@ -29,6 +32,11 @@ e.printStackTrace(); } } + + @Override + public String getName() { + return "Stanford POS Tagger"; + } @Override public String tag(String sentence) { @@ -53,5 +61,46 @@ public List<String> tagTopK(String sentence) { return Collections.singletonList(tag(sentence)); } + + public List<String> getTags(String sentence){ + List<String> tags = new ArrayList<String>(); + + ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); + + StringReader reader = new StringReader(sentence); + List<List<HasWord>> text = MaxentTagger.tokenizeText(reader); + + if (text.size() == 1) { + tagged = tagger.tagSentence(text.get(0)); + } + + for(TaggedWord tW : tagged){ + tags.add(tW.tag()); + } + + return tags; + } + + @Override + public Tagging<String> getTagging(String sentence){ + ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); + + StringReader reader = new StringReader(sentence); + List<List<HasWord>> text = MaxentTagger.tokenizeText(reader); + + if (text.size() == 1) { + tagged = tagger.tagSentence(text.get(0)); + } + + List<String> tokenList = new ArrayList<String>(); + List<String> tagList = new ArrayList<String>(); + + for(TaggedWord tW : tagged){ + tokenList.add(tW.word()); + tagList.add(tW.tag()); + } + + return new Tagging<String>(tokenList, tagList); + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -8,6 +8,8 @@ import org.annolab.tt4j.TreeTaggerException; import org.annolab.tt4j.TreeTaggerWrapper; +import com.aliasi.tag.Tagging; + public class TreeTagger implements PartOfSpeechTagger { TreeTaggerWrapper<String> tt; @@ -40,6 +42,17 @@ } return ""; } + + @Override + public String getName() { + return "Tree Tagger"; + } + + @Override + public Tagging<String> getTagging(String sentence) { + // TODO Auto-generated method stub + return null; + } @Override public List<String> tagTopK(String sentence) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -8,6 +8,9 @@ import net.didion.jwnl.JWNLException; import net.didion.jwnl.data.IndexWord; import net.didion.jwnl.data.POS; +import net.didion.jwnl.data.Pointer; +import net.didion.jwnl.data.PointerTarget; +import net.didion.jwnl.data.PointerType; import net.didion.jwnl.data.PointerUtils; import net.didion.jwnl.data.Synset; import net.didion.jwnl.data.Word; @@ -52,6 +55,24 @@ return synonyms; } + public List<String> getSisterTerms(POS pos, String s){ + List<String> sisterTerms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) +// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); + if(iw != null){ + Synset[] synsets = iw.getSenses();System.out.println(synsets[0]); + PointerTarget[] pointerArr = synsets[0].getTargets(); + System.out.println(pointerArr); + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + return sisterTerms; + } + public List<String> getAttributes(String s) { List<String> result = new ArrayList<String>(); @@ -78,6 +99,7 @@ public static void main(String[] args) { System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); + System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); } /** Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -41,6 +41,14 @@ this(solrServerURL); this.searchField = searchField; } + + public String getServerURL() { + return server.getBaseURL(); + } + + public String getSearchField() { + return searchField; + } @Override public List<String> getResources(String queryString) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -1,13 +1,17 @@ package org.dllearner.algorithm.tbsl.search; +import java.text.NumberFormat; import java.util.ArrayList; import java.util.List; +import java.util.Locale; public class ThresholdSlidingSolrSearch extends SolrSearch { private double minThreshold = 0.8; private double step = 0.1; + private NumberFormat format = NumberFormat.getInstance(Locale.GERMAN); + public ThresholdSlidingSolrSearch(String solrServerURL) { super(solrServerURL); } @@ -22,26 +26,29 @@ this.step = step; } + public ThresholdSlidingSolrSearch(SolrSearch search){ + super(search.getServerURL(), search.getSearchField()); + } + + @Override public List<String> getResources(String queryString, int limit, int offset) { List<String> resources = new ArrayList<String>(); + double threshold = 1; String queryWithThreshold = queryString; while(resources.size() < limit && threshold >= minThreshold){ if(threshold < 1){ - queryWithThreshold = queryString + "~" + threshold; + queryWithThreshold = queryString + "~" + format.format(threshold); } resources.addAll(findResources(queryWithThreshold, limit - resources.size(), 0)); threshold -= step; } - return resources; } - - } \ No newline at end of file Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -49,6 +49,7 @@ //copy constructor public Query(Query query){ + this.qt = query.getQt(); Set<SPARQL_Term> selTerms = new HashSet<SPARQL_Term>(); for(SPARQL_Term term : query.getSelTerms()){ SPARQL_Term newTerm = new SPARQL_Term(term.getName()); @@ -56,6 +57,7 @@ newTerm.setIsURI(newTerm.isURI); newTerm.setAggregate(term.getAggregate()); newTerm.setOrderBy(term.getOrderBy()); + newTerm.setAlias(term.getAlias()); selTerms.add(newTerm); } this.selTerms = selTerms; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -73,6 +73,14 @@ public void setIsURI(boolean isURI){ this.isURI = isURI; } + + public String getAlias() { + return alias; + } + + public void setAlias(String alias) { + this.alias = alias; + } @Override public String toString() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -39,10 +39,12 @@ } public List<String> getLexicalAnswerType(){ - String variable = query.getAnswerTypeVariable(); - for(Slot slot : slots){ - if(slot.getAnchor().equals(variable)){ - return slot.getWords(); + if(query.getQt() == SPARQL_QueryType.SELECT){ + String variable = query.getAnswerTypeVariable(); + for(Slot slot : slots){ + if(slot.getAnchor().equals(variable)){ + return slot.getWords(); + } } } return null; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-26 08:41:40 UTC (rev 3321) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-10-26 09:03:57 UTC (rev 3322) @@ -16,10 +16,9 @@ import org.dllearner.algorithm.tbsl.ltag.parser.LTAG_Lexicon_Constructor; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.ltag.parser.Preprocessor; -import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sem.drs.DRS; import org.dllearner.algorithm.tbsl.sem.drs.UDRS; @@ -63,8 +62,8 @@ g = LTAG_Constructor.construct(grammarFiles); -// tagger = new StanfordPartOfSpeechTagger(); - tagger = new ApachePartOfSpeechTagger(); + tagger = new StanfordPartOfSpeechTagger(); +// tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; @@ -93,6 +92,7 @@ } else { tagged = s; + s = extractSentence(tagged); } String newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); @@ -137,11 +137,11 @@ if (!containsModuloRenaming(drses,drs)) { // // DEBUG -// System.out.println(dude); -// System.out.println(drs); -// for (Slot sl : slots) { -// System.out.println(sl.toString()); -// } + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } // // drses.add(drs); @@ -186,6 +186,9 @@ newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); } } + if(newwords.isEmpty()){ + + } if (newwords.isEmpty()) { newwords.add(slot.getWords().get(0)); } @@ -250,5 +253,23 @@ } return false; } + + private String extractSentence(String taggedSentence){ + int pos = taggedSentence.indexOf("/"); + while(pos != -1){ + String first = taggedSentence.substring(0, pos); + int endPos = taggedSentence.substring(pos).indexOf(" "); + if(endPos == -1){ + endPos = taggedSentence.substring(pos).length(); + } + String rest = taggedSentence.substring(pos + endPos); + + taggedSentence = first + rest; + pos = taggedSentence.indexOf("/"); + + } + return taggedSentence; + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |