From: <lor...@us...> - 2011-11-04 12:51:40
|
Revision: 3372 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3372&view=rev Author: lorenz_b Date: 2011-11-04 12:51:33 +0000 (Fri, 04 Nov 2011) Log Message: ----------- Added constructors with POS tagger. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-03 19:32:25 UTC (rev 3371) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-04 12:51:33 UTC (rev 3372) @@ -5,10 +5,12 @@ import java.io.IOException; import java.net.URL; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -22,6 +24,7 @@ import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; @@ -68,6 +71,10 @@ public class SPARQLTemplateBasedLearner implements SparqlQueryLearningAlgorithm{ + //for debugging + List<String> exclusions = Arrays.asList(new String[]{"http://dbpedia.org/ontology/GeopoliticalOrganisation", + "http://dbpedia.org/ontology/Non-ProfitOrganisation"}); + enum Ranking{ LUCENE, SIMILARITY, NONE } @@ -140,6 +147,20 @@ templateGenerator = new Templator(); } + public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger){ + init(options); + + Set<String> predicateFilters = new HashSet<String>(); + predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); + predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); + + prefixMap = Prefixes.getPrefixes(); + + modelGenenerator = new ModelGenerator(endpoint, predicateFilters); + + templateGenerator = new Templator(tagger); + } + /* * Only for Evaluation useful. */ @@ -635,7 +656,7 @@ Query cleanQuery = t.getQuery(); queries.add(new WeightedQuery(cleanQuery)); - Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); + Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); List<Slot> sortedSlots = new ArrayList<Slot>(); Set<Slot> classSlots = new HashSet<Slot>(); for(Slot slot : t.getSlots()){ @@ -669,8 +690,8 @@ for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ Query q = new Query(query.getQuery()); - if(a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Organisation>")){ - System.out.println("YES"); + if(a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Organisation>") && q.toString().contains("/Software>")){ + System.out.println("YES:\n" + query); } boolean drop = false; if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ @@ -701,7 +722,8 @@ if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ drop = true; } else { -// System.out.println("DROPPING: \n" + q.toString()); + if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")) + System.out.println("DROPPING: \n" + q.toString()); } } } else { @@ -731,7 +753,8 @@ if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ drop = true; } else { -// System.out.println("DROPPING: \n" + q.toString()); + if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")) + System.out.println("DROPPING: \n" + q.toString()); } } } @@ -801,6 +824,14 @@ for(String word : slot.getWords()){ rs = index.getResourcesWithScores(word, 10); + //debugging +// for(Iterator<SolrQueryResultItem> iter = rs.getItems().iterator();iter.hasNext();){ +// SolrQueryResultItem item = iter.next(); +// if(exclusions.contains(item.getUri())){ +// iter.remove(); +// } +// } + // System.out.println(word + "->" + rs); for(SolrQueryResultItem item : rs.getItems()){ int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); @@ -1364,12 +1395,11 @@ // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "In which programming language is GIMP written?"; // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; - String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; +// String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; // String question = "Which/WDT country/NN does/VBZ the/DT Airedale/NNP Terrier/NNP come/VBP from/IN"; -// String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; + String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; // String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; - -// String question = "Give me all books written by authors influenced by Ernest Hemingway."; +// String question = "Which/WDT music/NN albums/NNS contain/VBP the/DT song/NN Last/NNP Christmas/NNP"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), // Collections.<String>singletonList(""), Collections.<String>emptyList()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-03 19:32:25 UTC (rev 3371) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-04 12:51:33 UTC (rev 3372) @@ -41,6 +41,7 @@ PartOfSpeechTagger tagger; LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); + Parser p; Preprocessor pp; @@ -53,9 +54,14 @@ boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; - boolean USE_NER = false; + boolean USE_NER = true; public Templator() { + this(new StanfordPartOfSpeechTagger()); + } + + public Templator(final PartOfSpeechTagger tagger) { + this.tagger = tagger; List<InputStream> grammarFiles = new ArrayList<InputStream>(); for(int i = 0; i < GRAMMAR_FILES.length; i++){ @@ -64,8 +70,6 @@ g = LTAG_Constructor.construct(grammarFiles); - tagger = new StanfordPartOfSpeechTagger(); -// tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |