From: <lor...@us...> - 2012-07-03 14:16:39
|
Revision: 3770 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3770&view=rev Author: lorenz_b Date: 2012-07-03 14:16:28 +0000 (Tue, 03 Jul 2012) Log Message: ----------- Some changes to get relevant keywords for a question. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -24,6 +24,7 @@ import java.util.concurrent.Future; import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.ltag.parser.GrammarFilter; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; @@ -143,6 +144,8 @@ private PopularityMap popularityMap; + private Set<String> relevantKeywords; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -349,6 +352,7 @@ learnedSPARQLQueries = new HashMap<String, Object>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); + relevantKeywords = new HashSet<String>(); currentlyExecutedQuery = null; // templateMon.reset(); @@ -367,8 +371,10 @@ } templateMon.stop(); logger.info("Done in " + templateMon.getLastValue() + "ms."); + relevantKeywords.addAll(templateGenerator.getUnknownWords()); if(templates.isEmpty()){ throw new NoTemplateFoundException(); + } logger.info("Templates:"); for(Template t : templates){ @@ -463,13 +469,7 @@ } public Set<String> getRelevantKeywords(){ - Set<String> keywords = new HashSet<String>(); - for(Template t : templates){ - for (Slot slot : t.getSlots()) { - keywords.add(slot.getWords().get(0)); - } - } - return keywords; + return relevantKeywords; } private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -21,18 +21,20 @@ * grammar contains the .+ wildcard the input n-gram "a b x y c" matches the * anchor "a b .+ c". */ -class GrammarFilter { +public class GrammarFilter { private static final Logger logger = Logger.getLogger(GrammarFilter.class); final static String[] NAMED_Strings = {"named", "called"}; // DISAM - static List<Integer> usedInts = new ArrayList<Integer>(); - static ArrayList<String> doubles = new ArrayList<String>(); + private List<Integer> usedInts = new ArrayList<Integer>(); + private List<String> doubles = new ArrayList<String>(); public static boolean VERBOSE = true; - static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps, String mode) { + private List<String> unknownWords; + + public ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps, String mode) { // DISAM: CLEAR usedInts = new ArrayList<Integer>(); @@ -208,7 +210,7 @@ } } - List<String> unknownWords = new ArrayList<String>(); + unknownWords = new ArrayList<String>(); for (String t : unknownTokens) { String[] tParts = t.split(" "); for (String s : tParts) { @@ -280,8 +282,12 @@ return parseG; } + + public List<String> getUnknownWords(){ + return unknownWords; + } - private static List<Pair<String,String>> checkForNamedString(String token) { + private List<Pair<String,String>> checkForNamedString(String token) { String[] split; if (token.contains(" ")) { @@ -366,7 +372,7 @@ return result; } - private static int createFresh() { + private int createFresh() { int fresh = 0; for (int i = 0; usedInts.contains(i); i++) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -32,6 +32,8 @@ private List<Dude> dudes = new ArrayList<Dude>(); private ParseGrammar parseGrammar = null; private List<Integer> temporaryEntries = new ArrayList<Integer>(); + + private GrammarFilter grammarFilter = new GrammarFilter(); @SuppressWarnings("unchecked") private final Class[] operations = { Scanner.class, MoveDotDown.class, @@ -65,7 +67,7 @@ * times, a tree for each token is added. Both trees need to have * different treeIDs for the parser to work correctly. */ - parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); + parseGrammar = grammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); String inputNoTags = ""; for (String s : taggeduserinput.split(" ")) { @@ -97,6 +99,10 @@ } + public List<String> getUnknownWords(){ + return grammarFilter.getUnknownWords(); + } + public List<DerivationTree> parseMultiThreaded(String taggeduserinput, LTAGLexicon grammar) { derivationTrees.clear(); @@ -112,7 +118,7 @@ * times, a tree for each token is added. Both trees need to have * different treeIDs for the parser to work correctly. */ - parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); + parseGrammar = grammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); String inputNoTags = ""; for (String s : taggeduserinput.split(" ")) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -44,7 +44,7 @@ LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); - Parser p; + Parser parser; Preprocessor pp; WordNet wordnet; @@ -83,11 +83,11 @@ g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); - p.SHOW_GRAMMAR = true; - p.USE_DPS_AS_INITTREES = true; - p.CONSTRUCT_SEMANTICS = true; - p.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; pp = new Preprocessor(USE_NER); } @@ -104,11 +104,11 @@ g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); - p.SHOW_GRAMMAR = true; - p.USE_DPS_AS_INITTREES = true; - p.CONSTRUCT_SEMANTICS = true; - p.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; pp = new Preprocessor(USE_NER); } @@ -125,12 +125,12 @@ g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); - p.SHOW_GRAMMAR = false; - p.VERBOSE = b; - p.USE_DPS_AS_INITTREES = true; - p.CONSTRUCT_SEMANTICS = true; - p.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = false; + parser.VERBOSE = b; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; pp = new Preprocessor(USE_NER); pp.setVERBOSE(b); @@ -180,16 +180,16 @@ newtagged = pp.condense(newtagged); if (VERBOSE) logger.trace("Preprocessed: " + newtagged); - p.parse(newtagged,g); + parser.parse(newtagged,g); - if (p.getDerivationTrees().isEmpty()) { - p.clear(g,p.getTemps()); + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); clearAgain = false; if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); } else { try { - p.buildDerivedTrees(g); + parser.buildDerivedTrees(g); } catch (ParseException e) { if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); } @@ -205,7 +205,7 @@ Set<DRS> drses = new HashSet<DRS>(); Set<Template> templates = new HashSet<Template>(); - for (Dude dude : p.getDudes()) { + for (Dude dude : parser.getDudes()) { UDRS udrs = d2u.convert(dude); if (udrs != null) { @@ -295,7 +295,7 @@ } if (clearAgain) { - p.clear(g,p.getTemps()); + parser.clear(g,parser.getTemps()); } // System.gc(); @@ -326,16 +326,16 @@ newtagged = pp.condense(newtagged); if (VERBOSE) logger.trace("Preprocessed: " + newtagged); - p.parseMultiThreaded(newtagged,g); + parser.parseMultiThreaded(newtagged,g); - if (p.getDerivationTrees().isEmpty()) { - p.clear(g,p.getTemps()); + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); clearAgain = false; if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); } else { try { - p.buildDerivedTreesMultiThreaded(g); + parser.buildDerivedTreesMultiThreaded(g); } catch (ParseException e) { if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); } @@ -358,7 +358,7 @@ // threadPool.shutdown(); // while(!threadPool.isTerminated()){} - for (Dude dude : p.getDudes()) { + for (Dude dude : parser.getDudes()) { UDRS udrs = d2u.convert(dude); if (udrs != null) { @@ -451,7 +451,7 @@ if (clearAgain) { - p.clear(g,p.getTemps()); + parser.clear(g,parser.getTemps()); } // System.gc(); @@ -462,6 +462,10 @@ return taggedInput; } + public List<String> getUnknownWords(){ + return parser.getUnknownWords(); + } + private List<String> getLemmatizedWords(List<String> words){ List<String> stemmed = new ArrayList<String>(); for(String word : words){ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -49,6 +49,7 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.setMappingIndex(mappingIndex); learner.init(); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -114,11 +114,12 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.setMappingIndex(mappingIndex); learner.init(); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; question = "Give me all Victorian houses in Oxfordshire"; - question = "houses with more than 3 bedrooms"; + question = "Edwardian houses close to supermarket for less than 1,000,000 in Oxfordshire"; // question = "Give me all family houses with more than 2 bathrooms and more than 4 bedrooms"; learner.setQuestion(question); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |