From: <chr...@us...> - 2011-09-07 17:09:56
|
Revision: 3244 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3244&view=rev Author: christinaunger Date: 2011-09-07 17:09:48 +0000 (Wed, 07 Sep 2011) Log Message: ----------- [tbsl] added a simpler version of templates without triples (for template filling by means of graph exploration) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicSlot.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term_deprecated.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -5,17 +5,24 @@ import java.io.InputStreamReader; import java.util.Set; +import org.dllearner.algorithm.tbsl.sparql.BasicQueryTemplate; import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.templator.BasicTemplator; import org.dllearner.algorithm.tbsl.templator.Templator; public class TestFrontend { + + // MODE ::= BASIC | LEIPZIG + static String MODE = "BASIC"; public static void main(String[] args) { Templator templator = new Templator(); + BasicTemplator btemplator = new BasicTemplator(); - System.out.println("======= SPARQL Templator v0.1 ============="); + System.out.println("======= SPARQL Templator ================="); + System.out.println("Running in " + MODE + " mode."); System.out.println("\nType ':q' to quit."); while (true) { @@ -24,13 +31,19 @@ if (s.equals(":q")) { System.exit(0); } - - Set<Template> temps = templator.buildTemplates(s); - for (Template temp : temps) { - System.out.println(temp.toString()); + if (MODE.equals("BASIC")) { + Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(s); + for (BasicQueryTemplate temp : querytemps) { + System.out.println(temp.toString()); + } } - + else if (MODE.equals("LEIPZG")) { + Set<Template> temps = templator.buildTemplates(s); + for (Template temp : temps) { + System.out.println(temp.toString()); + } + } } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -0,0 +1,321 @@ +package org.dllearner.algorithm.tbsl.converter; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sem.drs.Complex_DRS_Condition; +import org.dllearner.algorithm.tbsl.sem.drs.DRS; +import org.dllearner.algorithm.tbsl.sem.drs.DRS_Condition; +import org.dllearner.algorithm.tbsl.sem.drs.DRS_Quantifier; +import org.dllearner.algorithm.tbsl.sem.drs.DiscourseReferent; +import org.dllearner.algorithm.tbsl.sem.drs.Negated_DRS; +import org.dllearner.algorithm.tbsl.sem.drs.Simple_DRS_Condition; +import org.dllearner.algorithm.tbsl.sparql.BasicQueryTemplate; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Aggregate; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_OrderBy; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; +import org.dllearner.algorithm.tbsl.sparql.Slot; + + +public class DRS2BasicSPARQL_Converter { + + List<Slot> slots; + BasicQueryTemplate query; + List<Integer> usedInts; + + public DRS2BasicSPARQL_Converter() { + query = new BasicQueryTemplate(); + usedInts = new ArrayList<Integer>(); + } + + public void setSlots(List<Slot> ls) { + slots = ls; + } + + // TODO ?? + public List<SPARQL_Property> getProperties(Complex_DRS_Condition cond) { + List<SPARQL_Property> retVal = new ArrayList<SPARQL_Property>(); + + return retVal; + } + + public BasicQueryTemplate convert(DRS drs,List<Slot> ls) { + + query = new BasicQueryTemplate(); + slots = ls; + + return convert(drs, new BasicQueryTemplate(), false); + } + + private BasicQueryTemplate convert(DRS drs, BasicQueryTemplate query, boolean negate) { + + redundantEqualRenaming(drs); + + for (DRS_Condition condition : drs.getConditions()) { + convertCondition(condition,query); + if (negate) { + for (SPARQL_Term term : query.getSelTerms()) { + SPARQL_Filter f = new SPARQL_Filter(); + f.addNotBound(term); + query.addFilter(f); + } + } + } + + for (DiscourseReferent referent : drs.getDRs()) { + if (referent.isMarked()) { + SPARQL_Term term = new SPARQL_Term(referent.toString().replace("?","")); + term.setIsVariable(true); + query.addSelTerm(term); + } + if (referent.isNonexistential()) { + SPARQL_Term term = new SPARQL_Term(referent.getValue()); + term.setIsVariable(true); + SPARQL_Filter f = new SPARQL_Filter(); + f.addNotBound(term); + query.addFilter(f); + } + for (Slot s : slots) { + if (s.getAnchor().equals(referent.getValue())) { + query.addSlot(s); + break; + } + } + } + + if (query.getSelTerms().size() == 0) + query.setQt(SPARQL_QueryType.ASK); + + return query; + } + + private BasicQueryTemplate convertCondition(DRS_Condition condition, BasicQueryTemplate query) { + + if (condition.isComplexCondition()) { + + Complex_DRS_Condition complex = (Complex_DRS_Condition) condition; + + DRS restrictor = complex.getRestrictor(); + DRS_Quantifier quant = complex.getQuantifier(); + DRS scope = complex.getScope(); + + // call recursively + for (DRS_Condition cond : restrictor.getConditions()) { + query = convertCondition(cond, query); + } + for (DRS_Condition cond : scope.getConditions()) { + query = convertCondition(cond, query); + } + // add the quantifier at last + DiscourseReferent ref = complex.getReferent(); + String sref = ref.getValue(); + String fresh; + + switch (quant) { + case HOWMANY: + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT)); + break; + case EVERY: + // probably save to ignore // TODO unless in cases like "which actor starred in every movie by spielberg?" + // query.addFilter(new SPARQL_Filter(new SPARQL_Term(sref))); + break; + case NO: + SPARQL_Filter f = new SPARQL_Filter(); + f.addNotBound(new SPARQL_Term(sref)); + query.addFilter(f); + break; + case FEW: // + break; + case MANY: // + break; + case MOST: // + break; + case SOME: // + break; + case THELEAST: + fresh = "c"+createFresh(); + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,fresh)); + query.addOrderBy(new SPARQL_Term(fresh, SPARQL_OrderBy.ASC)); + query.setLimit(1); + break; + case THEMOST: + fresh = "c"+createFresh(); + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,fresh)); + query.addOrderBy(new SPARQL_Term(fresh, SPARQL_OrderBy.DESC)); + query.setLimit(1); + break; + } + } else if (condition.isNegatedCondition()) { + Negated_DRS neg = (Negated_DRS) condition; + query = convert(neg.getDRS(), query, true); + + } else { + Simple_DRS_Condition simple = (Simple_DRS_Condition) condition; + + String predicate = simple.getPredicate(); + if (predicate.startsWith("SLOT")) { + for (Slot s : slots) { + if (s.getAnchor().equals(predicate)) { + s.setToken(predicate); + predicate = "p" + createFresh(); + s.setAnchor(simple.getArguments().get(0).getValue()); + break; + } + else if (s.getToken().equals(predicate)) { + predicate = s.getAnchor(); + } + } + } + + SPARQL_Property prop = new SPARQL_Property(predicate); + prop.setIsVariable(true); + + boolean noliteral = true; + if (simple.getArguments().size() > 1 && simple.getArguments().get(1).getValue().matches("\\d+")) { + noliteral = false; + } + + if (predicate.equals("p")) { + query.addConditions(simple.toString()); + } + else if (predicate.equals("count")) { + // COUNT(?x) AS ?c + if (noliteral) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, simple.getArguments().get(1).getValue())); + return query; + } + else { + String fresh = "c"+createFresh(); + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, fresh)); + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(fresh), + new SPARQL_Term(simple.getArguments().get(1).getValue(),true), + SPARQL_PairType.EQ))); + return query; + } + } else if (predicate.equals("sum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.SUM)); + return query; + } else if (predicate.equals("greater")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.GT))); + return query; + } else if (predicate.equals("greaterorequal")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.GTEQ))); + return query; + } else if (predicate.equals("less")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.LT))); + return query; + } else if (predicate.equals("lessorequal")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.LTEQ))); + return query; + } else if (predicate.equals("maximum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue())); + query.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.DESC)); + query.setLimit(1); + return query; + } else if (predicate.equals("minimum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue())); + query.addOrderBy(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_OrderBy.ASC)); + query.setLimit(1); + return query; + } else if (predicate.equals("countmaximum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, "c")); + query.addOrderBy(new SPARQL_Term("c", SPARQL_OrderBy.DESC)); + query.setLimit(1); + return query; + } else if (predicate.equals("countminimum")) { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, "c")); + query.addOrderBy(new SPARQL_Term("c", SPARQL_OrderBy.DESC)); + query.setLimit(1); + return query; + } else if (predicate.equals("equal")) { + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),noliteral), + SPARQL_PairType.EQ))); + return query; + } + } + return query; + } + + + public void redundantEqualRenaming(DRS drs) { + + Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>(); + for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { + if(c.getPredicate().equals("equal")) { + equalsConditions.add(c); + } + } + + DiscourseReferent firstArg; + DiscourseReferent secondArg; + + for (Simple_DRS_Condition c : equalsConditions) { + + firstArg = c.getArguments().get(0); + secondArg = c.getArguments().get(1); + + boolean oneArgIsInt = firstArg.getValue().matches("\\d+") || secondArg.getValue().matches("\\d+"); + + drs.removeCondition(c); + if (!oneArgIsInt) { + drs.replaceEqualRef(firstArg, secondArg, false); + } else { + drs.replaceEqualRef(firstArg, secondArg, true); + } + for (Slot s : slots) { + if (s.getAnchor().equals(firstArg.getValue())) { + s.setAnchor(secondArg.getValue()); + } + } + } + + // finally remove all conditions that ended up of form equal(y,y) + Set<Simple_DRS_Condition> equalEqualsConditions = new HashSet<Simple_DRS_Condition>(); + for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { + if(c.getPredicate().equals("equal") && c.getArguments().get(0).getValue().equals(c.getArguments().get(1).getValue())) { + equalEqualsConditions.add(c); + } + } + for (Simple_DRS_Condition c : equalEqualsConditions) { + drs.removeCondition(c); + } + } + + private int createFresh() { + + int fresh = 0; + for (int i = 0; usedInts.contains(i); i++) { + fresh = i+1 ; + } + usedInts.add(fresh); + return fresh; + } +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -187,13 +187,13 @@ break; case THELEAST: fresh = "c"+createFresh(); - query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,true, new SPARQL_Term(fresh))); + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,fresh)); query.addOrderBy(new SPARQL_Term(fresh, SPARQL_OrderBy.ASC)); query.setLimit(1); break; case THEMOST: fresh = "c"+createFresh(); - query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,true, new SPARQL_Term(fresh))); + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,fresh)); query.addOrderBy(new SPARQL_Term(fresh, SPARQL_OrderBy.DESC)); query.setLimit(1); break; @@ -238,7 +238,7 @@ if (predicate.equals("count")) { // COUNT(?x) AS ?c - query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, true, new SPARQL_Term(simple.getArguments().get(1).getValue(),true))); + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, simple.getArguments().get(1).getValue())); return query; } else if (predicate.equals("sum")) { query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.SUM)); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -10,6 +10,7 @@ import org.dllearner.algorithm.tbsl.ltag.data.TreeNode; import org.dllearner.algorithm.tbsl.ltag.reader.ParseException; import org.dllearner.algorithm.tbsl.sem.util.Pair; +import org.dllearner.algorithm.tbsl.templator.BasicSlotBuilder; import org.dllearner.algorithm.tbsl.templator.SlotBuilder; /** @@ -29,13 +30,14 @@ static List<Integer> usedInts = new ArrayList<Integer>(); static ArrayList<String> doubles = new ArrayList<String>(); - static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps) { + static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps, String mode) { // DISAM: CLEAR usedInts = new ArrayList<Integer>(); doubles = new ArrayList<String>(); SlotBuilder slotbuilder = new SlotBuilder(); + BasicSlotBuilder basicslotbuilder = new BasicSlotBuilder(); List<String> input = getWordList(taggedinput.trim()); input.add(0,"#"); // This is important. Don't mess with the parser! @@ -235,7 +237,16 @@ } System.out.println("build slot for: " + buildSlotFor + "\n"); - List<String[]> entries = slotbuilder.build(taggedinput,buildSlotFor); + List<String[]> entries; + if (mode.equals("LEIPZIG")) { + entries = slotbuilder.build(taggedinput,buildSlotFor); + } + else if (mode.equals("BASIC")) { + entries = basicslotbuilder.build(taggedinput,buildSlotFor); + } + else { // should never happen! + entries = new ArrayList<String[]>(); + } try { for (String[] entry : entries) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -15,6 +15,7 @@ public boolean USE_LESS_MEMORY = false; public boolean SHOW_GRAMMAR = false; public boolean SHOW_LEXICAL_COVERAGE = false; + public String MODE = "BASIC"; // MODE ::= BASIC | LEIPZIG private String[] input; private List<DerivationTree> derivationTrees = new ArrayList<DerivationTree>(); @@ -53,7 +54,7 @@ * times, a tree for each token is added. Both trees need to have * different treeIDs for the parser to work correctly. */ - parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries); + parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); String inputNoTags = ""; for (String s : taggeduserinput.split(" ")) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -13,17 +13,21 @@ static final String[] genericReplacements = { "\"", "", "'", "", "[!?.,;]", "" }; static final String[] englishReplacements = { "don't", "do not", "doesn't", "does not" }; + static boolean USE_NER; + static NER ner; - static NER ner = new LingPipeNER(true);//not case sensitive best solution? - - public Preprocessor() { + public Preprocessor(boolean n) { + USE_NER = n; + if (USE_NER) { + ner = new LingPipeNER(true); //not case sensitive best solution? + } } - public static String normalize(String s) { + public String normalize(String s) { return normalize(s, new String[0]); } - public static String normalize(String s, String... repl) { + public String normalize(String s, String... repl) { if (repl.length % 2 != 0 || genericReplacements.length % 2 != 0 || englishReplacements.length % 2 != 0) { throw new IllegalArgumentException(); @@ -41,7 +45,7 @@ return s; } - public static String condense(String taggedstring) { + public String condense(String taggedstring) { /* condense: * x/RBR adj/JJ > adj/JJR, x/RBS adj/JJ > adj/JJS, x/WRB adj/JJ > x/JJH @@ -187,7 +191,7 @@ return condensedstring; } - public static String condenseNominals(String s) { + public String condenseNominals(String s) { String flat = s; @@ -216,7 +220,7 @@ return flat; } - public static String findNEs(String tagged,String untagged) { + public String findNEs(String tagged,String untagged) { String out = tagged; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -12,6 +12,7 @@ import org.dllearner.algorithm.tbsl.sem.util.Label; import org.dllearner.algorithm.tbsl.sem.util.SemanticRepresentation; import org.dllearner.algorithm.tbsl.sem.util.Type; +import org.dllearner.algorithm.tbsl.sparql.BasicSlot; import org.dllearner.algorithm.tbsl.sparql.Slot; public class Dude implements SemanticRepresentation{ @@ -221,6 +222,7 @@ List<Argument> args = new ArrayList<Argument>(); List<DominanceConstraint> dcs = new ArrayList<DominanceConstraint>(); List<Slot> ls = new ArrayList<Slot>(); + List<BasicSlot> lbs = new ArrayList<BasicSlot>(); for (DRS component : components) { cs.add(component.clone()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2011-09-07 17:09:48 UTC (rev 3244) @@ -62,7 +62,7 @@ List<DRS> drs_list = null; List<DominanceConstraint> constraints = null; List<Argument> arg_list = null; - List<Slot> slots = null; + List<Slot> slots = null; } { "<" referent = dr() "," label=<LABEL> "," type=Type() "," "[" (drs_list=DRS_List())? "]" "," "[" (arg_list = Arg_List())? "]" Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -0,0 +1,284 @@ +package org.dllearner.algorithm.tbsl.sparql; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class BasicQueryTemplate +{ + + Set<SPARQL_Term> selTerms; // SELECT ?x ?y + Set<SPARQL_Prefix> prefixes; + Set<String> conditions; + Set<SPARQL_Term> orderBy; + Set<SPARQL_Filter> filter; + SPARQL_QueryType qt = SPARQL_QueryType.SELECT; + List<Slot> slots; + + int limit; + int offset; + + public BasicQueryTemplate() + { + super(); + selTerms = new HashSet<SPARQL_Term>(); + prefixes = new HashSet<SPARQL_Prefix>(); + conditions = new HashSet<String>(); + orderBy = new HashSet<SPARQL_Term>(); + filter = new HashSet<SPARQL_Filter>(); + slots = new ArrayList<Slot>(); + } + + public void addSlot(Slot s) { + slots.add(s); + } + + public void addConditions(String s) { + conditions.add(s); + } + + @Override + public String toString() + { + + String retVal = ""; + for (SPARQL_Prefix prefix : prefixes) + { + retVal += prefix.toString() + "\n"; + } + + if (qt == SPARQL_QueryType.SELECT) + { + retVal += "\nSELECT "; + + for (SPARQL_Term term : selTerms) + { + retVal += term.toString() + " "; + } + } + else retVal += "\nASK "; + + retVal += "WHERE {\n"; + + for (String s : conditions) { + retVal += "\t" + s + "\n"; + } + + for (SPARQL_Filter f : filter) + { + retVal += "\t" + f.toString() + " .\n"; + } + + retVal += "}\n"; + + if (orderBy != null && !orderBy.isEmpty()) + { + retVal += "ORDER BY "; + for (SPARQL_Term term : orderBy) + { + retVal += term.toString() + " "; + } + retVal += "\n"; + } + + if (limit != 0 || offset != 0) + { + retVal += "LIMIT " + limit + " OFFSET " + offset + "\n"; + } + + retVal += "\n"; + + for (Slot s : slots) { + retVal += s.toString() + "\n"; + } + + return retVal; + + } + + public List<String> getVariablesAsStringList() + { + List<String> result = new ArrayList<String>(); + for (SPARQL_Term term : selTerms) + { + result.add(term.toString()); + } + return result; + } + + public Set<SPARQL_Term> getSelTerms() + { + return selTerms; + } + + public void setSelTerms(Set<SPARQL_Term> selTerms) + { + this.selTerms = selTerms; + } + + public Set<SPARQL_Prefix> getPrefixes() + { + return prefixes; + } + + public Set<SPARQL_Filter> getFilters(){ + return filter; + } + + public void setPrefixes(Set<SPARQL_Prefix> prefixes) + { + this.prefixes = prefixes; + } + + public void addFilter(SPARQL_Filter f) + { + for (int i = 0; i < filter.size(); ++i) + if (f.equals(filter.toArray()[i])) return; + + this.filter.add(f); + } + + public Set<SPARQL_Term> getOrderBy() + { + return orderBy; + } + + public void addOrderBy(SPARQL_Term term) + { + if (term.orderBy == SPARQL_OrderBy.NONE) + term.orderBy = SPARQL_OrderBy.ASC; + + orderBy.add(term); + } + + public void addPrefix(SPARQL_Prefix prefix) + { + prefixes.add(prefix); + } + + public void addSelTerm(SPARQL_Term term) + { + for (int i = 0; i < selTerms.size(); ++i) + if (term.equals(selTerms.toArray()[i])) return; + + selTerms.add(term); + } + + public boolean isSelTerm(SPARQL_Term term) + { + for (int i = 0; i < selTerms.size(); ++i) // TODO: have to figure out + // while .remove doesn't + // call .equals + { + if (term.equals(selTerms.toArray()[i])) return true; + } + return false; + } + + public void removeSelTerm(SPARQL_Term term) + { + Set<SPARQL_Term> newSelTerms = new HashSet<SPARQL_Term>(); + for (int i = 0; i < selTerms.size(); ++i) // TODO: have to figure out + // while .remove doesn't + // call .equals + { + if (!term.equals(selTerms.toArray()[i])) newSelTerms.add((SPARQL_Term) selTerms.toArray()[i]); + } + selTerms = newSelTerms; + } + + public int getLimit() + { + return limit; + } + + public void setLimit(int limit) + { + this.limit = limit; + } + + public int getOffset() + { + return offset; + } + + public void setOffset(int offset) + { + this.offset = offset; + } + + public SPARQL_QueryType getQt() + { + return qt; + } + + public void setQt(SPARQL_QueryType qt) + { + this.qt = qt; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((filter == null) ? 0 : filter.hashCode()); + result = prime * result + limit; + result = prime * result + offset; + result = prime * result + ((orderBy == null) ? 0 : orderBy.hashCode()); + result = prime * result + + ((prefixes == null) ? 0 : prefixes.hashCode()); + result = prime * result + ((qt == null) ? 0 : qt.hashCode()); + result = prime * result + + ((selTerms == null) ? 0 : selTerms.hashCode()); + result = prime * result + ((slots == null) ? 0 : slots.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + BasicQueryTemplate other = (BasicQueryTemplate) obj; + if (filter == null) { + if (other.filter != null) + return false; + } else if (!filter.equals(other.filter)) + return false; + if (limit != other.limit) + return false; + if (offset != other.offset) + return false; + if (orderBy == null) { + if (other.orderBy != null) + return false; + } else if (!orderBy.equals(other.orderBy)) + return false; + if (prefixes == null) { + if (other.prefixes != null) + return false; + } else if (!prefixes.equals(other.prefixes)) + return false; + if (qt == null) { + if (other.qt != null) + return false; + } else if (!qt.equals(other.qt)) + return false; + if (selTerms == null) { + if (other.selTerms != null) + return false; + } else if (!selTerms.equals(other.selTerms)) + return false; + if (slots == null) { + if (other.slots != null) + return false; + } else if (!slots.equals(other.slots)) + return false; + return true; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicQueryTemplate.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicSlot.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicSlot.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicSlot.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -0,0 +1,100 @@ +package org.dllearner.algorithm.tbsl.sparql; + + +public class BasicSlot { + + String anchor; + String token; + SlotType type; + + public BasicSlot(String a,String t) { + anchor = a; + token = t; + type = SlotType.UNSPEC; + replaceUnderscores(); + } + public BasicSlot(String a,SlotType t,String s) { + anchor = a; + token = s; + type = t; + replaceUnderscores(); + } + + public void setSlotType(SlotType st) { + type = st; + } + + public SlotType getSlotType(){ + return type; + } + + public String getAnchor() { + return anchor; + } + public void setAnchor(String s) { + anchor = s; + } + public String getToken() { + return token; + } + public void setToken(String t) { + token = t; + } + + public void replaceReferent(String ref1,String ref2) { + if (anchor.equals(ref1)) { + anchor = ref2; + } + } + + public void replaceUnderscores() { + token = token.replaceAll("_"," "); + } + + @Override + public String toString() { + return anchor + ": " + type + " {" + token + "}"; + } + @Override + public BasicSlot clone() { + return new BasicSlot(anchor,type,token); + } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((anchor == null) ? 0 : anchor.hashCode()); + result = prime * result + ((token == null) ? 0 : token.hashCode()); + result = prime * result + ((type == null) ? 0 : type.hashCode()); + return result; + } + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + BasicSlot other = (BasicSlot) obj; + if (anchor == null) { + if (other.anchor != null) + return false; + } else if (!anchor.equals(other.anchor)) + return false; + if (token == null) { + if (other.token != null) + return false; + } else if (!token.equals(other.token)) + return false; + if (type == null) { + if (other.type != null) + return false; + } else if (!type.equals(other.type)) + return false; + return true; + } + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/BasicSlot.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -1,42 +1,45 @@ package org.dllearner.algorithm.tbsl.sparql; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Aggregate; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_OrderBy; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; + public class SPARQL_Term extends SPARQL_Value { SPARQL_OrderBy orderBy = SPARQL_OrderBy.NONE; SPARQL_Aggregate aggregate = SPARQL_Aggregate.NONE; - SPARQL_Term as = null; + boolean isURI = false; + String alias; public SPARQL_Term(String name) { super(name); this.name = name.replace("?","").replace("!",""); + alias = name; } - public SPARQL_Term(String name,boolean b) { + public SPARQL_Term(String name, boolean uri) { super(name); this.name = name.replace("?","").replace("!",""); - setIsVariable(b); + isURI = uri; + alias = name; } public SPARQL_Term(String name, SPARQL_Aggregate aggregate) { super(name); this.aggregate = aggregate; + alias = name; } - public SPARQL_Term(String name, SPARQL_Aggregate aggregate,boolean b,SPARQL_Term t) { + public SPARQL_Term(String name, SPARQL_Aggregate aggregate, String as) { super(name); this.aggregate = aggregate; - setIsVariable(b); - as = t; + alias = as; } - public SPARQL_Term(String name, SPARQL_OrderBy orderBy) { + public SPARQL_Term(String name, SPARQL_OrderBy ob) { super(name); - this.orderBy = orderBy; + orderBy = ob; + alias = name; } - public SPARQL_Term(String name, SPARQL_OrderBy orderBy,boolean b,SPARQL_Term t) { - super(name); - this.orderBy = orderBy; - setIsVariable(b); - as = t; - } @Override public boolean equals(Object obj) { @@ -50,8 +53,8 @@ return orderBy; } - public void setOrderBy(SPARQL_OrderBy orderBy) { - this.orderBy = orderBy; + public void setOrderBy(SPARQL_OrderBy ob) { + orderBy = ob; } public SPARQL_Aggregate getAggregate() { @@ -64,35 +67,29 @@ public boolean isString() { - return name.startsWith("'"); + return name.startsWith("'") || name.matches("\\d+"); } @Override public String toString() { +// System.err.println("SPARQL_Term: name="+name+",alias="+alias+",agg="+aggregate+",orderBy="+orderBy); // DEBUG if (aggregate != SPARQL_Aggregate.NONE) { - if (as != null) { - return aggregate+"(?"+name.toLowerCase()+") AS " + as.toString(); - } - else { + if (alias != null && !alias.equals(name)) + return aggregate+"(?"+name.toLowerCase()+") AS ?" + alias; + else return aggregate+"(?"+name.toLowerCase()+")"; - } } if (orderBy != SPARQL_OrderBy.NONE) { - String n; - if (as != null) { n = as.name; } else { n = name; } if (orderBy == SPARQL_OrderBy.ASC) - return "ASC(?"+n.toLowerCase()+")"; - else - return "DESC(?"+n.toLowerCase()+")"; + return "ASC(?"+alias.toLowerCase()+")"; + else + return "DESC(?"+alias.toLowerCase()+")"; } - if (isVariable() && !isString()) { - return "?"+name.toLowerCase(); - } - else { + if (isString() || isURI) { return name; } + else return "?"+name.toLowerCase(); } - } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term_deprecated.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term_deprecated.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term_deprecated.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -0,0 +1,98 @@ +package org.dllearner.algorithm.tbsl.sparql; + +public class SPARQL_Term_deprecated extends SPARQL_Value { + + SPARQL_OrderBy orderBy = SPARQL_OrderBy.NONE; + SPARQL_Aggregate aggregate = SPARQL_Aggregate.NONE; + SPARQL_Term_deprecated as = null; + + public SPARQL_Term_deprecated(String name) { + super(name); + this.name = name.replace("?","").replace("!",""); + } + public SPARQL_Term_deprecated(String name,boolean b) { + super(name); + this.name = name.replace("?","").replace("!",""); + setIsVariable(b); + } + + public SPARQL_Term_deprecated(String name, SPARQL_Aggregate aggregate) { + super(name); + this.aggregate = aggregate; + } + public SPARQL_Term_deprecated(String name, SPARQL_Aggregate aggregate,boolean b,SPARQL_Term_deprecated t) { + super(name); + this.aggregate = aggregate; + setIsVariable(b); + as = t; + } + + public SPARQL_Term_deprecated(String name, SPARQL_OrderBy orderBy) { + super(name); + this.orderBy = orderBy; + } + public SPARQL_Term_deprecated(String name, SPARQL_OrderBy orderBy,boolean b,SPARQL_Term_deprecated t) { + super(name); + this.orderBy = orderBy; + setIsVariable(b); + as = t; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof SPARQL_Term_deprecated)) return false; + + SPARQL_Term_deprecated f = (SPARQL_Term_deprecated) obj; + return f.getName().toLowerCase().equals(this.getName().toLowerCase()) && f.getAggregate() == aggregate && f.getOrderBy() == orderBy; + } + + public SPARQL_OrderBy getOrderBy() { + return orderBy; + } + + public void setOrderBy(SPARQL_OrderBy orderBy) { + this.orderBy = orderBy; + } + + public SPARQL_Aggregate getAggregate() { + return aggregate; + } + + public void setAggregate(SPARQL_Aggregate aggregate) { + this.aggregate = aggregate; + } + + public boolean isString() + { + return name.startsWith("'"); + } + + @Override + public String toString() { + if (aggregate != SPARQL_Aggregate.NONE) { + if (as != null) { + return aggregate+"(?"+name.toLowerCase()+") AS " + as.toString(); + } + else { + return aggregate+"(?"+name.toLowerCase()+")"; + } + } + if (orderBy != SPARQL_OrderBy.NONE) { + String n; + if (as != null) { n = as.name; } else { n = name; } + if (orderBy == SPARQL_OrderBy.ASC) + return "ASC(?"+n.toLowerCase()+")"; + else + return "DESC(?"+n.toLowerCase()+")"; + } + if (isVariable() && !isString()) { + return "?"+name.toLowerCase(); + } + else { + return name; + } + } + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term_deprecated.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2011-09-03 02:53:32 UTC (rev 3243) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -1,5 +1,5 @@ package org.dllearner.algorithm.tbsl.sparql; public enum SlotType { - CLASS, PROPERTY, SYMPROPERTY, RESOURCE, UNSPEC + CLASS, PROPERTY, SYMPROPERTY, RESOURCE, LITERAL, UNSPEC } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -0,0 +1,318 @@ +package org.dllearner.algorithm.tbsl.templator; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +import org.dllearner.algorithm.tbsl.sem.util.Pair; + +public class BasicSlotBuilder { + + private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; + private String[] adjective = {"JJ","JJR","JJS","JJH"}; + private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; + private String[] preps = {"IN","TO"}; + + public BasicSlotBuilder() { + } + + /** + * gets synonyms, attribute etc. from WordNet and construct grammar entries + * INPUT: array of tokens and array of POStags, from which preprocessor constructs a list of pairs (token,pos) + * OUTPUT: list of (treestring,dude) + **/ + public List<String[]> build(String taggedstring,List<Pair<String,String>> tokenPOSpairs) { + + List<String[]> result = new ArrayList<String[]>(); + + for (Pair<String,String> pair : tokenPOSpairs) { + + String token = pair.fst; + String tokenfluent = token.replaceAll(" ","").replaceAll("_",""); + String pos = pair.snd; + + String type = "UNSPEC"; + String slot; + + /* NOUNS */ + if (equalsOneOf(pos,noun)) { + + if (pos.equals("NNP") || pos.equals("NNPS")) { + type = "RESOURCE"; + } + + slot = "SLOT_" + tokenfluent + "/" + type + "/" + token; + + // treetoken + String treetoken = "N:'" + token.toLowerCase() + "'"; + if (token.trim().contains(" ")) { + String[] tokenParts = token.split(" "); + treetoken = ""; + for (String t : tokenParts) { + treetoken += " N:'" + t.toLowerCase() + "'"; + } + treetoken = treetoken.trim(); + } + // + if (pos.equals("NN") || pos.equals("NNS")) { + /* DP */ + String[] dpEntry1 = {token, + "(DP (NP " + treetoken + "))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; + String[] dpEntry2 = {token, + "(DP (NP " + treetoken + " DP[name]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; + result.add(dpEntry1); + result.add(dpEntry2); + /* NP */ + String[] npEntry1 = {token, + "(NP " + treetoken + ")", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; + String[] npEntry2 = {token, + "(NP " + treetoken + " DP[name])", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; + result.add(npEntry1); + result.add(npEntry2); + } + else if (pos.equals("NNP") || pos.equals("NNPS")) { + /* DP */ + String[] dpEntry1 = {token, + "(DP (NP " + treetoken + "))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; + String[] dpEntry2 = {token, + "(DP DET[det] (NP " + treetoken + "))", + "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[(l2,x,det,e)],[l2=l1],[" + slot + "]>"}; + result.add(dpEntry1); + result.add(dpEntry2); + } + else if (pos.equals("NPREP")) { + String[] dpEntry1 = {token, + "(DP (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + String[] dpEntry2 = {token, + "(DP DET[det] (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>"}; + String[] npEntry = {token, + "(NP " + treetoken + " DP[pobj])", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + result.add(dpEntry1); + result.add(dpEntry2); + result.add(npEntry); + } + else if (pos.equals("JJNPREP")) { + slot = "SLOT_" + tokenfluent + "/UNSPEC/" + token; + String[] dpEntry1 = {token, + "(DP (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(p) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>" }; + String[] dpEntry2 = {token, + "(DP DET[det] (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(p) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>" }; + String[] npEntry = {token, + "(NP " + treetoken + " DP[pobj])", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(p) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + result.add(dpEntry1); + result.add(dpEntry2); + result.add(npEntry); + } + else if(pos.equals("JJNN") && token.contains("_")) { + slot = "SLOT_" + tokenfluent + "/USNPEC/" + token; + String[] npEntry = {token, + "(NP " + treetoken + " )", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; + result.add(npEntry); + } + + } + /* VERBS */ + else if (equalsOneOf(pos,verb)) { + + if (token.equals("has") || token.equals("have") || token.equals("had")) { + slot = ""; + } + else { + slot = "SLOT_" + token + "/PROPERTY/" + token; + } + if (pos.equals("PASSIVE")) { + String[] passEntry1 = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + String[] passEntry2 = {token, + "(S DP[wh] (VP DP[dp] V:'" + token + "'))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(passEntry1); + result.add(passEntry2); + } + else if (pos.equals("PASSPART")) { + String[] passpartEntry = {token, + "(NP NP* (VP V:'" + token + "' DP[dp]))", + "<x,l1,t,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + result.add(passpartEntry); + } + else if (pos.equals("VPASS")) { + String[] passEntry = {token, + "(S DP[subj] (VP V:'" + token + "'))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(passEntry); + } + else if (pos.equals("VPASSIN")) { + String[] passEntry = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(passEntry); + } + else if (pos.equals("GERUNDIN")) { + String[] gerundinEntry1 = {token, + "(NP NP* V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + String[] gerundinEntry2 = {token, + "(ADJ V:'" + token + "' DP[obj]))", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + result.add(gerundinEntry1); + result.add(gerundinEntry2); + } + else if (pos.equals("VPREP")) { + String[] passEntry = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + String[] whEntry = {token, + "(S DP[obj] (VP DP[subj] V:'" + token + "'))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(passEntry); + result.add(whEntry); + } + else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP")) { + String[] vEntry = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(vEntry); + } + else if (pos.equals("VB")) { + String[] whEntry = {token, + "(S DP[obj] (VP DP[subj] V:'" + token + "'))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(whEntry); + } + else if (pos.equals("VBG") || pos.equals("VBN")) { + String[] gerEntry = {token, + "(NP NP* (VP V:'" + token + "' DP[dp]))", + "<x,l1,t,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + result.add(gerEntry); + } + else if (pos.equals("WHEN")) { + slot = "SLOT_" + token + "/PROPERTY/" + token + "_date"; + String[] whenEntry = {token, + "(S DP[subj] (VP V:'" + token + "'))", + "<x,l1,t,[ l1:[ ?y | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; + result.add(whenEntry); + } + else if (pos.equals("WHERE")) { + slot = "SLOT_" + token + "/PROPERTY/" + token + "_place"; + String[] whereEntry = {token, + "(S DP[subj] (VP V:'" + token + "'))", + "<x,l1,t,[ l1:[ ?y | SLOT_" + token + "(p) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; + result.add(whereEntry); + } + + } + /* ADJECTIVES */ + else if (equalsOneOf(pos,adjective)) { + + slot = "SLOT_" + token + "/PROPERTY/" + token; + /* ADJECTIVE */ + if (pos.equals("JJ")) { + String[] adjEntry = {token, + "(NP ADJ:'" + token.toLowerCase() + "' NP*)", + "<x,l1,<e,t>,[ l1:[ j | SLOT_" + token + "(j) ] ],[],[],["+slot+"]>"}; + result.add(adjEntry); + } + if (pos.equals("JJH")) { + String[] howEntry = {"how "+token, + "(DP WH:'how' ADJ:'" + token.toLowerCase() + "')", + "<x,l1,<<e,t>,t>,[ l1:[ ?j,x | SLOT_" + token + "(j) ] ],[],[],["+slot+"]>"}; + result.add(howEntry); + } + /* COMPARATIVE */ + else if (pos.equals("JJR")) { + String pol = polarity(token); + String comp; + if (pol.equals("POS")) { + comp = "greater"; + } else { comp = "less"; } + + String[] compEntry1 = {token, + "(ADJ ADJ:'" + token.toLowerCase() + "' P:'than' DP[compobj])", + "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(p), p(x,i), p(y,j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; + result.add(compEntry1); + String[] compEntry2 = {token, + "(NP NP* (ADJ ADJ:'" + token.toLowerCase() + "' P:'than' DP[compobj]))", + "<x,l1,<e,t>,[ l1:[ p,j,i | SLOT_" + token + "(p), p(i), p(j), " + comp + "(i,j) ] ],[ (l2,y,compobj,<<e,t>,t>) ],[l1=l2],["+slot+"]>"}; + result.add(compEntry2); + } + /* SUPERLATIVE */ + else if (pos.equals("JJS")) { + String pol = polarity(token); + String comp; + if (pol.equals("POS")) { + comp = "maximum"; + } else { comp = "minimum"; } + + String[] superEntry1 = {token, + "(DET DET:'the' ADJ:'" + token.toLowerCase() + "')", + "<x,l1,e,[ l1:[ p,x,j | SLOT_" + token + "(p), p(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; + result.add(superEntry1); + String[] superEntry2 = {token, + "(DP (NP DET:'the' ADJ:'" + token.toLowerCase() + "'))", + "<x,l1,<<e,t>,t>,[ l1:[ p,x,j | SLOT_" + token + "(p), p(x,j), " + comp + "(j) ] ],[],[],["+slot+"]>"}; + result.add(superEntry2); + String[] superEntry3 = {token, + "(DP (NP DET:'the' ADJ:'" + token.toLowerCase() + "' NP[noun]))", + "<x,l1,<<e,t>,t>,[ l1:[ p,x,j | SLOT_" + token + "(p), p(x,j), " + comp + "(j) ] ],[ (l2,x,noun,<e,t>) ],[l2=l1],["+slot+"]>"}; + result.add(superEntry3); + } + } + /* PREPOSITIONS */ + else if (equalsOneOf(pos,preps)) { + String[] npAdjunct = {token, + "(NP NP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + result.add(npAdjunct); + } + } + + return result; + } + + private boolean equalsOneOf(String string,String[] strings) { + for (String s : strings) { + if (string.equals(s)) { + return true; + } + } + return false; + } + + private String polarity(String adj) { + + String polarity = "POS"; + + BufferedReader in; + try { + in = new BufferedReader(new InputStreamReader(this.getClass().getClassLoader().getResourceAsStream("tbsl/lexicon/adj_list.txt"))); + String line; + while ((line = in.readLine()) != null ) { + if (line.contains(adj)) { + polarity = line.split(" ")[0]; + break; + } + } + in.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + return polarity; + } + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2011-09-07 17:09:48 UTC (rev 3244) @@ -0,0 +1,150 @@ +package org.dllearner.algorithm.tbsl.templator; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.converter.DRS2BasicSPARQL_Converter; +import org.dllearner.algorithm.tbsl.converter.DUDE2UDRS_Converter; +import org.dllearner.algorithm.tbsl.ltag.parser.LTAGLexicon; +import org.dllearner.algorithm.tbsl.ltag.parser.LTAG_Lexicon_Constructor; +import org.dllearner.algorithm.tbsl.ltag.parser.Parser; +import org.dllearner.algorithm.tbsl.ltag.parser.Preprocessor; +import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.sem.drs.DRS; +import org.dllearner.algorithm.tbsl.sem.drs.UDRS; +import org.dllearner.algorithm.tbsl.sem.dudes.data.Dude; +import org.dllearner.algorithm.tbsl.sem.dudes.reader.ParseException; +import org.dllearner.algorithm.tbsl.sparql.BasicQueryTemplate; +import org.dllearner.algorithm.tbsl.sparql.Slot; + +public class BasicTemplator { + + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex"}; + + PartOfSpeechTagger tagger; + LTAGLexicon g; + LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); + Parser p; + Preprocessor pp; + + boolean ONE_SCOPE_ONLY = true; + boolean UNTAGGED_INPUT = true; + + public BasicTemplator() { + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + +// tagger = new StanfordPartOfSpeechTagger(); + tagger = new ApachePartOfSpeechTagger(); + + p = new Parser(); + p.SHOW_GRAMMAR = true; + p.USE_DPS_AS_INITTREES = true; + p.CONSTRUCT_SEMANTICS = true; + p.MODE = "BASIC"; + + pp = new Preprocessor(false); + } + + public void setUNTAGGED_INPUT(boolean b) { + UNTAGGED_INPUT = b; + } + + public Set<BasicQueryTemplate> buildBasicQueries(String s) { + + DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); + DRS2BasicSPARQL_Converter d2s = new DRS2BasicSPARQL_Converter(); + boolean clearAgain = true; + + String tagged; + if (UNTAGGED_INPUT) { + s = pp.normalize(s); + tagged = tagger.tag(s); + System.out.println("Tagged input: " + tagged); + } + else { + tagged = s; + } + + String newtagged = pp.condenseNominals(tagged); + newtagged = pp.condense(newtagged); + System.out.println("Preprocessed: " + newtagg... [truncated message content] |